aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@xensource.com>2007-07-17 21:37:05 -0400
committerJeremy Fitzhardinge <jeremy@goop.org>2007-07-18 11:47:43 -0400
commitf91a8b447b9af64f589f6e13fec7f09b5927563d (patch)
tree9e220f5049d9963173464dd51906eb210c744fdb
parent9a4029fd3409eb224eb62c32d9792071382694ec (diff)
xen: Account for stolen time
This patch accounts for the time stolen from our VCPUs. Stolen time is time where a vcpu is runnable and could be running, but all available physical CPUs are being used for something else. This accounting gets run on each timer interrupt, just as a way to get it run relatively often, and when interesting things are going on. Stolen time is not really used by much in the kernel; it is reported in /proc/stats, and that's about it. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Acked-by: Chris Wright <chrisw@sous-sol.org> Cc: john stultz <johnstul@us.ibm.com> Cc: Rik van Riel <riel@redhat.com>
-rw-r--r--arch/i386/xen/time.c159
1 files changed, 150 insertions, 9 deletions
diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c
index b457980ff3c2..acbfd9969462 100644
--- a/arch/i386/xen/time.c
+++ b/arch/i386/xen/time.c
@@ -11,6 +11,7 @@
11#include <linux/interrupt.h> 11#include <linux/interrupt.h>
12#include <linux/clocksource.h> 12#include <linux/clocksource.h>
13#include <linux/clockchips.h> 13#include <linux/clockchips.h>
14#include <linux/kernel_stat.h>
14 15
15#include <asm/xen/hypervisor.h> 16#include <asm/xen/hypervisor.h>
16#include <asm/xen/hypercall.h> 17#include <asm/xen/hypercall.h>
@@ -25,6 +26,7 @@
25 26
26/* Xen may fire a timer up to this many ns early */ 27/* Xen may fire a timer up to this many ns early */
27#define TIMER_SLOP 100000 28#define TIMER_SLOP 100000
29#define NS_PER_TICK (1000000000LL / HZ)
28 30
29/* These are perodically updated in shared_info, and then copied here. */ 31/* These are perodically updated in shared_info, and then copied here. */
30struct shadow_time_info { 32struct shadow_time_info {
@@ -37,6 +39,139 @@ struct shadow_time_info {
37 39
38static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); 40static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
39 41
42/* runstate info updated by Xen */
43static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
44
45/* snapshots of runstate info */
46static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);
47
48/* unused ns of stolen and blocked time */
49static DEFINE_PER_CPU(u64, residual_stolen);
50static DEFINE_PER_CPU(u64, residual_blocked);
51
52/* return an consistent snapshot of 64-bit time/counter value */
53static u64 get64(const u64 *p)
54{
55 u64 ret;
56
57 if (BITS_PER_LONG < 64) {
58 u32 *p32 = (u32 *)p;
59 u32 h, l;
60
61 /*
62 * Read high then low, and then make sure high is
63 * still the same; this will only loop if low wraps
64 * and carries into high.
65 * XXX some clean way to make this endian-proof?
66 */
67 do {
68 h = p32[1];
69 barrier();
70 l = p32[0];
71 barrier();
72 } while (p32[1] != h);
73
74 ret = (((u64)h) << 32) | l;
75 } else
76 ret = *p;
77
78 return ret;
79}
80
81/*
82 * Runstate accounting
83 */
84static void get_runstate_snapshot(struct vcpu_runstate_info *res)
85{
86 u64 state_time;
87 struct vcpu_runstate_info *state;
88
89 preempt_disable();
90
91 state = &__get_cpu_var(runstate);
92
93 /*
94 * The runstate info is always updated by the hypervisor on
95 * the current CPU, so there's no need to use anything
96 * stronger than a compiler barrier when fetching it.
97 */
98 do {
99 state_time = get64(&state->state_entry_time);
100 barrier();
101 *res = *state;
102 barrier();
103 } while (get64(&state->state_entry_time) != state_time);
104
105 preempt_enable();
106}
107
108static void setup_runstate_info(int cpu)
109{
110 struct vcpu_register_runstate_memory_area area;
111
112 area.addr.v = &per_cpu(runstate, cpu);
113
114 if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
115 cpu, &area))
116 BUG();
117}
118
119static void do_stolen_accounting(void)
120{
121 struct vcpu_runstate_info state;
122 struct vcpu_runstate_info *snap;
123 s64 blocked, runnable, offline, stolen;
124 cputime_t ticks;
125
126 get_runstate_snapshot(&state);
127
128 WARN_ON(state.state != RUNSTATE_running);
129
130 snap = &__get_cpu_var(runstate_snapshot);
131
132 /* work out how much time the VCPU has not been runn*ing* */
133 blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
134 runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
135 offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
136
137 *snap = state;
138
139 /* Add the appropriate number of ticks of stolen time,
140 including any left-overs from last time. Passing NULL to
141 account_steal_time accounts the time as stolen. */
142 stolen = runnable + offline + __get_cpu_var(residual_stolen);
143
144 if (stolen < 0)
145 stolen = 0;
146
147 ticks = 0;
148 while (stolen >= NS_PER_TICK) {
149 ticks++;
150 stolen -= NS_PER_TICK;
151 }
152 __get_cpu_var(residual_stolen) = stolen;
153 account_steal_time(NULL, ticks);
154
155 /* Add the appropriate number of ticks of blocked time,
156 including any left-overs from last time. Passing idle to
157 account_steal_time accounts the time as idle/wait. */
158 blocked += __get_cpu_var(residual_blocked);
159
160 if (blocked < 0)
161 blocked = 0;
162
163 ticks = 0;
164 while (blocked >= NS_PER_TICK) {
165 ticks++;
166 blocked -= NS_PER_TICK;
167 }
168 __get_cpu_var(residual_blocked) = blocked;
169 account_steal_time(idle_task(smp_processor_id()), ticks);
170}
171
172
173
174/* Get the CPU speed from Xen */
40unsigned long xen_cpu_khz(void) 175unsigned long xen_cpu_khz(void)
41{ 176{
42 u64 cpu_khz = 1000000ULL << 32; 177 u64 cpu_khz = 1000000ULL << 32;
@@ -56,13 +191,11 @@ unsigned long xen_cpu_khz(void)
56 * Reads a consistent set of time-base values from Xen, into a shadow data 191 * Reads a consistent set of time-base values from Xen, into a shadow data
57 * area. 192 * area.
58 */ 193 */
59static void get_time_values_from_xen(void) 194static unsigned get_time_values_from_xen(void)
60{ 195{
61 struct vcpu_time_info *src; 196 struct vcpu_time_info *src;
62 struct shadow_time_info *dst; 197 struct shadow_time_info *dst;
63 198
64 preempt_disable();
65
66 /* src is shared memory with the hypervisor, so we need to 199 /* src is shared memory with the hypervisor, so we need to
67 make sure we get a consistent snapshot, even in the face of 200 make sure we get a consistent snapshot, even in the face of
68 being preempted. */ 201 being preempted. */
@@ -79,7 +212,7 @@ static void get_time_values_from_xen(void)
79 rmb(); /* test version after fetching data */ 212 rmb(); /* test version after fetching data */
80 } while ((src->version & 1) | (dst->version ^ src->version)); 213 } while ((src->version & 1) | (dst->version ^ src->version));
81 214
82 preempt_enable(); 215 return dst->version;
83} 216}
84 217
85/* 218/*
@@ -123,7 +256,7 @@ static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
123static u64 get_nsec_offset(struct shadow_time_info *shadow) 256static u64 get_nsec_offset(struct shadow_time_info *shadow)
124{ 257{
125 u64 now, delta; 258 u64 now, delta;
126 rdtscll(now); 259 now = native_read_tsc();
127 delta = now - shadow->tsc_timestamp; 260 delta = now - shadow->tsc_timestamp;
128 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); 261 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
129} 262}
@@ -132,10 +265,14 @@ cycle_t xen_clocksource_read(void)
132{ 265{
133 struct shadow_time_info *shadow = &get_cpu_var(shadow_time); 266 struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
134 cycle_t ret; 267 cycle_t ret;
268 unsigned version;
135 269
136 get_time_values_from_xen(); 270 do {
137 271 version = get_time_values_from_xen();
138 ret = shadow->system_timestamp + get_nsec_offset(shadow); 272 barrier();
273 ret = shadow->system_timestamp + get_nsec_offset(shadow);
274 barrier();
275 } while (version != __get_cpu_var(xen_vcpu)->time.version);
139 276
140 put_cpu_var(shadow_time); 277 put_cpu_var(shadow_time);
141 278
@@ -352,6 +489,8 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
352 ret = IRQ_HANDLED; 489 ret = IRQ_HANDLED;
353 } 490 }
354 491
492 do_stolen_accounting();
493
355 return ret; 494 return ret;
356} 495}
357 496
@@ -378,6 +517,8 @@ static void xen_setup_timer(int cpu)
378 evt->irq = irq; 517 evt->irq = irq;
379 clockevents_register_device(evt); 518 clockevents_register_device(evt);
380 519
520 setup_runstate_info(cpu);
521
381 put_cpu_var(xen_clock_events); 522 put_cpu_var(xen_clock_events);
382} 523}
383 524
@@ -390,7 +531,7 @@ __init void xen_time_init(void)
390 clocksource_register(&xen_clocksource); 531 clocksource_register(&xen_clocksource);
391 532
392 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { 533 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
393 /* Successfully turned off 100hz tick, so we have the 534 /* Successfully turned off 100Hz tick, so we have the
394 vcpuop-based timer interface */ 535 vcpuop-based timer interface */
395 printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); 536 printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
396 xen_clockevent = &xen_vcpuop_clockevent; 537 xen_clockevent = &xen_vcpuop_clockevent;