aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/vmitime.c
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2007-02-13 07:26:21 -0500
committerAndi Kleen <andi@basil.nowhere.org>2007-02-13 07:26:21 -0500
commitbbab4f3bb7f528d2b8ccb5de9ae5f6ff3fb29684 (patch)
tree141d035b9d79711e6679fadc31c9583f908dfedb /arch/i386/kernel/vmitime.c
parent7ce0bcfd1667736f1293cff845139bbee53186de (diff)
[PATCH] i386: vMI timer patches
VMI timer code. It works by taking over the local APIC clock when APIC is configured, which requires a couple hooks into the APIC code. The backend timer code could be commonized into the timer infrastructure, but there are some pieces missing (stolen time, in particular), and the exact semantics of when to do accounting for NO_IDLE need to be shared between different hypervisors as well. So for now, VMI timer is a separate module. [Adrian Bunk: cleanups] Subject: VMI timer patches Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andi Kleen <ak@suse.de> Cc: Andi Kleen <ak@suse.de> Cc: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Chris Wright <chrisw@sous-sol.org> Signed-off-by: Andrew Morton <akpm@osdl.org>
Diffstat (limited to 'arch/i386/kernel/vmitime.c')
-rw-r--r--arch/i386/kernel/vmitime.c495
1 files changed, 495 insertions, 0 deletions
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
new file mode 100644
index 000000000000..7c3033dbe5f5
--- /dev/null
+++ b/arch/i386/kernel/vmitime.c
@@ -0,0 +1,495 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25/*
26 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28 * See comments there for proper credits.
29 */
30
31#include <linux/spinlock.h>
32#include <linux/init.h>
33#include <linux/errno.h>
34#include <linux/jiffies.h>
35#include <linux/interrupt.h>
36#include <linux/kernel_stat.h>
37#include <linux/rcupdate.h>
38#include <linux/clocksource.h>
39
40#include <asm/timer.h>
41#include <asm/io.h>
42#include <asm/apic.h>
43#include <asm/div64.h>
44#include <asm/timer.h>
45#include <asm/desc.h>
46
47#include <asm/vmi.h>
48#include <asm/vmi_time.h>
49
50#include <mach_timer.h>
51#include <io_ports.h>
52
53#ifdef CONFIG_X86_LOCAL_APIC
54#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
55#else
56#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
57#endif
58
59/* Cached VMI operations */
60struct vmi_timer_ops vmi_timer_ops;
61
62#ifdef CONFIG_NO_IDLE_HZ
63
64/* /proc/sys/kernel/hz_timer state. */
65int sysctl_hz_timer;
66
67/* Some stats */
68static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
69static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
70static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
71
72#endif /* CONFIG_NO_IDLE_HZ */
73
74/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75static int alarm_hz = CONFIG_VMI_ALARM_HZ;
76
77/* Cache of the value get_cycle_frequency / HZ. */
78static signed long long cycles_per_jiffy;
79
80/* Cache of the value get_cycle_frequency / alarm_hz. */
81static signed long long cycles_per_alarm;
82
83/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84 * Protected by xtime_lock. */
85static unsigned long long real_cycles_accounted_system;
86
87/* The number of cycles accounted for by update_process_times(), per cpu. */
88static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
89
90/* The number of stolen cycles accounted, per cpu. */
91static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
92
93/* Clock source. */
94static cycle_t read_real_cycles(void)
95{
96 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
97}
98
99static cycle_t read_available_cycles(void)
100{
101 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
102}
103
104#if 0
105static cycle_t read_stolen_cycles(void)
106{
107 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
108}
109#endif /* 0 */
110
111static struct clocksource clocksource_vmi = {
112 .name = "vmi-timer",
113 .rating = 450,
114 .read = read_real_cycles,
115 .mask = CLOCKSOURCE_MASK(64),
116 .mult = 0, /* to be set */
117 .shift = 22,
118 .is_continuous = 1,
119};
120
121
122/* Timer interrupt handler. */
123static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
124
125static struct irqaction vmi_timer_irq = {
126 vmi_timer_interrupt,
127 SA_INTERRUPT,
128 CPU_MASK_NONE,
129 "VMI-alarm",
130 NULL,
131 NULL
132};
133
134/* Alarm rate */
135static int __init vmi_timer_alarm_rate_setup(char* str)
136{
137 int alarm_rate;
138 if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
139 alarm_hz = alarm_rate;
140 printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
141 }
142 return 1;
143}
144__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
145
146
147/* Initialization */
148static void vmi_get_wallclock_ts(struct timespec *ts)
149{
150 unsigned long long wallclock;
151 wallclock = vmi_timer_ops.get_wallclock(); // nsec units
152 ts->tv_nsec = do_div(wallclock, 1000000000);
153 ts->tv_sec = wallclock;
154}
155
156static void update_xtime_from_wallclock(void)
157{
158 struct timespec ts;
159 vmi_get_wallclock_ts(&ts);
160 do_settimeofday(&ts);
161}
162
163unsigned long vmi_get_wallclock(void)
164{
165 struct timespec ts;
166 vmi_get_wallclock_ts(&ts);
167 return ts.tv_sec;
168}
169
170int vmi_set_wallclock(unsigned long now)
171{
172 return -1;
173}
174
175unsigned long long vmi_sched_clock(void)
176{
177 return read_available_cycles();
178}
179
180void __init vmi_time_init(void)
181{
182 unsigned long long cycles_per_sec, cycles_per_msec;
183
184 setup_irq(0, &vmi_timer_irq);
185#ifdef CONFIG_X86_LOCAL_APIC
186 set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
187#endif
188
189 no_sync_cmos_clock = 1;
190
191 vmi_get_wallclock_ts(&xtime);
192 set_normalized_timespec(&wall_to_monotonic,
193 -xtime.tv_sec, -xtime.tv_nsec);
194
195 real_cycles_accounted_system = read_real_cycles();
196 update_xtime_from_wallclock();
197 per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
198
199 cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
200
201 cycles_per_jiffy = cycles_per_sec;
202 (void)do_div(cycles_per_jiffy, HZ);
203 cycles_per_alarm = cycles_per_sec;
204 (void)do_div(cycles_per_alarm, alarm_hz);
205 cycles_per_msec = cycles_per_sec;
206 (void)do_div(cycles_per_msec, 1000);
207 cpu_khz = cycles_per_msec;
208
209 printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
210 "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
211 cycles_per_alarm);
212
213 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
214 clocksource_vmi.shift);
215 if (clocksource_register(&clocksource_vmi))
216 printk(KERN_WARNING "Error registering VMITIME clocksource.");
217
218 /* Disable PIT. */
219 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
220
221 /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
222 * reduce the latency calling update_process_times. */
223 vmi_timer_ops.set_alarm(
224 VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
225 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
226 cycles_per_alarm);
227}
228
229#ifdef CONFIG_X86_LOCAL_APIC
230
231void __init vmi_timer_setup_boot_alarm(void)
232{
233 local_irq_disable();
234
235 /* Route the interrupt to the correct vector. */
236 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
237
238 /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
239 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
240 vmi_timer_ops.set_alarm(
241 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
242 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
243 cycles_per_alarm);
244 local_irq_enable();
245}
246
247/* Initialize the time accounting variables for an AP on an SMP system.
248 * Also, set the local alarm for the AP. */
249void __init vmi_timer_setup_secondary_alarm(void)
250{
251 int cpu = smp_processor_id();
252
253 /* Route the interrupt to the correct vector. */
254 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
255
256 per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
257
258 vmi_timer_ops.set_alarm(
259 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
260 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
261 cycles_per_alarm);
262}
263
264#endif
265
266/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
267static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
268{
269 long long cycles_not_accounted;
270
271 write_seqlock(&xtime_lock);
272
273 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
274 while (cycles_not_accounted >= cycles_per_jiffy) {
275 /* systems wide jiffies and wallclock. */
276 do_timer(1);
277
278 cycles_not_accounted -= cycles_per_jiffy;
279 real_cycles_accounted_system += cycles_per_jiffy;
280 }
281
282 if (vmi_timer_ops.wallclock_updated())
283 update_xtime_from_wallclock();
284
285 write_sequnlock(&xtime_lock);
286}
287
288/* Update per-cpu process times. */
289static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
290 unsigned long long cur_process_times_cycles)
291{
292 long long cycles_not_accounted;
293 cycles_not_accounted = cur_process_times_cycles -
294 per_cpu(process_times_cycles_accounted_cpu, cpu);
295
296 while (cycles_not_accounted >= cycles_per_jiffy) {
297 /* Account time to the current process. This includes
298 * calling into the scheduler to decrement the timeslice
299 * and possibly reschedule.*/
300 update_process_times(user_mode(regs));
301 /* XXX handle /proc/profile multiplier. */
302 profile_tick(CPU_PROFILING);
303
304 cycles_not_accounted -= cycles_per_jiffy;
305 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
306 }
307}
308
309#ifdef CONFIG_NO_IDLE_HZ
310/* Update per-cpu idle times. Used when a no-hz halt is ended. */
311static void vmi_account_no_hz_idle_cycles(int cpu,
312 unsigned long long cur_process_times_cycles)
313{
314 long long cycles_not_accounted;
315 unsigned long no_idle_hz_jiffies = 0;
316
317 cycles_not_accounted = cur_process_times_cycles -
318 per_cpu(process_times_cycles_accounted_cpu, cpu);
319
320 while (cycles_not_accounted >= cycles_per_jiffy) {
321 no_idle_hz_jiffies++;
322 cycles_not_accounted -= cycles_per_jiffy;
323 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
324 }
325 /* Account time to the idle process. */
326 account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
327}
328#endif
329
330/* Update per-cpu stolen time. */
331static void vmi_account_stolen_cycles(int cpu,
332 unsigned long long cur_real_cycles,
333 unsigned long long cur_avail_cycles)
334{
335 long long stolen_cycles_not_accounted;
336 unsigned long stolen_jiffies = 0;
337
338 if (cur_real_cycles < cur_avail_cycles)
339 return;
340
341 stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
342 per_cpu(stolen_cycles_accounted_cpu, cpu);
343
344 while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
345 stolen_jiffies++;
346 stolen_cycles_not_accounted -= cycles_per_jiffy;
347 per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
348 }
349 /* HACK: pass NULL to force time onto cpustat->steal. */
350 account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
351}
352
353/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
354 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
355static void vmi_local_timer_interrupt(int cpu)
356{
357 unsigned long long cur_real_cycles, cur_process_times_cycles;
358
359 cur_real_cycles = read_real_cycles();
360 cur_process_times_cycles = read_available_cycles();
361 /* Update system wide (real) time state (xtime, jiffies). */
362 vmi_account_real_cycles(cur_real_cycles);
363 /* Update per-cpu process times. */
364 vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
365 /* Update time stolen from this cpu by the hypervisor. */
366 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
367}
368
369#ifdef CONFIG_NO_IDLE_HZ
370
371/* Must be called only from idle loop, with interrupts disabled. */
372int vmi_stop_hz_timer(void)
373{
374 /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
375
376 unsigned long seq, next;
377 unsigned long long real_cycles_expiry;
378 int cpu = smp_processor_id();
379 int idle;
380
381 BUG_ON(!irqs_disabled());
382 if (sysctl_hz_timer != 0)
383 return 0;
384
385 cpu_set(cpu, nohz_cpu_mask);
386 smp_mb();
387 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
388 (next = next_timer_interrupt(), time_before_eq(next, jiffies))) {
389 cpu_clear(cpu, nohz_cpu_mask);
390 next = jiffies;
391 idle = 0;
392 } else
393 idle = 1;
394
395 /* Convert jiffies to the real cycle counter. */
396 do {
397 seq = read_seqbegin(&xtime_lock);
398 real_cycles_expiry = real_cycles_accounted_system +
399 (long)(next - jiffies) * cycles_per_jiffy;
400 } while (read_seqretry(&xtime_lock, seq));
401
402 /* This cpu is going idle. Disable the periodic alarm. */
403 if (idle) {
404 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
405 per_cpu(idle_start_jiffies, cpu) = jiffies;
406 }
407
408 /* Set the real time alarm to expire at the next event. */
409 vmi_timer_ops.set_alarm(
410 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
411 real_cycles_expiry, 0);
412
413 return idle;
414}
415
416static void vmi_reenable_hz_timer(int cpu)
417{
418 /* For /proc/vmi/info idle_hz stat. */
419 per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
420 per_cpu(vmi_idle_no_hz_irqs, cpu)++;
421
422 /* Don't bother explicitly cancelling the one-shot alarm -- at
423 * worse we will receive a spurious timer interrupt. */
424 vmi_timer_ops.set_alarm(
425 VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
426 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
427 cycles_per_alarm);
428 /* Indicate this cpu is no longer nohz idle. */
429 cpu_clear(cpu, nohz_cpu_mask);
430}
431
432/* Called from interrupt handlers when (local) HZ timer is disabled. */
433void vmi_account_time_restart_hz_timer(void)
434{
435 unsigned long long cur_real_cycles, cur_process_times_cycles;
436 int cpu = smp_processor_id();
437
438 BUG_ON(!irqs_disabled());
439 /* Account the time during which the HZ timer was disabled. */
440 cur_real_cycles = read_real_cycles();
441 cur_process_times_cycles = read_available_cycles();
442 /* Update system wide (real) time state (xtime, jiffies). */
443 vmi_account_real_cycles(cur_real_cycles);
444 /* Update per-cpu idle times. */
445 vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
446 /* Update time stolen from this cpu by the hypervisor. */
447 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
448 /* Reenable the hz timer. */
449 vmi_reenable_hz_timer(cpu);
450}
451
452#endif /* CONFIG_NO_IDLE_HZ */
453
454/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
455 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
456 * APIC setup and setup_boot_vmi_alarm() is called. */
457static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
458{
459 vmi_local_timer_interrupt(smp_processor_id());
460 return IRQ_HANDLED;
461}
462
463#ifdef CONFIG_X86_LOCAL_APIC
464
465/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
466 * Also used in UP when CONFIG_X86_LOCAL_APIC.
467 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
468void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
469{
470 struct pt_regs *old_regs = set_irq_regs(regs);
471 int cpu = smp_processor_id();
472
473 /*
474 * the NMI deadlock-detector uses this.
475 */
476 per_cpu(irq_stat,cpu).apic_timer_irqs++;
477
478 /*
479 * NOTE! We'd better ACK the irq immediately,
480 * because timer handling can be slow.
481 */
482 ack_APIC_irq();
483
484 /*
485 * update_process_times() expects us to have done irq_enter().
486 * Besides, if we don't timer interrupts ignore the global
487 * interrupt lock, which is the WrongThing (tm) to do.
488 */
489 irq_enter();
490 vmi_local_timer_interrupt(cpu);
491 irq_exit();
492 set_irq_regs(old_regs);
493}
494
495#endif /* CONFIG_X86_LOCAL_APIC */