aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/i386/Kconfig9
-rw-r--r--arch/i386/kernel/Makefile2
-rw-r--r--arch/i386/kernel/apic.c2
-rw-r--r--arch/i386/kernel/entry.S5
-rw-r--r--arch/i386/kernel/paravirt.c2
-rw-r--r--arch/i386/kernel/smpboot.c4
-rw-r--r--arch/i386/kernel/time.c4
-rw-r--r--arch/i386/kernel/tsc.c4
-rw-r--r--arch/i386/kernel/vmi.c45
-rw-r--r--arch/i386/kernel/vmitime.c495
-rw-r--r--include/asm-i386/apic.h2
-rw-r--r--include/asm-i386/paravirt.h12
-rw-r--r--include/asm-i386/time.h1
-rw-r--r--include/asm-i386/timer.h2
-rw-r--r--include/asm-i386/vmi_time.h103
15 files changed, 687 insertions, 5 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index a3b3f6ee3642..595fb771366e 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1272,3 +1272,12 @@ config X86_TRAMPOLINE
1272config KTIME_SCALAR 1272config KTIME_SCALAR
1273 bool 1273 bool
1274 default y 1274 default y
1275
1276config NO_IDLE_HZ
1277 bool
1278 depends on PARAVIRT
1279 default y
1280 help
1281 Switches the regular HZ timer off when the system is going idle.
1282 This helps a hypervisor detect that the Linux system is idle,
1283 reducing the overhead of idle systems.
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 9cfb58911f14..97f1e961d684 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
40obj-$(CONFIG_HPET_TIMER) += hpet.o 40obj-$(CONFIG_HPET_TIMER) += hpet.o
41obj-$(CONFIG_K8_NB) += k8.o 41obj-$(CONFIG_K8_NB) += k8.o
42 42
43obj-$(CONFIG_VMI) += vmi.o 43obj-$(CONFIG_VMI) += vmi.o vmitime.o
44 44
45# Make sure this is linked after any other paravirt_ops structs: see head.S 45# Make sure this is linked after any other paravirt_ops structs: see head.S
46obj-$(CONFIG_PARAVIRT) += paravirt.o 46obj-$(CONFIG_PARAVIRT) += paravirt.o
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 776d9be26af9..629c5ed94260 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -1395,7 +1395,7 @@ int __init APIC_init_uniprocessor (void)
1395 if (!skip_ioapic_setup && nr_ioapics) 1395 if (!skip_ioapic_setup && nr_ioapics)
1396 setup_IO_APIC(); 1396 setup_IO_APIC();
1397#endif 1397#endif
1398 setup_boot_APIC_clock(); 1398 setup_boot_clock();
1399 1399
1400 return 0; 1400 return 0;
1401} 1401}
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 8c6a22a42d2e..d4b4ffc9eacb 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -626,6 +626,11 @@ ENTRY(name) \
626/* The include is where all of the SMP etc. interrupts come from */ 626/* The include is where all of the SMP etc. interrupts come from */
627#include "entry_arch.h" 627#include "entry_arch.h"
628 628
629/* This alternate entry is needed because we hijack the apic LVTT */
630#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
631BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
632#endif
633
629KPROBE_ENTRY(page_fault) 634KPROBE_ENTRY(page_fault)
630 RING0_EC_FRAME 635 RING0_EC_FRAME
631 pushl $do_page_fault 636 pushl $do_page_fault
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index 5bf81059a7e6..2003733310dc 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -544,6 +544,8 @@ struct paravirt_ops paravirt_ops = {
544 .apic_write = native_apic_write, 544 .apic_write = native_apic_write,
545 .apic_write_atomic = native_apic_write_atomic, 545 .apic_write_atomic = native_apic_write_atomic,
546 .apic_read = native_apic_read, 546 .apic_read = native_apic_read,
547 .setup_boot_clock = setup_boot_APIC_clock,
548 .setup_secondary_clock = setup_secondary_APIC_clock,
547#endif 549#endif
548 .set_lazy_mode = (void *)native_nop, 550 .set_lazy_mode = (void *)native_nop,
549 551
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 42502d820e4f..5a00b07e7194 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -554,7 +554,7 @@ static void __cpuinit start_secondary(void *unused)
554 smp_callin(); 554 smp_callin();
555 while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) 555 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
556 rep_nop(); 556 rep_nop();
557 setup_secondary_APIC_clock(); 557 setup_secondary_clock();
558 if (nmi_watchdog == NMI_IO_APIC) { 558 if (nmi_watchdog == NMI_IO_APIC) {
559 disable_8259A_irq(0); 559 disable_8259A_irq(0);
560 enable_NMI_through_LVT0(NULL); 560 enable_NMI_through_LVT0(NULL);
@@ -1331,7 +1331,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1331 1331
1332 smpboot_setup_io_apic(); 1332 smpboot_setup_io_apic();
1333 1333
1334 setup_boot_APIC_clock(); 1334 setup_boot_clock();
1335 1335
1336 /* 1336 /*
1337 * Synchronize the TSC with the AP 1337 * Synchronize the TSC with the AP
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index c505b16c0990..9603ccaba997 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -232,6 +232,7 @@ EXPORT_SYMBOL(get_cmos_time);
232static void sync_cmos_clock(unsigned long dummy); 232static void sync_cmos_clock(unsigned long dummy);
233 233
234static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); 234static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
235int no_sync_cmos_clock;
235 236
236static void sync_cmos_clock(unsigned long dummy) 237static void sync_cmos_clock(unsigned long dummy)
237{ 238{
@@ -275,7 +276,8 @@ static void sync_cmos_clock(unsigned long dummy)
275 276
276void notify_arch_cmos_timer(void) 277void notify_arch_cmos_timer(void)
277{ 278{
278 mod_timer(&sync_cmos_timer, jiffies + 1); 279 if (!no_sync_cmos_clock)
280 mod_timer(&sync_cmos_timer, jiffies + 1);
279} 281}
280 282
281static long clock_cmos_diff; 283static long clock_cmos_diff;
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 2cfc7b09b925..12fef14995a5 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -23,6 +23,7 @@
23 * an extra value to store the TSC freq 23 * an extra value to store the TSC freq
24 */ 24 */
25unsigned int tsc_khz; 25unsigned int tsc_khz;
26unsigned long long (*custom_sched_clock)(void);
26 27
27int tsc_disable; 28int tsc_disable;
28 29
@@ -107,6 +108,9 @@ unsigned long long sched_clock(void)
107{ 108{
108 unsigned long long this_offset; 109 unsigned long long this_offset;
109 110
111 if (unlikely(custom_sched_clock))
112 return (*custom_sched_clock)();
113
110 /* 114 /*
111 * in the NUMA case we dont use the TSC as they are not 115 * in the NUMA case we dont use the TSC as they are not
112 * synchronized across all CPUs. 116 * synchronized across all CPUs.
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index a94d64b10f75..bb5a7abf949c 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -34,6 +34,7 @@
34#include <asm/apic.h> 34#include <asm/apic.h>
35#include <asm/processor.h> 35#include <asm/processor.h>
36#include <asm/timer.h> 36#include <asm/timer.h>
37#include <asm/vmi_time.h>
37 38
38/* Convenient for calling VMI functions indirectly in the ROM */ 39/* Convenient for calling VMI functions indirectly in the ROM */
39typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); 40typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@ -67,6 +68,7 @@ struct {
67 void (*set_linear_mapping)(int, u32, u32, u32); 68 void (*set_linear_mapping)(int, u32, u32, u32);
68 void (*flush_tlb)(int); 69 void (*flush_tlb)(int);
69 void (*set_initial_ap_state)(int, int); 70 void (*set_initial_ap_state)(int, int);
71 void (*halt)(void);
70} vmi_ops; 72} vmi_ops;
71 73
72/* XXX move this to alternative.h */ 74/* XXX move this to alternative.h */
@@ -252,6 +254,19 @@ static void vmi_nop(void)
252{ 254{
253} 255}
254 256
257/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
258#ifdef CONFIG_NO_IDLE_HZ
259static fastcall void vmi_safe_halt(void)
260{
261 int idle = vmi_stop_hz_timer();
262 vmi_ops.halt();
263 if (idle) {
264 local_irq_disable();
265 vmi_account_time_restart_hz_timer();
266 local_irq_enable();
267 }
268}
269#endif
255 270
256#ifdef CONFIG_DEBUG_PAGE_TYPE 271#ifdef CONFIG_DEBUG_PAGE_TYPE
257 272
@@ -727,7 +742,12 @@ static inline int __init activate_vmi(void)
727 (char *)paravirt_ops.save_fl); 742 (char *)paravirt_ops.save_fl);
728 patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], 743 patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE],
729 (char *)paravirt_ops.irq_disable); 744 (char *)paravirt_ops.irq_disable);
745#ifndef CONFIG_NO_IDLE_HZ
730 para_fill(safe_halt, Halt); 746 para_fill(safe_halt, Halt);
747#else
748 vmi_ops.halt = vmi_get_function(VMI_CALL_Halt);
749 paravirt_ops.safe_halt = vmi_safe_halt;
750#endif
731 para_fill(wbinvd, WBINVD); 751 para_fill(wbinvd, WBINVD);
732 /* paravirt_ops.read_msr = vmi_rdmsr */ 752 /* paravirt_ops.read_msr = vmi_rdmsr */
733 /* paravirt_ops.write_msr = vmi_wrmsr */ 753 /* paravirt_ops.write_msr = vmi_wrmsr */
@@ -838,6 +858,31 @@ static inline int __init activate_vmi(void)
838#endif 858#endif
839 859
840 /* 860 /*
861 * Check for VMI timer functionality by probing for a cycle frequency method
862 */
863 reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency);
864 if (rel->type != VMI_RELOCATION_NONE) {
865 vmi_timer_ops.get_cycle_frequency = (void *)rel->eip;
866 vmi_timer_ops.get_cycle_counter =
867 vmi_get_function(VMI_CALL_GetCycleCounter);
868 vmi_timer_ops.get_wallclock =
869 vmi_get_function(VMI_CALL_GetWallclockTime);
870 vmi_timer_ops.wallclock_updated =
871 vmi_get_function(VMI_CALL_WallclockUpdated);
872 vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
873 vmi_timer_ops.cancel_alarm =
874 vmi_get_function(VMI_CALL_CancelAlarm);
875 paravirt_ops.time_init = vmi_time_init;
876 paravirt_ops.get_wallclock = vmi_get_wallclock;
877 paravirt_ops.set_wallclock = vmi_set_wallclock;
878#ifdef CONFIG_X86_LOCAL_APIC
879 paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm;
880 paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm;
881#endif
882 custom_sched_clock = vmi_sched_clock;
883 }
884
885 /*
841 * Alternative instruction rewriting doesn't happen soon enough 886 * Alternative instruction rewriting doesn't happen soon enough
842 * to convert VMI_IRET to a call instead of a jump; so we have 887 * to convert VMI_IRET to a call instead of a jump; so we have
843 * to do this before IRQs get reenabled. Fortunately, it is 888 * to do this before IRQs get reenabled. Fortunately, it is
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
new file mode 100644
index 000000000000..7c3033dbe5f5
--- /dev/null
+++ b/arch/i386/kernel/vmitime.c
@@ -0,0 +1,495 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25/*
26 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28 * See comments there for proper credits.
29 */
30
31#include <linux/spinlock.h>
32#include <linux/init.h>
33#include <linux/errno.h>
34#include <linux/jiffies.h>
35#include <linux/interrupt.h>
36#include <linux/kernel_stat.h>
37#include <linux/rcupdate.h>
38#include <linux/clocksource.h>
39
40#include <asm/timer.h>
41#include <asm/io.h>
42#include <asm/apic.h>
43#include <asm/div64.h>
44#include <asm/timer.h>
45#include <asm/desc.h>
46
47#include <asm/vmi.h>
48#include <asm/vmi_time.h>
49
50#include <mach_timer.h>
51#include <io_ports.h>
52
53#ifdef CONFIG_X86_LOCAL_APIC
54#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
55#else
56#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
57#endif
58
59/* Cached VMI operations */
60struct vmi_timer_ops vmi_timer_ops;
61
62#ifdef CONFIG_NO_IDLE_HZ
63
64/* /proc/sys/kernel/hz_timer state. */
65int sysctl_hz_timer;
66
67/* Some stats */
68static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
69static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
70static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
71
72#endif /* CONFIG_NO_IDLE_HZ */
73
74/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75static int alarm_hz = CONFIG_VMI_ALARM_HZ;
76
77/* Cache of the value get_cycle_frequency / HZ. */
78static signed long long cycles_per_jiffy;
79
80/* Cache of the value get_cycle_frequency / alarm_hz. */
81static signed long long cycles_per_alarm;
82
83/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84 * Protected by xtime_lock. */
85static unsigned long long real_cycles_accounted_system;
86
87/* The number of cycles accounted for by update_process_times(), per cpu. */
88static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
89
90/* The number of stolen cycles accounted, per cpu. */
91static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
92
93/* Clock source. */
94static cycle_t read_real_cycles(void)
95{
96 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
97}
98
99static cycle_t read_available_cycles(void)
100{
101 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
102}
103
104#if 0
105static cycle_t read_stolen_cycles(void)
106{
107 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
108}
109#endif /* 0 */
110
111static struct clocksource clocksource_vmi = {
112 .name = "vmi-timer",
113 .rating = 450,
114 .read = read_real_cycles,
115 .mask = CLOCKSOURCE_MASK(64),
116 .mult = 0, /* to be set */
117 .shift = 22,
118 .is_continuous = 1,
119};
120
121
122/* Timer interrupt handler. */
123static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
124
125static struct irqaction vmi_timer_irq = {
126 vmi_timer_interrupt,
127 SA_INTERRUPT,
128 CPU_MASK_NONE,
129 "VMI-alarm",
130 NULL,
131 NULL
132};
133
134/* Alarm rate */
135static int __init vmi_timer_alarm_rate_setup(char* str)
136{
137 int alarm_rate;
138 if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
139 alarm_hz = alarm_rate;
140 printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
141 }
142 return 1;
143}
144__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
145
146
147/* Initialization */
148static void vmi_get_wallclock_ts(struct timespec *ts)
149{
150 unsigned long long wallclock;
151 wallclock = vmi_timer_ops.get_wallclock(); // nsec units
152 ts->tv_nsec = do_div(wallclock, 1000000000);
153 ts->tv_sec = wallclock;
154}
155
156static void update_xtime_from_wallclock(void)
157{
158 struct timespec ts;
159 vmi_get_wallclock_ts(&ts);
160 do_settimeofday(&ts);
161}
162
163unsigned long vmi_get_wallclock(void)
164{
165 struct timespec ts;
166 vmi_get_wallclock_ts(&ts);
167 return ts.tv_sec;
168}
169
170int vmi_set_wallclock(unsigned long now)
171{
172 return -1;
173}
174
175unsigned long long vmi_sched_clock(void)
176{
177 return read_available_cycles();
178}
179
180void __init vmi_time_init(void)
181{
182 unsigned long long cycles_per_sec, cycles_per_msec;
183
184 setup_irq(0, &vmi_timer_irq);
185#ifdef CONFIG_X86_LOCAL_APIC
186 set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
187#endif
188
189 no_sync_cmos_clock = 1;
190
191 vmi_get_wallclock_ts(&xtime);
192 set_normalized_timespec(&wall_to_monotonic,
193 -xtime.tv_sec, -xtime.tv_nsec);
194
195 real_cycles_accounted_system = read_real_cycles();
196 update_xtime_from_wallclock();
197 per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
198
199 cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
200
201 cycles_per_jiffy = cycles_per_sec;
202 (void)do_div(cycles_per_jiffy, HZ);
203 cycles_per_alarm = cycles_per_sec;
204 (void)do_div(cycles_per_alarm, alarm_hz);
205 cycles_per_msec = cycles_per_sec;
206 (void)do_div(cycles_per_msec, 1000);
207 cpu_khz = cycles_per_msec;
208
209 printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
210 "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
211 cycles_per_alarm);
212
213 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
214 clocksource_vmi.shift);
215 if (clocksource_register(&clocksource_vmi))
216 printk(KERN_WARNING "Error registering VMITIME clocksource.");
217
218 /* Disable PIT. */
219 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
220
221 /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
222 * reduce the latency calling update_process_times. */
223 vmi_timer_ops.set_alarm(
224 VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
225 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
226 cycles_per_alarm);
227}
228
229#ifdef CONFIG_X86_LOCAL_APIC
230
231void __init vmi_timer_setup_boot_alarm(void)
232{
233 local_irq_disable();
234
235 /* Route the interrupt to the correct vector. */
236 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
237
238 /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
239 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
240 vmi_timer_ops.set_alarm(
241 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
242 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
243 cycles_per_alarm);
244 local_irq_enable();
245}
246
247/* Initialize the time accounting variables for an AP on an SMP system.
248 * Also, set the local alarm for the AP. */
249void __init vmi_timer_setup_secondary_alarm(void)
250{
251 int cpu = smp_processor_id();
252
253 /* Route the interrupt to the correct vector. */
254 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
255
256 per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
257
258 vmi_timer_ops.set_alarm(
259 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
260 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
261 cycles_per_alarm);
262}
263
264#endif
265
266/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
267static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
268{
269 long long cycles_not_accounted;
270
271 write_seqlock(&xtime_lock);
272
273 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
274 while (cycles_not_accounted >= cycles_per_jiffy) {
275 /* systems wide jiffies and wallclock. */
276 do_timer(1);
277
278 cycles_not_accounted -= cycles_per_jiffy;
279 real_cycles_accounted_system += cycles_per_jiffy;
280 }
281
282 if (vmi_timer_ops.wallclock_updated())
283 update_xtime_from_wallclock();
284
285 write_sequnlock(&xtime_lock);
286}
287
288/* Update per-cpu process times. */
289static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
290 unsigned long long cur_process_times_cycles)
291{
292 long long cycles_not_accounted;
293 cycles_not_accounted = cur_process_times_cycles -
294 per_cpu(process_times_cycles_accounted_cpu, cpu);
295
296 while (cycles_not_accounted >= cycles_per_jiffy) {
297 /* Account time to the current process. This includes
298 * calling into the scheduler to decrement the timeslice
299 * and possibly reschedule.*/
300 update_process_times(user_mode(regs));
301 /* XXX handle /proc/profile multiplier. */
302 profile_tick(CPU_PROFILING);
303
304 cycles_not_accounted -= cycles_per_jiffy;
305 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
306 }
307}
308
309#ifdef CONFIG_NO_IDLE_HZ
310/* Update per-cpu idle times. Used when a no-hz halt is ended. */
311static void vmi_account_no_hz_idle_cycles(int cpu,
312 unsigned long long cur_process_times_cycles)
313{
314 long long cycles_not_accounted;
315 unsigned long no_idle_hz_jiffies = 0;
316
317 cycles_not_accounted = cur_process_times_cycles -
318 per_cpu(process_times_cycles_accounted_cpu, cpu);
319
320 while (cycles_not_accounted >= cycles_per_jiffy) {
321 no_idle_hz_jiffies++;
322 cycles_not_accounted -= cycles_per_jiffy;
323 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
324 }
325 /* Account time to the idle process. */
326 account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
327}
328#endif
329
330/* Update per-cpu stolen time. */
331static void vmi_account_stolen_cycles(int cpu,
332 unsigned long long cur_real_cycles,
333 unsigned long long cur_avail_cycles)
334{
335 long long stolen_cycles_not_accounted;
336 unsigned long stolen_jiffies = 0;
337
338 if (cur_real_cycles < cur_avail_cycles)
339 return;
340
341 stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
342 per_cpu(stolen_cycles_accounted_cpu, cpu);
343
344 while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
345 stolen_jiffies++;
346 stolen_cycles_not_accounted -= cycles_per_jiffy;
347 per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
348 }
349 /* HACK: pass NULL to force time onto cpustat->steal. */
350 account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
351}
352
353/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
354 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
355static void vmi_local_timer_interrupt(int cpu)
356{
357 unsigned long long cur_real_cycles, cur_process_times_cycles;
358
359 cur_real_cycles = read_real_cycles();
360 cur_process_times_cycles = read_available_cycles();
361 /* Update system wide (real) time state (xtime, jiffies). */
362 vmi_account_real_cycles(cur_real_cycles);
363 /* Update per-cpu process times. */
364 vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
365 /* Update time stolen from this cpu by the hypervisor. */
366 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
367}
368
369#ifdef CONFIG_NO_IDLE_HZ
370
371/* Must be called only from idle loop, with interrupts disabled. */
372int vmi_stop_hz_timer(void)
373{
374 /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
375
376 unsigned long seq, next;
377 unsigned long long real_cycles_expiry;
378 int cpu = smp_processor_id();
379 int idle;
380
381 BUG_ON(!irqs_disabled());
382 if (sysctl_hz_timer != 0)
383 return 0;
384
385 cpu_set(cpu, nohz_cpu_mask);
386 smp_mb();
387 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
388 (next = next_timer_interrupt(), time_before_eq(next, jiffies))) {
389 cpu_clear(cpu, nohz_cpu_mask);
390 next = jiffies;
391 idle = 0;
392 } else
393 idle = 1;
394
395 /* Convert jiffies to the real cycle counter. */
396 do {
397 seq = read_seqbegin(&xtime_lock);
398 real_cycles_expiry = real_cycles_accounted_system +
399 (long)(next - jiffies) * cycles_per_jiffy;
400 } while (read_seqretry(&xtime_lock, seq));
401
402 /* This cpu is going idle. Disable the periodic alarm. */
403 if (idle) {
404 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
405 per_cpu(idle_start_jiffies, cpu) = jiffies;
406 }
407
408 /* Set the real time alarm to expire at the next event. */
409 vmi_timer_ops.set_alarm(
410 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
411 real_cycles_expiry, 0);
412
413 return idle;
414}
415
416static void vmi_reenable_hz_timer(int cpu)
417{
418 /* For /proc/vmi/info idle_hz stat. */
419 per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
420 per_cpu(vmi_idle_no_hz_irqs, cpu)++;
421
422 /* Don't bother explicitly cancelling the one-shot alarm -- at
423 * worse we will receive a spurious timer interrupt. */
424 vmi_timer_ops.set_alarm(
425 VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
426 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
427 cycles_per_alarm);
428 /* Indicate this cpu is no longer nohz idle. */
429 cpu_clear(cpu, nohz_cpu_mask);
430}
431
432/* Called from interrupt handlers when (local) HZ timer is disabled. */
433void vmi_account_time_restart_hz_timer(void)
434{
435 unsigned long long cur_real_cycles, cur_process_times_cycles;
436 int cpu = smp_processor_id();
437
438 BUG_ON(!irqs_disabled());
439 /* Account the time during which the HZ timer was disabled. */
440 cur_real_cycles = read_real_cycles();
441 cur_process_times_cycles = read_available_cycles();
442 /* Update system wide (real) time state (xtime, jiffies). */
443 vmi_account_real_cycles(cur_real_cycles);
444 /* Update per-cpu idle times. */
445 vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
446 /* Update time stolen from this cpu by the hypervisor. */
447 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
448 /* Reenable the hz timer. */
449 vmi_reenable_hz_timer(cpu);
450}
451
452#endif /* CONFIG_NO_IDLE_HZ */
453
454/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
455 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
456 * APIC setup and setup_boot_vmi_alarm() is called. */
457static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
458{
459 vmi_local_timer_interrupt(smp_processor_id());
460 return IRQ_HANDLED;
461}
462
463#ifdef CONFIG_X86_LOCAL_APIC
464
465/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
466 * Also used in UP when CONFIG_X86_LOCAL_APIC.
467 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
468void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
469{
470 struct pt_regs *old_regs = set_irq_regs(regs);
471 int cpu = smp_processor_id();
472
473 /*
474 * the NMI deadlock-detector uses this.
475 */
476 per_cpu(irq_stat,cpu).apic_timer_irqs++;
477
478 /*
479 * NOTE! We'd better ACK the irq immediately,
480 * because timer handling can be slow.
481 */
482 ack_APIC_irq();
483
484 /*
485 * update_process_times() expects us to have done irq_enter().
486 * Besides, if we don't timer interrupts ignore the global
487 * interrupt lock, which is the WrongThing (tm) to do.
488 */
489 irq_enter();
490 vmi_local_timer_interrupt(cpu);
491 irq_exit();
492 set_irq_regs(old_regs);
493}
494
495#endif /* CONFIG_X86_LOCAL_APIC */
diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h
index 41a44319905f..3a61206fd108 100644
--- a/include/asm-i386/apic.h
+++ b/include/asm-i386/apic.h
@@ -43,6 +43,8 @@ extern void generic_apic_probe(void);
43#define apic_write native_apic_write 43#define apic_write native_apic_write
44#define apic_write_atomic native_apic_write_atomic 44#define apic_write_atomic native_apic_write_atomic
45#define apic_read native_apic_read 45#define apic_read native_apic_read
46#define setup_boot_clock setup_boot_APIC_clock
47#define setup_secondary_clock setup_secondary_APIC_clock
46#endif 48#endif
47 49
48static __inline fastcall void native_apic_write(unsigned long reg, 50static __inline fastcall void native_apic_write(unsigned long reg,
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index 6ccf36499b2a..12ef95924da6 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -121,6 +121,8 @@ struct paravirt_ops
121 void (fastcall *apic_write)(unsigned long reg, unsigned long v); 121 void (fastcall *apic_write)(unsigned long reg, unsigned long v);
122 void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v); 122 void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v);
123 unsigned long (fastcall *apic_read)(unsigned long reg); 123 unsigned long (fastcall *apic_read)(unsigned long reg);
124 void (*setup_boot_clock)(void);
125 void (*setup_secondary_clock)(void);
124#endif 126#endif
125 127
126 void (fastcall *flush_tlb_user)(void); 128 void (fastcall *flush_tlb_user)(void);
@@ -323,6 +325,16 @@ static inline unsigned long apic_read(unsigned long reg)
323{ 325{
324 return paravirt_ops.apic_read(reg); 326 return paravirt_ops.apic_read(reg);
325} 327}
328
329static inline void setup_boot_clock(void)
330{
331 paravirt_ops.setup_boot_clock();
332}
333
334static inline void setup_secondary_clock(void)
335{
336 paravirt_ops.setup_secondary_clock();
337}
326#endif 338#endif
327 339
328#ifdef CONFIG_SMP 340#ifdef CONFIG_SMP
diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h
index ea8065af825a..571b4294dc2e 100644
--- a/include/asm-i386/time.h
+++ b/include/asm-i386/time.h
@@ -30,6 +30,7 @@ static inline int native_set_wallclock(unsigned long nowtime)
30 30
31#ifdef CONFIG_PARAVIRT 31#ifdef CONFIG_PARAVIRT
32#include <asm/paravirt.h> 32#include <asm/paravirt.h>
33extern unsigned long long native_sched_clock(void);
33#else /* !CONFIG_PARAVIRT */ 34#else /* !CONFIG_PARAVIRT */
34 35
35#define get_wallclock() native_get_wallclock() 36#define get_wallclock() native_get_wallclock()
diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h
index 1ee64e34cd35..4752c3a6a708 100644
--- a/include/asm-i386/timer.h
+++ b/include/asm-i386/timer.h
@@ -9,6 +9,8 @@ void setup_pit_timer(void);
9extern int pit_latch_buggy; 9extern int pit_latch_buggy;
10extern int timer_ack; 10extern int timer_ack;
11extern int no_timer_check; 11extern int no_timer_check;
12extern unsigned long long (*custom_sched_clock)(void);
13extern int no_sync_cmos_clock;
12extern int recalibrate_cpu_khz(void); 14extern int recalibrate_cpu_khz(void);
13 15
14#endif 16#endif
diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h
new file mode 100644
index 000000000000..c12931211007
--- /dev/null
+++ b/include/asm-i386/vmi_time.h
@@ -0,0 +1,103 @@
1/*
2 * VMI Time wrappers
3 *
4 * Copyright (C) 2006, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25#ifndef __VMI_TIME_H
26#define __VMI_TIME_H
27
28/*
29 * Raw VMI call indices for timer functions
30 */
31#define VMI_CALL_GetCycleFrequency 66
32#define VMI_CALL_GetCycleCounter 67
33#define VMI_CALL_SetAlarm 68
34#define VMI_CALL_CancelAlarm 69
35#define VMI_CALL_GetWallclockTime 70
36#define VMI_CALL_WallclockUpdated 71
37
38/* Cached VMI timer operations */
39extern struct vmi_timer_ops {
40 u64 (*get_cycle_frequency)(void);
41 u64 (*get_cycle_counter)(int);
42 u64 (*get_wallclock)(void);
43 int (*wallclock_updated)(void);
44 void (*set_alarm)(u32 flags, u64 expiry, u64 period);
45 void (*cancel_alarm)(u32 flags);
46} vmi_timer_ops;
47
48/* Prototypes */
49extern void __init vmi_time_init(void);
50extern unsigned long vmi_get_wallclock(void);
51extern int vmi_set_wallclock(unsigned long now);
52extern unsigned long long vmi_sched_clock(void);
53
54#ifdef CONFIG_X86_LOCAL_APIC
55extern void __init vmi_timer_setup_boot_alarm(void);
56extern void __init vmi_timer_setup_secondary_alarm(void);
57extern void apic_vmi_timer_interrupt(void);
58#endif
59
60#ifdef CONFIG_NO_IDLE_HZ
61extern int vmi_stop_hz_timer(void);
62extern void vmi_account_time_restart_hz_timer(void);
63#endif
64
65/*
66 * When run under a hypervisor, a vcpu is always in one of three states:
67 * running, halted, or ready. The vcpu is in the 'running' state if it
68 * is executing. When the vcpu executes the halt interface, the vcpu
69 * enters the 'halted' state and remains halted until there is some work
70 * pending for the vcpu (e.g. an alarm expires, host I/O completes on
71 * behalf of virtual I/O). At this point, the vcpu enters the 'ready'
72 * state (waiting for the hypervisor to reschedule it). Finally, at any
73 * time when the vcpu is not in the 'running' state nor the 'halted'
74 * state, it is in the 'ready' state.
75 *
76 * Real time is advances while the vcpu is 'running', 'ready', or
77 * 'halted'. Stolen time is the time in which the vcpu is in the
78 * 'ready' state. Available time is the remaining time -- the vcpu is
79 * either 'running' or 'halted'.
80 *
81 * All three views of time are accessible through the VMI cycle
82 * counters.
83 */
84
85/* The cycle counters. */
86#define VMI_CYCLES_REAL 0
87#define VMI_CYCLES_AVAILABLE 1
88#define VMI_CYCLES_STOLEN 2
89
90/* The alarm interface 'flags' bits */
91#define VMI_ALARM_COUNTERS 2
92
93#define VMI_ALARM_COUNTER_MASK 0x000000ff
94
95#define VMI_ALARM_WIRED_IRQ0 0x00000000
96#define VMI_ALARM_WIRED_LVTT 0x00010000
97
98#define VMI_ALARM_IS_ONESHOT 0x00000000
99#define VMI_ALARM_IS_PERIODIC 0x00000100
100
101#define CONFIG_VMI_ALARM_HZ 100
102
103#endif