diff options
-rw-r--r-- | arch/i386/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/i386/kernel/entry.S | 5 | ||||
-rw-r--r-- | arch/i386/kernel/vmi.c | 26 | ||||
-rw-r--r-- | arch/i386/kernel/vmiclock.c | 318 | ||||
-rw-r--r-- | arch/i386/kernel/vmitime.c | 482 | ||||
-rw-r--r-- | include/asm-i386/vmi_time.h | 18 |
6 files changed, 327 insertions, 524 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index bd7753cb9e69..4f98516b9f94 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -39,7 +39,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | |||
39 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 39 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
40 | obj-$(CONFIG_K8_NB) += k8.o | 40 | obj-$(CONFIG_K8_NB) += k8.o |
41 | 41 | ||
42 | obj-$(CONFIG_VMI) += vmi.o vmitime.o | 42 | obj-$(CONFIG_VMI) += vmi.o vmiclock.o |
43 | obj-$(CONFIG_PARAVIRT) += paravirt.o | 43 | obj-$(CONFIG_PARAVIRT) += paravirt.o |
44 | obj-y += pcspeaker.o | 44 | obj-y += pcspeaker.o |
45 | 45 | ||
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 7f92ceb428ad..90ffcdb69838 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -637,11 +637,6 @@ ENDPROC(name) | |||
637 | /* The include is where all of the SMP etc. interrupts come from */ | 637 | /* The include is where all of the SMP etc. interrupts come from */ |
638 | #include "entry_arch.h" | 638 | #include "entry_arch.h" |
639 | 639 | ||
640 | /* This alternate entry is needed because we hijack the apic LVTT */ | ||
641 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) | ||
642 | BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) | ||
643 | #endif | ||
644 | |||
645 | KPROBE_ENTRY(page_fault) | 640 | KPROBE_ENTRY(page_fault) |
646 | RING0_EC_FRAME | 641 | RING0_EC_FRAME |
647 | pushl $do_page_fault | 642 | pushl $do_page_fault |
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index 0df0b2cd3617..0fae15dee765 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c | |||
@@ -77,6 +77,9 @@ static struct { | |||
77 | extern struct paravirt_patch __start_parainstructions[], | 77 | extern struct paravirt_patch __start_parainstructions[], |
78 | __stop_parainstructions[]; | 78 | __stop_parainstructions[]; |
79 | 79 | ||
80 | /* Cached VMI operations */ | ||
81 | struct vmi_timer_ops vmi_timer_ops; | ||
82 | |||
80 | /* | 83 | /* |
81 | * VMI patching routines. | 84 | * VMI patching routines. |
82 | */ | 85 | */ |
@@ -235,18 +238,6 @@ static void vmi_nop(void) | |||
235 | { | 238 | { |
236 | } | 239 | } |
237 | 240 | ||
238 | /* For NO_IDLE_HZ, we stop the clock when halting the kernel */ | ||
239 | static fastcall void vmi_safe_halt(void) | ||
240 | { | ||
241 | int idle = vmi_stop_hz_timer(); | ||
242 | vmi_ops.halt(); | ||
243 | if (idle) { | ||
244 | local_irq_disable(); | ||
245 | vmi_account_time_restart_hz_timer(); | ||
246 | local_irq_enable(); | ||
247 | } | ||
248 | } | ||
249 | |||
250 | #ifdef CONFIG_DEBUG_PAGE_TYPE | 241 | #ifdef CONFIG_DEBUG_PAGE_TYPE |
251 | 242 | ||
252 | #ifdef CONFIG_X86_PAE | 243 | #ifdef CONFIG_X86_PAE |
@@ -722,7 +713,6 @@ do { \ | |||
722 | } \ | 713 | } \ |
723 | } while (0) | 714 | } while (0) |
724 | 715 | ||
725 | |||
726 | /* | 716 | /* |
727 | * Activate the VMI interface and switch into paravirtualized mode | 717 | * Activate the VMI interface and switch into paravirtualized mode |
728 | */ | 718 | */ |
@@ -901,8 +891,8 @@ static inline int __init activate_vmi(void) | |||
901 | paravirt_ops.get_wallclock = vmi_get_wallclock; | 891 | paravirt_ops.get_wallclock = vmi_get_wallclock; |
902 | paravirt_ops.set_wallclock = vmi_set_wallclock; | 892 | paravirt_ops.set_wallclock = vmi_set_wallclock; |
903 | #ifdef CONFIG_X86_LOCAL_APIC | 893 | #ifdef CONFIG_X86_LOCAL_APIC |
904 | paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; | 894 | paravirt_ops.setup_boot_clock = vmi_time_bsp_init; |
905 | paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; | 895 | paravirt_ops.setup_secondary_clock = vmi_time_ap_init; |
906 | #endif | 896 | #endif |
907 | paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; | 897 | paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; |
908 | paravirt_ops.get_cpu_khz = vmi_cpu_khz; | 898 | paravirt_ops.get_cpu_khz = vmi_cpu_khz; |
@@ -914,11 +904,7 @@ static inline int __init activate_vmi(void) | |||
914 | disable_vmi_timer = 1; | 904 | disable_vmi_timer = 1; |
915 | } | 905 | } |
916 | 906 | ||
917 | /* No idle HZ mode only works if VMI timer and no idle is enabled */ | 907 | para_fill(safe_halt, Halt); |
918 | if (disable_noidle || disable_vmi_timer) | ||
919 | para_fill(safe_halt, Halt); | ||
920 | else | ||
921 | para_wrap(safe_halt, vmi_safe_halt, halt, Halt); | ||
922 | 908 | ||
923 | /* | 909 | /* |
924 | * Alternative instruction rewriting doesn't happen soon enough | 910 | * Alternative instruction rewriting doesn't happen soon enough |
diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c new file mode 100644 index 000000000000..26a37f8a8762 --- /dev/null +++ b/arch/i386/kernel/vmiclock.c | |||
@@ -0,0 +1,318 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2007, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/smp.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/cpumask.h> | ||
26 | #include <linux/clocksource.h> | ||
27 | #include <linux/clockchips.h> | ||
28 | |||
29 | #include <asm/vmi.h> | ||
30 | #include <asm/vmi_time.h> | ||
31 | #include <asm/arch_hooks.h> | ||
32 | #include <asm/apicdef.h> | ||
33 | #include <asm/apic.h> | ||
34 | #include <asm/timer.h> | ||
35 | |||
36 | #include <irq_vectors.h> | ||
37 | #include "io_ports.h" | ||
38 | |||
39 | #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
40 | #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
41 | |||
42 | static DEFINE_PER_CPU(struct clock_event_device, local_events); | ||
43 | |||
44 | static inline u32 vmi_counter(u32 flags) | ||
45 | { | ||
46 | /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding | ||
47 | * cycle counter. */ | ||
48 | return flags & VMI_ALARM_COUNTER_MASK; | ||
49 | } | ||
50 | |||
51 | /* paravirt_ops.get_wallclock = vmi_get_wallclock */ | ||
52 | unsigned long vmi_get_wallclock(void) | ||
53 | { | ||
54 | unsigned long long wallclock; | ||
55 | wallclock = vmi_timer_ops.get_wallclock(); // nsec | ||
56 | (void)do_div(wallclock, 1000000000); // sec | ||
57 | |||
58 | return wallclock; | ||
59 | } | ||
60 | |||
61 | /* paravirt_ops.set_wallclock = vmi_set_wallclock */ | ||
62 | int vmi_set_wallclock(unsigned long now) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | /* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */ | ||
68 | unsigned long long vmi_get_sched_cycles(void) | ||
69 | { | ||
70 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | ||
71 | } | ||
72 | |||
73 | /* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ | ||
74 | unsigned long vmi_cpu_khz(void) | ||
75 | { | ||
76 | unsigned long long khz; | ||
77 | khz = vmi_timer_ops.get_cycle_frequency(); | ||
78 | (void)do_div(khz, 1000); | ||
79 | return khz; | ||
80 | } | ||
81 | |||
82 | static inline unsigned int vmi_get_timer_vector(void) | ||
83 | { | ||
84 | #ifdef CONFIG_X86_IO_APIC | ||
85 | return FIRST_DEVICE_VECTOR; | ||
86 | #else | ||
87 | return FIRST_EXTERNAL_VECTOR; | ||
88 | #endif | ||
89 | } | ||
90 | |||
91 | /** vmi clockchip */ | ||
92 | #ifdef CONFIG_X86_LOCAL_APIC | ||
93 | static unsigned int startup_timer_irq(unsigned int irq) | ||
94 | { | ||
95 | unsigned long val = apic_read(APIC_LVTT); | ||
96 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
97 | |||
98 | return (val & APIC_SEND_PENDING); | ||
99 | } | ||
100 | |||
101 | static void mask_timer_irq(unsigned int irq) | ||
102 | { | ||
103 | unsigned long val = apic_read(APIC_LVTT); | ||
104 | apic_write(APIC_LVTT, val | APIC_LVT_MASKED); | ||
105 | } | ||
106 | |||
107 | static void unmask_timer_irq(unsigned int irq) | ||
108 | { | ||
109 | unsigned long val = apic_read(APIC_LVTT); | ||
110 | apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED); | ||
111 | } | ||
112 | |||
113 | static void ack_timer_irq(unsigned int irq) | ||
114 | { | ||
115 | ack_APIC_irq(); | ||
116 | } | ||
117 | |||
118 | static struct irq_chip vmi_chip __read_mostly = { | ||
119 | .name = "VMI-LOCAL", | ||
120 | .startup = startup_timer_irq, | ||
121 | .mask = mask_timer_irq, | ||
122 | .unmask = unmask_timer_irq, | ||
123 | .ack = ack_timer_irq | ||
124 | }; | ||
125 | #endif | ||
126 | |||
127 | /** vmi clockevent */ | ||
128 | #define VMI_ALARM_WIRED_IRQ0 0x00000000 | ||
129 | #define VMI_ALARM_WIRED_LVTT 0x00010000 | ||
130 | static int vmi_wiring = VMI_ALARM_WIRED_IRQ0; | ||
131 | |||
132 | static inline int vmi_get_alarm_wiring(void) | ||
133 | { | ||
134 | return vmi_wiring; | ||
135 | } | ||
136 | |||
137 | static void vmi_timer_set_mode(enum clock_event_mode mode, | ||
138 | struct clock_event_device *evt) | ||
139 | { | ||
140 | cycle_t now, cycles_per_hz; | ||
141 | BUG_ON(!irqs_disabled()); | ||
142 | |||
143 | switch (mode) { | ||
144 | case CLOCK_EVT_MODE_ONESHOT: | ||
145 | break; | ||
146 | case CLOCK_EVT_MODE_PERIODIC: | ||
147 | cycles_per_hz = vmi_timer_ops.get_cycle_frequency(); | ||
148 | (void)do_div(cycles_per_hz, HZ); | ||
149 | now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC)); | ||
150 | vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz); | ||
151 | break; | ||
152 | case CLOCK_EVT_MODE_UNUSED: | ||
153 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
154 | switch (evt->mode) { | ||
155 | case CLOCK_EVT_MODE_ONESHOT: | ||
156 | vmi_timer_ops.cancel_alarm(VMI_ONESHOT); | ||
157 | break; | ||
158 | case CLOCK_EVT_MODE_PERIODIC: | ||
159 | vmi_timer_ops.cancel_alarm(VMI_PERIODIC); | ||
160 | break; | ||
161 | default: | ||
162 | break; | ||
163 | } | ||
164 | break; | ||
165 | default: | ||
166 | break; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | static int vmi_timer_next_event(unsigned long delta, | ||
171 | struct clock_event_device *evt) | ||
172 | { | ||
173 | /* Unfortunately, set_next_event interface only passes relative | ||
174 | * expiry, but we want absolute expiry. It'd be better if were | ||
175 | * were passed an aboslute expiry, since a bunch of time may | ||
176 | * have been stolen between the time the delta is computed and | ||
177 | * when we set the alarm below. */ | ||
178 | cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); | ||
179 | |||
180 | BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | ||
181 | vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0); | ||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | static struct clock_event_device vmi_clockevent = { | ||
186 | .name = "vmi-timer", | ||
187 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
188 | .shift = 22, | ||
189 | .set_mode = vmi_timer_set_mode, | ||
190 | .set_next_event = vmi_timer_next_event, | ||
191 | .rating = 1000, | ||
192 | .irq = 0, | ||
193 | }; | ||
194 | |||
195 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
196 | { | ||
197 | struct clock_event_device *evt = &__get_cpu_var(local_events); | ||
198 | evt->event_handler(evt); | ||
199 | return IRQ_HANDLED; | ||
200 | } | ||
201 | |||
202 | static struct irqaction vmi_clock_action = { | ||
203 | .name = "vmi-timer", | ||
204 | .handler = vmi_timer_interrupt, | ||
205 | .flags = IRQF_DISABLED | IRQF_NOBALANCING, | ||
206 | .mask = CPU_MASK_ALL, | ||
207 | }; | ||
208 | |||
209 | static void __devinit vmi_time_init_clockevent(void) | ||
210 | { | ||
211 | cycle_t cycles_per_msec; | ||
212 | struct clock_event_device *evt; | ||
213 | |||
214 | int cpu = smp_processor_id(); | ||
215 | evt = &__get_cpu_var(local_events); | ||
216 | |||
217 | /* Use cycles_per_msec since div_sc params are 32-bits. */ | ||
218 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
219 | (void)do_div(cycles_per_msec, 1000); | ||
220 | |||
221 | memcpy(evt, &vmi_clockevent, sizeof(*evt)); | ||
222 | /* Must pick .shift such that .mult fits in 32-bits. Choosing | ||
223 | * .shift to be 22 allows 2^(32-22) cycles per nano-seconds | ||
224 | * before overflow. */ | ||
225 | evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift); | ||
226 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ | ||
227 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); | ||
228 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | ||
229 | evt->cpumask = cpumask_of_cpu(cpu); | ||
230 | |||
231 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", | ||
232 | evt->name, evt->mult, evt->shift); | ||
233 | clockevents_register_device(evt); | ||
234 | } | ||
235 | |||
236 | void __init vmi_time_init(void) | ||
237 | { | ||
238 | /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ | ||
239 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
240 | |||
241 | vmi_time_init_clockevent(); | ||
242 | setup_irq(0, &vmi_clock_action); | ||
243 | } | ||
244 | |||
245 | #ifdef CONFIG_X86_LOCAL_APIC | ||
246 | void __devinit vmi_time_bsp_init(void) | ||
247 | { | ||
248 | /* | ||
249 | * On APIC systems, we want local timers to fire on each cpu. We do | ||
250 | * this by programming LVTT to deliver timer events to the IRQ handler | ||
251 | * for IRQ-0, since we can't re-use the APIC local timer handler | ||
252 | * without interfering with that code. | ||
253 | */ | ||
254 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | ||
255 | local_irq_disable(); | ||
256 | #ifdef CONFIG_X86_SMP | ||
257 | /* | ||
258 | * XXX handle_percpu_irq only defined for SMP; we need to switch over | ||
259 | * to using it, since this is a local interrupt, which each CPU must | ||
260 | * handle individually without locking out or dropping simultaneous | ||
261 | * local timers on other CPUs. We also don't want to trigger the | ||
262 | * quirk workaround code for interrupts which gets invoked from | ||
263 | * handle_percpu_irq via eoi, so we use our own IRQ chip. | ||
264 | */ | ||
265 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt"); | ||
266 | #else | ||
267 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt"); | ||
268 | #endif | ||
269 | vmi_wiring = VMI_ALARM_WIRED_LVTT; | ||
270 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
271 | local_irq_enable(); | ||
272 | clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); | ||
273 | } | ||
274 | |||
275 | void __devinit vmi_time_ap_init(void) | ||
276 | { | ||
277 | vmi_time_init_clockevent(); | ||
278 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
279 | } | ||
280 | #endif | ||
281 | |||
282 | /** vmi clocksource */ | ||
283 | |||
284 | static cycle_t read_real_cycles(void) | ||
285 | { | ||
286 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
287 | } | ||
288 | |||
289 | static struct clocksource clocksource_vmi = { | ||
290 | .name = "vmi-timer", | ||
291 | .rating = 450, | ||
292 | .read = read_real_cycles, | ||
293 | .mask = CLOCKSOURCE_MASK(64), | ||
294 | .mult = 0, /* to be set */ | ||
295 | .shift = 22, | ||
296 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
297 | }; | ||
298 | |||
299 | static int __init init_vmi_clocksource(void) | ||
300 | { | ||
301 | cycle_t cycles_per_msec; | ||
302 | |||
303 | if (!vmi_timer_ops.get_cycle_frequency) | ||
304 | return 0; | ||
305 | /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */ | ||
306 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
307 | (void)do_div(cycles_per_msec, 1000); | ||
308 | |||
309 | /* Note that clocksource.{mult, shift} converts in the opposite direction | ||
310 | * as clockevents. */ | ||
311 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
312 | clocksource_vmi.shift); | ||
313 | |||
314 | printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec); | ||
315 | return clocksource_register(&clocksource_vmi); | ||
316 | |||
317 | } | ||
318 | module_init(init_vmi_clocksource); | ||
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c deleted file mode 100644 index 9dfb17739b67..000000000000 --- a/arch/i386/kernel/vmitime.c +++ /dev/null | |||
@@ -1,482 +0,0 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to dhecht@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * Portions of this code from arch/i386/kernel/timers/timer_tsc.c. | ||
27 | * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c. | ||
28 | * See comments there for proper credits. | ||
29 | */ | ||
30 | |||
31 | #include <linux/spinlock.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/jiffies.h> | ||
35 | #include <linux/interrupt.h> | ||
36 | #include <linux/kernel_stat.h> | ||
37 | #include <linux/rcupdate.h> | ||
38 | #include <linux/clocksource.h> | ||
39 | |||
40 | #include <asm/timer.h> | ||
41 | #include <asm/io.h> | ||
42 | #include <asm/apic.h> | ||
43 | #include <asm/div64.h> | ||
44 | #include <asm/timer.h> | ||
45 | #include <asm/desc.h> | ||
46 | |||
47 | #include <asm/vmi.h> | ||
48 | #include <asm/vmi_time.h> | ||
49 | |||
50 | #include <mach_timer.h> | ||
51 | #include <io_ports.h> | ||
52 | |||
53 | #ifdef CONFIG_X86_LOCAL_APIC | ||
54 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT | ||
55 | #else | ||
56 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0 | ||
57 | #endif | ||
58 | |||
59 | /* Cached VMI operations */ | ||
60 | struct vmi_timer_ops vmi_timer_ops; | ||
61 | |||
62 | #ifdef CONFIG_NO_IDLE_HZ | ||
63 | |||
64 | /* /proc/sys/kernel/hz_timer state. */ | ||
65 | int sysctl_hz_timer; | ||
66 | |||
67 | /* Some stats */ | ||
68 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs); | ||
69 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies); | ||
70 | static DEFINE_PER_CPU(unsigned long, idle_start_jiffies); | ||
71 | |||
72 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
73 | |||
74 | /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */ | ||
75 | static int alarm_hz = CONFIG_VMI_ALARM_HZ; | ||
76 | |||
77 | /* Cache of the value get_cycle_frequency / HZ. */ | ||
78 | static signed long long cycles_per_jiffy; | ||
79 | |||
80 | /* Cache of the value get_cycle_frequency / alarm_hz. */ | ||
81 | static signed long long cycles_per_alarm; | ||
82 | |||
83 | /* The number of cycles accounted for by the 'jiffies'/'xtime' count. | ||
84 | * Protected by xtime_lock. */ | ||
85 | static unsigned long long real_cycles_accounted_system; | ||
86 | |||
87 | /* The number of cycles accounted for by update_process_times(), per cpu. */ | ||
88 | static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu); | ||
89 | |||
90 | /* The number of stolen cycles accounted, per cpu. */ | ||
91 | static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu); | ||
92 | |||
93 | /* Clock source. */ | ||
94 | static cycle_t read_real_cycles(void) | ||
95 | { | ||
96 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
97 | } | ||
98 | |||
99 | static cycle_t read_available_cycles(void) | ||
100 | { | ||
101 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | ||
102 | } | ||
103 | |||
104 | #if 0 | ||
105 | static cycle_t read_stolen_cycles(void) | ||
106 | { | ||
107 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN); | ||
108 | } | ||
109 | #endif /* 0 */ | ||
110 | |||
111 | static struct clocksource clocksource_vmi = { | ||
112 | .name = "vmi-timer", | ||
113 | .rating = 450, | ||
114 | .read = read_real_cycles, | ||
115 | .mask = CLOCKSOURCE_MASK(64), | ||
116 | .mult = 0, /* to be set */ | ||
117 | .shift = 22, | ||
118 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
119 | }; | ||
120 | |||
121 | |||
122 | /* Timer interrupt handler. */ | ||
123 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id); | ||
124 | |||
125 | static struct irqaction vmi_timer_irq = { | ||
126 | .handler = vmi_timer_interrupt, | ||
127 | .flags = IRQF_DISABLED, | ||
128 | .mask = CPU_MASK_NONE, | ||
129 | .name = "VMI-alarm", | ||
130 | }; | ||
131 | |||
132 | /* Alarm rate */ | ||
133 | static int __init vmi_timer_alarm_rate_setup(char* str) | ||
134 | { | ||
135 | int alarm_rate; | ||
136 | if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) { | ||
137 | alarm_hz = alarm_rate; | ||
138 | printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz); | ||
139 | } | ||
140 | return 1; | ||
141 | } | ||
142 | __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup); | ||
143 | |||
144 | |||
145 | /* Initialization */ | ||
146 | static void vmi_get_wallclock_ts(struct timespec *ts) | ||
147 | { | ||
148 | unsigned long long wallclock; | ||
149 | wallclock = vmi_timer_ops.get_wallclock(); // nsec units | ||
150 | ts->tv_nsec = do_div(wallclock, 1000000000); | ||
151 | ts->tv_sec = wallclock; | ||
152 | } | ||
153 | |||
154 | unsigned long vmi_get_wallclock(void) | ||
155 | { | ||
156 | struct timespec ts; | ||
157 | vmi_get_wallclock_ts(&ts); | ||
158 | return ts.tv_sec; | ||
159 | } | ||
160 | |||
161 | int vmi_set_wallclock(unsigned long now) | ||
162 | { | ||
163 | return -1; | ||
164 | } | ||
165 | |||
166 | unsigned long long vmi_get_sched_cycles(void) | ||
167 | { | ||
168 | return read_available_cycles(); | ||
169 | } | ||
170 | |||
171 | unsigned long vmi_cpu_khz(void) | ||
172 | { | ||
173 | unsigned long long khz; | ||
174 | |||
175 | khz = vmi_timer_ops.get_cycle_frequency(); | ||
176 | (void)do_div(khz, 1000); | ||
177 | return khz; | ||
178 | } | ||
179 | |||
180 | void __init vmi_time_init(void) | ||
181 | { | ||
182 | unsigned long long cycles_per_sec, cycles_per_msec; | ||
183 | unsigned long flags; | ||
184 | |||
185 | local_irq_save(flags); | ||
186 | setup_irq(0, &vmi_timer_irq); | ||
187 | #ifdef CONFIG_X86_LOCAL_APIC | ||
188 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); | ||
189 | #endif | ||
190 | |||
191 | real_cycles_accounted_system = read_real_cycles(); | ||
192 | per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); | ||
193 | |||
194 | cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); | ||
195 | cycles_per_jiffy = cycles_per_sec; | ||
196 | (void)do_div(cycles_per_jiffy, HZ); | ||
197 | cycles_per_alarm = cycles_per_sec; | ||
198 | (void)do_div(cycles_per_alarm, alarm_hz); | ||
199 | cycles_per_msec = cycles_per_sec; | ||
200 | (void)do_div(cycles_per_msec, 1000); | ||
201 | |||
202 | printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" | ||
203 | "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, | ||
204 | cycles_per_alarm); | ||
205 | |||
206 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
207 | clocksource_vmi.shift); | ||
208 | if (clocksource_register(&clocksource_vmi)) | ||
209 | printk(KERN_WARNING "Error registering VMITIME clocksource."); | ||
210 | |||
211 | /* Disable PIT. */ | ||
212 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
213 | |||
214 | /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu | ||
215 | * reduce the latency calling update_process_times. */ | ||
216 | vmi_timer_ops.set_alarm( | ||
217 | VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
218 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
219 | cycles_per_alarm); | ||
220 | |||
221 | local_irq_restore(flags); | ||
222 | } | ||
223 | |||
224 | #ifdef CONFIG_X86_LOCAL_APIC | ||
225 | |||
226 | void __init vmi_timer_setup_boot_alarm(void) | ||
227 | { | ||
228 | local_irq_disable(); | ||
229 | |||
230 | /* Route the interrupt to the correct vector. */ | ||
231 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
232 | |||
233 | /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */ | ||
234 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
235 | vmi_timer_ops.set_alarm( | ||
236 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
237 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
238 | cycles_per_alarm); | ||
239 | local_irq_enable(); | ||
240 | } | ||
241 | |||
242 | /* Initialize the time accounting variables for an AP on an SMP system. | ||
243 | * Also, set the local alarm for the AP. */ | ||
244 | void __devinit vmi_timer_setup_secondary_alarm(void) | ||
245 | { | ||
246 | int cpu = smp_processor_id(); | ||
247 | |||
248 | /* Route the interrupt to the correct vector. */ | ||
249 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
250 | |||
251 | per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles(); | ||
252 | |||
253 | vmi_timer_ops.set_alarm( | ||
254 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
255 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
256 | cycles_per_alarm); | ||
257 | } | ||
258 | |||
259 | #endif | ||
260 | |||
261 | /* Update system wide (real) time accounting (e.g. jiffies, xtime). */ | ||
262 | static void vmi_account_real_cycles(unsigned long long cur_real_cycles) | ||
263 | { | ||
264 | long long cycles_not_accounted; | ||
265 | |||
266 | write_seqlock(&xtime_lock); | ||
267 | |||
268 | cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; | ||
269 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
270 | /* systems wide jiffies. */ | ||
271 | do_timer(1); | ||
272 | |||
273 | cycles_not_accounted -= cycles_per_jiffy; | ||
274 | real_cycles_accounted_system += cycles_per_jiffy; | ||
275 | } | ||
276 | |||
277 | write_sequnlock(&xtime_lock); | ||
278 | } | ||
279 | |||
280 | /* Update per-cpu process times. */ | ||
281 | static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu, | ||
282 | unsigned long long cur_process_times_cycles) | ||
283 | { | ||
284 | long long cycles_not_accounted; | ||
285 | cycles_not_accounted = cur_process_times_cycles - | ||
286 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
287 | |||
288 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
289 | /* Account time to the current process. This includes | ||
290 | * calling into the scheduler to decrement the timeslice | ||
291 | * and possibly reschedule.*/ | ||
292 | update_process_times(user_mode(regs)); | ||
293 | /* XXX handle /proc/profile multiplier. */ | ||
294 | profile_tick(CPU_PROFILING); | ||
295 | |||
296 | cycles_not_accounted -= cycles_per_jiffy; | ||
297 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
298 | } | ||
299 | } | ||
300 | |||
301 | #ifdef CONFIG_NO_IDLE_HZ | ||
302 | /* Update per-cpu idle times. Used when a no-hz halt is ended. */ | ||
303 | static void vmi_account_no_hz_idle_cycles(int cpu, | ||
304 | unsigned long long cur_process_times_cycles) | ||
305 | { | ||
306 | long long cycles_not_accounted; | ||
307 | unsigned long no_idle_hz_jiffies = 0; | ||
308 | |||
309 | cycles_not_accounted = cur_process_times_cycles - | ||
310 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
311 | |||
312 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
313 | no_idle_hz_jiffies++; | ||
314 | cycles_not_accounted -= cycles_per_jiffy; | ||
315 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
316 | } | ||
317 | /* Account time to the idle process. */ | ||
318 | account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies)); | ||
319 | } | ||
320 | #endif | ||
321 | |||
322 | /* Update per-cpu stolen time. */ | ||
323 | static void vmi_account_stolen_cycles(int cpu, | ||
324 | unsigned long long cur_real_cycles, | ||
325 | unsigned long long cur_avail_cycles) | ||
326 | { | ||
327 | long long stolen_cycles_not_accounted; | ||
328 | unsigned long stolen_jiffies = 0; | ||
329 | |||
330 | if (cur_real_cycles < cur_avail_cycles) | ||
331 | return; | ||
332 | |||
333 | stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles - | ||
334 | per_cpu(stolen_cycles_accounted_cpu, cpu); | ||
335 | |||
336 | while (stolen_cycles_not_accounted >= cycles_per_jiffy) { | ||
337 | stolen_jiffies++; | ||
338 | stolen_cycles_not_accounted -= cycles_per_jiffy; | ||
339 | per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
340 | } | ||
341 | /* HACK: pass NULL to force time onto cpustat->steal. */ | ||
342 | account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies)); | ||
343 | } | ||
344 | |||
345 | /* Body of either IRQ0 interrupt handler (UP no local-APIC) or | ||
346 | * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */ | ||
347 | static void vmi_local_timer_interrupt(int cpu) | ||
348 | { | ||
349 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
350 | |||
351 | cur_real_cycles = read_real_cycles(); | ||
352 | cur_process_times_cycles = read_available_cycles(); | ||
353 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
354 | vmi_account_real_cycles(cur_real_cycles); | ||
355 | /* Update per-cpu process times. */ | ||
356 | vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles); | ||
357 | /* Update time stolen from this cpu by the hypervisor. */ | ||
358 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
359 | } | ||
360 | |||
361 | #ifdef CONFIG_NO_IDLE_HZ | ||
362 | |||
363 | /* Must be called only from idle loop, with interrupts disabled. */ | ||
364 | int vmi_stop_hz_timer(void) | ||
365 | { | ||
366 | /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */ | ||
367 | |||
368 | unsigned long seq, next; | ||
369 | unsigned long long real_cycles_expiry; | ||
370 | int cpu = smp_processor_id(); | ||
371 | |||
372 | BUG_ON(!irqs_disabled()); | ||
373 | if (sysctl_hz_timer != 0) | ||
374 | return 0; | ||
375 | |||
376 | cpu_set(cpu, nohz_cpu_mask); | ||
377 | smp_mb(); | ||
378 | |||
379 | if (rcu_needs_cpu(cpu) || local_softirq_pending() || | ||
380 | (next = next_timer_interrupt(), | ||
381 | time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) { | ||
382 | cpu_clear(cpu, nohz_cpu_mask); | ||
383 | return 0; | ||
384 | } | ||
385 | |||
386 | /* Convert jiffies to the real cycle counter. */ | ||
387 | do { | ||
388 | seq = read_seqbegin(&xtime_lock); | ||
389 | real_cycles_expiry = real_cycles_accounted_system + | ||
390 | (long)(next - jiffies) * cycles_per_jiffy; | ||
391 | } while (read_seqretry(&xtime_lock, seq)); | ||
392 | |||
393 | /* This cpu is going idle. Disable the periodic alarm. */ | ||
394 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
395 | per_cpu(idle_start_jiffies, cpu) = jiffies; | ||
396 | /* Set the real time alarm to expire at the next event. */ | ||
397 | vmi_timer_ops.set_alarm( | ||
398 | VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, | ||
399 | real_cycles_expiry, 0); | ||
400 | return 1; | ||
401 | } | ||
402 | |||
403 | static void vmi_reenable_hz_timer(int cpu) | ||
404 | { | ||
405 | /* For /proc/vmi/info idle_hz stat. */ | ||
406 | per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu); | ||
407 | per_cpu(vmi_idle_no_hz_irqs, cpu)++; | ||
408 | |||
409 | /* Don't bother explicitly cancelling the one-shot alarm -- at | ||
410 | * worse we will receive a spurious timer interrupt. */ | ||
411 | vmi_timer_ops.set_alarm( | ||
412 | VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
413 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
414 | cycles_per_alarm); | ||
415 | /* Indicate this cpu is no longer nohz idle. */ | ||
416 | cpu_clear(cpu, nohz_cpu_mask); | ||
417 | } | ||
418 | |||
419 | /* Called from interrupt handlers when (local) HZ timer is disabled. */ | ||
420 | void vmi_account_time_restart_hz_timer(void) | ||
421 | { | ||
422 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
423 | int cpu = smp_processor_id(); | ||
424 | |||
425 | BUG_ON(!irqs_disabled()); | ||
426 | /* Account the time during which the HZ timer was disabled. */ | ||
427 | cur_real_cycles = read_real_cycles(); | ||
428 | cur_process_times_cycles = read_available_cycles(); | ||
429 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
430 | vmi_account_real_cycles(cur_real_cycles); | ||
431 | /* Update per-cpu idle times. */ | ||
432 | vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles); | ||
433 | /* Update time stolen from this cpu by the hypervisor. */ | ||
434 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
435 | /* Reenable the hz timer. */ | ||
436 | vmi_reenable_hz_timer(cpu); | ||
437 | } | ||
438 | |||
439 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
440 | |||
441 | /* UP (and no local-APIC) VMI-timer alarm interrupt handler. | ||
442 | * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after | ||
443 | * APIC setup and setup_boot_vmi_alarm() is called. */ | ||
444 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
445 | { | ||
446 | vmi_local_timer_interrupt(smp_processor_id()); | ||
447 | return IRQ_HANDLED; | ||
448 | } | ||
449 | |||
450 | #ifdef CONFIG_X86_LOCAL_APIC | ||
451 | |||
452 | /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector. | ||
453 | * Also used in UP when CONFIG_X86_LOCAL_APIC. | ||
454 | * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */ | ||
455 | void smp_apic_vmi_timer_interrupt(struct pt_regs *regs) | ||
456 | { | ||
457 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
458 | int cpu = smp_processor_id(); | ||
459 | |||
460 | /* | ||
461 | * the NMI deadlock-detector uses this. | ||
462 | */ | ||
463 | per_cpu(irq_stat,cpu).apic_timer_irqs++; | ||
464 | |||
465 | /* | ||
466 | * NOTE! We'd better ACK the irq immediately, | ||
467 | * because timer handling can be slow. | ||
468 | */ | ||
469 | ack_APIC_irq(); | ||
470 | |||
471 | /* | ||
472 | * update_process_times() expects us to have done irq_enter(). | ||
473 | * Besides, if we don't timer interrupts ignore the global | ||
474 | * interrupt lock, which is the WrongThing (tm) to do. | ||
475 | */ | ||
476 | irq_enter(); | ||
477 | vmi_local_timer_interrupt(cpu); | ||
478 | irq_exit(); | ||
479 | set_irq_regs(old_regs); | ||
480 | } | ||
481 | |||
482 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h index c3a1fcf66c96..213930b995cb 100644 --- a/include/asm-i386/vmi_time.h +++ b/include/asm-i386/vmi_time.h | |||
@@ -53,22 +53,8 @@ extern unsigned long long vmi_get_sched_cycles(void); | |||
53 | extern unsigned long vmi_cpu_khz(void); | 53 | extern unsigned long vmi_cpu_khz(void); |
54 | 54 | ||
55 | #ifdef CONFIG_X86_LOCAL_APIC | 55 | #ifdef CONFIG_X86_LOCAL_APIC |
56 | extern void __init vmi_timer_setup_boot_alarm(void); | 56 | extern void __devinit vmi_time_bsp_init(void); |
57 | extern void __devinit vmi_timer_setup_secondary_alarm(void); | 57 | extern void __devinit vmi_time_ap_init(void); |
58 | extern void apic_vmi_timer_interrupt(void); | ||
59 | #endif | ||
60 | |||
61 | #ifdef CONFIG_NO_IDLE_HZ | ||
62 | extern int vmi_stop_hz_timer(void); | ||
63 | extern void vmi_account_time_restart_hz_timer(void); | ||
64 | #else | ||
65 | static inline int vmi_stop_hz_timer(void) | ||
66 | { | ||
67 | return 0; | ||
68 | } | ||
69 | static inline void vmi_account_time_restart_hz_timer(void) | ||
70 | { | ||
71 | } | ||
72 | #endif | 58 | #endif |
73 | 59 | ||
74 | /* | 60 | /* |