aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2007-05-02 13:27:16 -0400
committerAndi Kleen <andi@basil.nowhere.org>2007-05-02 13:27:16 -0400
commite0bb8643974397a8d36670e06e6a54bb84f3289f (patch)
tree196c6929fb8ccfe839f044a73be2f9861a88f175 /arch
parenteeef9c68aae2f4f21ab810d0339e0f22d30b0cd8 (diff)
[PATCH] i386: Convert VMI timer to use clock events
Convert VMI timer to use clock events, making it properly able to use the NO_HZ infrastructure. On UP systems, with no local APIC, we just continue to route these events through the PIT. On systems with a local APIC, or SMP, we provide a single source interrupt chip which creates the local timer IRQ. It actually gets delivered by the APIC hardware, but we don't want to use the same local APIC clocksource processing, so we create our own handler here. Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andi Kleen <ak@suse.de> CC: Dan Hecht <dhecht@vmware.com> CC: Ingo Molnar <mingo@elte.hu> CC: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/kernel/Makefile2
-rw-r--r--arch/i386/kernel/entry.S5
-rw-r--r--arch/i386/kernel/vmi.c26
-rw-r--r--arch/i386/kernel/vmiclock.c318
-rw-r--r--arch/i386/kernel/vmitime.c482
5 files changed, 325 insertions, 508 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index bd7753cb9e69..4f98516b9f94 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
39obj-$(CONFIG_HPET_TIMER) += hpet.o 39obj-$(CONFIG_HPET_TIMER) += hpet.o
40obj-$(CONFIG_K8_NB) += k8.o 40obj-$(CONFIG_K8_NB) += k8.o
41 41
42obj-$(CONFIG_VMI) += vmi.o vmitime.o 42obj-$(CONFIG_VMI) += vmi.o vmiclock.o
43obj-$(CONFIG_PARAVIRT) += paravirt.o 43obj-$(CONFIG_PARAVIRT) += paravirt.o
44obj-y += pcspeaker.o 44obj-y += pcspeaker.o
45 45
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 7f92ceb428ad..90ffcdb69838 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -637,11 +637,6 @@ ENDPROC(name)
637/* The include is where all of the SMP etc. interrupts come from */ 637/* The include is where all of the SMP etc. interrupts come from */
638#include "entry_arch.h" 638#include "entry_arch.h"
639 639
640/* This alternate entry is needed because we hijack the apic LVTT */
641#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
642BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
643#endif
644
645KPROBE_ENTRY(page_fault) 640KPROBE_ENTRY(page_fault)
646 RING0_EC_FRAME 641 RING0_EC_FRAME
647 pushl $do_page_fault 642 pushl $do_page_fault
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index 0df0b2cd3617..0fae15dee765 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -77,6 +77,9 @@ static struct {
77extern struct paravirt_patch __start_parainstructions[], 77extern struct paravirt_patch __start_parainstructions[],
78 __stop_parainstructions[]; 78 __stop_parainstructions[];
79 79
80/* Cached VMI operations */
81struct vmi_timer_ops vmi_timer_ops;
82
80/* 83/*
81 * VMI patching routines. 84 * VMI patching routines.
82 */ 85 */
@@ -235,18 +238,6 @@ static void vmi_nop(void)
235{ 238{
236} 239}
237 240
238/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
239static fastcall void vmi_safe_halt(void)
240{
241 int idle = vmi_stop_hz_timer();
242 vmi_ops.halt();
243 if (idle) {
244 local_irq_disable();
245 vmi_account_time_restart_hz_timer();
246 local_irq_enable();
247 }
248}
249
250#ifdef CONFIG_DEBUG_PAGE_TYPE 241#ifdef CONFIG_DEBUG_PAGE_TYPE
251 242
252#ifdef CONFIG_X86_PAE 243#ifdef CONFIG_X86_PAE
@@ -722,7 +713,6 @@ do { \
722 } \ 713 } \
723} while (0) 714} while (0)
724 715
725
726/* 716/*
727 * Activate the VMI interface and switch into paravirtualized mode 717 * Activate the VMI interface and switch into paravirtualized mode
728 */ 718 */
@@ -901,8 +891,8 @@ static inline int __init activate_vmi(void)
901 paravirt_ops.get_wallclock = vmi_get_wallclock; 891 paravirt_ops.get_wallclock = vmi_get_wallclock;
902 paravirt_ops.set_wallclock = vmi_set_wallclock; 892 paravirt_ops.set_wallclock = vmi_set_wallclock;
903#ifdef CONFIG_X86_LOCAL_APIC 893#ifdef CONFIG_X86_LOCAL_APIC
904 paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; 894 paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
905 paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; 895 paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
906#endif 896#endif
907 paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; 897 paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
908 paravirt_ops.get_cpu_khz = vmi_cpu_khz; 898 paravirt_ops.get_cpu_khz = vmi_cpu_khz;
@@ -914,11 +904,7 @@ static inline int __init activate_vmi(void)
914 disable_vmi_timer = 1; 904 disable_vmi_timer = 1;
915 } 905 }
916 906
917 /* No idle HZ mode only works if VMI timer and no idle is enabled */ 907 para_fill(safe_halt, Halt);
918 if (disable_noidle || disable_vmi_timer)
919 para_fill(safe_halt, Halt);
920 else
921 para_wrap(safe_halt, vmi_safe_halt, halt, Halt);
922 908
923 /* 909 /*
924 * Alternative instruction rewriting doesn't happen soon enough 910 * Alternative instruction rewriting doesn't happen soon enough
diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c
new file mode 100644
index 000000000000..26a37f8a8762
--- /dev/null
+++ b/arch/i386/kernel/vmiclock.c
@@ -0,0 +1,318 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2007, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 */
22
23#include <linux/smp.h>
24#include <linux/interrupt.h>
25#include <linux/cpumask.h>
26#include <linux/clocksource.h>
27#include <linux/clockchips.h>
28
29#include <asm/vmi.h>
30#include <asm/vmi_time.h>
31#include <asm/arch_hooks.h>
32#include <asm/apicdef.h>
33#include <asm/apic.h>
34#include <asm/timer.h>
35
36#include <irq_vectors.h>
37#include "io_ports.h"
38
39#define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
40#define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
41
42static DEFINE_PER_CPU(struct clock_event_device, local_events);
43
44static inline u32 vmi_counter(u32 flags)
45{
46 /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding
47 * cycle counter. */
48 return flags & VMI_ALARM_COUNTER_MASK;
49}
50
51/* paravirt_ops.get_wallclock = vmi_get_wallclock */
52unsigned long vmi_get_wallclock(void)
53{
54 unsigned long long wallclock;
55 wallclock = vmi_timer_ops.get_wallclock(); // nsec
56 (void)do_div(wallclock, 1000000000); // sec
57
58 return wallclock;
59}
60
61/* paravirt_ops.set_wallclock = vmi_set_wallclock */
62int vmi_set_wallclock(unsigned long now)
63{
64 return 0;
65}
66
67/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */
68unsigned long long vmi_get_sched_cycles(void)
69{
70 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
71}
72
73/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
74unsigned long vmi_cpu_khz(void)
75{
76 unsigned long long khz;
77 khz = vmi_timer_ops.get_cycle_frequency();
78 (void)do_div(khz, 1000);
79 return khz;
80}
81
82static inline unsigned int vmi_get_timer_vector(void)
83{
84#ifdef CONFIG_X86_IO_APIC
85 return FIRST_DEVICE_VECTOR;
86#else
87 return FIRST_EXTERNAL_VECTOR;
88#endif
89}
90
91/** vmi clockchip */
92#ifdef CONFIG_X86_LOCAL_APIC
93static unsigned int startup_timer_irq(unsigned int irq)
94{
95 unsigned long val = apic_read(APIC_LVTT);
96 apic_write(APIC_LVTT, vmi_get_timer_vector());
97
98 return (val & APIC_SEND_PENDING);
99}
100
101static void mask_timer_irq(unsigned int irq)
102{
103 unsigned long val = apic_read(APIC_LVTT);
104 apic_write(APIC_LVTT, val | APIC_LVT_MASKED);
105}
106
107static void unmask_timer_irq(unsigned int irq)
108{
109 unsigned long val = apic_read(APIC_LVTT);
110 apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED);
111}
112
113static void ack_timer_irq(unsigned int irq)
114{
115 ack_APIC_irq();
116}
117
118static struct irq_chip vmi_chip __read_mostly = {
119 .name = "VMI-LOCAL",
120 .startup = startup_timer_irq,
121 .mask = mask_timer_irq,
122 .unmask = unmask_timer_irq,
123 .ack = ack_timer_irq
124};
125#endif
126
127/** vmi clockevent */
128#define VMI_ALARM_WIRED_IRQ0 0x00000000
129#define VMI_ALARM_WIRED_LVTT 0x00010000
130static int vmi_wiring = VMI_ALARM_WIRED_IRQ0;
131
132static inline int vmi_get_alarm_wiring(void)
133{
134 return vmi_wiring;
135}
136
137static void vmi_timer_set_mode(enum clock_event_mode mode,
138 struct clock_event_device *evt)
139{
140 cycle_t now, cycles_per_hz;
141 BUG_ON(!irqs_disabled());
142
143 switch (mode) {
144 case CLOCK_EVT_MODE_ONESHOT:
145 break;
146 case CLOCK_EVT_MODE_PERIODIC:
147 cycles_per_hz = vmi_timer_ops.get_cycle_frequency();
148 (void)do_div(cycles_per_hz, HZ);
149 now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC));
150 vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz);
151 break;
152 case CLOCK_EVT_MODE_UNUSED:
153 case CLOCK_EVT_MODE_SHUTDOWN:
154 switch (evt->mode) {
155 case CLOCK_EVT_MODE_ONESHOT:
156 vmi_timer_ops.cancel_alarm(VMI_ONESHOT);
157 break;
158 case CLOCK_EVT_MODE_PERIODIC:
159 vmi_timer_ops.cancel_alarm(VMI_PERIODIC);
160 break;
161 default:
162 break;
163 }
164 break;
165 default:
166 break;
167 }
168}
169
170static int vmi_timer_next_event(unsigned long delta,
171 struct clock_event_device *evt)
172{
173 /* Unfortunately, set_next_event interface only passes relative
174 * expiry, but we want absolute expiry. It'd be better if were
175 * were passed an aboslute expiry, since a bunch of time may
176 * have been stolen between the time the delta is computed and
177 * when we set the alarm below. */
178 cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT));
179
180 BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
181 vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0);
182 return 0;
183}
184
185static struct clock_event_device vmi_clockevent = {
186 .name = "vmi-timer",
187 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
188 .shift = 22,
189 .set_mode = vmi_timer_set_mode,
190 .set_next_event = vmi_timer_next_event,
191 .rating = 1000,
192 .irq = 0,
193};
194
195static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
196{
197 struct clock_event_device *evt = &__get_cpu_var(local_events);
198 evt->event_handler(evt);
199 return IRQ_HANDLED;
200}
201
202static struct irqaction vmi_clock_action = {
203 .name = "vmi-timer",
204 .handler = vmi_timer_interrupt,
205 .flags = IRQF_DISABLED | IRQF_NOBALANCING,
206 .mask = CPU_MASK_ALL,
207};
208
209static void __devinit vmi_time_init_clockevent(void)
210{
211 cycle_t cycles_per_msec;
212 struct clock_event_device *evt;
213
214 int cpu = smp_processor_id();
215 evt = &__get_cpu_var(local_events);
216
217 /* Use cycles_per_msec since div_sc params are 32-bits. */
218 cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
219 (void)do_div(cycles_per_msec, 1000);
220
221 memcpy(evt, &vmi_clockevent, sizeof(*evt));
222 /* Must pick .shift such that .mult fits in 32-bits. Choosing
223 * .shift to be 22 allows 2^(32-22) cycles per nano-seconds
224 * before overflow. */
225 evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift);
226 /* Upper bound is clockevent's use of ulong for cycle deltas. */
227 evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
228 evt->min_delta_ns = clockevent_delta2ns(1, evt);
229 evt->cpumask = cpumask_of_cpu(cpu);
230
231 printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
232 evt->name, evt->mult, evt->shift);
233 clockevents_register_device(evt);
234}
235
236void __init vmi_time_init(void)
237{
238 /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
239 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
240
241 vmi_time_init_clockevent();
242 setup_irq(0, &vmi_clock_action);
243}
244
245#ifdef CONFIG_X86_LOCAL_APIC
246void __devinit vmi_time_bsp_init(void)
247{
248 /*
249 * On APIC systems, we want local timers to fire on each cpu. We do
250 * this by programming LVTT to deliver timer events to the IRQ handler
251 * for IRQ-0, since we can't re-use the APIC local timer handler
252 * without interfering with that code.
253 */
254 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
255 local_irq_disable();
256#ifdef CONFIG_X86_SMP
257 /*
258 * XXX handle_percpu_irq only defined for SMP; we need to switch over
259 * to using it, since this is a local interrupt, which each CPU must
260 * handle individually without locking out or dropping simultaneous
261 * local timers on other CPUs. We also don't want to trigger the
262 * quirk workaround code for interrupts which gets invoked from
263 * handle_percpu_irq via eoi, so we use our own IRQ chip.
264 */
265 set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt");
266#else
267 set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt");
268#endif
269 vmi_wiring = VMI_ALARM_WIRED_LVTT;
270 apic_write(APIC_LVTT, vmi_get_timer_vector());
271 local_irq_enable();
272 clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
273}
274
275void __devinit vmi_time_ap_init(void)
276{
277 vmi_time_init_clockevent();
278 apic_write(APIC_LVTT, vmi_get_timer_vector());
279}
280#endif
281
282/** vmi clocksource */
283
284static cycle_t read_real_cycles(void)
285{
286 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
287}
288
289static struct clocksource clocksource_vmi = {
290 .name = "vmi-timer",
291 .rating = 450,
292 .read = read_real_cycles,
293 .mask = CLOCKSOURCE_MASK(64),
294 .mult = 0, /* to be set */
295 .shift = 22,
296 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
297};
298
299static int __init init_vmi_clocksource(void)
300{
301 cycle_t cycles_per_msec;
302
303 if (!vmi_timer_ops.get_cycle_frequency)
304 return 0;
305 /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */
306 cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
307 (void)do_div(cycles_per_msec, 1000);
308
309 /* Note that clocksource.{mult, shift} converts in the opposite direction
310 * as clockevents. */
311 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
312 clocksource_vmi.shift);
313
314 printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec);
315 return clocksource_register(&clocksource_vmi);
316
317}
318module_init(init_vmi_clocksource);
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
deleted file mode 100644
index 9dfb17739b67..000000000000
--- a/arch/i386/kernel/vmitime.c
+++ /dev/null
@@ -1,482 +0,0 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25/*
26 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28 * See comments there for proper credits.
29 */
30
31#include <linux/spinlock.h>
32#include <linux/init.h>
33#include <linux/errno.h>
34#include <linux/jiffies.h>
35#include <linux/interrupt.h>
36#include <linux/kernel_stat.h>
37#include <linux/rcupdate.h>
38#include <linux/clocksource.h>
39
40#include <asm/timer.h>
41#include <asm/io.h>
42#include <asm/apic.h>
43#include <asm/div64.h>
44#include <asm/timer.h>
45#include <asm/desc.h>
46
47#include <asm/vmi.h>
48#include <asm/vmi_time.h>
49
50#include <mach_timer.h>
51#include <io_ports.h>
52
53#ifdef CONFIG_X86_LOCAL_APIC
54#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
55#else
56#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
57#endif
58
59/* Cached VMI operations */
60struct vmi_timer_ops vmi_timer_ops;
61
62#ifdef CONFIG_NO_IDLE_HZ
63
64/* /proc/sys/kernel/hz_timer state. */
65int sysctl_hz_timer;
66
67/* Some stats */
68static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
69static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
70static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
71
72#endif /* CONFIG_NO_IDLE_HZ */
73
74/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75static int alarm_hz = CONFIG_VMI_ALARM_HZ;
76
77/* Cache of the value get_cycle_frequency / HZ. */
78static signed long long cycles_per_jiffy;
79
80/* Cache of the value get_cycle_frequency / alarm_hz. */
81static signed long long cycles_per_alarm;
82
83/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84 * Protected by xtime_lock. */
85static unsigned long long real_cycles_accounted_system;
86
87/* The number of cycles accounted for by update_process_times(), per cpu. */
88static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
89
90/* The number of stolen cycles accounted, per cpu. */
91static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
92
93/* Clock source. */
94static cycle_t read_real_cycles(void)
95{
96 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
97}
98
99static cycle_t read_available_cycles(void)
100{
101 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
102}
103
104#if 0
105static cycle_t read_stolen_cycles(void)
106{
107 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
108}
109#endif /* 0 */
110
111static struct clocksource clocksource_vmi = {
112 .name = "vmi-timer",
113 .rating = 450,
114 .read = read_real_cycles,
115 .mask = CLOCKSOURCE_MASK(64),
116 .mult = 0, /* to be set */
117 .shift = 22,
118 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
119};
120
121
122/* Timer interrupt handler. */
123static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
124
125static struct irqaction vmi_timer_irq = {
126 .handler = vmi_timer_interrupt,
127 .flags = IRQF_DISABLED,
128 .mask = CPU_MASK_NONE,
129 .name = "VMI-alarm",
130};
131
132/* Alarm rate */
133static int __init vmi_timer_alarm_rate_setup(char* str)
134{
135 int alarm_rate;
136 if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
137 alarm_hz = alarm_rate;
138 printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
139 }
140 return 1;
141}
142__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
143
144
145/* Initialization */
146static void vmi_get_wallclock_ts(struct timespec *ts)
147{
148 unsigned long long wallclock;
149 wallclock = vmi_timer_ops.get_wallclock(); // nsec units
150 ts->tv_nsec = do_div(wallclock, 1000000000);
151 ts->tv_sec = wallclock;
152}
153
154unsigned long vmi_get_wallclock(void)
155{
156 struct timespec ts;
157 vmi_get_wallclock_ts(&ts);
158 return ts.tv_sec;
159}
160
161int vmi_set_wallclock(unsigned long now)
162{
163 return -1;
164}
165
166unsigned long long vmi_get_sched_cycles(void)
167{
168 return read_available_cycles();
169}
170
171unsigned long vmi_cpu_khz(void)
172{
173 unsigned long long khz;
174
175 khz = vmi_timer_ops.get_cycle_frequency();
176 (void)do_div(khz, 1000);
177 return khz;
178}
179
180void __init vmi_time_init(void)
181{
182 unsigned long long cycles_per_sec, cycles_per_msec;
183 unsigned long flags;
184
185 local_irq_save(flags);
186 setup_irq(0, &vmi_timer_irq);
187#ifdef CONFIG_X86_LOCAL_APIC
188 set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
189#endif
190
191 real_cycles_accounted_system = read_real_cycles();
192 per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
193
194 cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
195 cycles_per_jiffy = cycles_per_sec;
196 (void)do_div(cycles_per_jiffy, HZ);
197 cycles_per_alarm = cycles_per_sec;
198 (void)do_div(cycles_per_alarm, alarm_hz);
199 cycles_per_msec = cycles_per_sec;
200 (void)do_div(cycles_per_msec, 1000);
201
202 printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
203 "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
204 cycles_per_alarm);
205
206 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
207 clocksource_vmi.shift);
208 if (clocksource_register(&clocksource_vmi))
209 printk(KERN_WARNING "Error registering VMITIME clocksource.");
210
211 /* Disable PIT. */
212 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
213
214 /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
215 * reduce the latency calling update_process_times. */
216 vmi_timer_ops.set_alarm(
217 VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
218 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
219 cycles_per_alarm);
220
221 local_irq_restore(flags);
222}
223
224#ifdef CONFIG_X86_LOCAL_APIC
225
226void __init vmi_timer_setup_boot_alarm(void)
227{
228 local_irq_disable();
229
230 /* Route the interrupt to the correct vector. */
231 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
232
233 /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
234 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
235 vmi_timer_ops.set_alarm(
236 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
237 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
238 cycles_per_alarm);
239 local_irq_enable();
240}
241
242/* Initialize the time accounting variables for an AP on an SMP system.
243 * Also, set the local alarm for the AP. */
244void __devinit vmi_timer_setup_secondary_alarm(void)
245{
246 int cpu = smp_processor_id();
247
248 /* Route the interrupt to the correct vector. */
249 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
250
251 per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
252
253 vmi_timer_ops.set_alarm(
254 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
255 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
256 cycles_per_alarm);
257}
258
259#endif
260
261/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
262static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
263{
264 long long cycles_not_accounted;
265
266 write_seqlock(&xtime_lock);
267
268 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
269 while (cycles_not_accounted >= cycles_per_jiffy) {
270 /* systems wide jiffies. */
271 do_timer(1);
272
273 cycles_not_accounted -= cycles_per_jiffy;
274 real_cycles_accounted_system += cycles_per_jiffy;
275 }
276
277 write_sequnlock(&xtime_lock);
278}
279
280/* Update per-cpu process times. */
281static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
282 unsigned long long cur_process_times_cycles)
283{
284 long long cycles_not_accounted;
285 cycles_not_accounted = cur_process_times_cycles -
286 per_cpu(process_times_cycles_accounted_cpu, cpu);
287
288 while (cycles_not_accounted >= cycles_per_jiffy) {
289 /* Account time to the current process. This includes
290 * calling into the scheduler to decrement the timeslice
291 * and possibly reschedule.*/
292 update_process_times(user_mode(regs));
293 /* XXX handle /proc/profile multiplier. */
294 profile_tick(CPU_PROFILING);
295
296 cycles_not_accounted -= cycles_per_jiffy;
297 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
298 }
299}
300
301#ifdef CONFIG_NO_IDLE_HZ
302/* Update per-cpu idle times. Used when a no-hz halt is ended. */
303static void vmi_account_no_hz_idle_cycles(int cpu,
304 unsigned long long cur_process_times_cycles)
305{
306 long long cycles_not_accounted;
307 unsigned long no_idle_hz_jiffies = 0;
308
309 cycles_not_accounted = cur_process_times_cycles -
310 per_cpu(process_times_cycles_accounted_cpu, cpu);
311
312 while (cycles_not_accounted >= cycles_per_jiffy) {
313 no_idle_hz_jiffies++;
314 cycles_not_accounted -= cycles_per_jiffy;
315 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
316 }
317 /* Account time to the idle process. */
318 account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
319}
320#endif
321
322/* Update per-cpu stolen time. */
323static void vmi_account_stolen_cycles(int cpu,
324 unsigned long long cur_real_cycles,
325 unsigned long long cur_avail_cycles)
326{
327 long long stolen_cycles_not_accounted;
328 unsigned long stolen_jiffies = 0;
329
330 if (cur_real_cycles < cur_avail_cycles)
331 return;
332
333 stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
334 per_cpu(stolen_cycles_accounted_cpu, cpu);
335
336 while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
337 stolen_jiffies++;
338 stolen_cycles_not_accounted -= cycles_per_jiffy;
339 per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
340 }
341 /* HACK: pass NULL to force time onto cpustat->steal. */
342 account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
343}
344
345/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
346 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
347static void vmi_local_timer_interrupt(int cpu)
348{
349 unsigned long long cur_real_cycles, cur_process_times_cycles;
350
351 cur_real_cycles = read_real_cycles();
352 cur_process_times_cycles = read_available_cycles();
353 /* Update system wide (real) time state (xtime, jiffies). */
354 vmi_account_real_cycles(cur_real_cycles);
355 /* Update per-cpu process times. */
356 vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
357 /* Update time stolen from this cpu by the hypervisor. */
358 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
359}
360
361#ifdef CONFIG_NO_IDLE_HZ
362
363/* Must be called only from idle loop, with interrupts disabled. */
364int vmi_stop_hz_timer(void)
365{
366 /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
367
368 unsigned long seq, next;
369 unsigned long long real_cycles_expiry;
370 int cpu = smp_processor_id();
371
372 BUG_ON(!irqs_disabled());
373 if (sysctl_hz_timer != 0)
374 return 0;
375
376 cpu_set(cpu, nohz_cpu_mask);
377 smp_mb();
378
379 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
380 (next = next_timer_interrupt(),
381 time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) {
382 cpu_clear(cpu, nohz_cpu_mask);
383 return 0;
384 }
385
386 /* Convert jiffies to the real cycle counter. */
387 do {
388 seq = read_seqbegin(&xtime_lock);
389 real_cycles_expiry = real_cycles_accounted_system +
390 (long)(next - jiffies) * cycles_per_jiffy;
391 } while (read_seqretry(&xtime_lock, seq));
392
393 /* This cpu is going idle. Disable the periodic alarm. */
394 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
395 per_cpu(idle_start_jiffies, cpu) = jiffies;
396 /* Set the real time alarm to expire at the next event. */
397 vmi_timer_ops.set_alarm(
398 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
399 real_cycles_expiry, 0);
400 return 1;
401}
402
403static void vmi_reenable_hz_timer(int cpu)
404{
405 /* For /proc/vmi/info idle_hz stat. */
406 per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
407 per_cpu(vmi_idle_no_hz_irqs, cpu)++;
408
409 /* Don't bother explicitly cancelling the one-shot alarm -- at
410 * worse we will receive a spurious timer interrupt. */
411 vmi_timer_ops.set_alarm(
412 VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
413 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
414 cycles_per_alarm);
415 /* Indicate this cpu is no longer nohz idle. */
416 cpu_clear(cpu, nohz_cpu_mask);
417}
418
419/* Called from interrupt handlers when (local) HZ timer is disabled. */
420void vmi_account_time_restart_hz_timer(void)
421{
422 unsigned long long cur_real_cycles, cur_process_times_cycles;
423 int cpu = smp_processor_id();
424
425 BUG_ON(!irqs_disabled());
426 /* Account the time during which the HZ timer was disabled. */
427 cur_real_cycles = read_real_cycles();
428 cur_process_times_cycles = read_available_cycles();
429 /* Update system wide (real) time state (xtime, jiffies). */
430 vmi_account_real_cycles(cur_real_cycles);
431 /* Update per-cpu idle times. */
432 vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
433 /* Update time stolen from this cpu by the hypervisor. */
434 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
435 /* Reenable the hz timer. */
436 vmi_reenable_hz_timer(cpu);
437}
438
439#endif /* CONFIG_NO_IDLE_HZ */
440
441/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
442 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
443 * APIC setup and setup_boot_vmi_alarm() is called. */
444static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
445{
446 vmi_local_timer_interrupt(smp_processor_id());
447 return IRQ_HANDLED;
448}
449
450#ifdef CONFIG_X86_LOCAL_APIC
451
452/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
453 * Also used in UP when CONFIG_X86_LOCAL_APIC.
454 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
455void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
456{
457 struct pt_regs *old_regs = set_irq_regs(regs);
458 int cpu = smp_processor_id();
459
460 /*
461 * the NMI deadlock-detector uses this.
462 */
463 per_cpu(irq_stat,cpu).apic_timer_irqs++;
464
465 /*
466 * NOTE! We'd better ACK the irq immediately,
467 * because timer handling can be slow.
468 */
469 ack_APIC_irq();
470
471 /*
472 * update_process_times() expects us to have done irq_enter().
473 * Besides, if we don't timer interrupts ignore the global
474 * interrupt lock, which is the WrongThing (tm) to do.
475 */
476 irq_enter();
477 vmi_local_timer_interrupt(cpu);
478 irq_exit();
479 set_irq_regs(old_regs);
480}
481
482#endif /* CONFIG_X86_LOCAL_APIC */