aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/vmitime.c
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2007-05-02 13:27:16 -0400
committerAndi Kleen <andi@basil.nowhere.org>2007-05-02 13:27:16 -0400
commite0bb8643974397a8d36670e06e6a54bb84f3289f (patch)
tree196c6929fb8ccfe839f044a73be2f9861a88f175 /arch/i386/kernel/vmitime.c
parenteeef9c68aae2f4f21ab810d0339e0f22d30b0cd8 (diff)
[PATCH] i386: Convert VMI timer to use clock events
Convert VMI timer to use clock events, making it properly able to use the NO_HZ infrastructure. On UP systems, with no local APIC, we just continue to route these events through the PIT. On systems with a local APIC, or SMP, we provide a single source interrupt chip which creates the local timer IRQ. It actually gets delivered by the APIC hardware, but we don't want to use the same local APIC clocksource processing, so we create our own handler here. Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andi Kleen <ak@suse.de> CC: Dan Hecht <dhecht@vmware.com> CC: Ingo Molnar <mingo@elte.hu> CC: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/i386/kernel/vmitime.c')
-rw-r--r--arch/i386/kernel/vmitime.c482
1 files changed, 0 insertions, 482 deletions
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
deleted file mode 100644
index 9dfb17739b67..000000000000
--- a/arch/i386/kernel/vmitime.c
+++ /dev/null
@@ -1,482 +0,0 @@
1/*
2 * VMI paravirtual timer support routines.
3 *
4 * Copyright (C) 2005, VMware, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14 * NON INFRINGEMENT. See the GNU General Public License for more
15 * details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * Send feedback to dhecht@vmware.com
22 *
23 */
24
25/*
26 * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27 * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28 * See comments there for proper credits.
29 */
30
31#include <linux/spinlock.h>
32#include <linux/init.h>
33#include <linux/errno.h>
34#include <linux/jiffies.h>
35#include <linux/interrupt.h>
36#include <linux/kernel_stat.h>
37#include <linux/rcupdate.h>
38#include <linux/clocksource.h>
39
40#include <asm/timer.h>
41#include <asm/io.h>
42#include <asm/apic.h>
43#include <asm/div64.h>
44#include <asm/timer.h>
45#include <asm/desc.h>
46
47#include <asm/vmi.h>
48#include <asm/vmi_time.h>
49
50#include <mach_timer.h>
51#include <io_ports.h>
52
53#ifdef CONFIG_X86_LOCAL_APIC
54#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
55#else
56#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
57#endif
58
59/* Cached VMI operations */
60struct vmi_timer_ops vmi_timer_ops;
61
62#ifdef CONFIG_NO_IDLE_HZ
63
64/* /proc/sys/kernel/hz_timer state. */
65int sysctl_hz_timer;
66
67/* Some stats */
68static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
69static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
70static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
71
72#endif /* CONFIG_NO_IDLE_HZ */
73
74/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75static int alarm_hz = CONFIG_VMI_ALARM_HZ;
76
77/* Cache of the value get_cycle_frequency / HZ. */
78static signed long long cycles_per_jiffy;
79
80/* Cache of the value get_cycle_frequency / alarm_hz. */
81static signed long long cycles_per_alarm;
82
83/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84 * Protected by xtime_lock. */
85static unsigned long long real_cycles_accounted_system;
86
87/* The number of cycles accounted for by update_process_times(), per cpu. */
88static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
89
90/* The number of stolen cycles accounted, per cpu. */
91static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
92
93/* Clock source. */
94static cycle_t read_real_cycles(void)
95{
96 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
97}
98
99static cycle_t read_available_cycles(void)
100{
101 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
102}
103
104#if 0
105static cycle_t read_stolen_cycles(void)
106{
107 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
108}
109#endif /* 0 */
110
111static struct clocksource clocksource_vmi = {
112 .name = "vmi-timer",
113 .rating = 450,
114 .read = read_real_cycles,
115 .mask = CLOCKSOURCE_MASK(64),
116 .mult = 0, /* to be set */
117 .shift = 22,
118 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
119};
120
121
122/* Timer interrupt handler. */
123static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
124
125static struct irqaction vmi_timer_irq = {
126 .handler = vmi_timer_interrupt,
127 .flags = IRQF_DISABLED,
128 .mask = CPU_MASK_NONE,
129 .name = "VMI-alarm",
130};
131
132/* Alarm rate */
133static int __init vmi_timer_alarm_rate_setup(char* str)
134{
135 int alarm_rate;
136 if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
137 alarm_hz = alarm_rate;
138 printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
139 }
140 return 1;
141}
142__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
143
144
145/* Initialization */
146static void vmi_get_wallclock_ts(struct timespec *ts)
147{
148 unsigned long long wallclock;
149 wallclock = vmi_timer_ops.get_wallclock(); // nsec units
150 ts->tv_nsec = do_div(wallclock, 1000000000);
151 ts->tv_sec = wallclock;
152}
153
154unsigned long vmi_get_wallclock(void)
155{
156 struct timespec ts;
157 vmi_get_wallclock_ts(&ts);
158 return ts.tv_sec;
159}
160
161int vmi_set_wallclock(unsigned long now)
162{
163 return -1;
164}
165
166unsigned long long vmi_get_sched_cycles(void)
167{
168 return read_available_cycles();
169}
170
171unsigned long vmi_cpu_khz(void)
172{
173 unsigned long long khz;
174
175 khz = vmi_timer_ops.get_cycle_frequency();
176 (void)do_div(khz, 1000);
177 return khz;
178}
179
180void __init vmi_time_init(void)
181{
182 unsigned long long cycles_per_sec, cycles_per_msec;
183 unsigned long flags;
184
185 local_irq_save(flags);
186 setup_irq(0, &vmi_timer_irq);
187#ifdef CONFIG_X86_LOCAL_APIC
188 set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
189#endif
190
191 real_cycles_accounted_system = read_real_cycles();
192 per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
193
194 cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
195 cycles_per_jiffy = cycles_per_sec;
196 (void)do_div(cycles_per_jiffy, HZ);
197 cycles_per_alarm = cycles_per_sec;
198 (void)do_div(cycles_per_alarm, alarm_hz);
199 cycles_per_msec = cycles_per_sec;
200 (void)do_div(cycles_per_msec, 1000);
201
202 printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
203 "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
204 cycles_per_alarm);
205
206 clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
207 clocksource_vmi.shift);
208 if (clocksource_register(&clocksource_vmi))
209 printk(KERN_WARNING "Error registering VMITIME clocksource.");
210
211 /* Disable PIT. */
212 outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
213
214 /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
215 * reduce the latency calling update_process_times. */
216 vmi_timer_ops.set_alarm(
217 VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
218 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
219 cycles_per_alarm);
220
221 local_irq_restore(flags);
222}
223
224#ifdef CONFIG_X86_LOCAL_APIC
225
226void __init vmi_timer_setup_boot_alarm(void)
227{
228 local_irq_disable();
229
230 /* Route the interrupt to the correct vector. */
231 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
232
233 /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
234 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
235 vmi_timer_ops.set_alarm(
236 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
237 per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
238 cycles_per_alarm);
239 local_irq_enable();
240}
241
242/* Initialize the time accounting variables for an AP on an SMP system.
243 * Also, set the local alarm for the AP. */
244void __devinit vmi_timer_setup_secondary_alarm(void)
245{
246 int cpu = smp_processor_id();
247
248 /* Route the interrupt to the correct vector. */
249 apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
250
251 per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
252
253 vmi_timer_ops.set_alarm(
254 VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
255 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
256 cycles_per_alarm);
257}
258
259#endif
260
261/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
262static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
263{
264 long long cycles_not_accounted;
265
266 write_seqlock(&xtime_lock);
267
268 cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
269 while (cycles_not_accounted >= cycles_per_jiffy) {
270 /* systems wide jiffies. */
271 do_timer(1);
272
273 cycles_not_accounted -= cycles_per_jiffy;
274 real_cycles_accounted_system += cycles_per_jiffy;
275 }
276
277 write_sequnlock(&xtime_lock);
278}
279
280/* Update per-cpu process times. */
281static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
282 unsigned long long cur_process_times_cycles)
283{
284 long long cycles_not_accounted;
285 cycles_not_accounted = cur_process_times_cycles -
286 per_cpu(process_times_cycles_accounted_cpu, cpu);
287
288 while (cycles_not_accounted >= cycles_per_jiffy) {
289 /* Account time to the current process. This includes
290 * calling into the scheduler to decrement the timeslice
291 * and possibly reschedule.*/
292 update_process_times(user_mode(regs));
293 /* XXX handle /proc/profile multiplier. */
294 profile_tick(CPU_PROFILING);
295
296 cycles_not_accounted -= cycles_per_jiffy;
297 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
298 }
299}
300
301#ifdef CONFIG_NO_IDLE_HZ
302/* Update per-cpu idle times. Used when a no-hz halt is ended. */
303static void vmi_account_no_hz_idle_cycles(int cpu,
304 unsigned long long cur_process_times_cycles)
305{
306 long long cycles_not_accounted;
307 unsigned long no_idle_hz_jiffies = 0;
308
309 cycles_not_accounted = cur_process_times_cycles -
310 per_cpu(process_times_cycles_accounted_cpu, cpu);
311
312 while (cycles_not_accounted >= cycles_per_jiffy) {
313 no_idle_hz_jiffies++;
314 cycles_not_accounted -= cycles_per_jiffy;
315 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
316 }
317 /* Account time to the idle process. */
318 account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
319}
320#endif
321
322/* Update per-cpu stolen time. */
323static void vmi_account_stolen_cycles(int cpu,
324 unsigned long long cur_real_cycles,
325 unsigned long long cur_avail_cycles)
326{
327 long long stolen_cycles_not_accounted;
328 unsigned long stolen_jiffies = 0;
329
330 if (cur_real_cycles < cur_avail_cycles)
331 return;
332
333 stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
334 per_cpu(stolen_cycles_accounted_cpu, cpu);
335
336 while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
337 stolen_jiffies++;
338 stolen_cycles_not_accounted -= cycles_per_jiffy;
339 per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
340 }
341 /* HACK: pass NULL to force time onto cpustat->steal. */
342 account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
343}
344
345/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
346 * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
347static void vmi_local_timer_interrupt(int cpu)
348{
349 unsigned long long cur_real_cycles, cur_process_times_cycles;
350
351 cur_real_cycles = read_real_cycles();
352 cur_process_times_cycles = read_available_cycles();
353 /* Update system wide (real) time state (xtime, jiffies). */
354 vmi_account_real_cycles(cur_real_cycles);
355 /* Update per-cpu process times. */
356 vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
357 /* Update time stolen from this cpu by the hypervisor. */
358 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
359}
360
361#ifdef CONFIG_NO_IDLE_HZ
362
363/* Must be called only from idle loop, with interrupts disabled. */
364int vmi_stop_hz_timer(void)
365{
366 /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
367
368 unsigned long seq, next;
369 unsigned long long real_cycles_expiry;
370 int cpu = smp_processor_id();
371
372 BUG_ON(!irqs_disabled());
373 if (sysctl_hz_timer != 0)
374 return 0;
375
376 cpu_set(cpu, nohz_cpu_mask);
377 smp_mb();
378
379 if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
380 (next = next_timer_interrupt(),
381 time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) {
382 cpu_clear(cpu, nohz_cpu_mask);
383 return 0;
384 }
385
386 /* Convert jiffies to the real cycle counter. */
387 do {
388 seq = read_seqbegin(&xtime_lock);
389 real_cycles_expiry = real_cycles_accounted_system +
390 (long)(next - jiffies) * cycles_per_jiffy;
391 } while (read_seqretry(&xtime_lock, seq));
392
393 /* This cpu is going idle. Disable the periodic alarm. */
394 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
395 per_cpu(idle_start_jiffies, cpu) = jiffies;
396 /* Set the real time alarm to expire at the next event. */
397 vmi_timer_ops.set_alarm(
398 VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
399 real_cycles_expiry, 0);
400 return 1;
401}
402
403static void vmi_reenable_hz_timer(int cpu)
404{
405 /* For /proc/vmi/info idle_hz stat. */
406 per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
407 per_cpu(vmi_idle_no_hz_irqs, cpu)++;
408
409 /* Don't bother explicitly cancelling the one-shot alarm -- at
410 * worse we will receive a spurious timer interrupt. */
411 vmi_timer_ops.set_alarm(
412 VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
413 per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
414 cycles_per_alarm);
415 /* Indicate this cpu is no longer nohz idle. */
416 cpu_clear(cpu, nohz_cpu_mask);
417}
418
419/* Called from interrupt handlers when (local) HZ timer is disabled. */
420void vmi_account_time_restart_hz_timer(void)
421{
422 unsigned long long cur_real_cycles, cur_process_times_cycles;
423 int cpu = smp_processor_id();
424
425 BUG_ON(!irqs_disabled());
426 /* Account the time during which the HZ timer was disabled. */
427 cur_real_cycles = read_real_cycles();
428 cur_process_times_cycles = read_available_cycles();
429 /* Update system wide (real) time state (xtime, jiffies). */
430 vmi_account_real_cycles(cur_real_cycles);
431 /* Update per-cpu idle times. */
432 vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
433 /* Update time stolen from this cpu by the hypervisor. */
434 vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
435 /* Reenable the hz timer. */
436 vmi_reenable_hz_timer(cpu);
437}
438
439#endif /* CONFIG_NO_IDLE_HZ */
440
441/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
442 * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
443 * APIC setup and setup_boot_vmi_alarm() is called. */
444static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
445{
446 vmi_local_timer_interrupt(smp_processor_id());
447 return IRQ_HANDLED;
448}
449
450#ifdef CONFIG_X86_LOCAL_APIC
451
452/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
453 * Also used in UP when CONFIG_X86_LOCAL_APIC.
454 * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
455void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
456{
457 struct pt_regs *old_regs = set_irq_regs(regs);
458 int cpu = smp_processor_id();
459
460 /*
461 * the NMI deadlock-detector uses this.
462 */
463 per_cpu(irq_stat,cpu).apic_timer_irqs++;
464
465 /*
466 * NOTE! We'd better ACK the irq immediately,
467 * because timer handling can be slow.
468 */
469 ack_APIC_irq();
470
471 /*
472 * update_process_times() expects us to have done irq_enter().
473 * Besides, if we don't timer interrupts ignore the global
474 * interrupt lock, which is the WrongThing (tm) to do.
475 */
476 irq_enter();
477 vmi_local_timer_interrupt(cpu);
478 irq_exit();
479 set_irq_regs(old_regs);
480}
481
482#endif /* CONFIG_X86_LOCAL_APIC */