diff options
Diffstat (limited to 'arch/i386')
-rw-r--r-- | arch/i386/Kconfig | 9 | ||||
-rw-r--r-- | arch/i386/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/i386/kernel/apic.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/entry.S | 5 | ||||
-rw-r--r-- | arch/i386/kernel/paravirt.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/smpboot.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/time.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/tsc.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/vmi.c | 45 | ||||
-rw-r--r-- | arch/i386/kernel/vmitime.c | 495 |
10 files changed, 567 insertions, 5 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index a3b3f6ee3642..595fb771366e 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -1272,3 +1272,12 @@ config X86_TRAMPOLINE | |||
1272 | config KTIME_SCALAR | 1272 | config KTIME_SCALAR |
1273 | bool | 1273 | bool |
1274 | default y | 1274 | default y |
1275 | |||
1276 | config NO_IDLE_HZ | ||
1277 | bool | ||
1278 | depends on PARAVIRT | ||
1279 | default y | ||
1280 | help | ||
1281 | Switches the regular HZ timer off when the system is going idle. | ||
1282 | This helps a hypervisor detect that the Linux system is idle, | ||
1283 | reducing the overhead of idle systems. | ||
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 9cfb58911f14..97f1e961d684 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -40,7 +40,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | |||
40 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 40 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
41 | obj-$(CONFIG_K8_NB) += k8.o | 41 | obj-$(CONFIG_K8_NB) += k8.o |
42 | 42 | ||
43 | obj-$(CONFIG_VMI) += vmi.o | 43 | obj-$(CONFIG_VMI) += vmi.o vmitime.o |
44 | 44 | ||
45 | # Make sure this is linked after any other paravirt_ops structs: see head.S | 45 | # Make sure this is linked after any other paravirt_ops structs: see head.S |
46 | obj-$(CONFIG_PARAVIRT) += paravirt.o | 46 | obj-$(CONFIG_PARAVIRT) += paravirt.o |
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 776d9be26af9..629c5ed94260 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c | |||
@@ -1395,7 +1395,7 @@ int __init APIC_init_uniprocessor (void) | |||
1395 | if (!skip_ioapic_setup && nr_ioapics) | 1395 | if (!skip_ioapic_setup && nr_ioapics) |
1396 | setup_IO_APIC(); | 1396 | setup_IO_APIC(); |
1397 | #endif | 1397 | #endif |
1398 | setup_boot_APIC_clock(); | 1398 | setup_boot_clock(); |
1399 | 1399 | ||
1400 | return 0; | 1400 | return 0; |
1401 | } | 1401 | } |
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 8c6a22a42d2e..d4b4ffc9eacb 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -626,6 +626,11 @@ ENTRY(name) \ | |||
626 | /* The include is where all of the SMP etc. interrupts come from */ | 626 | /* The include is where all of the SMP etc. interrupts come from */ |
627 | #include "entry_arch.h" | 627 | #include "entry_arch.h" |
628 | 628 | ||
629 | /* This alternate entry is needed because we hijack the apic LVTT */ | ||
630 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) | ||
631 | BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) | ||
632 | #endif | ||
633 | |||
629 | KPROBE_ENTRY(page_fault) | 634 | KPROBE_ENTRY(page_fault) |
630 | RING0_EC_FRAME | 635 | RING0_EC_FRAME |
631 | pushl $do_page_fault | 636 | pushl $do_page_fault |
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 5bf81059a7e6..2003733310dc 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c | |||
@@ -544,6 +544,8 @@ struct paravirt_ops paravirt_ops = { | |||
544 | .apic_write = native_apic_write, | 544 | .apic_write = native_apic_write, |
545 | .apic_write_atomic = native_apic_write_atomic, | 545 | .apic_write_atomic = native_apic_write_atomic, |
546 | .apic_read = native_apic_read, | 546 | .apic_read = native_apic_read, |
547 | .setup_boot_clock = setup_boot_APIC_clock, | ||
548 | .setup_secondary_clock = setup_secondary_APIC_clock, | ||
547 | #endif | 549 | #endif |
548 | .set_lazy_mode = (void *)native_nop, | 550 | .set_lazy_mode = (void *)native_nop, |
549 | 551 | ||
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 42502d820e4f..5a00b07e7194 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -554,7 +554,7 @@ static void __cpuinit start_secondary(void *unused) | |||
554 | smp_callin(); | 554 | smp_callin(); |
555 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 555 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
556 | rep_nop(); | 556 | rep_nop(); |
557 | setup_secondary_APIC_clock(); | 557 | setup_secondary_clock(); |
558 | if (nmi_watchdog == NMI_IO_APIC) { | 558 | if (nmi_watchdog == NMI_IO_APIC) { |
559 | disable_8259A_irq(0); | 559 | disable_8259A_irq(0); |
560 | enable_NMI_through_LVT0(NULL); | 560 | enable_NMI_through_LVT0(NULL); |
@@ -1331,7 +1331,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1331 | 1331 | ||
1332 | smpboot_setup_io_apic(); | 1332 | smpboot_setup_io_apic(); |
1333 | 1333 | ||
1334 | setup_boot_APIC_clock(); | 1334 | setup_boot_clock(); |
1335 | 1335 | ||
1336 | /* | 1336 | /* |
1337 | * Synchronize the TSC with the AP | 1337 | * Synchronize the TSC with the AP |
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index c505b16c0990..9603ccaba997 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c | |||
@@ -232,6 +232,7 @@ EXPORT_SYMBOL(get_cmos_time); | |||
232 | static void sync_cmos_clock(unsigned long dummy); | 232 | static void sync_cmos_clock(unsigned long dummy); |
233 | 233 | ||
234 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); | 234 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); |
235 | int no_sync_cmos_clock; | ||
235 | 236 | ||
236 | static void sync_cmos_clock(unsigned long dummy) | 237 | static void sync_cmos_clock(unsigned long dummy) |
237 | { | 238 | { |
@@ -275,7 +276,8 @@ static void sync_cmos_clock(unsigned long dummy) | |||
275 | 276 | ||
276 | void notify_arch_cmos_timer(void) | 277 | void notify_arch_cmos_timer(void) |
277 | { | 278 | { |
278 | mod_timer(&sync_cmos_timer, jiffies + 1); | 279 | if (!no_sync_cmos_clock) |
280 | mod_timer(&sync_cmos_timer, jiffies + 1); | ||
279 | } | 281 | } |
280 | 282 | ||
281 | static long clock_cmos_diff; | 283 | static long clock_cmos_diff; |
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 2cfc7b09b925..12fef14995a5 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
@@ -23,6 +23,7 @@ | |||
23 | * an extra value to store the TSC freq | 23 | * an extra value to store the TSC freq |
24 | */ | 24 | */ |
25 | unsigned int tsc_khz; | 25 | unsigned int tsc_khz; |
26 | unsigned long long (*custom_sched_clock)(void); | ||
26 | 27 | ||
27 | int tsc_disable; | 28 | int tsc_disable; |
28 | 29 | ||
@@ -107,6 +108,9 @@ unsigned long long sched_clock(void) | |||
107 | { | 108 | { |
108 | unsigned long long this_offset; | 109 | unsigned long long this_offset; |
109 | 110 | ||
111 | if (unlikely(custom_sched_clock)) | ||
112 | return (*custom_sched_clock)(); | ||
113 | |||
110 | /* | 114 | /* |
111 | * in the NUMA case we dont use the TSC as they are not | 115 | * in the NUMA case we dont use the TSC as they are not |
112 | * synchronized across all CPUs. | 116 | * synchronized across all CPUs. |
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index a94d64b10f75..bb5a7abf949c 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <asm/apic.h> | 34 | #include <asm/apic.h> |
35 | #include <asm/processor.h> | 35 | #include <asm/processor.h> |
36 | #include <asm/timer.h> | 36 | #include <asm/timer.h> |
37 | #include <asm/vmi_time.h> | ||
37 | 38 | ||
38 | /* Convenient for calling VMI functions indirectly in the ROM */ | 39 | /* Convenient for calling VMI functions indirectly in the ROM */ |
39 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); | 40 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); |
@@ -67,6 +68,7 @@ struct { | |||
67 | void (*set_linear_mapping)(int, u32, u32, u32); | 68 | void (*set_linear_mapping)(int, u32, u32, u32); |
68 | void (*flush_tlb)(int); | 69 | void (*flush_tlb)(int); |
69 | void (*set_initial_ap_state)(int, int); | 70 | void (*set_initial_ap_state)(int, int); |
71 | void (*halt)(void); | ||
70 | } vmi_ops; | 72 | } vmi_ops; |
71 | 73 | ||
72 | /* XXX move this to alternative.h */ | 74 | /* XXX move this to alternative.h */ |
@@ -252,6 +254,19 @@ static void vmi_nop(void) | |||
252 | { | 254 | { |
253 | } | 255 | } |
254 | 256 | ||
257 | /* For NO_IDLE_HZ, we stop the clock when halting the kernel */ | ||
258 | #ifdef CONFIG_NO_IDLE_HZ | ||
259 | static fastcall void vmi_safe_halt(void) | ||
260 | { | ||
261 | int idle = vmi_stop_hz_timer(); | ||
262 | vmi_ops.halt(); | ||
263 | if (idle) { | ||
264 | local_irq_disable(); | ||
265 | vmi_account_time_restart_hz_timer(); | ||
266 | local_irq_enable(); | ||
267 | } | ||
268 | } | ||
269 | #endif | ||
255 | 270 | ||
256 | #ifdef CONFIG_DEBUG_PAGE_TYPE | 271 | #ifdef CONFIG_DEBUG_PAGE_TYPE |
257 | 272 | ||
@@ -727,7 +742,12 @@ static inline int __init activate_vmi(void) | |||
727 | (char *)paravirt_ops.save_fl); | 742 | (char *)paravirt_ops.save_fl); |
728 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], | 743 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], |
729 | (char *)paravirt_ops.irq_disable); | 744 | (char *)paravirt_ops.irq_disable); |
745 | #ifndef CONFIG_NO_IDLE_HZ | ||
730 | para_fill(safe_halt, Halt); | 746 | para_fill(safe_halt, Halt); |
747 | #else | ||
748 | vmi_ops.halt = vmi_get_function(VMI_CALL_Halt); | ||
749 | paravirt_ops.safe_halt = vmi_safe_halt; | ||
750 | #endif | ||
731 | para_fill(wbinvd, WBINVD); | 751 | para_fill(wbinvd, WBINVD); |
732 | /* paravirt_ops.read_msr = vmi_rdmsr */ | 752 | /* paravirt_ops.read_msr = vmi_rdmsr */ |
733 | /* paravirt_ops.write_msr = vmi_wrmsr */ | 753 | /* paravirt_ops.write_msr = vmi_wrmsr */ |
@@ -838,6 +858,31 @@ static inline int __init activate_vmi(void) | |||
838 | #endif | 858 | #endif |
839 | 859 | ||
840 | /* | 860 | /* |
861 | * Check for VMI timer functionality by probing for a cycle frequency method | ||
862 | */ | ||
863 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); | ||
864 | if (rel->type != VMI_RELOCATION_NONE) { | ||
865 | vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; | ||
866 | vmi_timer_ops.get_cycle_counter = | ||
867 | vmi_get_function(VMI_CALL_GetCycleCounter); | ||
868 | vmi_timer_ops.get_wallclock = | ||
869 | vmi_get_function(VMI_CALL_GetWallclockTime); | ||
870 | vmi_timer_ops.wallclock_updated = | ||
871 | vmi_get_function(VMI_CALL_WallclockUpdated); | ||
872 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); | ||
873 | vmi_timer_ops.cancel_alarm = | ||
874 | vmi_get_function(VMI_CALL_CancelAlarm); | ||
875 | paravirt_ops.time_init = vmi_time_init; | ||
876 | paravirt_ops.get_wallclock = vmi_get_wallclock; | ||
877 | paravirt_ops.set_wallclock = vmi_set_wallclock; | ||
878 | #ifdef CONFIG_X86_LOCAL_APIC | ||
879 | paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; | ||
880 | paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; | ||
881 | #endif | ||
882 | custom_sched_clock = vmi_sched_clock; | ||
883 | } | ||
884 | |||
885 | /* | ||
841 | * Alternative instruction rewriting doesn't happen soon enough | 886 | * Alternative instruction rewriting doesn't happen soon enough |
842 | * to convert VMI_IRET to a call instead of a jump; so we have | 887 | * to convert VMI_IRET to a call instead of a jump; so we have |
843 | * to do this before IRQs get reenabled. Fortunately, it is | 888 | * to do this before IRQs get reenabled. Fortunately, it is |
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c new file mode 100644 index 000000000000..7c3033dbe5f5 --- /dev/null +++ b/arch/i386/kernel/vmitime.c | |||
@@ -0,0 +1,495 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to dhecht@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * Portions of this code from arch/i386/kernel/timers/timer_tsc.c. | ||
27 | * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c. | ||
28 | * See comments there for proper credits. | ||
29 | */ | ||
30 | |||
31 | #include <linux/spinlock.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/jiffies.h> | ||
35 | #include <linux/interrupt.h> | ||
36 | #include <linux/kernel_stat.h> | ||
37 | #include <linux/rcupdate.h> | ||
38 | #include <linux/clocksource.h> | ||
39 | |||
40 | #include <asm/timer.h> | ||
41 | #include <asm/io.h> | ||
42 | #include <asm/apic.h> | ||
43 | #include <asm/div64.h> | ||
44 | #include <asm/timer.h> | ||
45 | #include <asm/desc.h> | ||
46 | |||
47 | #include <asm/vmi.h> | ||
48 | #include <asm/vmi_time.h> | ||
49 | |||
50 | #include <mach_timer.h> | ||
51 | #include <io_ports.h> | ||
52 | |||
53 | #ifdef CONFIG_X86_LOCAL_APIC | ||
54 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT | ||
55 | #else | ||
56 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0 | ||
57 | #endif | ||
58 | |||
59 | /* Cached VMI operations */ | ||
60 | struct vmi_timer_ops vmi_timer_ops; | ||
61 | |||
62 | #ifdef CONFIG_NO_IDLE_HZ | ||
63 | |||
64 | /* /proc/sys/kernel/hz_timer state. */ | ||
65 | int sysctl_hz_timer; | ||
66 | |||
67 | /* Some stats */ | ||
68 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs); | ||
69 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies); | ||
70 | static DEFINE_PER_CPU(unsigned long, idle_start_jiffies); | ||
71 | |||
72 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
73 | |||
74 | /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */ | ||
75 | static int alarm_hz = CONFIG_VMI_ALARM_HZ; | ||
76 | |||
77 | /* Cache of the value get_cycle_frequency / HZ. */ | ||
78 | static signed long long cycles_per_jiffy; | ||
79 | |||
80 | /* Cache of the value get_cycle_frequency / alarm_hz. */ | ||
81 | static signed long long cycles_per_alarm; | ||
82 | |||
83 | /* The number of cycles accounted for by the 'jiffies'/'xtime' count. | ||
84 | * Protected by xtime_lock. */ | ||
85 | static unsigned long long real_cycles_accounted_system; | ||
86 | |||
87 | /* The number of cycles accounted for by update_process_times(), per cpu. */ | ||
88 | static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu); | ||
89 | |||
90 | /* The number of stolen cycles accounted, per cpu. */ | ||
91 | static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu); | ||
92 | |||
93 | /* Clock source. */ | ||
94 | static cycle_t read_real_cycles(void) | ||
95 | { | ||
96 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
97 | } | ||
98 | |||
99 | static cycle_t read_available_cycles(void) | ||
100 | { | ||
101 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | ||
102 | } | ||
103 | |||
104 | #if 0 | ||
105 | static cycle_t read_stolen_cycles(void) | ||
106 | { | ||
107 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN); | ||
108 | } | ||
109 | #endif /* 0 */ | ||
110 | |||
111 | static struct clocksource clocksource_vmi = { | ||
112 | .name = "vmi-timer", | ||
113 | .rating = 450, | ||
114 | .read = read_real_cycles, | ||
115 | .mask = CLOCKSOURCE_MASK(64), | ||
116 | .mult = 0, /* to be set */ | ||
117 | .shift = 22, | ||
118 | .is_continuous = 1, | ||
119 | }; | ||
120 | |||
121 | |||
122 | /* Timer interrupt handler. */ | ||
123 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id); | ||
124 | |||
125 | static struct irqaction vmi_timer_irq = { | ||
126 | vmi_timer_interrupt, | ||
127 | SA_INTERRUPT, | ||
128 | CPU_MASK_NONE, | ||
129 | "VMI-alarm", | ||
130 | NULL, | ||
131 | NULL | ||
132 | }; | ||
133 | |||
134 | /* Alarm rate */ | ||
135 | static int __init vmi_timer_alarm_rate_setup(char* str) | ||
136 | { | ||
137 | int alarm_rate; | ||
138 | if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) { | ||
139 | alarm_hz = alarm_rate; | ||
140 | printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz); | ||
141 | } | ||
142 | return 1; | ||
143 | } | ||
144 | __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup); | ||
145 | |||
146 | |||
147 | /* Initialization */ | ||
148 | static void vmi_get_wallclock_ts(struct timespec *ts) | ||
149 | { | ||
150 | unsigned long long wallclock; | ||
151 | wallclock = vmi_timer_ops.get_wallclock(); // nsec units | ||
152 | ts->tv_nsec = do_div(wallclock, 1000000000); | ||
153 | ts->tv_sec = wallclock; | ||
154 | } | ||
155 | |||
156 | static void update_xtime_from_wallclock(void) | ||
157 | { | ||
158 | struct timespec ts; | ||
159 | vmi_get_wallclock_ts(&ts); | ||
160 | do_settimeofday(&ts); | ||
161 | } | ||
162 | |||
163 | unsigned long vmi_get_wallclock(void) | ||
164 | { | ||
165 | struct timespec ts; | ||
166 | vmi_get_wallclock_ts(&ts); | ||
167 | return ts.tv_sec; | ||
168 | } | ||
169 | |||
170 | int vmi_set_wallclock(unsigned long now) | ||
171 | { | ||
172 | return -1; | ||
173 | } | ||
174 | |||
175 | unsigned long long vmi_sched_clock(void) | ||
176 | { | ||
177 | return read_available_cycles(); | ||
178 | } | ||
179 | |||
180 | void __init vmi_time_init(void) | ||
181 | { | ||
182 | unsigned long long cycles_per_sec, cycles_per_msec; | ||
183 | |||
184 | setup_irq(0, &vmi_timer_irq); | ||
185 | #ifdef CONFIG_X86_LOCAL_APIC | ||
186 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); | ||
187 | #endif | ||
188 | |||
189 | no_sync_cmos_clock = 1; | ||
190 | |||
191 | vmi_get_wallclock_ts(&xtime); | ||
192 | set_normalized_timespec(&wall_to_monotonic, | ||
193 | -xtime.tv_sec, -xtime.tv_nsec); | ||
194 | |||
195 | real_cycles_accounted_system = read_real_cycles(); | ||
196 | update_xtime_from_wallclock(); | ||
197 | per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); | ||
198 | |||
199 | cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); | ||
200 | |||
201 | cycles_per_jiffy = cycles_per_sec; | ||
202 | (void)do_div(cycles_per_jiffy, HZ); | ||
203 | cycles_per_alarm = cycles_per_sec; | ||
204 | (void)do_div(cycles_per_alarm, alarm_hz); | ||
205 | cycles_per_msec = cycles_per_sec; | ||
206 | (void)do_div(cycles_per_msec, 1000); | ||
207 | cpu_khz = cycles_per_msec; | ||
208 | |||
209 | printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" | ||
210 | "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, | ||
211 | cycles_per_alarm); | ||
212 | |||
213 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
214 | clocksource_vmi.shift); | ||
215 | if (clocksource_register(&clocksource_vmi)) | ||
216 | printk(KERN_WARNING "Error registering VMITIME clocksource."); | ||
217 | |||
218 | /* Disable PIT. */ | ||
219 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
220 | |||
221 | /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu | ||
222 | * reduce the latency calling update_process_times. */ | ||
223 | vmi_timer_ops.set_alarm( | ||
224 | VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
225 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
226 | cycles_per_alarm); | ||
227 | } | ||
228 | |||
229 | #ifdef CONFIG_X86_LOCAL_APIC | ||
230 | |||
231 | void __init vmi_timer_setup_boot_alarm(void) | ||
232 | { | ||
233 | local_irq_disable(); | ||
234 | |||
235 | /* Route the interrupt to the correct vector. */ | ||
236 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
237 | |||
238 | /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */ | ||
239 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
240 | vmi_timer_ops.set_alarm( | ||
241 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
242 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
243 | cycles_per_alarm); | ||
244 | local_irq_enable(); | ||
245 | } | ||
246 | |||
247 | /* Initialize the time accounting variables for an AP on an SMP system. | ||
248 | * Also, set the local alarm for the AP. */ | ||
249 | void __init vmi_timer_setup_secondary_alarm(void) | ||
250 | { | ||
251 | int cpu = smp_processor_id(); | ||
252 | |||
253 | /* Route the interrupt to the correct vector. */ | ||
254 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
255 | |||
256 | per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles(); | ||
257 | |||
258 | vmi_timer_ops.set_alarm( | ||
259 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
260 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
261 | cycles_per_alarm); | ||
262 | } | ||
263 | |||
264 | #endif | ||
265 | |||
266 | /* Update system wide (real) time accounting (e.g. jiffies, xtime). */ | ||
267 | static void vmi_account_real_cycles(unsigned long long cur_real_cycles) | ||
268 | { | ||
269 | long long cycles_not_accounted; | ||
270 | |||
271 | write_seqlock(&xtime_lock); | ||
272 | |||
273 | cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; | ||
274 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
275 | /* systems wide jiffies and wallclock. */ | ||
276 | do_timer(1); | ||
277 | |||
278 | cycles_not_accounted -= cycles_per_jiffy; | ||
279 | real_cycles_accounted_system += cycles_per_jiffy; | ||
280 | } | ||
281 | |||
282 | if (vmi_timer_ops.wallclock_updated()) | ||
283 | update_xtime_from_wallclock(); | ||
284 | |||
285 | write_sequnlock(&xtime_lock); | ||
286 | } | ||
287 | |||
288 | /* Update per-cpu process times. */ | ||
289 | static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu, | ||
290 | unsigned long long cur_process_times_cycles) | ||
291 | { | ||
292 | long long cycles_not_accounted; | ||
293 | cycles_not_accounted = cur_process_times_cycles - | ||
294 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
295 | |||
296 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
297 | /* Account time to the current process. This includes | ||
298 | * calling into the scheduler to decrement the timeslice | ||
299 | * and possibly reschedule.*/ | ||
300 | update_process_times(user_mode(regs)); | ||
301 | /* XXX handle /proc/profile multiplier. */ | ||
302 | profile_tick(CPU_PROFILING); | ||
303 | |||
304 | cycles_not_accounted -= cycles_per_jiffy; | ||
305 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
306 | } | ||
307 | } | ||
308 | |||
309 | #ifdef CONFIG_NO_IDLE_HZ | ||
310 | /* Update per-cpu idle times. Used when a no-hz halt is ended. */ | ||
311 | static void vmi_account_no_hz_idle_cycles(int cpu, | ||
312 | unsigned long long cur_process_times_cycles) | ||
313 | { | ||
314 | long long cycles_not_accounted; | ||
315 | unsigned long no_idle_hz_jiffies = 0; | ||
316 | |||
317 | cycles_not_accounted = cur_process_times_cycles - | ||
318 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
319 | |||
320 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
321 | no_idle_hz_jiffies++; | ||
322 | cycles_not_accounted -= cycles_per_jiffy; | ||
323 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
324 | } | ||
325 | /* Account time to the idle process. */ | ||
326 | account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies)); | ||
327 | } | ||
328 | #endif | ||
329 | |||
330 | /* Update per-cpu stolen time. */ | ||
331 | static void vmi_account_stolen_cycles(int cpu, | ||
332 | unsigned long long cur_real_cycles, | ||
333 | unsigned long long cur_avail_cycles) | ||
334 | { | ||
335 | long long stolen_cycles_not_accounted; | ||
336 | unsigned long stolen_jiffies = 0; | ||
337 | |||
338 | if (cur_real_cycles < cur_avail_cycles) | ||
339 | return; | ||
340 | |||
341 | stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles - | ||
342 | per_cpu(stolen_cycles_accounted_cpu, cpu); | ||
343 | |||
344 | while (stolen_cycles_not_accounted >= cycles_per_jiffy) { | ||
345 | stolen_jiffies++; | ||
346 | stolen_cycles_not_accounted -= cycles_per_jiffy; | ||
347 | per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
348 | } | ||
349 | /* HACK: pass NULL to force time onto cpustat->steal. */ | ||
350 | account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies)); | ||
351 | } | ||
352 | |||
353 | /* Body of either IRQ0 interrupt handler (UP no local-APIC) or | ||
354 | * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */ | ||
355 | static void vmi_local_timer_interrupt(int cpu) | ||
356 | { | ||
357 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
358 | |||
359 | cur_real_cycles = read_real_cycles(); | ||
360 | cur_process_times_cycles = read_available_cycles(); | ||
361 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
362 | vmi_account_real_cycles(cur_real_cycles); | ||
363 | /* Update per-cpu process times. */ | ||
364 | vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles); | ||
365 | /* Update time stolen from this cpu by the hypervisor. */ | ||
366 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
367 | } | ||
368 | |||
369 | #ifdef CONFIG_NO_IDLE_HZ | ||
370 | |||
371 | /* Must be called only from idle loop, with interrupts disabled. */ | ||
372 | int vmi_stop_hz_timer(void) | ||
373 | { | ||
374 | /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */ | ||
375 | |||
376 | unsigned long seq, next; | ||
377 | unsigned long long real_cycles_expiry; | ||
378 | int cpu = smp_processor_id(); | ||
379 | int idle; | ||
380 | |||
381 | BUG_ON(!irqs_disabled()); | ||
382 | if (sysctl_hz_timer != 0) | ||
383 | return 0; | ||
384 | |||
385 | cpu_set(cpu, nohz_cpu_mask); | ||
386 | smp_mb(); | ||
387 | if (rcu_needs_cpu(cpu) || local_softirq_pending() || | ||
388 | (next = next_timer_interrupt(), time_before_eq(next, jiffies))) { | ||
389 | cpu_clear(cpu, nohz_cpu_mask); | ||
390 | next = jiffies; | ||
391 | idle = 0; | ||
392 | } else | ||
393 | idle = 1; | ||
394 | |||
395 | /* Convert jiffies to the real cycle counter. */ | ||
396 | do { | ||
397 | seq = read_seqbegin(&xtime_lock); | ||
398 | real_cycles_expiry = real_cycles_accounted_system + | ||
399 | (long)(next - jiffies) * cycles_per_jiffy; | ||
400 | } while (read_seqretry(&xtime_lock, seq)); | ||
401 | |||
402 | /* This cpu is going idle. Disable the periodic alarm. */ | ||
403 | if (idle) { | ||
404 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
405 | per_cpu(idle_start_jiffies, cpu) = jiffies; | ||
406 | } | ||
407 | |||
408 | /* Set the real time alarm to expire at the next event. */ | ||
409 | vmi_timer_ops.set_alarm( | ||
410 | VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, | ||
411 | real_cycles_expiry, 0); | ||
412 | |||
413 | return idle; | ||
414 | } | ||
415 | |||
416 | static void vmi_reenable_hz_timer(int cpu) | ||
417 | { | ||
418 | /* For /proc/vmi/info idle_hz stat. */ | ||
419 | per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu); | ||
420 | per_cpu(vmi_idle_no_hz_irqs, cpu)++; | ||
421 | |||
422 | /* Don't bother explicitly cancelling the one-shot alarm -- at | ||
423 | * worse we will receive a spurious timer interrupt. */ | ||
424 | vmi_timer_ops.set_alarm( | ||
425 | VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
426 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
427 | cycles_per_alarm); | ||
428 | /* Indicate this cpu is no longer nohz idle. */ | ||
429 | cpu_clear(cpu, nohz_cpu_mask); | ||
430 | } | ||
431 | |||
432 | /* Called from interrupt handlers when (local) HZ timer is disabled. */ | ||
433 | void vmi_account_time_restart_hz_timer(void) | ||
434 | { | ||
435 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
436 | int cpu = smp_processor_id(); | ||
437 | |||
438 | BUG_ON(!irqs_disabled()); | ||
439 | /* Account the time during which the HZ timer was disabled. */ | ||
440 | cur_real_cycles = read_real_cycles(); | ||
441 | cur_process_times_cycles = read_available_cycles(); | ||
442 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
443 | vmi_account_real_cycles(cur_real_cycles); | ||
444 | /* Update per-cpu idle times. */ | ||
445 | vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles); | ||
446 | /* Update time stolen from this cpu by the hypervisor. */ | ||
447 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
448 | /* Reenable the hz timer. */ | ||
449 | vmi_reenable_hz_timer(cpu); | ||
450 | } | ||
451 | |||
452 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
453 | |||
454 | /* UP (and no local-APIC) VMI-timer alarm interrupt handler. | ||
455 | * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after | ||
456 | * APIC setup and setup_boot_vmi_alarm() is called. */ | ||
457 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
458 | { | ||
459 | vmi_local_timer_interrupt(smp_processor_id()); | ||
460 | return IRQ_HANDLED; | ||
461 | } | ||
462 | |||
463 | #ifdef CONFIG_X86_LOCAL_APIC | ||
464 | |||
465 | /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector. | ||
466 | * Also used in UP when CONFIG_X86_LOCAL_APIC. | ||
467 | * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */ | ||
468 | void smp_apic_vmi_timer_interrupt(struct pt_regs *regs) | ||
469 | { | ||
470 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
471 | int cpu = smp_processor_id(); | ||
472 | |||
473 | /* | ||
474 | * the NMI deadlock-detector uses this. | ||
475 | */ | ||
476 | per_cpu(irq_stat,cpu).apic_timer_irqs++; | ||
477 | |||
478 | /* | ||
479 | * NOTE! We'd better ACK the irq immediately, | ||
480 | * because timer handling can be slow. | ||
481 | */ | ||
482 | ack_APIC_irq(); | ||
483 | |||
484 | /* | ||
485 | * update_process_times() expects us to have done irq_enter(). | ||
486 | * Besides, if we don't timer interrupts ignore the global | ||
487 | * interrupt lock, which is the WrongThing (tm) to do. | ||
488 | */ | ||
489 | irq_enter(); | ||
490 | vmi_local_timer_interrupt(cpu); | ||
491 | irq_exit(); | ||
492 | set_irq_regs(old_regs); | ||
493 | } | ||
494 | |||
495 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||