diff options
Diffstat (limited to 'arch/i386/kernel')
52 files changed, 4141 insertions, 2355 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 1e8988e558c5..4ae3dcf1d2f0 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -18,7 +18,7 @@ obj-$(CONFIG_X86_MSR) += msr.o | |||
18 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 18 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
19 | obj-$(CONFIG_MICROCODE) += microcode.o | 19 | obj-$(CONFIG_MICROCODE) += microcode.o |
20 | obj-$(CONFIG_APM) += apm.o | 20 | obj-$(CONFIG_APM) += apm.o |
21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o | 21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o |
22 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 22 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
23 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | 23 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o |
24 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | 24 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o |
@@ -32,7 +32,6 @@ obj-$(CONFIG_KPROBES) += kprobes.o | |||
32 | obj-$(CONFIG_MODULES) += module.o | 32 | obj-$(CONFIG_MODULES) += module.o |
33 | obj-y += sysenter.o vsyscall.o | 33 | obj-y += sysenter.o vsyscall.o |
34 | obj-$(CONFIG_ACPI_SRAT) += srat.o | 34 | obj-$(CONFIG_ACPI_SRAT) += srat.o |
35 | obj-$(CONFIG_HPET_TIMER) += time_hpet.o | ||
36 | obj-$(CONFIG_EFI) += efi.o efi_stub.o | 35 | obj-$(CONFIG_EFI) += efi.o efi_stub.o |
37 | obj-$(CONFIG_DOUBLEFAULT) += doublefault.o | 36 | obj-$(CONFIG_DOUBLEFAULT) += doublefault.o |
38 | obj-$(CONFIG_VM86) += vm86.o | 37 | obj-$(CONFIG_VM86) += vm86.o |
@@ -40,8 +39,9 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | |||
40 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 39 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
41 | obj-$(CONFIG_K8_NB) += k8.o | 40 | obj-$(CONFIG_K8_NB) += k8.o |
42 | 41 | ||
43 | # Make sure this is linked after any other paravirt_ops structs: see head.S | 42 | obj-$(CONFIG_VMI) += vmi.o vmitime.o |
44 | obj-$(CONFIG_PARAVIRT) += paravirt.o | 43 | obj-$(CONFIG_PARAVIRT) += paravirt.o |
44 | obj-y += pcspeaker.o | ||
45 | 45 | ||
46 | EXTRA_AFLAGS := -traditional | 46 | EXTRA_AFLAGS := -traditional |
47 | 47 | ||
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index e94aff6888ca..fb3e72328a5a 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/acpi.h> | 27 | #include <linux/acpi.h> |
28 | #include <linux/acpi_pmtmr.h> | ||
28 | #include <linux/efi.h> | 29 | #include <linux/efi.h> |
29 | #include <linux/cpumask.h> | 30 | #include <linux/cpumask.h> |
30 | #include <linux/module.h> | 31 | #include <linux/module.h> |
@@ -615,6 +616,7 @@ static int __init acpi_parse_sbf(struct acpi_table_header *table) | |||
615 | } | 616 | } |
616 | 617 | ||
617 | #ifdef CONFIG_HPET_TIMER | 618 | #ifdef CONFIG_HPET_TIMER |
619 | #include <asm/hpet.h> | ||
618 | 620 | ||
619 | static int __init acpi_parse_hpet(struct acpi_table_header *table) | 621 | static int __init acpi_parse_hpet(struct acpi_table_header *table) |
620 | { | 622 | { |
@@ -645,24 +647,11 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) | |||
645 | hpet_res->end = (1 * 1024) - 1; | 647 | hpet_res->end = (1 * 1024) - 1; |
646 | } | 648 | } |
647 | 649 | ||
648 | #ifdef CONFIG_X86_64 | 650 | hpet_address = hpet_tbl->address.address; |
649 | vxtime.hpet_address = hpet_tbl->address.address; | ||
650 | |||
651 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", | 651 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", |
652 | hpet_tbl->id, vxtime.hpet_address); | 652 | hpet_tbl->id, hpet_address); |
653 | |||
654 | res_start = vxtime.hpet_address; | ||
655 | #else /* X86 */ | ||
656 | { | ||
657 | extern unsigned long hpet_address; | ||
658 | |||
659 | hpet_address = hpet_tbl->address.address; | ||
660 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", | ||
661 | hpet_tbl->id, hpet_address); | ||
662 | 653 | ||
663 | res_start = hpet_address; | 654 | res_start = hpet_address; |
664 | } | ||
665 | #endif /* X86 */ | ||
666 | 655 | ||
667 | if (hpet_res) { | 656 | if (hpet_res) { |
668 | hpet_res->start = res_start; | 657 | hpet_res->start = res_start; |
@@ -676,10 +665,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) | |||
676 | #define acpi_parse_hpet NULL | 665 | #define acpi_parse_hpet NULL |
677 | #endif | 666 | #endif |
678 | 667 | ||
679 | #ifdef CONFIG_X86_PM_TIMER | ||
680 | extern u32 pmtmr_ioport; | ||
681 | #endif | ||
682 | |||
683 | static int __init acpi_parse_fadt(struct acpi_table_header *table) | 668 | static int __init acpi_parse_fadt(struct acpi_table_header *table) |
684 | { | 669 | { |
685 | 670 | ||
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 776d9be26af9..9655c233e6f1 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include <linux/kernel_stat.h> | 25 | #include <linux/kernel_stat.h> |
26 | #include <linux/sysdev.h> | 26 | #include <linux/sysdev.h> |
27 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
28 | #include <linux/clockchips.h> | ||
29 | #include <linux/acpi_pmtmr.h> | ||
28 | #include <linux/module.h> | 30 | #include <linux/module.h> |
29 | 31 | ||
30 | #include <asm/atomic.h> | 32 | #include <asm/atomic.h> |
@@ -36,6 +38,7 @@ | |||
36 | #include <asm/hpet.h> | 38 | #include <asm/hpet.h> |
37 | #include <asm/i8253.h> | 39 | #include <asm/i8253.h> |
38 | #include <asm/nmi.h> | 40 | #include <asm/nmi.h> |
41 | #include <asm/idle.h> | ||
39 | 42 | ||
40 | #include <mach_apic.h> | 43 | #include <mach_apic.h> |
41 | #include <mach_apicdef.h> | 44 | #include <mach_apicdef.h> |
@@ -44,128 +47,549 @@ | |||
44 | #include "io_ports.h" | 47 | #include "io_ports.h" |
45 | 48 | ||
46 | /* | 49 | /* |
47 | * cpu_mask that denotes the CPUs that needs timer interrupt coming in as | 50 | * Sanity check |
48 | * IPIs in place of local APIC timers | ||
49 | */ | 51 | */ |
50 | static cpumask_t timer_bcast_ipi; | 52 | #if (SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F |
53 | # error SPURIOUS_APIC_VECTOR definition error | ||
54 | #endif | ||
51 | 55 | ||
52 | /* | 56 | /* |
53 | * Knob to control our willingness to enable the local APIC. | 57 | * Knob to control our willingness to enable the local APIC. |
58 | * | ||
59 | * -1=force-disable, +1=force-enable | ||
54 | */ | 60 | */ |
55 | static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ | 61 | static int enable_local_apic __initdata = 0; |
56 | |||
57 | static inline void lapic_disable(void) | ||
58 | { | ||
59 | enable_local_apic = -1; | ||
60 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | ||
61 | } | ||
62 | 62 | ||
63 | static inline void lapic_enable(void) | 63 | /* Local APIC timer verification ok */ |
64 | { | 64 | static int local_apic_timer_verify_ok; |
65 | enable_local_apic = 1; | ||
66 | } | ||
67 | 65 | ||
68 | /* | 66 | /* |
69 | * Debug level | 67 | * Debug level, exported for io_apic.c |
70 | */ | 68 | */ |
71 | int apic_verbosity; | 69 | int apic_verbosity; |
72 | 70 | ||
71 | static unsigned int calibration_result; | ||
73 | 72 | ||
73 | static int lapic_next_event(unsigned long delta, | ||
74 | struct clock_event_device *evt); | ||
75 | static void lapic_timer_setup(enum clock_event_mode mode, | ||
76 | struct clock_event_device *evt); | ||
77 | static void lapic_timer_broadcast(cpumask_t mask); | ||
74 | static void apic_pm_activate(void); | 78 | static void apic_pm_activate(void); |
75 | 79 | ||
80 | /* | ||
81 | * The local apic timer can be used for any function which is CPU local. | ||
82 | */ | ||
83 | static struct clock_event_device lapic_clockevent = { | ||
84 | .name = "lapic", | ||
85 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | ||
86 | | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, | ||
87 | .shift = 32, | ||
88 | .set_mode = lapic_timer_setup, | ||
89 | .set_next_event = lapic_next_event, | ||
90 | .broadcast = lapic_timer_broadcast, | ||
91 | .rating = 100, | ||
92 | .irq = -1, | ||
93 | }; | ||
94 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); | ||
95 | |||
96 | /* Local APIC was disabled by the BIOS and enabled by the kernel */ | ||
97 | static int enabled_via_apicbase; | ||
98 | |||
99 | /* | ||
100 | * Get the LAPIC version | ||
101 | */ | ||
102 | static inline int lapic_get_version(void) | ||
103 | { | ||
104 | return GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | * Check, if the APIC is integrated or a seperate chip | ||
109 | */ | ||
110 | static inline int lapic_is_integrated(void) | ||
111 | { | ||
112 | return APIC_INTEGRATED(lapic_get_version()); | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * Check, whether this is a modern or a first generation APIC | ||
117 | */ | ||
76 | static int modern_apic(void) | 118 | static int modern_apic(void) |
77 | { | 119 | { |
78 | unsigned int lvr, version; | ||
79 | /* AMD systems use old APIC versions, so check the CPU */ | 120 | /* AMD systems use old APIC versions, so check the CPU */ |
80 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | 121 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && |
81 | boot_cpu_data.x86 >= 0xf) | 122 | boot_cpu_data.x86 >= 0xf) |
82 | return 1; | 123 | return 1; |
83 | lvr = apic_read(APIC_LVR); | 124 | return lapic_get_version() >= 0x14; |
84 | version = GET_APIC_VERSION(lvr); | ||
85 | return version >= 0x14; | ||
86 | } | 125 | } |
87 | 126 | ||
127 | /** | ||
128 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 | ||
129 | */ | ||
130 | void enable_NMI_through_LVT0 (void * dummy) | ||
131 | { | ||
132 | unsigned int v = APIC_DM_NMI; | ||
133 | |||
134 | /* Level triggered for 82489DX */ | ||
135 | if (!lapic_is_integrated()) | ||
136 | v |= APIC_LVT_LEVEL_TRIGGER; | ||
137 | apic_write_around(APIC_LVT0, v); | ||
138 | } | ||
139 | |||
140 | /** | ||
141 | * get_physical_broadcast - Get number of physical broadcast IDs | ||
142 | */ | ||
143 | int get_physical_broadcast(void) | ||
144 | { | ||
145 | return modern_apic() ? 0xff : 0xf; | ||
146 | } | ||
147 | |||
148 | /** | ||
149 | * lapic_get_maxlvt - get the maximum number of local vector table entries | ||
150 | */ | ||
151 | int lapic_get_maxlvt(void) | ||
152 | { | ||
153 | unsigned int v = apic_read(APIC_LVR); | ||
154 | |||
155 | /* 82489DXs do not report # of LVT entries. */ | ||
156 | return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * Local APIC timer | ||
161 | */ | ||
162 | |||
163 | /* Clock divisor is set to 16 */ | ||
164 | #define APIC_DIVISOR 16 | ||
165 | |||
88 | /* | 166 | /* |
89 | * 'what should we do if we get a hw irq event on an illegal vector'. | 167 | * This function sets up the local APIC timer, with a timeout of |
90 | * each architecture has to answer this themselves. | 168 | * 'clocks' APIC bus clock. During calibration we actually call |
169 | * this function twice on the boot CPU, once with a bogus timeout | ||
170 | * value, second time for real. The other (noncalibrating) CPUs | ||
171 | * call this function only once, with the real, calibrated value. | ||
172 | * | ||
173 | * We do reads before writes even if unnecessary, to get around the | ||
174 | * P5 APIC double write bug. | ||
91 | */ | 175 | */ |
92 | void ack_bad_irq(unsigned int irq) | 176 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) |
93 | { | 177 | { |
94 | printk("unexpected IRQ trap at vector %02x\n", irq); | 178 | unsigned int lvtt_value, tmp_value; |
179 | |||
180 | lvtt_value = LOCAL_TIMER_VECTOR; | ||
181 | if (!oneshot) | ||
182 | lvtt_value |= APIC_LVT_TIMER_PERIODIC; | ||
183 | if (!lapic_is_integrated()) | ||
184 | lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); | ||
185 | |||
186 | if (!irqen) | ||
187 | lvtt_value |= APIC_LVT_MASKED; | ||
188 | |||
189 | apic_write_around(APIC_LVTT, lvtt_value); | ||
190 | |||
95 | /* | 191 | /* |
96 | * Currently unexpected vectors happen only on SMP and APIC. | 192 | * Divide PICLK by 16 |
97 | * We _must_ ack these because every local APIC has only N | ||
98 | * irq slots per priority level, and a 'hanging, unacked' IRQ | ||
99 | * holds up an irq slot - in excessive cases (when multiple | ||
100 | * unexpected vectors occur) that might lock up the APIC | ||
101 | * completely. | ||
102 | * But only ack when the APIC is enabled -AK | ||
103 | */ | 193 | */ |
104 | if (cpu_has_apic) | 194 | tmp_value = apic_read(APIC_TDCR); |
105 | ack_APIC_irq(); | 195 | apic_write_around(APIC_TDCR, (tmp_value |
196 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | ||
197 | | APIC_TDR_DIV_16); | ||
198 | |||
199 | if (!oneshot) | ||
200 | apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); | ||
106 | } | 201 | } |
107 | 202 | ||
108 | void __init apic_intr_init(void) | 203 | /* |
204 | * Program the next event, relative to now | ||
205 | */ | ||
206 | static int lapic_next_event(unsigned long delta, | ||
207 | struct clock_event_device *evt) | ||
208 | { | ||
209 | apic_write_around(APIC_TMICT, delta); | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * Setup the lapic timer in periodic or oneshot mode | ||
215 | */ | ||
216 | static void lapic_timer_setup(enum clock_event_mode mode, | ||
217 | struct clock_event_device *evt) | ||
218 | { | ||
219 | unsigned long flags; | ||
220 | unsigned int v; | ||
221 | |||
222 | /* Lapic used for broadcast ? */ | ||
223 | if (!local_apic_timer_verify_ok) | ||
224 | return; | ||
225 | |||
226 | local_irq_save(flags); | ||
227 | |||
228 | switch (mode) { | ||
229 | case CLOCK_EVT_MODE_PERIODIC: | ||
230 | case CLOCK_EVT_MODE_ONESHOT: | ||
231 | __setup_APIC_LVTT(calibration_result, | ||
232 | mode != CLOCK_EVT_MODE_PERIODIC, 1); | ||
233 | break; | ||
234 | case CLOCK_EVT_MODE_UNUSED: | ||
235 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
236 | v = apic_read(APIC_LVTT); | ||
237 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
238 | apic_write_around(APIC_LVTT, v); | ||
239 | break; | ||
240 | } | ||
241 | |||
242 | local_irq_restore(flags); | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Local APIC timer broadcast function | ||
247 | */ | ||
248 | static void lapic_timer_broadcast(cpumask_t mask) | ||
109 | { | 249 | { |
110 | #ifdef CONFIG_SMP | 250 | #ifdef CONFIG_SMP |
111 | smp_intr_init(); | 251 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); |
112 | #endif | 252 | #endif |
113 | /* self generated IPI for local APIC timer */ | 253 | } |
114 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | ||
115 | 254 | ||
116 | /* IPI vectors for APIC spurious and error interrupts */ | 255 | /* |
117 | set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 256 | * Setup the local APIC timer for this CPU. Copy the initilized values |
118 | set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 257 | * of the boot CPU and register the clock event in the framework. |
258 | */ | ||
259 | static void __devinit setup_APIC_timer(void) | ||
260 | { | ||
261 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | ||
119 | 262 | ||
120 | /* thermal monitor LVT interrupt */ | 263 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); |
121 | #ifdef CONFIG_X86_MCE_P4THERMAL | 264 | levt->cpumask = cpumask_of_cpu(smp_processor_id()); |
122 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 265 | |
123 | #endif | 266 | clockevents_register_device(levt); |
124 | } | 267 | } |
125 | 268 | ||
126 | /* Using APIC to generate smp_local_timer_interrupt? */ | 269 | /* |
127 | int using_apic_timer __read_mostly = 0; | 270 | * In this functions we calibrate APIC bus clocks to the external timer. |
271 | * | ||
272 | * We want to do the calibration only once since we want to have local timer | ||
273 | * irqs syncron. CPUs connected by the same APIC bus have the very same bus | ||
274 | * frequency. | ||
275 | * | ||
276 | * This was previously done by reading the PIT/HPET and waiting for a wrap | ||
277 | * around to find out, that a tick has elapsed. I have a box, where the PIT | ||
278 | * readout is broken, so it never gets out of the wait loop again. This was | ||
279 | * also reported by others. | ||
280 | * | ||
281 | * Monitoring the jiffies value is inaccurate and the clockevents | ||
282 | * infrastructure allows us to do a simple substitution of the interrupt | ||
283 | * handler. | ||
284 | * | ||
285 | * The calibration routine also uses the pm_timer when possible, as the PIT | ||
286 | * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes | ||
287 | * back to normal later in the boot process). | ||
288 | */ | ||
289 | |||
290 | #define LAPIC_CAL_LOOPS (HZ/10) | ||
128 | 291 | ||
129 | static int enabled_via_apicbase; | 292 | static __initdata volatile int lapic_cal_loops = -1; |
293 | static __initdata long lapic_cal_t1, lapic_cal_t2; | ||
294 | static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; | ||
295 | static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; | ||
296 | static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; | ||
130 | 297 | ||
131 | void enable_NMI_through_LVT0 (void * dummy) | 298 | /* |
299 | * Temporary interrupt handler. | ||
300 | */ | ||
301 | static void __init lapic_cal_handler(struct clock_event_device *dev) | ||
132 | { | 302 | { |
133 | unsigned int v, ver; | 303 | unsigned long long tsc = 0; |
304 | long tapic = apic_read(APIC_TMCCT); | ||
305 | unsigned long pm = acpi_pm_read_early(); | ||
134 | 306 | ||
135 | ver = apic_read(APIC_LVR); | 307 | if (cpu_has_tsc) |
136 | ver = GET_APIC_VERSION(ver); | 308 | rdtscll(tsc); |
137 | v = APIC_DM_NMI; /* unmask and set to NMI */ | 309 | |
138 | if (!APIC_INTEGRATED(ver)) /* 82489DX */ | 310 | switch (lapic_cal_loops++) { |
139 | v |= APIC_LVT_LEVEL_TRIGGER; | 311 | case 0: |
140 | apic_write_around(APIC_LVT0, v); | 312 | lapic_cal_t1 = tapic; |
313 | lapic_cal_tsc1 = tsc; | ||
314 | lapic_cal_pm1 = pm; | ||
315 | lapic_cal_j1 = jiffies; | ||
316 | break; | ||
317 | |||
318 | case LAPIC_CAL_LOOPS: | ||
319 | lapic_cal_t2 = tapic; | ||
320 | lapic_cal_tsc2 = tsc; | ||
321 | if (pm < lapic_cal_pm1) | ||
322 | pm += ACPI_PM_OVRRUN; | ||
323 | lapic_cal_pm2 = pm; | ||
324 | lapic_cal_j2 = jiffies; | ||
325 | break; | ||
326 | } | ||
141 | } | 327 | } |
142 | 328 | ||
143 | int get_physical_broadcast(void) | 329 | /* |
330 | * Setup the boot APIC | ||
331 | * | ||
332 | * Calibrate and verify the result. | ||
333 | */ | ||
334 | void __init setup_boot_APIC_clock(void) | ||
144 | { | 335 | { |
145 | if (modern_apic()) | 336 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
146 | return 0xff; | 337 | const long pm_100ms = PMTMR_TICKS_PER_SEC/10; |
147 | else | 338 | const long pm_thresh = pm_100ms/100; |
148 | return 0xf; | 339 | void (*real_handler)(struct clock_event_device *dev); |
340 | unsigned long deltaj; | ||
341 | long delta, deltapm; | ||
342 | |||
343 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" | ||
344 | "calibrating APIC timer ...\n"); | ||
345 | |||
346 | local_irq_disable(); | ||
347 | |||
348 | /* Replace the global interrupt handler */ | ||
349 | real_handler = global_clock_event->event_handler; | ||
350 | global_clock_event->event_handler = lapic_cal_handler; | ||
351 | |||
352 | /* | ||
353 | * Setup the APIC counter to 1e9. There is no way the lapic | ||
354 | * can underflow in the 100ms detection time frame | ||
355 | */ | ||
356 | __setup_APIC_LVTT(1000000000, 0, 0); | ||
357 | |||
358 | /* Let the interrupts run */ | ||
359 | local_irq_enable(); | ||
360 | |||
361 | while(lapic_cal_loops <= LAPIC_CAL_LOOPS); | ||
362 | |||
363 | local_irq_disable(); | ||
364 | |||
365 | /* Restore the real event handler */ | ||
366 | global_clock_event->event_handler = real_handler; | ||
367 | |||
368 | /* Build delta t1-t2 as apic timer counts down */ | ||
369 | delta = lapic_cal_t1 - lapic_cal_t2; | ||
370 | apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); | ||
371 | |||
372 | /* Check, if the PM timer is available */ | ||
373 | deltapm = lapic_cal_pm2 - lapic_cal_pm1; | ||
374 | apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); | ||
375 | |||
376 | if (deltapm) { | ||
377 | unsigned long mult; | ||
378 | u64 res; | ||
379 | |||
380 | mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); | ||
381 | |||
382 | if (deltapm > (pm_100ms - pm_thresh) && | ||
383 | deltapm < (pm_100ms + pm_thresh)) { | ||
384 | apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); | ||
385 | } else { | ||
386 | res = (((u64) deltapm) * mult) >> 22; | ||
387 | do_div(res, 1000000); | ||
388 | printk(KERN_WARNING "APIC calibration not consistent " | ||
389 | "with PM Timer: %ldms instead of 100ms\n", | ||
390 | (long)res); | ||
391 | /* Correct the lapic counter value */ | ||
392 | res = (((u64) delta ) * pm_100ms); | ||
393 | do_div(res, deltapm); | ||
394 | printk(KERN_INFO "APIC delta adjusted to PM-Timer: " | ||
395 | "%lu (%ld)\n", (unsigned long) res, delta); | ||
396 | delta = (long) res; | ||
397 | } | ||
398 | } | ||
399 | |||
400 | /* Calculate the scaled math multiplication factor */ | ||
401 | lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32); | ||
402 | lapic_clockevent.max_delta_ns = | ||
403 | clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); | ||
404 | lapic_clockevent.min_delta_ns = | ||
405 | clockevent_delta2ns(0xF, &lapic_clockevent); | ||
406 | |||
407 | calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; | ||
408 | |||
409 | apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); | ||
410 | apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); | ||
411 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", | ||
412 | calibration_result); | ||
413 | |||
414 | if (cpu_has_tsc) { | ||
415 | delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); | ||
416 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " | ||
417 | "%ld.%04ld MHz.\n", | ||
418 | (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), | ||
419 | (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); | ||
420 | } | ||
421 | |||
422 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " | ||
423 | "%u.%04u MHz.\n", | ||
424 | calibration_result / (1000000 / HZ), | ||
425 | calibration_result % (1000000 / HZ)); | ||
426 | |||
427 | |||
428 | apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); | ||
429 | |||
430 | /* | ||
431 | * Setup the apic timer manually | ||
432 | */ | ||
433 | local_apic_timer_verify_ok = 1; | ||
434 | levt->event_handler = lapic_cal_handler; | ||
435 | lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); | ||
436 | lapic_cal_loops = -1; | ||
437 | |||
438 | /* Let the interrupts run */ | ||
439 | local_irq_enable(); | ||
440 | |||
441 | while(lapic_cal_loops <= LAPIC_CAL_LOOPS); | ||
442 | |||
443 | local_irq_disable(); | ||
444 | |||
445 | /* Stop the lapic timer */ | ||
446 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); | ||
447 | |||
448 | local_irq_enable(); | ||
449 | |||
450 | /* Jiffies delta */ | ||
451 | deltaj = lapic_cal_j2 - lapic_cal_j1; | ||
452 | apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); | ||
453 | |||
454 | /* Check, if the PM timer is available */ | ||
455 | deltapm = lapic_cal_pm2 - lapic_cal_pm1; | ||
456 | apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); | ||
457 | |||
458 | local_apic_timer_verify_ok = 0; | ||
459 | |||
460 | if (deltapm) { | ||
461 | if (deltapm > (pm_100ms - pm_thresh) && | ||
462 | deltapm < (pm_100ms + pm_thresh)) { | ||
463 | apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); | ||
464 | /* Check, if the jiffies result is consistent */ | ||
465 | if (deltaj < LAPIC_CAL_LOOPS-2 || | ||
466 | deltaj > LAPIC_CAL_LOOPS+2) { | ||
467 | /* | ||
468 | * Not sure, what we can do about this one. | ||
469 | * When high resultion timers are active | ||
470 | * and the lapic timer does not stop in C3 | ||
471 | * we are fine. Otherwise more trouble might | ||
472 | * be waiting. -- tglx | ||
473 | */ | ||
474 | printk(KERN_WARNING "Global event device %s " | ||
475 | "has wrong frequency " | ||
476 | "(%lu ticks instead of %d)\n", | ||
477 | global_clock_event->name, deltaj, | ||
478 | LAPIC_CAL_LOOPS); | ||
479 | } | ||
480 | local_apic_timer_verify_ok = 1; | ||
481 | } | ||
482 | } else { | ||
483 | /* Check, if the jiffies result is consistent */ | ||
484 | if (deltaj >= LAPIC_CAL_LOOPS-2 && | ||
485 | deltaj <= LAPIC_CAL_LOOPS+2) { | ||
486 | apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); | ||
487 | local_apic_timer_verify_ok = 1; | ||
488 | } | ||
489 | } | ||
490 | |||
491 | if (!local_apic_timer_verify_ok) { | ||
492 | printk(KERN_WARNING | ||
493 | "APIC timer disabled due to verification failure.\n"); | ||
494 | /* No broadcast on UP ! */ | ||
495 | if (num_possible_cpus() == 1) | ||
496 | return; | ||
497 | } else | ||
498 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
499 | |||
500 | /* Setup the lapic or request the broadcast */ | ||
501 | setup_APIC_timer(); | ||
502 | } | ||
503 | |||
504 | void __devinit setup_secondary_APIC_clock(void) | ||
505 | { | ||
506 | setup_APIC_timer(); | ||
149 | } | 507 | } |
150 | 508 | ||
151 | int get_maxlvt(void) | 509 | /* |
510 | * The guts of the apic timer interrupt | ||
511 | */ | ||
512 | static void local_apic_timer_interrupt(void) | ||
152 | { | 513 | { |
153 | unsigned int v, ver, maxlvt; | 514 | int cpu = smp_processor_id(); |
515 | struct clock_event_device *evt = &per_cpu(lapic_events, cpu); | ||
154 | 516 | ||
155 | v = apic_read(APIC_LVR); | 517 | /* |
156 | ver = GET_APIC_VERSION(v); | 518 | * Normally we should not be here till LAPIC has been initialized but |
157 | /* 82489DXs do not report # of LVT entries. */ | 519 | * in some cases like kdump, its possible that there is a pending LAPIC |
158 | maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; | 520 | * timer interrupt from previous kernel's context and is delivered in |
159 | return maxlvt; | 521 | * new kernel the moment interrupts are enabled. |
522 | * | ||
523 | * Interrupts are enabled early and LAPIC is setup much later, hence | ||
524 | * its possible that when we get here evt->event_handler is NULL. | ||
525 | * Check for event_handler being NULL and discard the interrupt as | ||
526 | * spurious. | ||
527 | */ | ||
528 | if (!evt->event_handler) { | ||
529 | printk(KERN_WARNING | ||
530 | "Spurious LAPIC timer interrupt on cpu %d\n", cpu); | ||
531 | /* Switch it off */ | ||
532 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); | ||
533 | return; | ||
534 | } | ||
535 | |||
536 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
537 | |||
538 | evt->event_handler(evt); | ||
160 | } | 539 | } |
161 | 540 | ||
541 | /* | ||
542 | * Local APIC timer interrupt. This is the most natural way for doing | ||
543 | * local interrupts, but local timer interrupts can be emulated by | ||
544 | * broadcast interrupts too. [in case the hw doesn't support APIC timers] | ||
545 | * | ||
546 | * [ if a single-CPU system runs an SMP kernel then we call the local | ||
547 | * interrupt as well. Thus we cannot inline the local irq ... ] | ||
548 | */ | ||
549 | |||
550 | void fastcall smp_apic_timer_interrupt(struct pt_regs *regs) | ||
551 | { | ||
552 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
553 | |||
554 | /* | ||
555 | * NOTE! We'd better ACK the irq immediately, | ||
556 | * because timer handling can be slow. | ||
557 | */ | ||
558 | ack_APIC_irq(); | ||
559 | /* | ||
560 | * update_process_times() expects us to have done irq_enter(). | ||
561 | * Besides, if we don't timer interrupts ignore the global | ||
562 | * interrupt lock, which is the WrongThing (tm) to do. | ||
563 | */ | ||
564 | exit_idle(); | ||
565 | irq_enter(); | ||
566 | local_apic_timer_interrupt(); | ||
567 | irq_exit(); | ||
568 | |||
569 | set_irq_regs(old_regs); | ||
570 | } | ||
571 | |||
572 | int setup_profiling_timer(unsigned int multiplier) | ||
573 | { | ||
574 | return -EINVAL; | ||
575 | } | ||
576 | |||
577 | /* | ||
578 | * Local APIC start and shutdown | ||
579 | */ | ||
580 | |||
581 | /** | ||
582 | * clear_local_APIC - shutdown the local APIC | ||
583 | * | ||
584 | * This is called, when a CPU is disabled and before rebooting, so the state of | ||
585 | * the local APIC has no dangling leftovers. Also used to cleanout any BIOS | ||
586 | * leftovers during boot. | ||
587 | */ | ||
162 | void clear_local_APIC(void) | 588 | void clear_local_APIC(void) |
163 | { | 589 | { |
164 | int maxlvt; | 590 | int maxlvt = lapic_get_maxlvt(); |
165 | unsigned long v; | 591 | unsigned long v; |
166 | 592 | ||
167 | maxlvt = get_maxlvt(); | ||
168 | |||
169 | /* | 593 | /* |
170 | * Masking an LVT entry can trigger a local APIC error | 594 | * Masking an LVT entry can trigger a local APIC error |
171 | * if the vector is zero. Mask LVTERR first to prevent this. | 595 | * if the vector is zero. Mask LVTERR first to prevent this. |
@@ -189,7 +613,7 @@ void clear_local_APIC(void) | |||
189 | apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); | 613 | apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); |
190 | } | 614 | } |
191 | 615 | ||
192 | /* lets not touch this if we didn't frob it */ | 616 | /* lets not touch this if we didn't frob it */ |
193 | #ifdef CONFIG_X86_MCE_P4THERMAL | 617 | #ifdef CONFIG_X86_MCE_P4THERMAL |
194 | if (maxlvt >= 5) { | 618 | if (maxlvt >= 5) { |
195 | v = apic_read(APIC_LVTTHMR); | 619 | v = apic_read(APIC_LVTTHMR); |
@@ -211,85 +635,18 @@ void clear_local_APIC(void) | |||
211 | if (maxlvt >= 5) | 635 | if (maxlvt >= 5) |
212 | apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); | 636 | apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); |
213 | #endif | 637 | #endif |
214 | v = GET_APIC_VERSION(apic_read(APIC_LVR)); | 638 | /* Integrated APIC (!82489DX) ? */ |
215 | if (APIC_INTEGRATED(v)) { /* !82489DX */ | 639 | if (lapic_is_integrated()) { |
216 | if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ | 640 | if (maxlvt > 3) |
641 | /* Clear ESR due to Pentium errata 3AP and 11AP */ | ||
217 | apic_write(APIC_ESR, 0); | 642 | apic_write(APIC_ESR, 0); |
218 | apic_read(APIC_ESR); | 643 | apic_read(APIC_ESR); |
219 | } | 644 | } |
220 | } | 645 | } |
221 | 646 | ||
222 | void __init connect_bsp_APIC(void) | 647 | /** |
223 | { | 648 | * disable_local_APIC - clear and disable the local APIC |
224 | if (pic_mode) { | 649 | */ |
225 | /* | ||
226 | * Do not trust the local APIC being empty at bootup. | ||
227 | */ | ||
228 | clear_local_APIC(); | ||
229 | /* | ||
230 | * PIC mode, enable APIC mode in the IMCR, i.e. | ||
231 | * connect BSP's local APIC to INT and NMI lines. | ||
232 | */ | ||
233 | apic_printk(APIC_VERBOSE, "leaving PIC mode, " | ||
234 | "enabling APIC mode.\n"); | ||
235 | outb(0x70, 0x22); | ||
236 | outb(0x01, 0x23); | ||
237 | } | ||
238 | enable_apic_mode(); | ||
239 | } | ||
240 | |||
241 | void disconnect_bsp_APIC(int virt_wire_setup) | ||
242 | { | ||
243 | if (pic_mode) { | ||
244 | /* | ||
245 | * Put the board back into PIC mode (has an effect | ||
246 | * only on certain older boards). Note that APIC | ||
247 | * interrupts, including IPIs, won't work beyond | ||
248 | * this point! The only exception are INIT IPIs. | ||
249 | */ | ||
250 | apic_printk(APIC_VERBOSE, "disabling APIC mode, " | ||
251 | "entering PIC mode.\n"); | ||
252 | outb(0x70, 0x22); | ||
253 | outb(0x00, 0x23); | ||
254 | } | ||
255 | else { | ||
256 | /* Go back to Virtual Wire compatibility mode */ | ||
257 | unsigned long value; | ||
258 | |||
259 | /* For the spurious interrupt use vector F, and enable it */ | ||
260 | value = apic_read(APIC_SPIV); | ||
261 | value &= ~APIC_VECTOR_MASK; | ||
262 | value |= APIC_SPIV_APIC_ENABLED; | ||
263 | value |= 0xf; | ||
264 | apic_write_around(APIC_SPIV, value); | ||
265 | |||
266 | if (!virt_wire_setup) { | ||
267 | /* For LVT0 make it edge triggered, active high, external and enabled */ | ||
268 | value = apic_read(APIC_LVT0); | ||
269 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | | ||
270 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
271 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); | ||
272 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
273 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | ||
274 | apic_write_around(APIC_LVT0, value); | ||
275 | } | ||
276 | else { | ||
277 | /* Disable LVT0 */ | ||
278 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | ||
279 | } | ||
280 | |||
281 | /* For LVT1 make it edge triggered, active high, nmi and enabled */ | ||
282 | value = apic_read(APIC_LVT1); | ||
283 | value &= ~( | ||
284 | APIC_MODE_MASK | APIC_SEND_PENDING | | ||
285 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
286 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
287 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
288 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | ||
289 | apic_write_around(APIC_LVT1, value); | ||
290 | } | ||
291 | } | ||
292 | |||
293 | void disable_local_APIC(void) | 650 | void disable_local_APIC(void) |
294 | { | 651 | { |
295 | unsigned long value; | 652 | unsigned long value; |
@@ -304,8 +661,13 @@ void disable_local_APIC(void) | |||
304 | value &= ~APIC_SPIV_APIC_ENABLED; | 661 | value &= ~APIC_SPIV_APIC_ENABLED; |
305 | apic_write_around(APIC_SPIV, value); | 662 | apic_write_around(APIC_SPIV, value); |
306 | 663 | ||
664 | /* | ||
665 | * When LAPIC was disabled by the BIOS and enabled by the kernel, | ||
666 | * restore the disabled state. | ||
667 | */ | ||
307 | if (enabled_via_apicbase) { | 668 | if (enabled_via_apicbase) { |
308 | unsigned int l, h; | 669 | unsigned int l, h; |
670 | |||
309 | rdmsr(MSR_IA32_APICBASE, l, h); | 671 | rdmsr(MSR_IA32_APICBASE, l, h); |
310 | l &= ~MSR_IA32_APICBASE_ENABLE; | 672 | l &= ~MSR_IA32_APICBASE_ENABLE; |
311 | wrmsr(MSR_IA32_APICBASE, l, h); | 673 | wrmsr(MSR_IA32_APICBASE, l, h); |
@@ -313,6 +675,28 @@ void disable_local_APIC(void) | |||
313 | } | 675 | } |
314 | 676 | ||
315 | /* | 677 | /* |
678 | * If Linux enabled the LAPIC against the BIOS default disable it down before | ||
679 | * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and | ||
680 | * not power-off. Additionally clear all LVT entries before disable_local_APIC | ||
681 | * for the case where Linux didn't enable the LAPIC. | ||
682 | */ | ||
683 | void lapic_shutdown(void) | ||
684 | { | ||
685 | unsigned long flags; | ||
686 | |||
687 | if (!cpu_has_apic) | ||
688 | return; | ||
689 | |||
690 | local_irq_save(flags); | ||
691 | clear_local_APIC(); | ||
692 | |||
693 | if (enabled_via_apicbase) | ||
694 | disable_local_APIC(); | ||
695 | |||
696 | local_irq_restore(flags); | ||
697 | } | ||
698 | |||
699 | /* | ||
316 | * This is to verify that we're looking at a real local APIC. | 700 | * This is to verify that we're looking at a real local APIC. |
317 | * Check these against your board if the CPUs aren't getting | 701 | * Check these against your board if the CPUs aren't getting |
318 | * started for no apparent reason. | 702 | * started for no apparent reason. |
@@ -344,7 +728,7 @@ int __init verify_local_APIC(void) | |||
344 | reg1 = GET_APIC_VERSION(reg0); | 728 | reg1 = GET_APIC_VERSION(reg0); |
345 | if (reg1 == 0x00 || reg1 == 0xff) | 729 | if (reg1 == 0x00 || reg1 == 0xff) |
346 | return 0; | 730 | return 0; |
347 | reg1 = get_maxlvt(); | 731 | reg1 = lapic_get_maxlvt(); |
348 | if (reg1 < 0x02 || reg1 == 0xff) | 732 | if (reg1 < 0x02 || reg1 == 0xff) |
349 | return 0; | 733 | return 0; |
350 | 734 | ||
@@ -367,10 +751,15 @@ int __init verify_local_APIC(void) | |||
367 | return 1; | 751 | return 1; |
368 | } | 752 | } |
369 | 753 | ||
754 | /** | ||
755 | * sync_Arb_IDs - synchronize APIC bus arbitration IDs | ||
756 | */ | ||
370 | void __init sync_Arb_IDs(void) | 757 | void __init sync_Arb_IDs(void) |
371 | { | 758 | { |
372 | /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 | 759 | /* |
373 | And not needed on AMD */ | 760 | * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not |
761 | * needed on AMD. | ||
762 | */ | ||
374 | if (modern_apic()) | 763 | if (modern_apic()) |
375 | return; | 764 | return; |
376 | /* | 765 | /* |
@@ -383,14 +772,12 @@ void __init sync_Arb_IDs(void) | |||
383 | | APIC_DM_INIT); | 772 | | APIC_DM_INIT); |
384 | } | 773 | } |
385 | 774 | ||
386 | extern void __error_in_apic_c (void); | ||
387 | |||
388 | /* | 775 | /* |
389 | * An initial setup of the virtual wire mode. | 776 | * An initial setup of the virtual wire mode. |
390 | */ | 777 | */ |
391 | void __init init_bsp_APIC(void) | 778 | void __init init_bsp_APIC(void) |
392 | { | 779 | { |
393 | unsigned long value, ver; | 780 | unsigned long value; |
394 | 781 | ||
395 | /* | 782 | /* |
396 | * Don't do the setup now if we have a SMP BIOS as the | 783 | * Don't do the setup now if we have a SMP BIOS as the |
@@ -399,9 +786,6 @@ void __init init_bsp_APIC(void) | |||
399 | if (smp_found_config || !cpu_has_apic) | 786 | if (smp_found_config || !cpu_has_apic) |
400 | return; | 787 | return; |
401 | 788 | ||
402 | value = apic_read(APIC_LVR); | ||
403 | ver = GET_APIC_VERSION(value); | ||
404 | |||
405 | /* | 789 | /* |
406 | * Do not trust the local APIC being empty at bootup. | 790 | * Do not trust the local APIC being empty at bootup. |
407 | */ | 791 | */ |
@@ -413,9 +797,10 @@ void __init init_bsp_APIC(void) | |||
413 | value = apic_read(APIC_SPIV); | 797 | value = apic_read(APIC_SPIV); |
414 | value &= ~APIC_VECTOR_MASK; | 798 | value &= ~APIC_VECTOR_MASK; |
415 | value |= APIC_SPIV_APIC_ENABLED; | 799 | value |= APIC_SPIV_APIC_ENABLED; |
416 | 800 | ||
417 | /* This bit is reserved on P4/Xeon and should be cleared */ | 801 | /* This bit is reserved on P4/Xeon and should be cleared */ |
418 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) | 802 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && |
803 | (boot_cpu_data.x86 == 15)) | ||
419 | value &= ~APIC_SPIV_FOCUS_DISABLED; | 804 | value &= ~APIC_SPIV_FOCUS_DISABLED; |
420 | else | 805 | else |
421 | value |= APIC_SPIV_FOCUS_DISABLED; | 806 | value |= APIC_SPIV_FOCUS_DISABLED; |
@@ -427,14 +812,17 @@ void __init init_bsp_APIC(void) | |||
427 | */ | 812 | */ |
428 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | 813 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); |
429 | value = APIC_DM_NMI; | 814 | value = APIC_DM_NMI; |
430 | if (!APIC_INTEGRATED(ver)) /* 82489DX */ | 815 | if (!lapic_is_integrated()) /* 82489DX */ |
431 | value |= APIC_LVT_LEVEL_TRIGGER; | 816 | value |= APIC_LVT_LEVEL_TRIGGER; |
432 | apic_write_around(APIC_LVT1, value); | 817 | apic_write_around(APIC_LVT1, value); |
433 | } | 818 | } |
434 | 819 | ||
820 | /** | ||
821 | * setup_local_APIC - setup the local APIC | ||
822 | */ | ||
435 | void __devinit setup_local_APIC(void) | 823 | void __devinit setup_local_APIC(void) |
436 | { | 824 | { |
437 | unsigned long oldvalue, value, ver, maxlvt; | 825 | unsigned long oldvalue, value, maxlvt, integrated; |
438 | int i, j; | 826 | int i, j; |
439 | 827 | ||
440 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ | 828 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ |
@@ -445,11 +833,7 @@ void __devinit setup_local_APIC(void) | |||
445 | apic_write(APIC_ESR, 0); | 833 | apic_write(APIC_ESR, 0); |
446 | } | 834 | } |
447 | 835 | ||
448 | value = apic_read(APIC_LVR); | 836 | integrated = lapic_is_integrated(); |
449 | ver = GET_APIC_VERSION(value); | ||
450 | |||
451 | if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) | ||
452 | __error_in_apic_c(); | ||
453 | 837 | ||
454 | /* | 838 | /* |
455 | * Double-check whether this APIC is really registered. | 839 | * Double-check whether this APIC is really registered. |
@@ -520,13 +904,10 @@ void __devinit setup_local_APIC(void) | |||
520 | * like LRU than MRU (the short-term load is more even across CPUs). | 904 | * like LRU than MRU (the short-term load is more even across CPUs). |
521 | * See also the comment in end_level_ioapic_irq(). --macro | 905 | * See also the comment in end_level_ioapic_irq(). --macro |
522 | */ | 906 | */ |
523 | #if 1 | 907 | |
524 | /* Enable focus processor (bit==0) */ | 908 | /* Enable focus processor (bit==0) */ |
525 | value &= ~APIC_SPIV_FOCUS_DISABLED; | 909 | value &= ~APIC_SPIV_FOCUS_DISABLED; |
526 | #else | 910 | |
527 | /* Disable focus processor (bit==1) */ | ||
528 | value |= APIC_SPIV_FOCUS_DISABLED; | ||
529 | #endif | ||
530 | /* | 911 | /* |
531 | * Set spurious IRQ vector | 912 | * Set spurious IRQ vector |
532 | */ | 913 | */ |
@@ -562,17 +943,18 @@ void __devinit setup_local_APIC(void) | |||
562 | value = APIC_DM_NMI; | 943 | value = APIC_DM_NMI; |
563 | else | 944 | else |
564 | value = APIC_DM_NMI | APIC_LVT_MASKED; | 945 | value = APIC_DM_NMI | APIC_LVT_MASKED; |
565 | if (!APIC_INTEGRATED(ver)) /* 82489DX */ | 946 | if (!integrated) /* 82489DX */ |
566 | value |= APIC_LVT_LEVEL_TRIGGER; | 947 | value |= APIC_LVT_LEVEL_TRIGGER; |
567 | apic_write_around(APIC_LVT1, value); | 948 | apic_write_around(APIC_LVT1, value); |
568 | 949 | ||
569 | if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */ | 950 | if (integrated && !esr_disable) { /* !82489DX */ |
570 | maxlvt = get_maxlvt(); | 951 | maxlvt = lapic_get_maxlvt(); |
571 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 952 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
572 | apic_write(APIC_ESR, 0); | 953 | apic_write(APIC_ESR, 0); |
573 | oldvalue = apic_read(APIC_ESR); | 954 | oldvalue = apic_read(APIC_ESR); |
574 | 955 | ||
575 | value = ERROR_APIC_VECTOR; // enables sending errors | 956 | /* enables sending errors */ |
957 | value = ERROR_APIC_VECTOR; | ||
576 | apic_write_around(APIC_LVTERR, value); | 958 | apic_write_around(APIC_LVTERR, value); |
577 | /* | 959 | /* |
578 | * spec says clear errors after enabling vector. | 960 | * spec says clear errors after enabling vector. |
@@ -585,207 +967,30 @@ void __devinit setup_local_APIC(void) | |||
585 | "vector: 0x%08lx after: 0x%08lx\n", | 967 | "vector: 0x%08lx after: 0x%08lx\n", |
586 | oldvalue, value); | 968 | oldvalue, value); |
587 | } else { | 969 | } else { |
588 | if (esr_disable) | 970 | if (esr_disable) |
589 | /* | 971 | /* |
590 | * Something untraceble is creating bad interrupts on | 972 | * Something untraceble is creating bad interrupts on |
591 | * secondary quads ... for the moment, just leave the | 973 | * secondary quads ... for the moment, just leave the |
592 | * ESR disabled - we can't do anything useful with the | 974 | * ESR disabled - we can't do anything useful with the |
593 | * errors anyway - mbligh | 975 | * errors anyway - mbligh |
594 | */ | 976 | */ |
595 | printk("Leaving ESR disabled.\n"); | 977 | printk(KERN_INFO "Leaving ESR disabled.\n"); |
596 | else | 978 | else |
597 | printk("No ESR for 82489DX.\n"); | 979 | printk(KERN_INFO "No ESR for 82489DX.\n"); |
598 | } | 980 | } |
599 | 981 | ||
982 | /* Disable the local apic timer */ | ||
983 | value = apic_read(APIC_LVTT); | ||
984 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
985 | apic_write_around(APIC_LVTT, value); | ||
986 | |||
600 | setup_apic_nmi_watchdog(NULL); | 987 | setup_apic_nmi_watchdog(NULL); |
601 | apic_pm_activate(); | 988 | apic_pm_activate(); |
602 | } | 989 | } |
603 | 990 | ||
604 | /* | 991 | /* |
605 | * If Linux enabled the LAPIC against the BIOS default | 992 | * Detect and initialize APIC |
606 | * disable it down before re-entering the BIOS on shutdown. | ||
607 | * Otherwise the BIOS may get confused and not power-off. | ||
608 | * Additionally clear all LVT entries before disable_local_APIC | ||
609 | * for the case where Linux didn't enable the LAPIC. | ||
610 | */ | 993 | */ |
611 | void lapic_shutdown(void) | ||
612 | { | ||
613 | unsigned long flags; | ||
614 | |||
615 | if (!cpu_has_apic) | ||
616 | return; | ||
617 | |||
618 | local_irq_save(flags); | ||
619 | clear_local_APIC(); | ||
620 | |||
621 | if (enabled_via_apicbase) | ||
622 | disable_local_APIC(); | ||
623 | |||
624 | local_irq_restore(flags); | ||
625 | } | ||
626 | |||
627 | #ifdef CONFIG_PM | ||
628 | |||
629 | static struct { | ||
630 | int active; | ||
631 | /* r/w apic fields */ | ||
632 | unsigned int apic_id; | ||
633 | unsigned int apic_taskpri; | ||
634 | unsigned int apic_ldr; | ||
635 | unsigned int apic_dfr; | ||
636 | unsigned int apic_spiv; | ||
637 | unsigned int apic_lvtt; | ||
638 | unsigned int apic_lvtpc; | ||
639 | unsigned int apic_lvt0; | ||
640 | unsigned int apic_lvt1; | ||
641 | unsigned int apic_lvterr; | ||
642 | unsigned int apic_tmict; | ||
643 | unsigned int apic_tdcr; | ||
644 | unsigned int apic_thmr; | ||
645 | } apic_pm_state; | ||
646 | |||
647 | static int lapic_suspend(struct sys_device *dev, pm_message_t state) | ||
648 | { | ||
649 | unsigned long flags; | ||
650 | int maxlvt; | ||
651 | |||
652 | if (!apic_pm_state.active) | ||
653 | return 0; | ||
654 | |||
655 | maxlvt = get_maxlvt(); | ||
656 | |||
657 | apic_pm_state.apic_id = apic_read(APIC_ID); | ||
658 | apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); | ||
659 | apic_pm_state.apic_ldr = apic_read(APIC_LDR); | ||
660 | apic_pm_state.apic_dfr = apic_read(APIC_DFR); | ||
661 | apic_pm_state.apic_spiv = apic_read(APIC_SPIV); | ||
662 | apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); | ||
663 | if (maxlvt >= 4) | ||
664 | apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); | ||
665 | apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); | ||
666 | apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); | ||
667 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | ||
668 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | ||
669 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | ||
670 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
671 | if (maxlvt >= 5) | ||
672 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | ||
673 | #endif | ||
674 | |||
675 | local_irq_save(flags); | ||
676 | disable_local_APIC(); | ||
677 | local_irq_restore(flags); | ||
678 | return 0; | ||
679 | } | ||
680 | |||
681 | static int lapic_resume(struct sys_device *dev) | ||
682 | { | ||
683 | unsigned int l, h; | ||
684 | unsigned long flags; | ||
685 | int maxlvt; | ||
686 | |||
687 | if (!apic_pm_state.active) | ||
688 | return 0; | ||
689 | |||
690 | maxlvt = get_maxlvt(); | ||
691 | |||
692 | local_irq_save(flags); | ||
693 | |||
694 | /* | ||
695 | * Make sure the APICBASE points to the right address | ||
696 | * | ||
697 | * FIXME! This will be wrong if we ever support suspend on | ||
698 | * SMP! We'll need to do this as part of the CPU restore! | ||
699 | */ | ||
700 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
701 | l &= ~MSR_IA32_APICBASE_BASE; | ||
702 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | ||
703 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
704 | |||
705 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); | ||
706 | apic_write(APIC_ID, apic_pm_state.apic_id); | ||
707 | apic_write(APIC_DFR, apic_pm_state.apic_dfr); | ||
708 | apic_write(APIC_LDR, apic_pm_state.apic_ldr); | ||
709 | apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); | ||
710 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); | ||
711 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); | ||
712 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); | ||
713 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
714 | if (maxlvt >= 5) | ||
715 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); | ||
716 | #endif | ||
717 | if (maxlvt >= 4) | ||
718 | apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); | ||
719 | apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); | ||
720 | apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); | ||
721 | apic_write(APIC_TMICT, apic_pm_state.apic_tmict); | ||
722 | apic_write(APIC_ESR, 0); | ||
723 | apic_read(APIC_ESR); | ||
724 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); | ||
725 | apic_write(APIC_ESR, 0); | ||
726 | apic_read(APIC_ESR); | ||
727 | local_irq_restore(flags); | ||
728 | return 0; | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * This device has no shutdown method - fully functioning local APICs | ||
733 | * are needed on every CPU up until machine_halt/restart/poweroff. | ||
734 | */ | ||
735 | |||
736 | static struct sysdev_class lapic_sysclass = { | ||
737 | set_kset_name("lapic"), | ||
738 | .resume = lapic_resume, | ||
739 | .suspend = lapic_suspend, | ||
740 | }; | ||
741 | |||
742 | static struct sys_device device_lapic = { | ||
743 | .id = 0, | ||
744 | .cls = &lapic_sysclass, | ||
745 | }; | ||
746 | |||
747 | static void __devinit apic_pm_activate(void) | ||
748 | { | ||
749 | apic_pm_state.active = 1; | ||
750 | } | ||
751 | |||
752 | static int __init init_lapic_sysfs(void) | ||
753 | { | ||
754 | int error; | ||
755 | |||
756 | if (!cpu_has_apic) | ||
757 | return 0; | ||
758 | /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ | ||
759 | |||
760 | error = sysdev_class_register(&lapic_sysclass); | ||
761 | if (!error) | ||
762 | error = sysdev_register(&device_lapic); | ||
763 | return error; | ||
764 | } | ||
765 | device_initcall(init_lapic_sysfs); | ||
766 | |||
767 | #else /* CONFIG_PM */ | ||
768 | |||
769 | static void apic_pm_activate(void) { } | ||
770 | |||
771 | #endif /* CONFIG_PM */ | ||
772 | |||
773 | /* | ||
774 | * Detect and enable local APICs on non-SMP boards. | ||
775 | * Original code written by Keir Fraser. | ||
776 | */ | ||
777 | |||
778 | static int __init apic_set_verbosity(char *str) | ||
779 | { | ||
780 | if (strcmp("debug", str) == 0) | ||
781 | apic_verbosity = APIC_DEBUG; | ||
782 | else if (strcmp("verbose", str) == 0) | ||
783 | apic_verbosity = APIC_VERBOSE; | ||
784 | return 1; | ||
785 | } | ||
786 | |||
787 | __setup("apic=", apic_set_verbosity); | ||
788 | |||
789 | static int __init detect_init_APIC (void) | 994 | static int __init detect_init_APIC (void) |
790 | { | 995 | { |
791 | u32 h, l, features; | 996 | u32 h, l, features; |
@@ -797,7 +1002,7 @@ static int __init detect_init_APIC (void) | |||
797 | switch (boot_cpu_data.x86_vendor) { | 1002 | switch (boot_cpu_data.x86_vendor) { |
798 | case X86_VENDOR_AMD: | 1003 | case X86_VENDOR_AMD: |
799 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || | 1004 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || |
800 | (boot_cpu_data.x86 == 15)) | 1005 | (boot_cpu_data.x86 == 15)) |
801 | break; | 1006 | break; |
802 | goto no_apic; | 1007 | goto no_apic; |
803 | case X86_VENDOR_INTEL: | 1008 | case X86_VENDOR_INTEL: |
@@ -811,23 +1016,23 @@ static int __init detect_init_APIC (void) | |||
811 | 1016 | ||
812 | if (!cpu_has_apic) { | 1017 | if (!cpu_has_apic) { |
813 | /* | 1018 | /* |
814 | * Over-ride BIOS and try to enable the local | 1019 | * Over-ride BIOS and try to enable the local APIC only if |
815 | * APIC only if "lapic" specified. | 1020 | * "lapic" specified. |
816 | */ | 1021 | */ |
817 | if (enable_local_apic <= 0) { | 1022 | if (enable_local_apic <= 0) { |
818 | printk("Local APIC disabled by BIOS -- " | 1023 | printk(KERN_INFO "Local APIC disabled by BIOS -- " |
819 | "you can enable it with \"lapic\"\n"); | 1024 | "you can enable it with \"lapic\"\n"); |
820 | return -1; | 1025 | return -1; |
821 | } | 1026 | } |
822 | /* | 1027 | /* |
823 | * Some BIOSes disable the local APIC in the | 1028 | * Some BIOSes disable the local APIC in the APIC_BASE |
824 | * APIC_BASE MSR. This can only be done in | 1029 | * MSR. This can only be done in software for Intel P6 or later |
825 | * software for Intel P6 or later and AMD K7 | 1030 | * and AMD K7 (Model > 1) or later. |
826 | * (Model > 1) or later. | ||
827 | */ | 1031 | */ |
828 | rdmsr(MSR_IA32_APICBASE, l, h); | 1032 | rdmsr(MSR_IA32_APICBASE, l, h); |
829 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { | 1033 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { |
830 | printk("Local APIC disabled by BIOS -- reenabling.\n"); | 1034 | printk(KERN_INFO |
1035 | "Local APIC disabled by BIOS -- reenabling.\n"); | ||
831 | l &= ~MSR_IA32_APICBASE_BASE; | 1036 | l &= ~MSR_IA32_APICBASE_BASE; |
832 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | 1037 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; |
833 | wrmsr(MSR_IA32_APICBASE, l, h); | 1038 | wrmsr(MSR_IA32_APICBASE, l, h); |
@@ -840,7 +1045,7 @@ static int __init detect_init_APIC (void) | |||
840 | */ | 1045 | */ |
841 | features = cpuid_edx(1); | 1046 | features = cpuid_edx(1); |
842 | if (!(features & (1 << X86_FEATURE_APIC))) { | 1047 | if (!(features & (1 << X86_FEATURE_APIC))) { |
843 | printk("Could not enable APIC!\n"); | 1048 | printk(KERN_WARNING "Could not enable APIC!\n"); |
844 | return -1; | 1049 | return -1; |
845 | } | 1050 | } |
846 | set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | 1051 | set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); |
@@ -854,17 +1059,20 @@ static int __init detect_init_APIC (void) | |||
854 | if (nmi_watchdog != NMI_NONE) | 1059 | if (nmi_watchdog != NMI_NONE) |
855 | nmi_watchdog = NMI_LOCAL_APIC; | 1060 | nmi_watchdog = NMI_LOCAL_APIC; |
856 | 1061 | ||
857 | printk("Found and enabled local APIC!\n"); | 1062 | printk(KERN_INFO "Found and enabled local APIC!\n"); |
858 | 1063 | ||
859 | apic_pm_activate(); | 1064 | apic_pm_activate(); |
860 | 1065 | ||
861 | return 0; | 1066 | return 0; |
862 | 1067 | ||
863 | no_apic: | 1068 | no_apic: |
864 | printk("No local APIC present or hardware disabled\n"); | 1069 | printk(KERN_INFO "No local APIC present or hardware disabled\n"); |
865 | return -1; | 1070 | return -1; |
866 | } | 1071 | } |
867 | 1072 | ||
1073 | /** | ||
1074 | * init_apic_mappings - initialize APIC mappings | ||
1075 | */ | ||
868 | void __init init_apic_mappings(void) | 1076 | void __init init_apic_mappings(void) |
869 | { | 1077 | { |
870 | unsigned long apic_phys; | 1078 | unsigned long apic_phys; |
@@ -924,387 +1132,96 @@ fake_ioapic_page: | |||
924 | } | 1132 | } |
925 | 1133 | ||
926 | /* | 1134 | /* |
927 | * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts | 1135 | * This initializes the IO-APIC and APIC hardware if this is |
928 | * per second. We assume that the caller has already set up the local | 1136 | * a UP kernel. |
929 | * APIC. | ||
930 | * | ||
931 | * The APIC timer is not exactly sync with the external timer chip, it | ||
932 | * closely follows bus clocks. | ||
933 | */ | ||
934 | |||
935 | /* | ||
936 | * The timer chip is already set up at HZ interrupts per second here, | ||
937 | * but we do not accept timer interrupts yet. We only allow the BP | ||
938 | * to calibrate. | ||
939 | */ | ||
940 | static unsigned int __devinit get_8254_timer_count(void) | ||
941 | { | ||
942 | unsigned long flags; | ||
943 | |||
944 | unsigned int count; | ||
945 | |||
946 | spin_lock_irqsave(&i8253_lock, flags); | ||
947 | |||
948 | outb_p(0x00, PIT_MODE); | ||
949 | count = inb_p(PIT_CH0); | ||
950 | count |= inb_p(PIT_CH0) << 8; | ||
951 | |||
952 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
953 | |||
954 | return count; | ||
955 | } | ||
956 | |||
957 | /* next tick in 8254 can be caught by catching timer wraparound */ | ||
958 | static void __devinit wait_8254_wraparound(void) | ||
959 | { | ||
960 | unsigned int curr_count, prev_count; | ||
961 | |||
962 | curr_count = get_8254_timer_count(); | ||
963 | do { | ||
964 | prev_count = curr_count; | ||
965 | curr_count = get_8254_timer_count(); | ||
966 | |||
967 | /* workaround for broken Mercury/Neptune */ | ||
968 | if (prev_count >= curr_count + 0x100) | ||
969 | curr_count = get_8254_timer_count(); | ||
970 | |||
971 | } while (prev_count >= curr_count); | ||
972 | } | ||
973 | |||
974 | /* | ||
975 | * Default initialization for 8254 timers. If we use other timers like HPET, | ||
976 | * we override this later | ||
977 | */ | ||
978 | void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound; | ||
979 | |||
980 | /* | ||
981 | * This function sets up the local APIC timer, with a timeout of | ||
982 | * 'clocks' APIC bus clock. During calibration we actually call | ||
983 | * this function twice on the boot CPU, once with a bogus timeout | ||
984 | * value, second time for real. The other (noncalibrating) CPUs | ||
985 | * call this function only once, with the real, calibrated value. | ||
986 | * | ||
987 | * We do reads before writes even if unnecessary, to get around the | ||
988 | * P5 APIC double write bug. | ||
989 | */ | 1137 | */ |
990 | 1138 | int __init APIC_init_uniprocessor (void) | |
991 | #define APIC_DIVISOR 16 | ||
992 | |||
993 | static void __setup_APIC_LVTT(unsigned int clocks) | ||
994 | { | 1139 | { |
995 | unsigned int lvtt_value, tmp_value, ver; | 1140 | if (enable_local_apic < 0) |
996 | int cpu = smp_processor_id(); | 1141 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); |
997 | |||
998 | ver = GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
999 | lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; | ||
1000 | if (!APIC_INTEGRATED(ver)) | ||
1001 | lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); | ||
1002 | |||
1003 | if (cpu_isset(cpu, timer_bcast_ipi)) | ||
1004 | lvtt_value |= APIC_LVT_MASKED; | ||
1005 | 1142 | ||
1006 | apic_write_around(APIC_LVTT, lvtt_value); | 1143 | if (!smp_found_config && !cpu_has_apic) |
1144 | return -1; | ||
1007 | 1145 | ||
1008 | /* | 1146 | /* |
1009 | * Divide PICLK by 16 | 1147 | * Complain if the BIOS pretends there is one. |
1010 | */ | 1148 | */ |
1011 | tmp_value = apic_read(APIC_TDCR); | 1149 | if (!cpu_has_apic && |
1012 | apic_write_around(APIC_TDCR, (tmp_value | 1150 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
1013 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 1151 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", |
1014 | | APIC_TDR_DIV_16); | 1152 | boot_cpu_physical_apicid); |
1015 | 1153 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | |
1016 | apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); | 1154 | return -1; |
1017 | } | 1155 | } |
1018 | 1156 | ||
1019 | static void __devinit setup_APIC_timer(unsigned int clocks) | 1157 | verify_local_APIC(); |
1020 | { | ||
1021 | unsigned long flags; | ||
1022 | 1158 | ||
1023 | local_irq_save(flags); | 1159 | connect_bsp_APIC(); |
1024 | 1160 | ||
1025 | /* | 1161 | /* |
1026 | * Wait for IRQ0's slice: | 1162 | * Hack: In case of kdump, after a crash, kernel might be booting |
1163 | * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid | ||
1164 | * might be zero if read from MP tables. Get it from LAPIC. | ||
1027 | */ | 1165 | */ |
1028 | wait_timer_tick(); | 1166 | #ifdef CONFIG_CRASH_DUMP |
1167 | boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); | ||
1168 | #endif | ||
1169 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | ||
1029 | 1170 | ||
1030 | __setup_APIC_LVTT(clocks); | 1171 | setup_local_APIC(); |
1031 | 1172 | ||
1032 | local_irq_restore(flags); | 1173 | #ifdef CONFIG_X86_IO_APIC |
1174 | if (smp_found_config) | ||
1175 | if (!skip_ioapic_setup && nr_ioapics) | ||
1176 | setup_IO_APIC(); | ||
1177 | #endif | ||
1178 | setup_boot_clock(); | ||
1179 | |||
1180 | return 0; | ||
1033 | } | 1181 | } |
1034 | 1182 | ||
1035 | /* | 1183 | /* |
1036 | * In this function we calibrate APIC bus clocks to the external | 1184 | * APIC command line parameters |
1037 | * timer. Unfortunately we cannot use jiffies and the timer irq | ||
1038 | * to calibrate, since some later bootup code depends on getting | ||
1039 | * the first irq? Ugh. | ||
1040 | * | ||
1041 | * We want to do the calibration only once since we | ||
1042 | * want to have local timer irqs syncron. CPUs connected | ||
1043 | * by the same APIC bus have the very same bus frequency. | ||
1044 | * And we want to have irqs off anyways, no accidental | ||
1045 | * APIC irq that way. | ||
1046 | */ | 1185 | */ |
1047 | 1186 | static int __init parse_lapic(char *arg) | |
1048 | static int __init calibrate_APIC_clock(void) | ||
1049 | { | ||
1050 | unsigned long long t1 = 0, t2 = 0; | ||
1051 | long tt1, tt2; | ||
1052 | long result; | ||
1053 | int i; | ||
1054 | const int LOOPS = HZ/10; | ||
1055 | |||
1056 | apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n"); | ||
1057 | |||
1058 | /* | ||
1059 | * Put whatever arbitrary (but long enough) timeout | ||
1060 | * value into the APIC clock, we just want to get the | ||
1061 | * counter running for calibration. | ||
1062 | */ | ||
1063 | __setup_APIC_LVTT(1000000000); | ||
1064 | |||
1065 | /* | ||
1066 | * The timer chip counts down to zero. Let's wait | ||
1067 | * for a wraparound to start exact measurement: | ||
1068 | * (the current tick might have been already half done) | ||
1069 | */ | ||
1070 | |||
1071 | wait_timer_tick(); | ||
1072 | |||
1073 | /* | ||
1074 | * We wrapped around just now. Let's start: | ||
1075 | */ | ||
1076 | if (cpu_has_tsc) | ||
1077 | rdtscll(t1); | ||
1078 | tt1 = apic_read(APIC_TMCCT); | ||
1079 | |||
1080 | /* | ||
1081 | * Let's wait LOOPS wraprounds: | ||
1082 | */ | ||
1083 | for (i = 0; i < LOOPS; i++) | ||
1084 | wait_timer_tick(); | ||
1085 | |||
1086 | tt2 = apic_read(APIC_TMCCT); | ||
1087 | if (cpu_has_tsc) | ||
1088 | rdtscll(t2); | ||
1089 | |||
1090 | /* | ||
1091 | * The APIC bus clock counter is 32 bits only, it | ||
1092 | * might have overflown, but note that we use signed | ||
1093 | * longs, thus no extra care needed. | ||
1094 | * | ||
1095 | * underflown to be exact, as the timer counts down ;) | ||
1096 | */ | ||
1097 | |||
1098 | result = (tt1-tt2)*APIC_DIVISOR/LOOPS; | ||
1099 | |||
1100 | if (cpu_has_tsc) | ||
1101 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " | ||
1102 | "%ld.%04ld MHz.\n", | ||
1103 | ((long)(t2-t1)/LOOPS)/(1000000/HZ), | ||
1104 | ((long)(t2-t1)/LOOPS)%(1000000/HZ)); | ||
1105 | |||
1106 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " | ||
1107 | "%ld.%04ld MHz.\n", | ||
1108 | result/(1000000/HZ), | ||
1109 | result%(1000000/HZ)); | ||
1110 | |||
1111 | return result; | ||
1112 | } | ||
1113 | |||
1114 | static unsigned int calibration_result; | ||
1115 | |||
1116 | void __init setup_boot_APIC_clock(void) | ||
1117 | { | ||
1118 | unsigned long flags; | ||
1119 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); | ||
1120 | using_apic_timer = 1; | ||
1121 | |||
1122 | local_irq_save(flags); | ||
1123 | |||
1124 | calibration_result = calibrate_APIC_clock(); | ||
1125 | /* | ||
1126 | * Now set up the timer for real. | ||
1127 | */ | ||
1128 | setup_APIC_timer(calibration_result); | ||
1129 | |||
1130 | local_irq_restore(flags); | ||
1131 | } | ||
1132 | |||
1133 | void __devinit setup_secondary_APIC_clock(void) | ||
1134 | { | ||
1135 | setup_APIC_timer(calibration_result); | ||
1136 | } | ||
1137 | |||
1138 | void disable_APIC_timer(void) | ||
1139 | { | ||
1140 | if (using_apic_timer) { | ||
1141 | unsigned long v; | ||
1142 | |||
1143 | v = apic_read(APIC_LVTT); | ||
1144 | /* | ||
1145 | * When an illegal vector value (0-15) is written to an LVT | ||
1146 | * entry and delivery mode is Fixed, the APIC may signal an | ||
1147 | * illegal vector error, with out regard to whether the mask | ||
1148 | * bit is set or whether an interrupt is actually seen on input. | ||
1149 | * | ||
1150 | * Boot sequence might call this function when the LVTT has | ||
1151 | * '0' vector value. So make sure vector field is set to | ||
1152 | * valid value. | ||
1153 | */ | ||
1154 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | ||
1155 | apic_write_around(APIC_LVTT, v); | ||
1156 | } | ||
1157 | } | ||
1158 | |||
1159 | void enable_APIC_timer(void) | ||
1160 | { | 1187 | { |
1161 | int cpu = smp_processor_id(); | 1188 | enable_local_apic = 1; |
1162 | 1189 | return 0; | |
1163 | if (using_apic_timer && | ||
1164 | !cpu_isset(cpu, timer_bcast_ipi)) { | ||
1165 | unsigned long v; | ||
1166 | |||
1167 | v = apic_read(APIC_LVTT); | ||
1168 | apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED); | ||
1169 | } | ||
1170 | } | 1190 | } |
1191 | early_param("lapic", parse_lapic); | ||
1171 | 1192 | ||
1172 | void switch_APIC_timer_to_ipi(void *cpumask) | 1193 | static int __init parse_nolapic(char *arg) |
1173 | { | 1194 | { |
1174 | cpumask_t mask = *(cpumask_t *)cpumask; | 1195 | enable_local_apic = -1; |
1175 | int cpu = smp_processor_id(); | 1196 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); |
1176 | 1197 | return 0; | |
1177 | if (cpu_isset(cpu, mask) && | ||
1178 | !cpu_isset(cpu, timer_bcast_ipi)) { | ||
1179 | disable_APIC_timer(); | ||
1180 | cpu_set(cpu, timer_bcast_ipi); | ||
1181 | } | ||
1182 | } | 1198 | } |
1183 | EXPORT_SYMBOL(switch_APIC_timer_to_ipi); | 1199 | early_param("nolapic", parse_nolapic); |
1184 | 1200 | ||
1185 | void switch_ipi_to_APIC_timer(void *cpumask) | 1201 | static int __init apic_set_verbosity(char *str) |
1186 | { | 1202 | { |
1187 | cpumask_t mask = *(cpumask_t *)cpumask; | 1203 | if (strcmp("debug", str) == 0) |
1188 | int cpu = smp_processor_id(); | 1204 | apic_verbosity = APIC_DEBUG; |
1189 | 1205 | else if (strcmp("verbose", str) == 0) | |
1190 | if (cpu_isset(cpu, mask) && | 1206 | apic_verbosity = APIC_VERBOSE; |
1191 | cpu_isset(cpu, timer_bcast_ipi)) { | 1207 | return 1; |
1192 | cpu_clear(cpu, timer_bcast_ipi); | ||
1193 | enable_APIC_timer(); | ||
1194 | } | ||
1195 | } | 1208 | } |
1196 | EXPORT_SYMBOL(switch_ipi_to_APIC_timer); | ||
1197 | |||
1198 | #undef APIC_DIVISOR | ||
1199 | 1209 | ||
1200 | /* | 1210 | __setup("apic=", apic_set_verbosity); |
1201 | * Local timer interrupt handler. It does both profiling and | ||
1202 | * process statistics/rescheduling. | ||
1203 | * | ||
1204 | * We do profiling in every local tick, statistics/rescheduling | ||
1205 | * happen only every 'profiling multiplier' ticks. The default | ||
1206 | * multiplier is 1 and it can be changed by writing the new multiplier | ||
1207 | * value into /proc/profile. | ||
1208 | */ | ||
1209 | |||
1210 | inline void smp_local_timer_interrupt(void) | ||
1211 | { | ||
1212 | profile_tick(CPU_PROFILING); | ||
1213 | #ifdef CONFIG_SMP | ||
1214 | update_process_times(user_mode_vm(get_irq_regs())); | ||
1215 | #endif | ||
1216 | 1211 | ||
1217 | /* | ||
1218 | * We take the 'long' return path, and there every subsystem | ||
1219 | * grabs the apropriate locks (kernel lock/ irq lock). | ||
1220 | * | ||
1221 | * we might want to decouple profiling from the 'long path', | ||
1222 | * and do the profiling totally in assembly. | ||
1223 | * | ||
1224 | * Currently this isn't too much of an issue (performance wise), | ||
1225 | * we can take more than 100K local irqs per second on a 100 MHz P5. | ||
1226 | */ | ||
1227 | } | ||
1228 | 1212 | ||
1229 | /* | 1213 | /* |
1230 | * Local APIC timer interrupt. This is the most natural way for doing | 1214 | * Local APIC interrupts |
1231 | * local interrupts, but local timer interrupts can be emulated by | ||
1232 | * broadcast interrupts too. [in case the hw doesn't support APIC timers] | ||
1233 | * | ||
1234 | * [ if a single-CPU system runs an SMP kernel then we call the local | ||
1235 | * interrupt as well. Thus we cannot inline the local irq ... ] | ||
1236 | */ | 1215 | */ |
1237 | 1216 | ||
1238 | fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) | ||
1239 | { | ||
1240 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
1241 | int cpu = smp_processor_id(); | ||
1242 | |||
1243 | /* | ||
1244 | * the NMI deadlock-detector uses this. | ||
1245 | */ | ||
1246 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
1247 | |||
1248 | /* | ||
1249 | * NOTE! We'd better ACK the irq immediately, | ||
1250 | * because timer handling can be slow. | ||
1251 | */ | ||
1252 | ack_APIC_irq(); | ||
1253 | /* | ||
1254 | * update_process_times() expects us to have done irq_enter(). | ||
1255 | * Besides, if we don't timer interrupts ignore the global | ||
1256 | * interrupt lock, which is the WrongThing (tm) to do. | ||
1257 | */ | ||
1258 | irq_enter(); | ||
1259 | smp_local_timer_interrupt(); | ||
1260 | irq_exit(); | ||
1261 | set_irq_regs(old_regs); | ||
1262 | } | ||
1263 | |||
1264 | #ifndef CONFIG_SMP | ||
1265 | static void up_apic_timer_interrupt_call(void) | ||
1266 | { | ||
1267 | int cpu = smp_processor_id(); | ||
1268 | |||
1269 | /* | ||
1270 | * the NMI deadlock-detector uses this. | ||
1271 | */ | ||
1272 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
1273 | |||
1274 | smp_local_timer_interrupt(); | ||
1275 | } | ||
1276 | #endif | ||
1277 | |||
1278 | void smp_send_timer_broadcast_ipi(void) | ||
1279 | { | ||
1280 | cpumask_t mask; | ||
1281 | |||
1282 | cpus_and(mask, cpu_online_map, timer_bcast_ipi); | ||
1283 | if (!cpus_empty(mask)) { | ||
1284 | #ifdef CONFIG_SMP | ||
1285 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); | ||
1286 | #else | ||
1287 | /* | ||
1288 | * We can directly call the apic timer interrupt handler | ||
1289 | * in UP case. Minus all irq related functions | ||
1290 | */ | ||
1291 | up_apic_timer_interrupt_call(); | ||
1292 | #endif | ||
1293 | } | ||
1294 | } | ||
1295 | |||
1296 | int setup_profiling_timer(unsigned int multiplier) | ||
1297 | { | ||
1298 | return -EINVAL; | ||
1299 | } | ||
1300 | |||
1301 | /* | 1217 | /* |
1302 | * This interrupt should _never_ happen with our APIC/SMP architecture | 1218 | * This interrupt should _never_ happen with our APIC/SMP architecture |
1303 | */ | 1219 | */ |
1304 | fastcall void smp_spurious_interrupt(struct pt_regs *regs) | 1220 | void smp_spurious_interrupt(struct pt_regs *regs) |
1305 | { | 1221 | { |
1306 | unsigned long v; | 1222 | unsigned long v; |
1307 | 1223 | ||
1224 | exit_idle(); | ||
1308 | irq_enter(); | 1225 | irq_enter(); |
1309 | /* | 1226 | /* |
1310 | * Check if this really is a spurious interrupt and ACK it | 1227 | * Check if this really is a spurious interrupt and ACK it |
@@ -1316,19 +1233,19 @@ fastcall void smp_spurious_interrupt(struct pt_regs *regs) | |||
1316 | ack_APIC_irq(); | 1233 | ack_APIC_irq(); |
1317 | 1234 | ||
1318 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ | 1235 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ |
1319 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n", | 1236 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " |
1320 | smp_processor_id()); | 1237 | "should never happen.\n", smp_processor_id()); |
1321 | irq_exit(); | 1238 | irq_exit(); |
1322 | } | 1239 | } |
1323 | 1240 | ||
1324 | /* | 1241 | /* |
1325 | * This interrupt should never happen with our APIC/SMP architecture | 1242 | * This interrupt should never happen with our APIC/SMP architecture |
1326 | */ | 1243 | */ |
1327 | 1244 | void smp_error_interrupt(struct pt_regs *regs) | |
1328 | fastcall void smp_error_interrupt(struct pt_regs *regs) | ||
1329 | { | 1245 | { |
1330 | unsigned long v, v1; | 1246 | unsigned long v, v1; |
1331 | 1247 | ||
1248 | exit_idle(); | ||
1332 | irq_enter(); | 1249 | irq_enter(); |
1333 | /* First tickle the hardware, only then report what went on. -- REW */ | 1250 | /* First tickle the hardware, only then report what went on. -- REW */ |
1334 | v = apic_read(APIC_ESR); | 1251 | v = apic_read(APIC_ESR); |
@@ -1348,69 +1265,261 @@ fastcall void smp_error_interrupt(struct pt_regs *regs) | |||
1348 | 7: Illegal register address | 1265 | 7: Illegal register address |
1349 | */ | 1266 | */ |
1350 | printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", | 1267 | printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", |
1351 | smp_processor_id(), v , v1); | 1268 | smp_processor_id(), v , v1); |
1352 | irq_exit(); | 1269 | irq_exit(); |
1353 | } | 1270 | } |
1354 | 1271 | ||
1355 | /* | 1272 | /* |
1356 | * This initializes the IO-APIC and APIC hardware if this is | 1273 | * Initialize APIC interrupts |
1357 | * a UP kernel. | ||
1358 | */ | 1274 | */ |
1359 | int __init APIC_init_uniprocessor (void) | 1275 | void __init apic_intr_init(void) |
1360 | { | 1276 | { |
1361 | if (enable_local_apic < 0) | 1277 | #ifdef CONFIG_SMP |
1362 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | 1278 | smp_intr_init(); |
1279 | #endif | ||
1280 | /* self generated IPI for local APIC timer */ | ||
1281 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | ||
1363 | 1282 | ||
1364 | if (!smp_found_config && !cpu_has_apic) | 1283 | /* IPI vectors for APIC spurious and error interrupts */ |
1365 | return -1; | 1284 | set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
1285 | set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | ||
1366 | 1286 | ||
1367 | /* | 1287 | /* thermal monitor LVT interrupt */ |
1368 | * Complain if the BIOS pretends there is one. | 1288 | #ifdef CONFIG_X86_MCE_P4THERMAL |
1369 | */ | 1289 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
1370 | if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 1290 | #endif |
1371 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | 1291 | } |
1372 | boot_cpu_physical_apicid); | 1292 | |
1373 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | 1293 | /** |
1374 | return -1; | 1294 | * connect_bsp_APIC - attach the APIC to the interrupt system |
1295 | */ | ||
1296 | void __init connect_bsp_APIC(void) | ||
1297 | { | ||
1298 | if (pic_mode) { | ||
1299 | /* | ||
1300 | * Do not trust the local APIC being empty at bootup. | ||
1301 | */ | ||
1302 | clear_local_APIC(); | ||
1303 | /* | ||
1304 | * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's | ||
1305 | * local APIC to INT and NMI lines. | ||
1306 | */ | ||
1307 | apic_printk(APIC_VERBOSE, "leaving PIC mode, " | ||
1308 | "enabling APIC mode.\n"); | ||
1309 | outb(0x70, 0x22); | ||
1310 | outb(0x01, 0x23); | ||
1375 | } | 1311 | } |
1312 | enable_apic_mode(); | ||
1313 | } | ||
1376 | 1314 | ||
1377 | verify_local_APIC(); | 1315 | /** |
1316 | * disconnect_bsp_APIC - detach the APIC from the interrupt system | ||
1317 | * @virt_wire_setup: indicates, whether virtual wire mode is selected | ||
1318 | * | ||
1319 | * Virtual wire mode is necessary to deliver legacy interrupts even when the | ||
1320 | * APIC is disabled. | ||
1321 | */ | ||
1322 | void disconnect_bsp_APIC(int virt_wire_setup) | ||
1323 | { | ||
1324 | if (pic_mode) { | ||
1325 | /* | ||
1326 | * Put the board back into PIC mode (has an effect only on | ||
1327 | * certain older boards). Note that APIC interrupts, including | ||
1328 | * IPIs, won't work beyond this point! The only exception are | ||
1329 | * INIT IPIs. | ||
1330 | */ | ||
1331 | apic_printk(APIC_VERBOSE, "disabling APIC mode, " | ||
1332 | "entering PIC mode.\n"); | ||
1333 | outb(0x70, 0x22); | ||
1334 | outb(0x00, 0x23); | ||
1335 | } else { | ||
1336 | /* Go back to Virtual Wire compatibility mode */ | ||
1337 | unsigned long value; | ||
1378 | 1338 | ||
1379 | connect_bsp_APIC(); | 1339 | /* For the spurious interrupt use vector F, and enable it */ |
1340 | value = apic_read(APIC_SPIV); | ||
1341 | value &= ~APIC_VECTOR_MASK; | ||
1342 | value |= APIC_SPIV_APIC_ENABLED; | ||
1343 | value |= 0xf; | ||
1344 | apic_write_around(APIC_SPIV, value); | ||
1380 | 1345 | ||
1381 | /* | 1346 | if (!virt_wire_setup) { |
1382 | * Hack: In case of kdump, after a crash, kernel might be booting | 1347 | /* |
1383 | * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid | 1348 | * For LVT0 make it edge triggered, active high, |
1384 | * might be zero if read from MP tables. Get it from LAPIC. | 1349 | * external and enabled |
1385 | */ | 1350 | */ |
1386 | #ifdef CONFIG_CRASH_DUMP | 1351 | value = apic_read(APIC_LVT0); |
1387 | boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); | 1352 | value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | |
1388 | #endif | 1353 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | |
1389 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | 1354 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); |
1355 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
1356 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | ||
1357 | apic_write_around(APIC_LVT0, value); | ||
1358 | } else { | ||
1359 | /* Disable LVT0 */ | ||
1360 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | ||
1361 | } | ||
1390 | 1362 | ||
1391 | setup_local_APIC(); | 1363 | /* |
1364 | * For LVT1 make it edge triggered, active high, nmi and | ||
1365 | * enabled | ||
1366 | */ | ||
1367 | value = apic_read(APIC_LVT1); | ||
1368 | value &= ~( | ||
1369 | APIC_MODE_MASK | APIC_SEND_PENDING | | ||
1370 | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | | ||
1371 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | ||
1372 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | ||
1373 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | ||
1374 | apic_write_around(APIC_LVT1, value); | ||
1375 | } | ||
1376 | } | ||
1392 | 1377 | ||
1393 | #ifdef CONFIG_X86_IO_APIC | 1378 | /* |
1394 | if (smp_found_config) | 1379 | * Power management |
1395 | if (!skip_ioapic_setup && nr_ioapics) | 1380 | */ |
1396 | setup_IO_APIC(); | 1381 | #ifdef CONFIG_PM |
1382 | |||
1383 | static struct { | ||
1384 | int active; | ||
1385 | /* r/w apic fields */ | ||
1386 | unsigned int apic_id; | ||
1387 | unsigned int apic_taskpri; | ||
1388 | unsigned int apic_ldr; | ||
1389 | unsigned int apic_dfr; | ||
1390 | unsigned int apic_spiv; | ||
1391 | unsigned int apic_lvtt; | ||
1392 | unsigned int apic_lvtpc; | ||
1393 | unsigned int apic_lvt0; | ||
1394 | unsigned int apic_lvt1; | ||
1395 | unsigned int apic_lvterr; | ||
1396 | unsigned int apic_tmict; | ||
1397 | unsigned int apic_tdcr; | ||
1398 | unsigned int apic_thmr; | ||
1399 | } apic_pm_state; | ||
1400 | |||
1401 | static int lapic_suspend(struct sys_device *dev, pm_message_t state) | ||
1402 | { | ||
1403 | unsigned long flags; | ||
1404 | int maxlvt; | ||
1405 | |||
1406 | if (!apic_pm_state.active) | ||
1407 | return 0; | ||
1408 | |||
1409 | maxlvt = lapic_get_maxlvt(); | ||
1410 | |||
1411 | apic_pm_state.apic_id = apic_read(APIC_ID); | ||
1412 | apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); | ||
1413 | apic_pm_state.apic_ldr = apic_read(APIC_LDR); | ||
1414 | apic_pm_state.apic_dfr = apic_read(APIC_DFR); | ||
1415 | apic_pm_state.apic_spiv = apic_read(APIC_SPIV); | ||
1416 | apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); | ||
1417 | if (maxlvt >= 4) | ||
1418 | apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); | ||
1419 | apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); | ||
1420 | apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); | ||
1421 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | ||
1422 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | ||
1423 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | ||
1424 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
1425 | if (maxlvt >= 5) | ||
1426 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | ||
1397 | #endif | 1427 | #endif |
1398 | setup_boot_APIC_clock(); | ||
1399 | 1428 | ||
1429 | local_irq_save(flags); | ||
1430 | disable_local_APIC(); | ||
1431 | local_irq_restore(flags); | ||
1400 | return 0; | 1432 | return 0; |
1401 | } | 1433 | } |
1402 | 1434 | ||
1403 | static int __init parse_lapic(char *arg) | 1435 | static int lapic_resume(struct sys_device *dev) |
1404 | { | 1436 | { |
1405 | lapic_enable(); | 1437 | unsigned int l, h; |
1438 | unsigned long flags; | ||
1439 | int maxlvt; | ||
1440 | |||
1441 | if (!apic_pm_state.active) | ||
1442 | return 0; | ||
1443 | |||
1444 | maxlvt = lapic_get_maxlvt(); | ||
1445 | |||
1446 | local_irq_save(flags); | ||
1447 | |||
1448 | /* | ||
1449 | * Make sure the APICBASE points to the right address | ||
1450 | * | ||
1451 | * FIXME! This will be wrong if we ever support suspend on | ||
1452 | * SMP! We'll need to do this as part of the CPU restore! | ||
1453 | */ | ||
1454 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
1455 | l &= ~MSR_IA32_APICBASE_BASE; | ||
1456 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | ||
1457 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
1458 | |||
1459 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); | ||
1460 | apic_write(APIC_ID, apic_pm_state.apic_id); | ||
1461 | apic_write(APIC_DFR, apic_pm_state.apic_dfr); | ||
1462 | apic_write(APIC_LDR, apic_pm_state.apic_ldr); | ||
1463 | apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); | ||
1464 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); | ||
1465 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); | ||
1466 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); | ||
1467 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
1468 | if (maxlvt >= 5) | ||
1469 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); | ||
1470 | #endif | ||
1471 | if (maxlvt >= 4) | ||
1472 | apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); | ||
1473 | apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); | ||
1474 | apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); | ||
1475 | apic_write(APIC_TMICT, apic_pm_state.apic_tmict); | ||
1476 | apic_write(APIC_ESR, 0); | ||
1477 | apic_read(APIC_ESR); | ||
1478 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); | ||
1479 | apic_write(APIC_ESR, 0); | ||
1480 | apic_read(APIC_ESR); | ||
1481 | local_irq_restore(flags); | ||
1406 | return 0; | 1482 | return 0; |
1407 | } | 1483 | } |
1408 | early_param("lapic", parse_lapic); | ||
1409 | 1484 | ||
1410 | static int __init parse_nolapic(char *arg) | 1485 | /* |
1486 | * This device has no shutdown method - fully functioning local APICs | ||
1487 | * are needed on every CPU up until machine_halt/restart/poweroff. | ||
1488 | */ | ||
1489 | |||
1490 | static struct sysdev_class lapic_sysclass = { | ||
1491 | set_kset_name("lapic"), | ||
1492 | .resume = lapic_resume, | ||
1493 | .suspend = lapic_suspend, | ||
1494 | }; | ||
1495 | |||
1496 | static struct sys_device device_lapic = { | ||
1497 | .id = 0, | ||
1498 | .cls = &lapic_sysclass, | ||
1499 | }; | ||
1500 | |||
1501 | static void __devinit apic_pm_activate(void) | ||
1411 | { | 1502 | { |
1412 | lapic_disable(); | 1503 | apic_pm_state.active = 1; |
1413 | return 0; | ||
1414 | } | 1504 | } |
1415 | early_param("nolapic", parse_nolapic); | ||
1416 | 1505 | ||
1506 | static int __init init_lapic_sysfs(void) | ||
1507 | { | ||
1508 | int error; | ||
1509 | |||
1510 | if (!cpu_has_apic) | ||
1511 | return 0; | ||
1512 | /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ | ||
1513 | |||
1514 | error = sysdev_class_register(&lapic_sysclass); | ||
1515 | if (!error) | ||
1516 | error = sysdev_register(&device_lapic); | ||
1517 | return error; | ||
1518 | } | ||
1519 | device_initcall(init_lapic_sysfs); | ||
1520 | |||
1521 | #else /* CONFIG_PM */ | ||
1522 | |||
1523 | static void apic_pm_activate(void) { } | ||
1524 | |||
1525 | #endif /* CONFIG_PM */ | ||
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index db99a8948dae..064bbf2861f4 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c | |||
@@ -211,6 +211,7 @@ | |||
211 | #include <linux/slab.h> | 211 | #include <linux/slab.h> |
212 | #include <linux/stat.h> | 212 | #include <linux/stat.h> |
213 | #include <linux/proc_fs.h> | 213 | #include <linux/proc_fs.h> |
214 | #include <linux/seq_file.h> | ||
214 | #include <linux/miscdevice.h> | 215 | #include <linux/miscdevice.h> |
215 | #include <linux/apm_bios.h> | 216 | #include <linux/apm_bios.h> |
216 | #include <linux/init.h> | 217 | #include <linux/init.h> |
@@ -235,7 +236,6 @@ | |||
235 | 236 | ||
236 | #include "io_ports.h" | 237 | #include "io_ports.h" |
237 | 238 | ||
238 | extern unsigned long get_cmos_time(void); | ||
239 | extern void machine_real_restart(unsigned char *, int); | 239 | extern void machine_real_restart(unsigned char *, int); |
240 | 240 | ||
241 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) | 241 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) |
@@ -1175,28 +1175,6 @@ out: | |||
1175 | spin_unlock(&user_list_lock); | 1175 | spin_unlock(&user_list_lock); |
1176 | } | 1176 | } |
1177 | 1177 | ||
1178 | static void set_time(void) | ||
1179 | { | ||
1180 | struct timespec ts; | ||
1181 | if (got_clock_diff) { /* Must know time zone in order to set clock */ | ||
1182 | ts.tv_sec = get_cmos_time() + clock_cmos_diff; | ||
1183 | ts.tv_nsec = 0; | ||
1184 | do_settimeofday(&ts); | ||
1185 | } | ||
1186 | } | ||
1187 | |||
1188 | static void get_time_diff(void) | ||
1189 | { | ||
1190 | #ifndef CONFIG_APM_RTC_IS_GMT | ||
1191 | /* | ||
1192 | * Estimate time zone so that set_time can update the clock | ||
1193 | */ | ||
1194 | clock_cmos_diff = -get_cmos_time(); | ||
1195 | clock_cmos_diff += get_seconds(); | ||
1196 | got_clock_diff = 1; | ||
1197 | #endif | ||
1198 | } | ||
1199 | |||
1200 | static void reinit_timer(void) | 1178 | static void reinit_timer(void) |
1201 | { | 1179 | { |
1202 | #ifdef INIT_TIMER_AFTER_SUSPEND | 1180 | #ifdef INIT_TIMER_AFTER_SUSPEND |
@@ -1236,19 +1214,6 @@ static int suspend(int vetoable) | |||
1236 | local_irq_disable(); | 1214 | local_irq_disable(); |
1237 | device_power_down(PMSG_SUSPEND); | 1215 | device_power_down(PMSG_SUSPEND); |
1238 | 1216 | ||
1239 | /* serialize with the timer interrupt */ | ||
1240 | write_seqlock(&xtime_lock); | ||
1241 | |||
1242 | /* protect against access to timer chip registers */ | ||
1243 | spin_lock(&i8253_lock); | ||
1244 | |||
1245 | get_time_diff(); | ||
1246 | /* | ||
1247 | * Irq spinlock must be dropped around set_system_power_state. | ||
1248 | * We'll undo any timer changes due to interrupts below. | ||
1249 | */ | ||
1250 | spin_unlock(&i8253_lock); | ||
1251 | write_sequnlock(&xtime_lock); | ||
1252 | local_irq_enable(); | 1217 | local_irq_enable(); |
1253 | 1218 | ||
1254 | save_processor_state(); | 1219 | save_processor_state(); |
@@ -1257,7 +1222,6 @@ static int suspend(int vetoable) | |||
1257 | restore_processor_state(); | 1222 | restore_processor_state(); |
1258 | 1223 | ||
1259 | local_irq_disable(); | 1224 | local_irq_disable(); |
1260 | set_time(); | ||
1261 | reinit_timer(); | 1225 | reinit_timer(); |
1262 | 1226 | ||
1263 | if (err == APM_NO_ERROR) | 1227 | if (err == APM_NO_ERROR) |
@@ -1287,11 +1251,6 @@ static void standby(void) | |||
1287 | 1251 | ||
1288 | local_irq_disable(); | 1252 | local_irq_disable(); |
1289 | device_power_down(PMSG_SUSPEND); | 1253 | device_power_down(PMSG_SUSPEND); |
1290 | /* serialize with the timer interrupt */ | ||
1291 | write_seqlock(&xtime_lock); | ||
1292 | /* If needed, notify drivers here */ | ||
1293 | get_time_diff(); | ||
1294 | write_sequnlock(&xtime_lock); | ||
1295 | local_irq_enable(); | 1254 | local_irq_enable(); |
1296 | 1255 | ||
1297 | err = set_system_power_state(APM_STATE_STANDBY); | 1256 | err = set_system_power_state(APM_STATE_STANDBY); |
@@ -1385,7 +1344,6 @@ static void check_events(void) | |||
1385 | ignore_bounce = 1; | 1344 | ignore_bounce = 1; |
1386 | if ((event != APM_NORMAL_RESUME) | 1345 | if ((event != APM_NORMAL_RESUME) |
1387 | || (ignore_normal_resume == 0)) { | 1346 | || (ignore_normal_resume == 0)) { |
1388 | set_time(); | ||
1389 | device_resume(); | 1347 | device_resume(); |
1390 | pm_send_all(PM_RESUME, (void *)0); | 1348 | pm_send_all(PM_RESUME, (void *)0); |
1391 | queue_event(event, NULL); | 1349 | queue_event(event, NULL); |
@@ -1401,7 +1359,6 @@ static void check_events(void) | |||
1401 | break; | 1359 | break; |
1402 | 1360 | ||
1403 | case APM_UPDATE_TIME: | 1361 | case APM_UPDATE_TIME: |
1404 | set_time(); | ||
1405 | break; | 1362 | break; |
1406 | 1363 | ||
1407 | case APM_CRITICAL_SUSPEND: | 1364 | case APM_CRITICAL_SUSPEND: |
@@ -1636,9 +1593,8 @@ static int do_open(struct inode * inode, struct file * filp) | |||
1636 | return 0; | 1593 | return 0; |
1637 | } | 1594 | } |
1638 | 1595 | ||
1639 | static int apm_get_info(char *buf, char **start, off_t fpos, int length) | 1596 | static int proc_apm_show(struct seq_file *m, void *v) |
1640 | { | 1597 | { |
1641 | char * p; | ||
1642 | unsigned short bx; | 1598 | unsigned short bx; |
1643 | unsigned short cx; | 1599 | unsigned short cx; |
1644 | unsigned short dx; | 1600 | unsigned short dx; |
@@ -1650,8 +1606,6 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length) | |||
1650 | int time_units = -1; | 1606 | int time_units = -1; |
1651 | char *units = "?"; | 1607 | char *units = "?"; |
1652 | 1608 | ||
1653 | p = buf; | ||
1654 | |||
1655 | if ((num_online_cpus() == 1) && | 1609 | if ((num_online_cpus() == 1) && |
1656 | !(error = apm_get_power_status(&bx, &cx, &dx))) { | 1610 | !(error = apm_get_power_status(&bx, &cx, &dx))) { |
1657 | ac_line_status = (bx >> 8) & 0xff; | 1611 | ac_line_status = (bx >> 8) & 0xff; |
@@ -1705,7 +1659,7 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length) | |||
1705 | -1: Unknown | 1659 | -1: Unknown |
1706 | 8) min = minutes; sec = seconds */ | 1660 | 8) min = minutes; sec = seconds */ |
1707 | 1661 | ||
1708 | p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", | 1662 | seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", |
1709 | driver_version, | 1663 | driver_version, |
1710 | (apm_info.bios.version >> 8) & 0xff, | 1664 | (apm_info.bios.version >> 8) & 0xff, |
1711 | apm_info.bios.version & 0xff, | 1665 | apm_info.bios.version & 0xff, |
@@ -1716,10 +1670,22 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length) | |||
1716 | percentage, | 1670 | percentage, |
1717 | time_units, | 1671 | time_units, |
1718 | units); | 1672 | units); |
1673 | return 0; | ||
1674 | } | ||
1719 | 1675 | ||
1720 | return p - buf; | 1676 | static int proc_apm_open(struct inode *inode, struct file *file) |
1677 | { | ||
1678 | return single_open(file, proc_apm_show, NULL); | ||
1721 | } | 1679 | } |
1722 | 1680 | ||
1681 | static const struct file_operations apm_file_ops = { | ||
1682 | .owner = THIS_MODULE, | ||
1683 | .open = proc_apm_open, | ||
1684 | .read = seq_read, | ||
1685 | .llseek = seq_lseek, | ||
1686 | .release = single_release, | ||
1687 | }; | ||
1688 | |||
1723 | static int apm(void *unused) | 1689 | static int apm(void *unused) |
1724 | { | 1690 | { |
1725 | unsigned short bx; | 1691 | unsigned short bx; |
@@ -2341,9 +2307,9 @@ static int __init apm_init(void) | |||
2341 | set_base(gdt[APM_DS >> 3], | 2307 | set_base(gdt[APM_DS >> 3], |
2342 | __va((unsigned long)apm_info.bios.dseg << 4)); | 2308 | __va((unsigned long)apm_info.bios.dseg << 4)); |
2343 | 2309 | ||
2344 | apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info); | 2310 | apm_proc = create_proc_entry("apm", 0, NULL); |
2345 | if (apm_proc) | 2311 | if (apm_proc) |
2346 | apm_proc->owner = THIS_MODULE; | 2312 | apm_proc->proc_fops = &apm_file_ops; |
2347 | 2313 | ||
2348 | kapmd_task = kthread_create(apm, NULL, "kapmd"); | 2314 | kapmd_task = kthread_create(apm, NULL, "kapmd"); |
2349 | if (IS_ERR(kapmd_task)) { | 2315 | if (IS_ERR(kapmd_task)) { |
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 1b2f3cd33270..c37535163bfc 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -72,7 +72,7 @@ void foo(void) | |||
72 | OFFSET(PT_EAX, pt_regs, eax); | 72 | OFFSET(PT_EAX, pt_regs, eax); |
73 | OFFSET(PT_DS, pt_regs, xds); | 73 | OFFSET(PT_DS, pt_regs, xds); |
74 | OFFSET(PT_ES, pt_regs, xes); | 74 | OFFSET(PT_ES, pt_regs, xes); |
75 | OFFSET(PT_GS, pt_regs, xgs); | 75 | OFFSET(PT_FS, pt_regs, xfs); |
76 | OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); | 76 | OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); |
77 | OFFSET(PT_EIP, pt_regs, eip); | 77 | OFFSET(PT_EIP, pt_regs, eip); |
78 | OFFSET(PT_CS, pt_regs, xcs); | 78 | OFFSET(PT_CS, pt_regs, xcs); |
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 8a8bbdaaf38a..dcbbd0a8bfc2 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -605,7 +605,7 @@ void __init early_cpu_init(void) | |||
605 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | 605 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) |
606 | { | 606 | { |
607 | memset(regs, 0, sizeof(struct pt_regs)); | 607 | memset(regs, 0, sizeof(struct pt_regs)); |
608 | regs->xgs = __KERNEL_PDA; | 608 | regs->xfs = __KERNEL_PDA; |
609 | return regs; | 609 | return regs; |
610 | } | 610 | } |
611 | 611 | ||
@@ -662,12 +662,12 @@ struct i386_pda boot_pda = { | |||
662 | .pcurrent = &init_task, | 662 | .pcurrent = &init_task, |
663 | }; | 663 | }; |
664 | 664 | ||
665 | static inline void set_kernel_gs(void) | 665 | static inline void set_kernel_fs(void) |
666 | { | 666 | { |
667 | /* Set %gs for this CPU's PDA. Memory clobber is to create a | 667 | /* Set %fs for this CPU's PDA. Memory clobber is to create a |
668 | barrier with respect to any PDA operations, so the compiler | 668 | barrier with respect to any PDA operations, so the compiler |
669 | doesn't move any before here. */ | 669 | doesn't move any before here. */ |
670 | asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); | 670 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); |
671 | } | 671 | } |
672 | 672 | ||
673 | /* Initialize the CPU's GDT and PDA. The boot CPU does this for | 673 | /* Initialize the CPU's GDT and PDA. The boot CPU does this for |
@@ -718,7 +718,7 @@ void __cpuinit cpu_set_gdt(int cpu) | |||
718 | the boot CPU, this will transition from the boot gdt+pda to | 718 | the boot CPU, this will transition from the boot gdt+pda to |
719 | the real ones). */ | 719 | the real ones). */ |
720 | load_gdt(cpu_gdt_descr); | 720 | load_gdt(cpu_gdt_descr); |
721 | set_kernel_gs(); | 721 | set_kernel_fs(); |
722 | } | 722 | } |
723 | 723 | ||
724 | /* Common CPU init for both boot and secondary CPUs */ | 724 | /* Common CPU init for both boot and secondary CPUs */ |
@@ -764,8 +764,8 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | |||
764 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); | 764 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); |
765 | #endif | 765 | #endif |
766 | 766 | ||
767 | /* Clear %fs. */ | 767 | /* Clear %gs. */ |
768 | asm volatile ("mov %0, %%fs" : : "r" (0)); | 768 | asm volatile ("mov %0, %%gs" : : "r" (0)); |
769 | 769 | ||
770 | /* Clear all 6 debug registers: */ | 770 | /* Clear all 6 debug registers: */ |
771 | set_debugreg(0, 0); | 771 | set_debugreg(0, 0); |
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index 5299c5bf4454..6c52182ca323 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig | |||
@@ -217,6 +217,15 @@ config X86_LONGHAUL | |||
217 | 217 | ||
218 | If in doubt, say N. | 218 | If in doubt, say N. |
219 | 219 | ||
220 | config X86_E_POWERSAVER | ||
221 | tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" | ||
222 | select CPU_FREQ_TABLE | ||
223 | depends on EXPERIMENTAL | ||
224 | help | ||
225 | This adds the CPUFreq driver for VIA C7 processors. | ||
226 | |||
227 | If in doubt, say N. | ||
228 | |||
220 | comment "shared options" | 229 | comment "shared options" |
221 | 230 | ||
222 | config X86_ACPI_CPUFREQ_PROC_INTF | 231 | config X86_ACPI_CPUFREQ_PROC_INTF |
diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile index 8de3abe322a9..560f7760dae5 100644 --- a/arch/i386/kernel/cpu/cpufreq/Makefile +++ b/arch/i386/kernel/cpu/cpufreq/Makefile | |||
@@ -2,6 +2,7 @@ obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | |||
2 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 2 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
3 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 3 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o |
4 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o | 4 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o |
5 | obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o | ||
5 | obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o | 6 | obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o |
6 | obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o | 7 | obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o |
7 | obj-$(CONFIG_X86_LONGRUN) += longrun.o | 8 | obj-$(CONFIG_X86_LONGRUN) += longrun.o |
diff --git a/arch/i386/kernel/cpu/cpufreq/e_powersaver.c b/arch/i386/kernel/cpu/cpufreq/e_powersaver.c new file mode 100644 index 000000000000..f43d98e11cc7 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/e_powersaver.c | |||
@@ -0,0 +1,334 @@ | |||
1 | /* | ||
2 | * Based on documentation provided by Dave Jones. Thanks! | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/cpufreq.h> | ||
13 | #include <linux/ioport.h> | ||
14 | #include <linux/slab.h> | ||
15 | |||
16 | #include <asm/msr.h> | ||
17 | #include <asm/tsc.h> | ||
18 | #include <asm/timex.h> | ||
19 | #include <asm/io.h> | ||
20 | #include <asm/delay.h> | ||
21 | |||
22 | #define EPS_BRAND_C7M 0 | ||
23 | #define EPS_BRAND_C7 1 | ||
24 | #define EPS_BRAND_EDEN 2 | ||
25 | #define EPS_BRAND_C3 3 | ||
26 | |||
27 | struct eps_cpu_data { | ||
28 | u32 fsb; | ||
29 | struct cpufreq_frequency_table freq_table[]; | ||
30 | }; | ||
31 | |||
32 | static struct eps_cpu_data *eps_cpu[NR_CPUS]; | ||
33 | |||
34 | |||
35 | static unsigned int eps_get(unsigned int cpu) | ||
36 | { | ||
37 | struct eps_cpu_data *centaur; | ||
38 | u32 lo, hi; | ||
39 | |||
40 | if (cpu) | ||
41 | return 0; | ||
42 | centaur = eps_cpu[cpu]; | ||
43 | if (centaur == NULL) | ||
44 | return 0; | ||
45 | |||
46 | /* Return current frequency */ | ||
47 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
48 | return centaur->fsb * ((lo >> 8) & 0xff); | ||
49 | } | ||
50 | |||
51 | static int eps_set_state(struct eps_cpu_data *centaur, | ||
52 | unsigned int cpu, | ||
53 | u32 dest_state) | ||
54 | { | ||
55 | struct cpufreq_freqs freqs; | ||
56 | u32 lo, hi; | ||
57 | int err = 0; | ||
58 | int i; | ||
59 | |||
60 | freqs.old = eps_get(cpu); | ||
61 | freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); | ||
62 | freqs.cpu = cpu; | ||
63 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
64 | |||
65 | /* Wait while CPU is busy */ | ||
66 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
67 | i = 0; | ||
68 | while (lo & ((1 << 16) | (1 << 17))) { | ||
69 | udelay(16); | ||
70 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
71 | i++; | ||
72 | if (unlikely(i > 64)) { | ||
73 | err = -ENODEV; | ||
74 | goto postchange; | ||
75 | } | ||
76 | } | ||
77 | /* Set new multiplier and voltage */ | ||
78 | wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0); | ||
79 | /* Wait until transition end */ | ||
80 | i = 0; | ||
81 | do { | ||
82 | udelay(16); | ||
83 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
84 | i++; | ||
85 | if (unlikely(i > 64)) { | ||
86 | err = -ENODEV; | ||
87 | goto postchange; | ||
88 | } | ||
89 | } while (lo & ((1 << 16) | (1 << 17))); | ||
90 | |||
91 | /* Return current frequency */ | ||
92 | postchange: | ||
93 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
94 | freqs.new = centaur->fsb * ((lo >> 8) & 0xff); | ||
95 | |||
96 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
97 | return err; | ||
98 | } | ||
99 | |||
100 | static int eps_target(struct cpufreq_policy *policy, | ||
101 | unsigned int target_freq, | ||
102 | unsigned int relation) | ||
103 | { | ||
104 | struct eps_cpu_data *centaur; | ||
105 | unsigned int newstate = 0; | ||
106 | unsigned int cpu = policy->cpu; | ||
107 | unsigned int dest_state; | ||
108 | int ret; | ||
109 | |||
110 | if (unlikely(eps_cpu[cpu] == NULL)) | ||
111 | return -ENODEV; | ||
112 | centaur = eps_cpu[cpu]; | ||
113 | |||
114 | if (unlikely(cpufreq_frequency_table_target(policy, | ||
115 | &eps_cpu[cpu]->freq_table[0], | ||
116 | target_freq, | ||
117 | relation, | ||
118 | &newstate))) { | ||
119 | return -EINVAL; | ||
120 | } | ||
121 | |||
122 | /* Make frequency transition */ | ||
123 | dest_state = centaur->freq_table[newstate].index & 0xffff; | ||
124 | ret = eps_set_state(centaur, cpu, dest_state); | ||
125 | if (ret) | ||
126 | printk(KERN_ERR "eps: Timeout!\n"); | ||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | static int eps_verify(struct cpufreq_policy *policy) | ||
131 | { | ||
132 | return cpufreq_frequency_table_verify(policy, | ||
133 | &eps_cpu[policy->cpu]->freq_table[0]); | ||
134 | } | ||
135 | |||
136 | static int eps_cpu_init(struct cpufreq_policy *policy) | ||
137 | { | ||
138 | unsigned int i; | ||
139 | u32 lo, hi; | ||
140 | u64 val; | ||
141 | u8 current_multiplier, current_voltage; | ||
142 | u8 max_multiplier, max_voltage; | ||
143 | u8 min_multiplier, min_voltage; | ||
144 | u8 brand; | ||
145 | u32 fsb; | ||
146 | struct eps_cpu_data *centaur; | ||
147 | struct cpufreq_frequency_table *f_table; | ||
148 | int k, step, voltage; | ||
149 | int ret; | ||
150 | int states; | ||
151 | |||
152 | if (policy->cpu != 0) | ||
153 | return -ENODEV; | ||
154 | |||
155 | /* Check brand */ | ||
156 | printk("eps: Detected VIA "); | ||
157 | rdmsr(0x1153, lo, hi); | ||
158 | brand = (((lo >> 2) ^ lo) >> 18) & 3; | ||
159 | switch(brand) { | ||
160 | case EPS_BRAND_C7M: | ||
161 | printk("C7-M\n"); | ||
162 | break; | ||
163 | case EPS_BRAND_C7: | ||
164 | printk("C7\n"); | ||
165 | break; | ||
166 | case EPS_BRAND_EDEN: | ||
167 | printk("Eden\n"); | ||
168 | break; | ||
169 | case EPS_BRAND_C3: | ||
170 | printk("C3\n"); | ||
171 | return -ENODEV; | ||
172 | break; | ||
173 | } | ||
174 | /* Enable Enhanced PowerSaver */ | ||
175 | rdmsrl(MSR_IA32_MISC_ENABLE, val); | ||
176 | if (!(val & 1 << 16)) { | ||
177 | val |= 1 << 16; | ||
178 | wrmsrl(MSR_IA32_MISC_ENABLE, val); | ||
179 | /* Can be locked at 0 */ | ||
180 | rdmsrl(MSR_IA32_MISC_ENABLE, val); | ||
181 | if (!(val & 1 << 16)) { | ||
182 | printk("eps: Can't enable Enhanced PowerSaver\n"); | ||
183 | return -ENODEV; | ||
184 | } | ||
185 | } | ||
186 | |||
187 | /* Print voltage and multiplier */ | ||
188 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
189 | current_voltage = lo & 0xff; | ||
190 | printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700); | ||
191 | current_multiplier = (lo >> 8) & 0xff; | ||
192 | printk("eps: Current multiplier = %d\n", current_multiplier); | ||
193 | |||
194 | /* Print limits */ | ||
195 | max_voltage = hi & 0xff; | ||
196 | printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); | ||
197 | max_multiplier = (hi >> 8) & 0xff; | ||
198 | printk("eps: Highest multiplier = %d\n", max_multiplier); | ||
199 | min_voltage = (hi >> 16) & 0xff; | ||
200 | printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); | ||
201 | min_multiplier = (hi >> 24) & 0xff; | ||
202 | printk("eps: Lowest multiplier = %d\n", min_multiplier); | ||
203 | |||
204 | /* Sanity checks */ | ||
205 | if (current_multiplier == 0 || max_multiplier == 0 | ||
206 | || min_multiplier == 0) | ||
207 | return -EINVAL; | ||
208 | if (current_multiplier > max_multiplier | ||
209 | || max_multiplier <= min_multiplier) | ||
210 | return -EINVAL; | ||
211 | if (current_voltage > 0x1c || max_voltage > 0x1c) | ||
212 | return -EINVAL; | ||
213 | if (max_voltage < min_voltage) | ||
214 | return -EINVAL; | ||
215 | |||
216 | /* Calc FSB speed */ | ||
217 | fsb = cpu_khz / current_multiplier; | ||
218 | /* Calc number of p-states supported */ | ||
219 | if (brand == EPS_BRAND_C7M) | ||
220 | states = max_multiplier - min_multiplier + 1; | ||
221 | else | ||
222 | states = 2; | ||
223 | |||
224 | /* Allocate private data and frequency table for current cpu */ | ||
225 | centaur = kzalloc(sizeof(struct eps_cpu_data) | ||
226 | + (states + 1) * sizeof(struct cpufreq_frequency_table), | ||
227 | GFP_KERNEL); | ||
228 | if (!centaur) | ||
229 | return -ENOMEM; | ||
230 | eps_cpu[0] = centaur; | ||
231 | |||
232 | /* Copy basic values */ | ||
233 | centaur->fsb = fsb; | ||
234 | |||
235 | /* Fill frequency and MSR value table */ | ||
236 | f_table = ¢aur->freq_table[0]; | ||
237 | if (brand != EPS_BRAND_C7M) { | ||
238 | f_table[0].frequency = fsb * min_multiplier; | ||
239 | f_table[0].index = (min_multiplier << 8) | min_voltage; | ||
240 | f_table[1].frequency = fsb * max_multiplier; | ||
241 | f_table[1].index = (max_multiplier << 8) | max_voltage; | ||
242 | f_table[2].frequency = CPUFREQ_TABLE_END; | ||
243 | } else { | ||
244 | k = 0; | ||
245 | step = ((max_voltage - min_voltage) * 256) | ||
246 | / (max_multiplier - min_multiplier); | ||
247 | for (i = min_multiplier; i <= max_multiplier; i++) { | ||
248 | voltage = (k * step) / 256 + min_voltage; | ||
249 | f_table[k].frequency = fsb * i; | ||
250 | f_table[k].index = (i << 8) | voltage; | ||
251 | k++; | ||
252 | } | ||
253 | f_table[k].frequency = CPUFREQ_TABLE_END; | ||
254 | } | ||
255 | |||
256 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
257 | policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ | ||
258 | policy->cur = fsb * current_multiplier; | ||
259 | |||
260 | ret = cpufreq_frequency_table_cpuinfo(policy, ¢aur->freq_table[0]); | ||
261 | if (ret) { | ||
262 | kfree(centaur); | ||
263 | return ret; | ||
264 | } | ||
265 | |||
266 | cpufreq_frequency_table_get_attr(¢aur->freq_table[0], policy->cpu); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | static int eps_cpu_exit(struct cpufreq_policy *policy) | ||
271 | { | ||
272 | unsigned int cpu = policy->cpu; | ||
273 | struct eps_cpu_data *centaur; | ||
274 | u32 lo, hi; | ||
275 | |||
276 | if (eps_cpu[cpu] == NULL) | ||
277 | return -ENODEV; | ||
278 | centaur = eps_cpu[cpu]; | ||
279 | |||
280 | /* Get max frequency */ | ||
281 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
282 | /* Set max frequency */ | ||
283 | eps_set_state(centaur, cpu, hi & 0xffff); | ||
284 | /* Bye */ | ||
285 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
286 | kfree(eps_cpu[cpu]); | ||
287 | eps_cpu[cpu] = NULL; | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | static struct freq_attr* eps_attr[] = { | ||
292 | &cpufreq_freq_attr_scaling_available_freqs, | ||
293 | NULL, | ||
294 | }; | ||
295 | |||
296 | static struct cpufreq_driver eps_driver = { | ||
297 | .verify = eps_verify, | ||
298 | .target = eps_target, | ||
299 | .init = eps_cpu_init, | ||
300 | .exit = eps_cpu_exit, | ||
301 | .get = eps_get, | ||
302 | .name = "e_powersaver", | ||
303 | .owner = THIS_MODULE, | ||
304 | .attr = eps_attr, | ||
305 | }; | ||
306 | |||
307 | static int __init eps_init(void) | ||
308 | { | ||
309 | struct cpuinfo_x86 *c = cpu_data; | ||
310 | |||
311 | /* This driver will work only on Centaur C7 processors with | ||
312 | * Enhanced SpeedStep/PowerSaver registers */ | ||
313 | if (c->x86_vendor != X86_VENDOR_CENTAUR | ||
314 | || c->x86 != 6 || c->x86_model != 10) | ||
315 | return -ENODEV; | ||
316 | if (!cpu_has(c, X86_FEATURE_EST)) | ||
317 | return -ENODEV; | ||
318 | |||
319 | if (cpufreq_register_driver(&eps_driver)) | ||
320 | return -EINVAL; | ||
321 | return 0; | ||
322 | } | ||
323 | |||
324 | static void __exit eps_exit(void) | ||
325 | { | ||
326 | cpufreq_unregister_driver(&eps_driver); | ||
327 | } | ||
328 | |||
329 | MODULE_AUTHOR("Rafa³ Bilski <rafalbilski@interia.pl>"); | ||
330 | MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); | ||
331 | MODULE_LICENSE("GPL"); | ||
332 | |||
333 | module_init(eps_init); | ||
334 | module_exit(eps_exit); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index a3db9332d652..b59878a0d9b3 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c | |||
@@ -8,12 +8,11 @@ | |||
8 | * VIA have currently 3 different versions of Longhaul. | 8 | * VIA have currently 3 different versions of Longhaul. |
9 | * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. | 9 | * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. |
10 | * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. | 10 | * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. |
11 | * Version 2 of longhaul is the same as v1, but adds voltage scaling. | 11 | * Version 2 of longhaul is backward compatible with v1, but adds |
12 | * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C) | 12 | * LONGHAUL MSR for purpose of both frequency and voltage scaling. |
13 | * voltage scaling support has currently been disabled in this driver | 13 | * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C). |
14 | * until we have code that gets it right. | ||
15 | * Version 3 of longhaul got renamed to Powersaver and redesigned | 14 | * Version 3 of longhaul got renamed to Powersaver and redesigned |
16 | * to use the POWERSAVER MSR at 0x110a. | 15 | * to use only the POWERSAVER MSR at 0x110a. |
17 | * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. | 16 | * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. |
18 | * It's pretty much the same feature wise to longhaul v2, though | 17 | * It's pretty much the same feature wise to longhaul v2, though |
19 | * there is provision for scaling FSB too, but this doesn't work | 18 | * there is provision for scaling FSB too, but this doesn't work |
@@ -51,10 +50,12 @@ | |||
51 | #define CPU_EZRA 3 | 50 | #define CPU_EZRA 3 |
52 | #define CPU_EZRA_T 4 | 51 | #define CPU_EZRA_T 4 |
53 | #define CPU_NEHEMIAH 5 | 52 | #define CPU_NEHEMIAH 5 |
53 | #define CPU_NEHEMIAH_C 6 | ||
54 | 54 | ||
55 | /* Flags */ | 55 | /* Flags */ |
56 | #define USE_ACPI_C3 (1 << 1) | 56 | #define USE_ACPI_C3 (1 << 1) |
57 | #define USE_NORTHBRIDGE (1 << 2) | 57 | #define USE_NORTHBRIDGE (1 << 2) |
58 | #define USE_VT8235 (1 << 3) | ||
58 | 59 | ||
59 | static int cpu_model; | 60 | static int cpu_model; |
60 | static unsigned int numscales=16; | 61 | static unsigned int numscales=16; |
@@ -63,7 +64,8 @@ static unsigned int fsb; | |||
63 | static struct mV_pos *vrm_mV_table; | 64 | static struct mV_pos *vrm_mV_table; |
64 | static unsigned char *mV_vrm_table; | 65 | static unsigned char *mV_vrm_table; |
65 | struct f_msr { | 66 | struct f_msr { |
66 | unsigned char vrm; | 67 | u8 vrm; |
68 | u8 pos; | ||
67 | }; | 69 | }; |
68 | static struct f_msr f_msr_table[32]; | 70 | static struct f_msr f_msr_table[32]; |
69 | 71 | ||
@@ -73,10 +75,10 @@ static int can_scale_voltage; | |||
73 | static struct acpi_processor *pr = NULL; | 75 | static struct acpi_processor *pr = NULL; |
74 | static struct acpi_processor_cx *cx = NULL; | 76 | static struct acpi_processor_cx *cx = NULL; |
75 | static u8 longhaul_flags; | 77 | static u8 longhaul_flags; |
78 | static u8 longhaul_pos; | ||
76 | 79 | ||
77 | /* Module parameters */ | 80 | /* Module parameters */ |
78 | static int scale_voltage; | 81 | static int scale_voltage; |
79 | static int ignore_latency; | ||
80 | 82 | ||
81 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) | 83 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) |
82 | 84 | ||
@@ -164,26 +166,47 @@ static void do_longhaul1(unsigned int clock_ratio_index) | |||
164 | static void do_powersaver(int cx_address, unsigned int clock_ratio_index) | 166 | static void do_powersaver(int cx_address, unsigned int clock_ratio_index) |
165 | { | 167 | { |
166 | union msr_longhaul longhaul; | 168 | union msr_longhaul longhaul; |
169 | u8 dest_pos; | ||
167 | u32 t; | 170 | u32 t; |
168 | 171 | ||
172 | dest_pos = f_msr_table[clock_ratio_index].pos; | ||
173 | |||
169 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); | 174 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); |
175 | /* Setup new frequency */ | ||
170 | longhaul.bits.RevisionKey = longhaul.bits.RevisionID; | 176 | longhaul.bits.RevisionKey = longhaul.bits.RevisionID; |
171 | longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; | 177 | longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; |
172 | longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; | 178 | longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; |
173 | longhaul.bits.EnableSoftBusRatio = 1; | 179 | /* Setup new voltage */ |
174 | 180 | if (can_scale_voltage) | |
175 | if (can_scale_voltage) { | ||
176 | longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; | 181 | longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; |
182 | /* Sync to timer tick */ | ||
183 | safe_halt(); | ||
184 | /* Raise voltage if necessary */ | ||
185 | if (can_scale_voltage && longhaul_pos < dest_pos) { | ||
177 | longhaul.bits.EnableSoftVID = 1; | 186 | longhaul.bits.EnableSoftVID = 1; |
187 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
188 | /* Change voltage */ | ||
189 | if (!cx_address) { | ||
190 | ACPI_FLUSH_CPU_CACHE(); | ||
191 | halt(); | ||
192 | } else { | ||
193 | ACPI_FLUSH_CPU_CACHE(); | ||
194 | /* Invoke C3 */ | ||
195 | inb(cx_address); | ||
196 | /* Dummy op - must do something useless after P_LVL3 | ||
197 | * read */ | ||
198 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | ||
199 | } | ||
200 | longhaul.bits.EnableSoftVID = 0; | ||
201 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
202 | longhaul_pos = dest_pos; | ||
178 | } | 203 | } |
179 | 204 | ||
180 | /* Sync to timer tick */ | ||
181 | safe_halt(); | ||
182 | /* Change frequency on next halt or sleep */ | 205 | /* Change frequency on next halt or sleep */ |
206 | longhaul.bits.EnableSoftBusRatio = 1; | ||
183 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | 207 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); |
184 | if (!cx_address) { | 208 | if (!cx_address) { |
185 | ACPI_FLUSH_CPU_CACHE(); | 209 | ACPI_FLUSH_CPU_CACHE(); |
186 | /* Invoke C1 */ | ||
187 | halt(); | 210 | halt(); |
188 | } else { | 211 | } else { |
189 | ACPI_FLUSH_CPU_CACHE(); | 212 | ACPI_FLUSH_CPU_CACHE(); |
@@ -193,12 +216,29 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index) | |||
193 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | 216 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); |
194 | } | 217 | } |
195 | /* Disable bus ratio bit */ | 218 | /* Disable bus ratio bit */ |
196 | local_irq_disable(); | ||
197 | longhaul.bits.RevisionKey = longhaul.bits.RevisionID; | ||
198 | longhaul.bits.EnableSoftBusRatio = 0; | 219 | longhaul.bits.EnableSoftBusRatio = 0; |
199 | longhaul.bits.EnableSoftBSEL = 0; | ||
200 | longhaul.bits.EnableSoftVID = 0; | ||
201 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | 220 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); |
221 | |||
222 | /* Reduce voltage if necessary */ | ||
223 | if (can_scale_voltage && longhaul_pos > dest_pos) { | ||
224 | longhaul.bits.EnableSoftVID = 1; | ||
225 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
226 | /* Change voltage */ | ||
227 | if (!cx_address) { | ||
228 | ACPI_FLUSH_CPU_CACHE(); | ||
229 | halt(); | ||
230 | } else { | ||
231 | ACPI_FLUSH_CPU_CACHE(); | ||
232 | /* Invoke C3 */ | ||
233 | inb(cx_address); | ||
234 | /* Dummy op - must do something useless after P_LVL3 | ||
235 | * read */ | ||
236 | t = inl(acpi_gbl_FADT.xpm_timer_block.address); | ||
237 | } | ||
238 | longhaul.bits.EnableSoftVID = 0; | ||
239 | wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); | ||
240 | longhaul_pos = dest_pos; | ||
241 | } | ||
202 | } | 242 | } |
203 | 243 | ||
204 | /** | 244 | /** |
@@ -257,26 +297,19 @@ static void longhaul_setstate(unsigned int clock_ratio_index) | |||
257 | /* | 297 | /* |
258 | * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) | 298 | * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) |
259 | * Software controlled multipliers only. | 299 | * Software controlled multipliers only. |
260 | * | ||
261 | * *NB* Until we get voltage scaling working v1 & v2 are the same code. | ||
262 | * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5b] and Ezra [C5C] | ||
263 | */ | 300 | */ |
264 | case TYPE_LONGHAUL_V1: | 301 | case TYPE_LONGHAUL_V1: |
265 | case TYPE_LONGHAUL_V2: | ||
266 | do_longhaul1(clock_ratio_index); | 302 | do_longhaul1(clock_ratio_index); |
267 | break; | 303 | break; |
268 | 304 | ||
269 | /* | 305 | /* |
306 | * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C] | ||
307 | * | ||
270 | * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) | 308 | * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) |
271 | * We can scale voltage with this too, but that's currently | ||
272 | * disabled until we come up with a decent 'match freq to voltage' | ||
273 | * algorithm. | ||
274 | * When we add voltage scaling, we will also need to do the | ||
275 | * voltage/freq setting in order depending on the direction | ||
276 | * of scaling (like we do in powernow-k7.c) | ||
277 | * Nehemiah can do FSB scaling too, but this has never been proven | 309 | * Nehemiah can do FSB scaling too, but this has never been proven |
278 | * to work in practice. | 310 | * to work in practice. |
279 | */ | 311 | */ |
312 | case TYPE_LONGHAUL_V2: | ||
280 | case TYPE_POWERSAVER: | 313 | case TYPE_POWERSAVER: |
281 | if (longhaul_flags & USE_ACPI_C3) { | 314 | if (longhaul_flags & USE_ACPI_C3) { |
282 | /* Don't allow wakeup */ | 315 | /* Don't allow wakeup */ |
@@ -301,6 +334,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index) | |||
301 | local_irq_restore(flags); | 334 | local_irq_restore(flags); |
302 | preempt_enable(); | 335 | preempt_enable(); |
303 | 336 | ||
337 | freqs.new = calc_speed(longhaul_get_cpu_mult()); | ||
304 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 338 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
305 | } | 339 | } |
306 | 340 | ||
@@ -315,31 +349,19 @@ static void longhaul_setstate(unsigned int clock_ratio_index) | |||
315 | 349 | ||
316 | #define ROUNDING 0xf | 350 | #define ROUNDING 0xf |
317 | 351 | ||
318 | static int _guess(int guess, int mult) | ||
319 | { | ||
320 | int target; | ||
321 | |||
322 | target = ((mult/10)*guess); | ||
323 | if (mult%10 != 0) | ||
324 | target += (guess/2); | ||
325 | target += ROUNDING/2; | ||
326 | target &= ~ROUNDING; | ||
327 | return target; | ||
328 | } | ||
329 | |||
330 | |||
331 | static int guess_fsb(int mult) | 352 | static int guess_fsb(int mult) |
332 | { | 353 | { |
333 | int speed = (cpu_khz/1000); | 354 | int speed = cpu_khz / 1000; |
334 | int i; | 355 | int i; |
335 | int speeds[] = { 66, 100, 133, 200 }; | 356 | int speeds[] = { 666, 1000, 1333, 2000 }; |
336 | 357 | int f_max, f_min; | |
337 | speed += ROUNDING/2; | 358 | |
338 | speed &= ~ROUNDING; | 359 | for (i = 0; i < 4; i++) { |
339 | 360 | f_max = ((speeds[i] * mult) + 50) / 100; | |
340 | for (i=0; i<4; i++) { | 361 | f_max += (ROUNDING / 2); |
341 | if (_guess(speeds[i], mult) == speed) | 362 | f_min = f_max - ROUNDING; |
342 | return speeds[i]; | 363 | if ((speed <= f_max) && (speed >= f_min)) |
364 | return speeds[i] / 10; | ||
343 | } | 365 | } |
344 | return 0; | 366 | return 0; |
345 | } | 367 | } |
@@ -347,67 +369,40 @@ static int guess_fsb(int mult) | |||
347 | 369 | ||
348 | static int __init longhaul_get_ranges(void) | 370 | static int __init longhaul_get_ranges(void) |
349 | { | 371 | { |
350 | unsigned long invalue; | ||
351 | unsigned int ezra_t_multipliers[32]= { | ||
352 | 90, 30, 40, 100, 55, 35, 45, 95, | ||
353 | 50, 70, 80, 60, 120, 75, 85, 65, | ||
354 | -1, 110, 120, -1, 135, 115, 125, 105, | ||
355 | 130, 150, 160, 140, -1, 155, -1, 145 }; | ||
356 | unsigned int j, k = 0; | 372 | unsigned int j, k = 0; |
357 | union msr_longhaul longhaul; | 373 | int mult; |
358 | int mult = 0; | ||
359 | 374 | ||
360 | switch (longhaul_version) { | 375 | /* Get current frequency */ |
361 | case TYPE_LONGHAUL_V1: | 376 | mult = longhaul_get_cpu_mult(); |
362 | case TYPE_LONGHAUL_V2: | 377 | if (mult == -1) { |
363 | /* Ugh, Longhaul v1 didn't have the min/max MSRs. | 378 | printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); |
364 | Assume min=3.0x & max = whatever we booted at. */ | 379 | return -EINVAL; |
380 | } | ||
381 | fsb = guess_fsb(mult); | ||
382 | if (fsb == 0) { | ||
383 | printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); | ||
384 | return -EINVAL; | ||
385 | } | ||
386 | /* Get max multiplier - as we always did. | ||
387 | * Longhaul MSR is usefull only when voltage scaling is enabled. | ||
388 | * C3 is booting at max anyway. */ | ||
389 | maxmult = mult; | ||
390 | /* Get min multiplier */ | ||
391 | switch (cpu_model) { | ||
392 | case CPU_NEHEMIAH: | ||
393 | minmult = 50; | ||
394 | break; | ||
395 | case CPU_NEHEMIAH_C: | ||
396 | minmult = 40; | ||
397 | break; | ||
398 | default: | ||
365 | minmult = 30; | 399 | minmult = 30; |
366 | maxmult = mult = longhaul_get_cpu_mult(); | ||
367 | break; | 400 | break; |
368 | |||
369 | case TYPE_POWERSAVER: | ||
370 | /* Ezra-T */ | ||
371 | if (cpu_model==CPU_EZRA_T) { | ||
372 | minmult = 30; | ||
373 | rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); | ||
374 | invalue = longhaul.bits.MaxMHzBR; | ||
375 | if (longhaul.bits.MaxMHzBR4) | ||
376 | invalue += 16; | ||
377 | maxmult = mult = ezra_t_multipliers[invalue]; | ||
378 | break; | ||
379 | } | ||
380 | |||
381 | /* Nehemiah */ | ||
382 | if (cpu_model==CPU_NEHEMIAH) { | ||
383 | rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); | ||
384 | |||
385 | /* | ||
386 | * TODO: This code works, but raises a lot of questions. | ||
387 | * - Some Nehemiah's seem to have broken Min/MaxMHzBR's. | ||
388 | * We get around this by using a hardcoded multiplier of 4.0x | ||
389 | * for the minimimum speed, and the speed we booted up at for the max. | ||
390 | * This is done in longhaul_get_cpu_mult() by reading the EBLCR register. | ||
391 | * - According to some VIA documentation EBLCR is only | ||
392 | * in pre-Nehemiah C3s. How this still works is a mystery. | ||
393 | * We're possibly using something undocumented and unsupported, | ||
394 | * But it works, so we don't grumble. | ||
395 | */ | ||
396 | minmult=40; | ||
397 | maxmult = mult = longhaul_get_cpu_mult(); | ||
398 | break; | ||
399 | } | ||
400 | } | 401 | } |
401 | fsb = guess_fsb(mult); | ||
402 | 402 | ||
403 | dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", | 403 | dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", |
404 | minmult/10, minmult%10, maxmult/10, maxmult%10); | 404 | minmult/10, minmult%10, maxmult/10, maxmult%10); |
405 | 405 | ||
406 | if (fsb == 0) { | ||
407 | printk (KERN_INFO PFX "Invalid (reserved) FSB!\n"); | ||
408 | return -EINVAL; | ||
409 | } | ||
410 | |||
411 | highest_speed = calc_speed(maxmult); | 406 | highest_speed = calc_speed(maxmult); |
412 | lowest_speed = calc_speed(minmult); | 407 | lowest_speed = calc_speed(minmult); |
413 | dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, | 408 | dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, |
@@ -455,6 +450,7 @@ static void __init longhaul_setup_voltagescaling(void) | |||
455 | union msr_longhaul longhaul; | 450 | union msr_longhaul longhaul; |
456 | struct mV_pos minvid, maxvid; | 451 | struct mV_pos minvid, maxvid; |
457 | unsigned int j, speed, pos, kHz_step, numvscales; | 452 | unsigned int j, speed, pos, kHz_step, numvscales; |
453 | int min_vid_speed; | ||
458 | 454 | ||
459 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); | 455 | rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); |
460 | if (!(longhaul.bits.RevisionID & 1)) { | 456 | if (!(longhaul.bits.RevisionID & 1)) { |
@@ -468,14 +464,14 @@ static void __init longhaul_setup_voltagescaling(void) | |||
468 | mV_vrm_table = &mV_vrm85[0]; | 464 | mV_vrm_table = &mV_vrm85[0]; |
469 | } else { | 465 | } else { |
470 | printk (KERN_INFO PFX "Mobile VRM\n"); | 466 | printk (KERN_INFO PFX "Mobile VRM\n"); |
467 | if (cpu_model < CPU_NEHEMIAH) | ||
468 | return; | ||
471 | vrm_mV_table = &mobilevrm_mV[0]; | 469 | vrm_mV_table = &mobilevrm_mV[0]; |
472 | mV_vrm_table = &mV_mobilevrm[0]; | 470 | mV_vrm_table = &mV_mobilevrm[0]; |
473 | } | 471 | } |
474 | 472 | ||
475 | minvid = vrm_mV_table[longhaul.bits.MinimumVID]; | 473 | minvid = vrm_mV_table[longhaul.bits.MinimumVID]; |
476 | maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; | 474 | maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; |
477 | numvscales = maxvid.pos - minvid.pos + 1; | ||
478 | kHz_step = (highest_speed - lowest_speed) / numvscales; | ||
479 | 475 | ||
480 | if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { | 476 | if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { |
481 | printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " | 477 | printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " |
@@ -491,20 +487,59 @@ static void __init longhaul_setup_voltagescaling(void) | |||
491 | return; | 487 | return; |
492 | } | 488 | } |
493 | 489 | ||
494 | printk(KERN_INFO PFX "Max VID=%d.%03d Min VID=%d.%03d, %d possible voltage scales\n", | 490 | /* How many voltage steps */ |
491 | numvscales = maxvid.pos - minvid.pos + 1; | ||
492 | printk(KERN_INFO PFX | ||
493 | "Max VID=%d.%03d " | ||
494 | "Min VID=%d.%03d, " | ||
495 | "%d possible voltage scales\n", | ||
495 | maxvid.mV/1000, maxvid.mV%1000, | 496 | maxvid.mV/1000, maxvid.mV%1000, |
496 | minvid.mV/1000, minvid.mV%1000, | 497 | minvid.mV/1000, minvid.mV%1000, |
497 | numvscales); | 498 | numvscales); |
498 | 499 | ||
500 | /* Calculate max frequency at min voltage */ | ||
501 | j = longhaul.bits.MinMHzBR; | ||
502 | if (longhaul.bits.MinMHzBR4) | ||
503 | j += 16; | ||
504 | min_vid_speed = eblcr_table[j]; | ||
505 | if (min_vid_speed == -1) | ||
506 | return; | ||
507 | switch (longhaul.bits.MinMHzFSB) { | ||
508 | case 0: | ||
509 | min_vid_speed *= 13333; | ||
510 | break; | ||
511 | case 1: | ||
512 | min_vid_speed *= 10000; | ||
513 | break; | ||
514 | case 3: | ||
515 | min_vid_speed *= 6666; | ||
516 | break; | ||
517 | default: | ||
518 | return; | ||
519 | break; | ||
520 | } | ||
521 | if (min_vid_speed >= highest_speed) | ||
522 | return; | ||
523 | /* Calculate kHz for one voltage step */ | ||
524 | kHz_step = (highest_speed - min_vid_speed) / numvscales; | ||
525 | |||
526 | |||
499 | j = 0; | 527 | j = 0; |
500 | while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { | 528 | while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { |
501 | speed = longhaul_table[j].frequency; | 529 | speed = longhaul_table[j].frequency; |
502 | pos = (speed - lowest_speed) / kHz_step + minvid.pos; | 530 | if (speed > min_vid_speed) |
531 | pos = (speed - min_vid_speed) / kHz_step + minvid.pos; | ||
532 | else | ||
533 | pos = minvid.pos; | ||
503 | f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; | 534 | f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; |
535 | f_msr_table[longhaul_table[j].index].pos = pos; | ||
504 | j++; | 536 | j++; |
505 | } | 537 | } |
506 | 538 | ||
539 | longhaul_pos = maxvid.pos; | ||
507 | can_scale_voltage = 1; | 540 | can_scale_voltage = 1; |
541 | printk(KERN_INFO PFX "Voltage scaling enabled. " | ||
542 | "Use of \"conservative\" governor is highly recommended.\n"); | ||
508 | } | 543 | } |
509 | 544 | ||
510 | 545 | ||
@@ -573,20 +608,51 @@ static int enable_arbiter_disable(void) | |||
573 | if (dev != NULL) { | 608 | if (dev != NULL) { |
574 | /* Enable access to port 0x22 */ | 609 | /* Enable access to port 0x22 */ |
575 | pci_read_config_byte(dev, reg, &pci_cmd); | 610 | pci_read_config_byte(dev, reg, &pci_cmd); |
576 | if ( !(pci_cmd & 1<<7) ) { | 611 | if (!(pci_cmd & 1<<7)) { |
577 | pci_cmd |= 1<<7; | 612 | pci_cmd |= 1<<7; |
578 | pci_write_config_byte(dev, reg, pci_cmd); | 613 | pci_write_config_byte(dev, reg, pci_cmd); |
614 | pci_read_config_byte(dev, reg, &pci_cmd); | ||
615 | if (!(pci_cmd & 1<<7)) { | ||
616 | printk(KERN_ERR PFX | ||
617 | "Can't enable access to port 0x22.\n"); | ||
618 | return 0; | ||
619 | } | ||
579 | } | 620 | } |
580 | return 1; | 621 | return 1; |
581 | } | 622 | } |
582 | return 0; | 623 | return 0; |
583 | } | 624 | } |
584 | 625 | ||
626 | static int longhaul_setup_vt8235(void) | ||
627 | { | ||
628 | struct pci_dev *dev; | ||
629 | u8 pci_cmd; | ||
630 | |||
631 | /* Find VT8235 southbridge */ | ||
632 | dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); | ||
633 | if (dev != NULL) { | ||
634 | /* Set transition time to max */ | ||
635 | pci_read_config_byte(dev, 0xec, &pci_cmd); | ||
636 | pci_cmd &= ~(1 << 2); | ||
637 | pci_write_config_byte(dev, 0xec, pci_cmd); | ||
638 | pci_read_config_byte(dev, 0xe4, &pci_cmd); | ||
639 | pci_cmd &= ~(1 << 7); | ||
640 | pci_write_config_byte(dev, 0xe4, pci_cmd); | ||
641 | pci_read_config_byte(dev, 0xe5, &pci_cmd); | ||
642 | pci_cmd |= 1 << 7; | ||
643 | pci_write_config_byte(dev, 0xe5, pci_cmd); | ||
644 | return 1; | ||
645 | } | ||
646 | return 0; | ||
647 | } | ||
648 | |||
585 | static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | 649 | static int __init longhaul_cpu_init(struct cpufreq_policy *policy) |
586 | { | 650 | { |
587 | struct cpuinfo_x86 *c = cpu_data; | 651 | struct cpuinfo_x86 *c = cpu_data; |
588 | char *cpuname=NULL; | 652 | char *cpuname=NULL; |
589 | int ret; | 653 | int ret; |
654 | u32 lo, hi; | ||
655 | int vt8235_present; | ||
590 | 656 | ||
591 | /* Check what we have on this motherboard */ | 657 | /* Check what we have on this motherboard */ |
592 | switch (c->x86_model) { | 658 | switch (c->x86_model) { |
@@ -599,16 +665,20 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
599 | break; | 665 | break; |
600 | 666 | ||
601 | case 7: | 667 | case 7: |
602 | longhaul_version = TYPE_LONGHAUL_V1; | ||
603 | switch (c->x86_mask) { | 668 | switch (c->x86_mask) { |
604 | case 0: | 669 | case 0: |
670 | longhaul_version = TYPE_LONGHAUL_V1; | ||
605 | cpu_model = CPU_SAMUEL2; | 671 | cpu_model = CPU_SAMUEL2; |
606 | cpuname = "C3 'Samuel 2' [C5B]"; | 672 | cpuname = "C3 'Samuel 2' [C5B]"; |
607 | /* Note, this is not a typo, early Samuel2's had Samuel1 ratios. */ | 673 | /* Note, this is not a typo, early Samuel2's had |
608 | memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); | 674 | * Samuel1 ratios. */ |
609 | memcpy (eblcr_table, samuel2_eblcr, sizeof(samuel2_eblcr)); | 675 | memcpy(clock_ratio, samuel1_clock_ratio, |
676 | sizeof(samuel1_clock_ratio)); | ||
677 | memcpy(eblcr_table, samuel2_eblcr, | ||
678 | sizeof(samuel2_eblcr)); | ||
610 | break; | 679 | break; |
611 | case 1 ... 15: | 680 | case 1 ... 15: |
681 | longhaul_version = TYPE_LONGHAUL_V2; | ||
612 | if (c->x86_mask < 8) { | 682 | if (c->x86_mask < 8) { |
613 | cpu_model = CPU_SAMUEL2; | 683 | cpu_model = CPU_SAMUEL2; |
614 | cpuname = "C3 'Samuel 2' [C5B]"; | 684 | cpuname = "C3 'Samuel 2' [C5B]"; |
@@ -616,8 +686,10 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
616 | cpu_model = CPU_EZRA; | 686 | cpu_model = CPU_EZRA; |
617 | cpuname = "C3 'Ezra' [C5C]"; | 687 | cpuname = "C3 'Ezra' [C5C]"; |
618 | } | 688 | } |
619 | memcpy (clock_ratio, ezra_clock_ratio, sizeof(ezra_clock_ratio)); | 689 | memcpy(clock_ratio, ezra_clock_ratio, |
620 | memcpy (eblcr_table, ezra_eblcr, sizeof(ezra_eblcr)); | 690 | sizeof(ezra_clock_ratio)); |
691 | memcpy(eblcr_table, ezra_eblcr, | ||
692 | sizeof(ezra_eblcr)); | ||
621 | break; | 693 | break; |
622 | } | 694 | } |
623 | break; | 695 | break; |
@@ -632,24 +704,24 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
632 | break; | 704 | break; |
633 | 705 | ||
634 | case 9: | 706 | case 9: |
635 | cpu_model = CPU_NEHEMIAH; | ||
636 | longhaul_version = TYPE_POWERSAVER; | 707 | longhaul_version = TYPE_POWERSAVER; |
637 | numscales=32; | 708 | numscales = 32; |
709 | memcpy(clock_ratio, | ||
710 | nehemiah_clock_ratio, | ||
711 | sizeof(nehemiah_clock_ratio)); | ||
712 | memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr)); | ||
638 | switch (c->x86_mask) { | 713 | switch (c->x86_mask) { |
639 | case 0 ... 1: | 714 | case 0 ... 1: |
640 | cpuname = "C3 'Nehemiah A' [C5N]"; | 715 | cpu_model = CPU_NEHEMIAH; |
641 | memcpy (clock_ratio, nehemiah_a_clock_ratio, sizeof(nehemiah_a_clock_ratio)); | 716 | cpuname = "C3 'Nehemiah A' [C5XLOE]"; |
642 | memcpy (eblcr_table, nehemiah_a_eblcr, sizeof(nehemiah_a_eblcr)); | ||
643 | break; | 717 | break; |
644 | case 2 ... 4: | 718 | case 2 ... 4: |
645 | cpuname = "C3 'Nehemiah B' [C5N]"; | 719 | cpu_model = CPU_NEHEMIAH; |
646 | memcpy (clock_ratio, nehemiah_b_clock_ratio, sizeof(nehemiah_b_clock_ratio)); | 720 | cpuname = "C3 'Nehemiah B' [C5XLOH]"; |
647 | memcpy (eblcr_table, nehemiah_b_eblcr, sizeof(nehemiah_b_eblcr)); | ||
648 | break; | 721 | break; |
649 | case 5 ... 15: | 722 | case 5 ... 15: |
650 | cpuname = "C3 'Nehemiah C' [C5N]"; | 723 | cpu_model = CPU_NEHEMIAH_C; |
651 | memcpy (clock_ratio, nehemiah_c_clock_ratio, sizeof(nehemiah_c_clock_ratio)); | 724 | cpuname = "C3 'Nehemiah C' [C5P]"; |
652 | memcpy (eblcr_table, nehemiah_c_eblcr, sizeof(nehemiah_c_eblcr)); | ||
653 | break; | 725 | break; |
654 | } | 726 | } |
655 | break; | 727 | break; |
@@ -658,6 +730,13 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
658 | cpuname = "Unknown"; | 730 | cpuname = "Unknown"; |
659 | break; | 731 | break; |
660 | } | 732 | } |
733 | /* Check Longhaul ver. 2 */ | ||
734 | if (longhaul_version == TYPE_LONGHAUL_V2) { | ||
735 | rdmsr(MSR_VIA_LONGHAUL, lo, hi); | ||
736 | if (lo == 0 && hi == 0) | ||
737 | /* Looks like MSR isn't present */ | ||
738 | longhaul_version = TYPE_LONGHAUL_V1; | ||
739 | } | ||
661 | 740 | ||
662 | printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); | 741 | printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); |
663 | switch (longhaul_version) { | 742 | switch (longhaul_version) { |
@@ -670,15 +749,18 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
670 | break; | 749 | break; |
671 | }; | 750 | }; |
672 | 751 | ||
752 | /* Doesn't hurt */ | ||
753 | vt8235_present = longhaul_setup_vt8235(); | ||
754 | |||
673 | /* Find ACPI data for processor */ | 755 | /* Find ACPI data for processor */ |
674 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, | 756 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, |
675 | &longhaul_walk_callback, NULL, (void *)&pr); | 757 | ACPI_UINT32_MAX, &longhaul_walk_callback, |
758 | NULL, (void *)&pr); | ||
676 | 759 | ||
677 | /* Check ACPI support for C3 state */ | 760 | /* Check ACPI support for C3 state */ |
678 | if ((pr != NULL) && (longhaul_version == TYPE_POWERSAVER)) { | 761 | if (pr != NULL && longhaul_version != TYPE_LONGHAUL_V1) { |
679 | cx = &pr->power.states[ACPI_STATE_C3]; | 762 | cx = &pr->power.states[ACPI_STATE_C3]; |
680 | if (cx->address > 0 && | 763 | if (cx->address > 0 && cx->latency <= 1000) { |
681 | (cx->latency <= 1000 || ignore_latency != 0) ) { | ||
682 | longhaul_flags |= USE_ACPI_C3; | 764 | longhaul_flags |= USE_ACPI_C3; |
683 | goto print_support_type; | 765 | goto print_support_type; |
684 | } | 766 | } |
@@ -688,8 +770,11 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
688 | longhaul_flags |= USE_NORTHBRIDGE; | 770 | longhaul_flags |= USE_NORTHBRIDGE; |
689 | goto print_support_type; | 771 | goto print_support_type; |
690 | } | 772 | } |
691 | 773 | /* Use VT8235 southbridge if present */ | |
692 | /* No ACPI C3 or we can't use it */ | 774 | if (longhaul_version == TYPE_POWERSAVER && vt8235_present) { |
775 | longhaul_flags |= USE_VT8235; | ||
776 | goto print_support_type; | ||
777 | } | ||
693 | /* Check ACPI support for bus master arbiter disable */ | 778 | /* Check ACPI support for bus master arbiter disable */ |
694 | if ((pr == NULL) || !(pr->flags.bm_control)) { | 779 | if ((pr == NULL) || !(pr->flags.bm_control)) { |
695 | printk(KERN_ERR PFX | 780 | printk(KERN_ERR PFX |
@@ -698,18 +783,18 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
698 | } | 783 | } |
699 | 784 | ||
700 | print_support_type: | 785 | print_support_type: |
701 | if (!(longhaul_flags & USE_NORTHBRIDGE)) { | 786 | if (longhaul_flags & USE_NORTHBRIDGE) |
702 | printk (KERN_INFO PFX "Using ACPI support.\n"); | ||
703 | } else { | ||
704 | printk (KERN_INFO PFX "Using northbridge support.\n"); | 787 | printk (KERN_INFO PFX "Using northbridge support.\n"); |
705 | } | 788 | else if (longhaul_flags & USE_VT8235) |
789 | printk (KERN_INFO PFX "Using VT8235 support.\n"); | ||
790 | else | ||
791 | printk (KERN_INFO PFX "Using ACPI support.\n"); | ||
706 | 792 | ||
707 | ret = longhaul_get_ranges(); | 793 | ret = longhaul_get_ranges(); |
708 | if (ret != 0) | 794 | if (ret != 0) |
709 | return ret; | 795 | return ret; |
710 | 796 | ||
711 | if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) && | 797 | if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) |
712 | (scale_voltage != 0)) | ||
713 | longhaul_setup_voltagescaling(); | 798 | longhaul_setup_voltagescaling(); |
714 | 799 | ||
715 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | 800 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; |
@@ -797,8 +882,6 @@ static void __exit longhaul_exit(void) | |||
797 | 882 | ||
798 | module_param (scale_voltage, int, 0644); | 883 | module_param (scale_voltage, int, 0644); |
799 | MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); | 884 | MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); |
800 | module_param(ignore_latency, int, 0644); | ||
801 | MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test"); | ||
802 | 885 | ||
803 | MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); | 886 | MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); |
804 | MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); | 887 | MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); |
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h index bc4682aad69b..bb0a04b1d1ab 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h | |||
@@ -235,84 +235,14 @@ static int __initdata ezrat_eblcr[32] = { | |||
235 | /* | 235 | /* |
236 | * VIA C3 Nehemiah */ | 236 | * VIA C3 Nehemiah */ |
237 | 237 | ||
238 | static int __initdata nehemiah_a_clock_ratio[32] = { | 238 | static int __initdata nehemiah_clock_ratio[32] = { |
239 | 100, /* 0000 -> 10.0x */ | 239 | 100, /* 0000 -> 10.0x */ |
240 | 160, /* 0001 -> 16.0x */ | 240 | 160, /* 0001 -> 16.0x */ |
241 | -1, /* 0010 -> RESERVED */ | 241 | 40, /* 0010 -> 4.0x */ |
242 | 90, /* 0011 -> 9.0x */ | ||
243 | 95, /* 0100 -> 9.5x */ | ||
244 | -1, /* 0101 -> RESERVED */ | ||
245 | -1, /* 0110 -> RESERVED */ | ||
246 | 55, /* 0111 -> 5.5x */ | ||
247 | 60, /* 1000 -> 6.0x */ | ||
248 | 70, /* 1001 -> 7.0x */ | ||
249 | 80, /* 1010 -> 8.0x */ | ||
250 | 50, /* 1011 -> 5.0x */ | ||
251 | 65, /* 1100 -> 6.5x */ | ||
252 | 75, /* 1101 -> 7.5x */ | ||
253 | 85, /* 1110 -> 8.5x */ | ||
254 | 120, /* 1111 -> 12.0x */ | ||
255 | 100, /* 0000 -> 10.0x */ | ||
256 | -1, /* 0001 -> RESERVED */ | ||
257 | 120, /* 0010 -> 12.0x */ | ||
258 | 90, /* 0011 -> 9.0x */ | ||
259 | 105, /* 0100 -> 10.5x */ | ||
260 | 115, /* 0101 -> 11.5x */ | ||
261 | 125, /* 0110 -> 12.5x */ | ||
262 | 135, /* 0111 -> 13.5x */ | ||
263 | 140, /* 1000 -> 14.0x */ | ||
264 | 150, /* 1001 -> 15.0x */ | ||
265 | 160, /* 1010 -> 16.0x */ | ||
266 | 130, /* 1011 -> 13.0x */ | ||
267 | 145, /* 1100 -> 14.5x */ | ||
268 | 155, /* 1101 -> 15.5x */ | ||
269 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
270 | 120, /* 1111 -> 12.0x */ | ||
271 | }; | ||
272 | |||
273 | static int __initdata nehemiah_b_clock_ratio[32] = { | ||
274 | 100, /* 0000 -> 10.0x */ | ||
275 | 160, /* 0001 -> 16.0x */ | ||
276 | -1, /* 0010 -> RESERVED */ | ||
277 | 90, /* 0011 -> 9.0x */ | ||
278 | 95, /* 0100 -> 9.5x */ | ||
279 | -1, /* 0101 -> RESERVED */ | ||
280 | -1, /* 0110 -> RESERVED */ | ||
281 | 55, /* 0111 -> 5.5x */ | ||
282 | 60, /* 1000 -> 6.0x */ | ||
283 | 70, /* 1001 -> 7.0x */ | ||
284 | 80, /* 1010 -> 8.0x */ | ||
285 | 50, /* 1011 -> 5.0x */ | ||
286 | 65, /* 1100 -> 6.5x */ | ||
287 | 75, /* 1101 -> 7.5x */ | ||
288 | 85, /* 1110 -> 8.5x */ | ||
289 | 120, /* 1111 -> 12.0x */ | ||
290 | 100, /* 0000 -> 10.0x */ | ||
291 | 110, /* 0001 -> 11.0x */ | ||
292 | 120, /* 0010 -> 12.0x */ | ||
293 | 90, /* 0011 -> 9.0x */ | ||
294 | 105, /* 0100 -> 10.5x */ | ||
295 | 115, /* 0101 -> 11.5x */ | ||
296 | 125, /* 0110 -> 12.5x */ | ||
297 | 135, /* 0111 -> 13.5x */ | ||
298 | 140, /* 1000 -> 14.0x */ | ||
299 | 150, /* 1001 -> 15.0x */ | ||
300 | 160, /* 1010 -> 16.0x */ | ||
301 | 130, /* 1011 -> 13.0x */ | ||
302 | 145, /* 1100 -> 14.5x */ | ||
303 | 155, /* 1101 -> 15.5x */ | ||
304 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
305 | 120, /* 1111 -> 12.0x */ | ||
306 | }; | ||
307 | |||
308 | static int __initdata nehemiah_c_clock_ratio[32] = { | ||
309 | 100, /* 0000 -> 10.0x */ | ||
310 | 160, /* 0001 -> 16.0x */ | ||
311 | 40, /* 0010 -> RESERVED */ | ||
312 | 90, /* 0011 -> 9.0x */ | 242 | 90, /* 0011 -> 9.0x */ |
313 | 95, /* 0100 -> 9.5x */ | 243 | 95, /* 0100 -> 9.5x */ |
314 | -1, /* 0101 -> RESERVED */ | 244 | -1, /* 0101 -> RESERVED */ |
315 | 45, /* 0110 -> RESERVED */ | 245 | 45, /* 0110 -> 4.5x */ |
316 | 55, /* 0111 -> 5.5x */ | 246 | 55, /* 0111 -> 5.5x */ |
317 | 60, /* 1000 -> 6.0x */ | 247 | 60, /* 1000 -> 6.0x */ |
318 | 70, /* 1001 -> 7.0x */ | 248 | 70, /* 1001 -> 7.0x */ |
@@ -340,84 +270,14 @@ static int __initdata nehemiah_c_clock_ratio[32] = { | |||
340 | 120, /* 1111 -> 12.0x */ | 270 | 120, /* 1111 -> 12.0x */ |
341 | }; | 271 | }; |
342 | 272 | ||
343 | static int __initdata nehemiah_a_eblcr[32] = { | 273 | static int __initdata nehemiah_eblcr[32] = { |
344 | 50, /* 0000 -> 5.0x */ | ||
345 | 160, /* 0001 -> 16.0x */ | ||
346 | -1, /* 0010 -> RESERVED */ | ||
347 | 100, /* 0011 -> 10.0x */ | ||
348 | 55, /* 0100 -> 5.5x */ | ||
349 | -1, /* 0101 -> RESERVED */ | ||
350 | -1, /* 0110 -> RESERVED */ | ||
351 | 95, /* 0111 -> 9.5x */ | ||
352 | 90, /* 1000 -> 9.0x */ | ||
353 | 70, /* 1001 -> 7.0x */ | ||
354 | 80, /* 1010 -> 8.0x */ | ||
355 | 60, /* 1011 -> 6.0x */ | ||
356 | 120, /* 1100 -> 12.0x */ | ||
357 | 75, /* 1101 -> 7.5x */ | ||
358 | 85, /* 1110 -> 8.5x */ | ||
359 | 65, /* 1111 -> 6.5x */ | ||
360 | 90, /* 0000 -> 9.0x */ | ||
361 | -1, /* 0001 -> RESERVED */ | ||
362 | 120, /* 0010 -> 12.0x */ | ||
363 | 100, /* 0011 -> 10.0x */ | ||
364 | 135, /* 0100 -> 13.5x */ | ||
365 | 115, /* 0101 -> 11.5x */ | ||
366 | 125, /* 0110 -> 12.5x */ | ||
367 | 105, /* 0111 -> 10.5x */ | ||
368 | 130, /* 1000 -> 13.0x */ | ||
369 | 150, /* 1001 -> 15.0x */ | ||
370 | 160, /* 1010 -> 16.0x */ | ||
371 | 140, /* 1011 -> 14.0x */ | ||
372 | 120, /* 1100 -> 12.0x */ | ||
373 | 155, /* 1101 -> 15.5x */ | ||
374 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
375 | 145 /* 1111 -> 14.5x */ | ||
376 | /* end of table */ | ||
377 | }; | ||
378 | static int __initdata nehemiah_b_eblcr[32] = { | ||
379 | 50, /* 0000 -> 5.0x */ | ||
380 | 160, /* 0001 -> 16.0x */ | ||
381 | -1, /* 0010 -> RESERVED */ | ||
382 | 100, /* 0011 -> 10.0x */ | ||
383 | 55, /* 0100 -> 5.5x */ | ||
384 | -1, /* 0101 -> RESERVED */ | ||
385 | -1, /* 0110 -> RESERVED */ | ||
386 | 95, /* 0111 -> 9.5x */ | ||
387 | 90, /* 1000 -> 9.0x */ | ||
388 | 70, /* 1001 -> 7.0x */ | ||
389 | 80, /* 1010 -> 8.0x */ | ||
390 | 60, /* 1011 -> 6.0x */ | ||
391 | 120, /* 1100 -> 12.0x */ | ||
392 | 75, /* 1101 -> 7.5x */ | ||
393 | 85, /* 1110 -> 8.5x */ | ||
394 | 65, /* 1111 -> 6.5x */ | ||
395 | 90, /* 0000 -> 9.0x */ | ||
396 | 110, /* 0001 -> 11.0x */ | ||
397 | 120, /* 0010 -> 12.0x */ | ||
398 | 100, /* 0011 -> 10.0x */ | ||
399 | 135, /* 0100 -> 13.5x */ | ||
400 | 115, /* 0101 -> 11.5x */ | ||
401 | 125, /* 0110 -> 12.5x */ | ||
402 | 105, /* 0111 -> 10.5x */ | ||
403 | 130, /* 1000 -> 13.0x */ | ||
404 | 150, /* 1001 -> 15.0x */ | ||
405 | 160, /* 1010 -> 16.0x */ | ||
406 | 140, /* 1011 -> 14.0x */ | ||
407 | 120, /* 1100 -> 12.0x */ | ||
408 | 155, /* 1101 -> 15.5x */ | ||
409 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
410 | 145 /* 1111 -> 14.5x */ | ||
411 | /* end of table */ | ||
412 | }; | ||
413 | static int __initdata nehemiah_c_eblcr[32] = { | ||
414 | 50, /* 0000 -> 5.0x */ | 274 | 50, /* 0000 -> 5.0x */ |
415 | 160, /* 0001 -> 16.0x */ | 275 | 160, /* 0001 -> 16.0x */ |
416 | 40, /* 0010 -> RESERVED */ | 276 | 40, /* 0010 -> 4.0x */ |
417 | 100, /* 0011 -> 10.0x */ | 277 | 100, /* 0011 -> 10.0x */ |
418 | 55, /* 0100 -> 5.5x */ | 278 | 55, /* 0100 -> 5.5x */ |
419 | -1, /* 0101 -> RESERVED */ | 279 | -1, /* 0101 -> RESERVED */ |
420 | 45, /* 0110 -> RESERVED */ | 280 | 45, /* 0110 -> 4.5x */ |
421 | 95, /* 0111 -> 9.5x */ | 281 | 95, /* 0111 -> 9.5x */ |
422 | 90, /* 1000 -> 9.0x */ | 282 | 90, /* 1000 -> 9.0x */ |
423 | 70, /* 1001 -> 7.0x */ | 283 | 70, /* 1001 -> 7.0x */ |
@@ -443,7 +303,6 @@ static int __initdata nehemiah_c_eblcr[32] = { | |||
443 | 155, /* 1101 -> 15.5x */ | 303 | 155, /* 1101 -> 15.5x */ |
444 | -1, /* 1110 -> RESERVED (13.0x) */ | 304 | -1, /* 1110 -> RESERVED (13.0x) */ |
445 | 145 /* 1111 -> 14.5x */ | 305 | 145 /* 1111 -> 14.5x */ |
446 | /* end of table */ | ||
447 | }; | 306 | }; |
448 | 307 | ||
449 | /* | 308 | /* |
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 2d6491672559..fe3b67005ebb 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1289,7 +1289,11 @@ static unsigned int powernowk8_get (unsigned int cpu) | |||
1289 | if (query_current_values_with_pending_wait(data)) | 1289 | if (query_current_values_with_pending_wait(data)) |
1290 | goto out; | 1290 | goto out; |
1291 | 1291 | ||
1292 | khz = find_khz_freq_from_fid(data->currfid); | 1292 | if (cpu_family == CPU_HW_PSTATE) |
1293 | khz = find_khz_freq_from_fiddid(data->currfid, data->currdid); | ||
1294 | else | ||
1295 | khz = find_khz_freq_from_fid(data->currfid); | ||
1296 | |||
1293 | 1297 | ||
1294 | out: | 1298 | out: |
1295 | set_cpus_allowed(current, oldmask); | 1299 | set_cpus_allowed(current, oldmask); |
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index c0c3b59de32c..de27bd07bc9c 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <asm/io.h> | 6 | #include <asm/io.h> |
7 | #include <asm/processor.h> | 7 | #include <asm/processor.h> |
8 | #include <asm/timer.h> | 8 | #include <asm/timer.h> |
9 | #include <asm/pci-direct.h> | ||
9 | 10 | ||
10 | #include "cpu.h" | 11 | #include "cpu.h" |
11 | 12 | ||
@@ -161,19 +162,19 @@ static void __cpuinit set_cx86_inc(void) | |||
161 | static void __cpuinit geode_configure(void) | 162 | static void __cpuinit geode_configure(void) |
162 | { | 163 | { |
163 | unsigned long flags; | 164 | unsigned long flags; |
164 | u8 ccr3, ccr4; | 165 | u8 ccr3; |
165 | local_irq_save(flags); | 166 | local_irq_save(flags); |
166 | 167 | ||
167 | /* Suspend on halt power saving and enable #SUSP pin */ | 168 | /* Suspend on halt power saving and enable #SUSP pin */ |
168 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); | 169 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); |
169 | 170 | ||
170 | ccr3 = getCx86(CX86_CCR3); | 171 | ccr3 = getCx86(CX86_CCR3); |
171 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* Enable */ | 172 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
172 | |||
173 | ccr4 = getCx86(CX86_CCR4); | ||
174 | ccr4 |= 0x38; /* FPU fast, DTE cache, Mem bypass */ | ||
175 | 173 | ||
176 | setCx86(CX86_CCR3, ccr3); | 174 | |
175 | /* FPU fast, DTE cache, Mem bypass */ | ||
176 | setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); | ||
177 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
177 | 178 | ||
178 | set_cx86_memwb(); | 179 | set_cx86_memwb(); |
179 | set_cx86_reorder(); | 180 | set_cx86_reorder(); |
@@ -183,14 +184,6 @@ static void __cpuinit geode_configure(void) | |||
183 | } | 184 | } |
184 | 185 | ||
185 | 186 | ||
186 | #ifdef CONFIG_PCI | ||
187 | static struct pci_device_id __cpuinitdata cyrix_55x0[] = { | ||
188 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, | ||
189 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, | ||
190 | { }, | ||
191 | }; | ||
192 | #endif | ||
193 | |||
194 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | 187 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) |
195 | { | 188 | { |
196 | unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; | 189 | unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; |
@@ -258,6 +251,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
258 | 251 | ||
259 | case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ | 252 | case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ |
260 | #ifdef CONFIG_PCI | 253 | #ifdef CONFIG_PCI |
254 | { | ||
255 | u32 vendor, device; | ||
261 | /* It isn't really a PCI quirk directly, but the cure is the | 256 | /* It isn't really a PCI quirk directly, but the cure is the |
262 | same. The MediaGX has deep magic SMM stuff that handles the | 257 | same. The MediaGX has deep magic SMM stuff that handles the |
263 | SB emulation. It thows away the fifo on disable_dma() which | 258 | SB emulation. It thows away the fifo on disable_dma() which |
@@ -273,22 +268,34 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
273 | printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); | 268 | printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); |
274 | isa_dma_bridge_buggy = 2; | 269 | isa_dma_bridge_buggy = 2; |
275 | 270 | ||
271 | /* We do this before the PCI layer is running. However we | ||
272 | are safe here as we know the bridge must be a Cyrix | ||
273 | companion and must be present */ | ||
274 | vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID); | ||
275 | device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID); | ||
276 | 276 | ||
277 | /* | 277 | /* |
278 | * The 5510/5520 companion chips have a funky PIT. | 278 | * The 5510/5520 companion chips have a funky PIT. |
279 | */ | 279 | */ |
280 | if (pci_dev_present(cyrix_55x0)) | 280 | if (vendor == PCI_VENDOR_ID_CYRIX && |
281 | (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) | ||
281 | pit_latch_buggy = 1; | 282 | pit_latch_buggy = 1; |
283 | } | ||
282 | #endif | 284 | #endif |
283 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ | 285 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ |
284 | 286 | ||
285 | /* GXm supports extended cpuid levels 'ala' AMD */ | 287 | /* GXm supports extended cpuid levels 'ala' AMD */ |
286 | if (c->cpuid_level == 2) { | 288 | if (c->cpuid_level == 2) { |
287 | /* Enable cxMMX extensions (GX1 Datasheet 54) */ | 289 | /* Enable cxMMX extensions (GX1 Datasheet 54) */ |
288 | setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); | 290 | setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); |
289 | 291 | ||
290 | /* GXlv/GXm/GX1 */ | 292 | /* |
291 | if((dir1 >= 0x50 && dir1 <= 0x54) || dir1 >= 0x63) | 293 | * GXm : 0x30 ... 0x5f GXm datasheet 51 |
294 | * GXlv: 0x6x GXlv datasheet 54 | ||
295 | * ? : 0x7x | ||
296 | * GX1 : 0x8x GX1 datasheet 56 | ||
297 | */ | ||
298 | if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f)) | ||
292 | geode_configure(); | 299 | geode_configure(); |
293 | get_model_name(c); /* get CPU marketing name */ | 300 | get_model_name(c); /* get CPU marketing name */ |
294 | return; | 301 | return; |
@@ -415,15 +422,14 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) | |||
415 | 422 | ||
416 | if (dir0 == 5 || dir0 == 3) | 423 | if (dir0 == 5 || dir0 == 3) |
417 | { | 424 | { |
418 | unsigned char ccr3, ccr4; | 425 | unsigned char ccr3; |
419 | unsigned long flags; | 426 | unsigned long flags; |
420 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); | 427 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); |
421 | local_irq_save(flags); | 428 | local_irq_save(flags); |
422 | ccr3 = getCx86(CX86_CCR3); | 429 | ccr3 = getCx86(CX86_CCR3); |
423 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | 430 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
424 | ccr4 = getCx86(CX86_CCR4); | 431 | setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ |
425 | setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */ | 432 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ |
426 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
427 | local_irq_restore(flags); | 433 | local_irq_restore(flags); |
428 | } | 434 | } |
429 | } | 435 | } |
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index d555bec0db99..4f10c62d180c 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
14 | #include <asm/system.h> | 14 | #include <asm/system.h> |
15 | #include <asm/mce.h> | ||
15 | 16 | ||
16 | #include "mce.h" | 17 | #include "mce.h" |
17 | 18 | ||
diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h index 84fd4cf7d0fb..81fb6e2d35f3 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.h +++ b/arch/i386/kernel/cpu/mcheck/mce.h | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <asm/mce.h> | ||
2 | 3 | ||
3 | void amd_mcheck_init(struct cpuinfo_x86 *c); | 4 | void amd_mcheck_init(struct cpuinfo_x86 *c); |
4 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | 5 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); |
@@ -9,6 +10,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c); | |||
9 | /* Call the installed machine check handler for this CPU setup. */ | 10 | /* Call the installed machine check handler for this CPU setup. */ |
10 | extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); | 11 | extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); |
11 | 12 | ||
12 | extern int mce_disabled; | ||
13 | extern int nr_mce_banks; | 13 | extern int nr_mce_banks; |
14 | 14 | ||
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 504434a46011..8359c19d3a23 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
15 | #include <asm/idle.h> | ||
15 | 16 | ||
16 | #include <asm/therm_throt.h> | 17 | #include <asm/therm_throt.h> |
17 | 18 | ||
@@ -59,6 +60,7 @@ static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_therm | |||
59 | 60 | ||
60 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) | 61 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) |
61 | { | 62 | { |
63 | exit_idle(); | ||
62 | irq_enter(); | 64 | irq_enter(); |
63 | vendor_thermal_interrupt(regs); | 65 | vendor_thermal_interrupt(regs); |
64 | irq_exit(); | 66 | irq_exit(); |
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c index ee771f305f96..c7d8f1756745 100644 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ b/arch/i386/kernel/cpu/mtrr/if.c | |||
@@ -211,6 +211,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
211 | default: | 211 | default: |
212 | return -ENOTTY; | 212 | return -ENOTTY; |
213 | case MTRRIOC_ADD_ENTRY: | 213 | case MTRRIOC_ADD_ENTRY: |
214 | #ifdef CONFIG_COMPAT | ||
215 | case MTRRIOC32_ADD_ENTRY: | ||
216 | #endif | ||
214 | if (!capable(CAP_SYS_ADMIN)) | 217 | if (!capable(CAP_SYS_ADMIN)) |
215 | return -EPERM; | 218 | return -EPERM; |
216 | err = | 219 | err = |
@@ -218,21 +221,33 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
218 | file, 0); | 221 | file, 0); |
219 | break; | 222 | break; |
220 | case MTRRIOC_SET_ENTRY: | 223 | case MTRRIOC_SET_ENTRY: |
224 | #ifdef CONFIG_COMPAT | ||
225 | case MTRRIOC32_SET_ENTRY: | ||
226 | #endif | ||
221 | if (!capable(CAP_SYS_ADMIN)) | 227 | if (!capable(CAP_SYS_ADMIN)) |
222 | return -EPERM; | 228 | return -EPERM; |
223 | err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); | 229 | err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); |
224 | break; | 230 | break; |
225 | case MTRRIOC_DEL_ENTRY: | 231 | case MTRRIOC_DEL_ENTRY: |
232 | #ifdef CONFIG_COMPAT | ||
233 | case MTRRIOC32_DEL_ENTRY: | ||
234 | #endif | ||
226 | if (!capable(CAP_SYS_ADMIN)) | 235 | if (!capable(CAP_SYS_ADMIN)) |
227 | return -EPERM; | 236 | return -EPERM; |
228 | err = mtrr_file_del(sentry.base, sentry.size, file, 0); | 237 | err = mtrr_file_del(sentry.base, sentry.size, file, 0); |
229 | break; | 238 | break; |
230 | case MTRRIOC_KILL_ENTRY: | 239 | case MTRRIOC_KILL_ENTRY: |
240 | #ifdef CONFIG_COMPAT | ||
241 | case MTRRIOC32_KILL_ENTRY: | ||
242 | #endif | ||
231 | if (!capable(CAP_SYS_ADMIN)) | 243 | if (!capable(CAP_SYS_ADMIN)) |
232 | return -EPERM; | 244 | return -EPERM; |
233 | err = mtrr_del(-1, sentry.base, sentry.size); | 245 | err = mtrr_del(-1, sentry.base, sentry.size); |
234 | break; | 246 | break; |
235 | case MTRRIOC_GET_ENTRY: | 247 | case MTRRIOC_GET_ENTRY: |
248 | #ifdef CONFIG_COMPAT | ||
249 | case MTRRIOC32_GET_ENTRY: | ||
250 | #endif | ||
236 | if (gentry.regnum >= num_var_ranges) | 251 | if (gentry.regnum >= num_var_ranges) |
237 | return -EINVAL; | 252 | return -EINVAL; |
238 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); | 253 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); |
@@ -249,6 +264,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
249 | 264 | ||
250 | break; | 265 | break; |
251 | case MTRRIOC_ADD_PAGE_ENTRY: | 266 | case MTRRIOC_ADD_PAGE_ENTRY: |
267 | #ifdef CONFIG_COMPAT | ||
268 | case MTRRIOC32_ADD_PAGE_ENTRY: | ||
269 | #endif | ||
252 | if (!capable(CAP_SYS_ADMIN)) | 270 | if (!capable(CAP_SYS_ADMIN)) |
253 | return -EPERM; | 271 | return -EPERM; |
254 | err = | 272 | err = |
@@ -256,21 +274,33 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
256 | file, 1); | 274 | file, 1); |
257 | break; | 275 | break; |
258 | case MTRRIOC_SET_PAGE_ENTRY: | 276 | case MTRRIOC_SET_PAGE_ENTRY: |
277 | #ifdef CONFIG_COMPAT | ||
278 | case MTRRIOC32_SET_PAGE_ENTRY: | ||
279 | #endif | ||
259 | if (!capable(CAP_SYS_ADMIN)) | 280 | if (!capable(CAP_SYS_ADMIN)) |
260 | return -EPERM; | 281 | return -EPERM; |
261 | err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); | 282 | err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); |
262 | break; | 283 | break; |
263 | case MTRRIOC_DEL_PAGE_ENTRY: | 284 | case MTRRIOC_DEL_PAGE_ENTRY: |
285 | #ifdef CONFIG_COMPAT | ||
286 | case MTRRIOC32_DEL_PAGE_ENTRY: | ||
287 | #endif | ||
264 | if (!capable(CAP_SYS_ADMIN)) | 288 | if (!capable(CAP_SYS_ADMIN)) |
265 | return -EPERM; | 289 | return -EPERM; |
266 | err = mtrr_file_del(sentry.base, sentry.size, file, 1); | 290 | err = mtrr_file_del(sentry.base, sentry.size, file, 1); |
267 | break; | 291 | break; |
268 | case MTRRIOC_KILL_PAGE_ENTRY: | 292 | case MTRRIOC_KILL_PAGE_ENTRY: |
293 | #ifdef CONFIG_COMPAT | ||
294 | case MTRRIOC32_KILL_PAGE_ENTRY: | ||
295 | #endif | ||
269 | if (!capable(CAP_SYS_ADMIN)) | 296 | if (!capable(CAP_SYS_ADMIN)) |
270 | return -EPERM; | 297 | return -EPERM; |
271 | err = mtrr_del_page(-1, sentry.base, sentry.size); | 298 | err = mtrr_del_page(-1, sentry.base, sentry.size); |
272 | break; | 299 | break; |
273 | case MTRRIOC_GET_PAGE_ENTRY: | 300 | case MTRRIOC_GET_PAGE_ENTRY: |
301 | #ifdef CONFIG_COMPAT | ||
302 | case MTRRIOC32_GET_PAGE_ENTRY: | ||
303 | #endif | ||
274 | if (gentry.regnum >= num_var_ranges) | 304 | if (gentry.regnum >= num_var_ranges) |
275 | return -EINVAL; | 305 | return -EINVAL; |
276 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); | 306 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); |
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 16bb7ea87145..0acfb6a5a220 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c | |||
@@ -50,7 +50,7 @@ u32 num_var_ranges = 0; | |||
50 | unsigned int *usage_table; | 50 | unsigned int *usage_table; |
51 | static DEFINE_MUTEX(mtrr_mutex); | 51 | static DEFINE_MUTEX(mtrr_mutex); |
52 | 52 | ||
53 | u32 size_or_mask, size_and_mask; | 53 | u64 size_or_mask, size_and_mask; |
54 | 54 | ||
55 | static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; | 55 | static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; |
56 | 56 | ||
@@ -662,8 +662,8 @@ void __init mtrr_bp_init(void) | |||
662 | boot_cpu_data.x86_mask == 0x4)) | 662 | boot_cpu_data.x86_mask == 0x4)) |
663 | phys_addr = 36; | 663 | phys_addr = 36; |
664 | 664 | ||
665 | size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); | 665 | size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); |
666 | size_and_mask = ~size_or_mask & 0xfff00000; | 666 | size_and_mask = ~size_or_mask & 0xfffff00000ULL; |
667 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && | 667 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && |
668 | boot_cpu_data.x86 == 6) { | 668 | boot_cpu_data.x86 == 6) { |
669 | /* VIA C* family have Intel style MTRRs, but | 669 | /* VIA C* family have Intel style MTRRs, but |
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h index d61ea9db6cfe..289dfe6030e3 100644 --- a/arch/i386/kernel/cpu/mtrr/mtrr.h +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h | |||
@@ -84,7 +84,7 @@ void get_mtrr_state(void); | |||
84 | 84 | ||
85 | extern void set_mtrr_ops(struct mtrr_ops * ops); | 85 | extern void set_mtrr_ops(struct mtrr_ops * ops); |
86 | 86 | ||
87 | extern u32 size_or_mask, size_and_mask; | 87 | extern u64 size_or_mask, size_and_mask; |
88 | extern struct mtrr_ops * mtrr_if; | 88 | extern struct mtrr_ops * mtrr_if; |
89 | 89 | ||
90 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) | 90 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) |
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 6624d8583c42..47e3ebbfb28d 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c | |||
@@ -29,7 +29,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
29 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 29 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
30 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | 30 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, |
31 | NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, | 31 | NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, |
32 | NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", | 32 | NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", "3dnowext", "3dnow", |
33 | 33 | ||
34 | /* Transmeta-defined */ | 34 | /* Transmeta-defined */ |
35 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | 35 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, |
@@ -47,7 +47,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
47 | /* Intel-defined (#2) */ | 47 | /* Intel-defined (#2) */ |
48 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | 48 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", |
49 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, | 49 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, |
50 | NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, | 50 | NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", |
51 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 51 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
52 | 52 | ||
53 | /* VIA/Cyrix/Centaur-defined */ | 53 | /* VIA/Cyrix/Centaur-defined */ |
@@ -57,8 +57,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
57 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 57 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
58 | 58 | ||
59 | /* AMD-defined (#2) */ | 59 | /* AMD-defined (#2) */ |
60 | "lahf_lm", "cmp_legacy", "svm", NULL, "cr8legacy", NULL, NULL, NULL, | 60 | "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8legacy", "abm", |
61 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 61 | "sse4a", "misalignsse", |
62 | "3dnowprefetch", "osvw", "ibs", NULL, NULL, NULL, NULL, NULL, | ||
62 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 63 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
63 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 64 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
64 | }; | 65 | }; |
@@ -69,8 +70,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
69 | "ttp", /* thermal trip */ | 70 | "ttp", /* thermal trip */ |
70 | "tm", | 71 | "tm", |
71 | "stc", | 72 | "stc", |
73 | "100mhzsteps", | ||
74 | "hwpstate", | ||
72 | NULL, | 75 | NULL, |
73 | /* nothing */ /* constant_tsc - moved to flags */ | 76 | NULL, /* constant_tsc - moved to flags */ |
77 | /* nothing */ | ||
74 | }; | 78 | }; |
75 | struct cpuinfo_x86 *c = v; | 79 | struct cpuinfo_x86 *c = v; |
76 | int i, n = c - cpu_data; | 80 | int i, n = c - cpu_data; |
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 4056fb7d2cdf..5678d46863c6 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c | |||
@@ -9,7 +9,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
9 | { | 9 | { |
10 | unsigned int cap_mask, uk, max, dummy; | 10 | unsigned int cap_mask, uk, max, dummy; |
11 | unsigned int cms_rev1, cms_rev2; | 11 | unsigned int cms_rev1, cms_rev2; |
12 | unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev; | 12 | unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; |
13 | char cpu_info[65]; | 13 | char cpu_info[65]; |
14 | 14 | ||
15 | get_model_name(c); /* Same as AMD/Cyrix */ | 15 | get_model_name(c); /* Same as AMD/Cyrix */ |
@@ -72,6 +72,9 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
72 | wrmsr(0x80860004, ~0, uk); | 72 | wrmsr(0x80860004, ~0, uk); |
73 | c->x86_capability[0] = cpuid_edx(0x00000001); | 73 | c->x86_capability[0] = cpuid_edx(0x00000001); |
74 | wrmsr(0x80860004, cap_mask, uk); | 74 | wrmsr(0x80860004, cap_mask, uk); |
75 | |||
76 | /* All Transmeta CPUs have a constant TSC */ | ||
77 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); | ||
75 | 78 | ||
76 | /* If we can run i686 user-space code, call us an i686 */ | 79 | /* If we can run i686 user-space code, call us an i686 */ |
77 | #define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) | 80 | #define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) |
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 4da75fa3208d..eeae0d992337 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c | |||
@@ -48,7 +48,6 @@ static struct class *cpuid_class; | |||
48 | #ifdef CONFIG_SMP | 48 | #ifdef CONFIG_SMP |
49 | 49 | ||
50 | struct cpuid_command { | 50 | struct cpuid_command { |
51 | int cpu; | ||
52 | u32 reg; | 51 | u32 reg; |
53 | u32 *data; | 52 | u32 *data; |
54 | }; | 53 | }; |
@@ -57,8 +56,7 @@ static void cpuid_smp_cpuid(void *cmd_block) | |||
57 | { | 56 | { |
58 | struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; | 57 | struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; |
59 | 58 | ||
60 | if (cmd->cpu == smp_processor_id()) | 59 | cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], |
61 | cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], | ||
62 | &cmd->data[3]); | 60 | &cmd->data[3]); |
63 | } | 61 | } |
64 | 62 | ||
@@ -70,11 +68,10 @@ static inline void do_cpuid(int cpu, u32 reg, u32 * data) | |||
70 | if (cpu == smp_processor_id()) { | 68 | if (cpu == smp_processor_id()) { |
71 | cpuid(reg, &data[0], &data[1], &data[2], &data[3]); | 69 | cpuid(reg, &data[0], &data[1], &data[2], &data[3]); |
72 | } else { | 70 | } else { |
73 | cmd.cpu = cpu; | ||
74 | cmd.reg = reg; | 71 | cmd.reg = reg; |
75 | cmd.data = data; | 72 | cmd.data = data; |
76 | 73 | ||
77 | smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1); | 74 | smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); |
78 | } | 75 | } |
79 | preempt_enable(); | 76 | preempt_enable(); |
80 | } | 77 | } |
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index f391abcf7da9..70f39560846a 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
15 | #include <asm/page.h> | 15 | #include <asm/page.h> |
16 | #include <asm/e820.h> | 16 | #include <asm/e820.h> |
17 | #include <asm/setup.h> | ||
17 | 18 | ||
18 | #ifdef CONFIG_EFI | 19 | #ifdef CONFIG_EFI |
19 | int efi_enabled = 0; | 20 | int efi_enabled = 0; |
@@ -156,21 +157,22 @@ static struct resource standard_io_resources[] = { { | |||
156 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | 157 | .flags = IORESOURCE_BUSY | IORESOURCE_IO |
157 | } }; | 158 | } }; |
158 | 159 | ||
159 | static int romsignature(const unsigned char *x) | 160 | #define ROMSIGNATURE 0xaa55 |
161 | |||
162 | static int __init romsignature(const unsigned char *rom) | ||
160 | { | 163 | { |
161 | unsigned short sig; | 164 | unsigned short sig; |
162 | int ret = 0; | 165 | |
163 | if (probe_kernel_address((const unsigned short *)x, sig) == 0) | 166 | return probe_kernel_address((const unsigned short *)rom, sig) == 0 && |
164 | ret = (sig == 0xaa55); | 167 | sig == ROMSIGNATURE; |
165 | return ret; | ||
166 | } | 168 | } |
167 | 169 | ||
168 | static int __init romchecksum(unsigned char *rom, unsigned long length) | 170 | static int __init romchecksum(unsigned char *rom, unsigned long length) |
169 | { | 171 | { |
170 | unsigned char *p, sum = 0; | 172 | unsigned char sum; |
171 | 173 | ||
172 | for (p = rom; p < rom + length; p++) | 174 | for (sum = 0; length; length--) |
173 | sum += *p; | 175 | sum += *rom++; |
174 | return sum == 0; | 176 | return sum == 0; |
175 | } | 177 | } |
176 | 178 | ||
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 5e47683fc63a..18bddcb8e9e8 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -30,7 +30,7 @@ | |||
30 | * 18(%esp) - %eax | 30 | * 18(%esp) - %eax |
31 | * 1C(%esp) - %ds | 31 | * 1C(%esp) - %ds |
32 | * 20(%esp) - %es | 32 | * 20(%esp) - %es |
33 | * 24(%esp) - %gs | 33 | * 24(%esp) - %fs |
34 | * 28(%esp) - orig_eax | 34 | * 28(%esp) - orig_eax |
35 | * 2C(%esp) - %eip | 35 | * 2C(%esp) - %eip |
36 | * 30(%esp) - %cs | 36 | * 30(%esp) - %cs |
@@ -99,9 +99,9 @@ VM_MASK = 0x00020000 | |||
99 | 99 | ||
100 | #define SAVE_ALL \ | 100 | #define SAVE_ALL \ |
101 | cld; \ | 101 | cld; \ |
102 | pushl %gs; \ | 102 | pushl %fs; \ |
103 | CFI_ADJUST_CFA_OFFSET 4;\ | 103 | CFI_ADJUST_CFA_OFFSET 4;\ |
104 | /*CFI_REL_OFFSET gs, 0;*/\ | 104 | /*CFI_REL_OFFSET fs, 0;*/\ |
105 | pushl %es; \ | 105 | pushl %es; \ |
106 | CFI_ADJUST_CFA_OFFSET 4;\ | 106 | CFI_ADJUST_CFA_OFFSET 4;\ |
107 | /*CFI_REL_OFFSET es, 0;*/\ | 107 | /*CFI_REL_OFFSET es, 0;*/\ |
@@ -133,7 +133,7 @@ VM_MASK = 0x00020000 | |||
133 | movl %edx, %ds; \ | 133 | movl %edx, %ds; \ |
134 | movl %edx, %es; \ | 134 | movl %edx, %es; \ |
135 | movl $(__KERNEL_PDA), %edx; \ | 135 | movl $(__KERNEL_PDA), %edx; \ |
136 | movl %edx, %gs | 136 | movl %edx, %fs |
137 | 137 | ||
138 | #define RESTORE_INT_REGS \ | 138 | #define RESTORE_INT_REGS \ |
139 | popl %ebx; \ | 139 | popl %ebx; \ |
@@ -166,9 +166,9 @@ VM_MASK = 0x00020000 | |||
166 | 2: popl %es; \ | 166 | 2: popl %es; \ |
167 | CFI_ADJUST_CFA_OFFSET -4;\ | 167 | CFI_ADJUST_CFA_OFFSET -4;\ |
168 | /*CFI_RESTORE es;*/\ | 168 | /*CFI_RESTORE es;*/\ |
169 | 3: popl %gs; \ | 169 | 3: popl %fs; \ |
170 | CFI_ADJUST_CFA_OFFSET -4;\ | 170 | CFI_ADJUST_CFA_OFFSET -4;\ |
171 | /*CFI_RESTORE gs;*/\ | 171 | /*CFI_RESTORE fs;*/\ |
172 | .pushsection .fixup,"ax"; \ | 172 | .pushsection .fixup,"ax"; \ |
173 | 4: movl $0,(%esp); \ | 173 | 4: movl $0,(%esp); \ |
174 | jmp 1b; \ | 174 | jmp 1b; \ |
@@ -227,6 +227,7 @@ ENTRY(ret_from_fork) | |||
227 | CFI_ADJUST_CFA_OFFSET -4 | 227 | CFI_ADJUST_CFA_OFFSET -4 |
228 | jmp syscall_exit | 228 | jmp syscall_exit |
229 | CFI_ENDPROC | 229 | CFI_ENDPROC |
230 | END(ret_from_fork) | ||
230 | 231 | ||
231 | /* | 232 | /* |
232 | * Return to user mode is not as complex as all this looks, | 233 | * Return to user mode is not as complex as all this looks, |
@@ -258,6 +259,7 @@ ENTRY(resume_userspace) | |||
258 | # int/exception return? | 259 | # int/exception return? |
259 | jne work_pending | 260 | jne work_pending |
260 | jmp restore_all | 261 | jmp restore_all |
262 | END(ret_from_exception) | ||
261 | 263 | ||
262 | #ifdef CONFIG_PREEMPT | 264 | #ifdef CONFIG_PREEMPT |
263 | ENTRY(resume_kernel) | 265 | ENTRY(resume_kernel) |
@@ -272,6 +274,7 @@ need_resched: | |||
272 | jz restore_all | 274 | jz restore_all |
273 | call preempt_schedule_irq | 275 | call preempt_schedule_irq |
274 | jmp need_resched | 276 | jmp need_resched |
277 | END(resume_kernel) | ||
275 | #endif | 278 | #endif |
276 | CFI_ENDPROC | 279 | CFI_ENDPROC |
277 | 280 | ||
@@ -349,16 +352,17 @@ sysenter_past_esp: | |||
349 | movl PT_OLDESP(%esp), %ecx | 352 | movl PT_OLDESP(%esp), %ecx |
350 | xorl %ebp,%ebp | 353 | xorl %ebp,%ebp |
351 | TRACE_IRQS_ON | 354 | TRACE_IRQS_ON |
352 | 1: mov PT_GS(%esp), %gs | 355 | 1: mov PT_FS(%esp), %fs |
353 | ENABLE_INTERRUPTS_SYSEXIT | 356 | ENABLE_INTERRUPTS_SYSEXIT |
354 | CFI_ENDPROC | 357 | CFI_ENDPROC |
355 | .pushsection .fixup,"ax" | 358 | .pushsection .fixup,"ax" |
356 | 2: movl $0,PT_GS(%esp) | 359 | 2: movl $0,PT_FS(%esp) |
357 | jmp 1b | 360 | jmp 1b |
358 | .section __ex_table,"a" | 361 | .section __ex_table,"a" |
359 | .align 4 | 362 | .align 4 |
360 | .long 1b,2b | 363 | .long 1b,2b |
361 | .popsection | 364 | .popsection |
365 | ENDPROC(sysenter_entry) | ||
362 | 366 | ||
363 | # system call handler stub | 367 | # system call handler stub |
364 | ENTRY(system_call) | 368 | ENTRY(system_call) |
@@ -459,6 +463,7 @@ ldt_ss: | |||
459 | CFI_ADJUST_CFA_OFFSET -8 | 463 | CFI_ADJUST_CFA_OFFSET -8 |
460 | jmp restore_nocheck | 464 | jmp restore_nocheck |
461 | CFI_ENDPROC | 465 | CFI_ENDPROC |
466 | ENDPROC(system_call) | ||
462 | 467 | ||
463 | # perform work that needs to be done immediately before resumption | 468 | # perform work that needs to be done immediately before resumption |
464 | ALIGN | 469 | ALIGN |
@@ -504,6 +509,7 @@ work_notifysig_v86: | |||
504 | xorl %edx, %edx | 509 | xorl %edx, %edx |
505 | call do_notify_resume | 510 | call do_notify_resume |
506 | jmp resume_userspace_sig | 511 | jmp resume_userspace_sig |
512 | END(work_pending) | ||
507 | 513 | ||
508 | # perform syscall exit tracing | 514 | # perform syscall exit tracing |
509 | ALIGN | 515 | ALIGN |
@@ -519,6 +525,7 @@ syscall_trace_entry: | |||
519 | cmpl $(nr_syscalls), %eax | 525 | cmpl $(nr_syscalls), %eax |
520 | jnae syscall_call | 526 | jnae syscall_call |
521 | jmp syscall_exit | 527 | jmp syscall_exit |
528 | END(syscall_trace_entry) | ||
522 | 529 | ||
523 | # perform syscall exit tracing | 530 | # perform syscall exit tracing |
524 | ALIGN | 531 | ALIGN |
@@ -532,6 +539,7 @@ syscall_exit_work: | |||
532 | movl $1, %edx | 539 | movl $1, %edx |
533 | call do_syscall_trace | 540 | call do_syscall_trace |
534 | jmp resume_userspace | 541 | jmp resume_userspace |
542 | END(syscall_exit_work) | ||
535 | CFI_ENDPROC | 543 | CFI_ENDPROC |
536 | 544 | ||
537 | RING0_INT_FRAME # can't unwind into user space anyway | 545 | RING0_INT_FRAME # can't unwind into user space anyway |
@@ -542,15 +550,17 @@ syscall_fault: | |||
542 | GET_THREAD_INFO(%ebp) | 550 | GET_THREAD_INFO(%ebp) |
543 | movl $-EFAULT,PT_EAX(%esp) | 551 | movl $-EFAULT,PT_EAX(%esp) |
544 | jmp resume_userspace | 552 | jmp resume_userspace |
553 | END(syscall_fault) | ||
545 | 554 | ||
546 | syscall_badsys: | 555 | syscall_badsys: |
547 | movl $-ENOSYS,PT_EAX(%esp) | 556 | movl $-ENOSYS,PT_EAX(%esp) |
548 | jmp resume_userspace | 557 | jmp resume_userspace |
558 | END(syscall_badsys) | ||
549 | CFI_ENDPROC | 559 | CFI_ENDPROC |
550 | 560 | ||
551 | #define FIXUP_ESPFIX_STACK \ | 561 | #define FIXUP_ESPFIX_STACK \ |
552 | /* since we are on a wrong stack, we cant make it a C code :( */ \ | 562 | /* since we are on a wrong stack, we cant make it a C code :( */ \ |
553 | movl %gs:PDA_cpu, %ebx; \ | 563 | movl %fs:PDA_cpu, %ebx; \ |
554 | PER_CPU(cpu_gdt_descr, %ebx); \ | 564 | PER_CPU(cpu_gdt_descr, %ebx); \ |
555 | movl GDS_address(%ebx), %ebx; \ | 565 | movl GDS_address(%ebx), %ebx; \ |
556 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | 566 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ |
@@ -581,9 +591,9 @@ syscall_badsys: | |||
581 | ENTRY(interrupt) | 591 | ENTRY(interrupt) |
582 | .text | 592 | .text |
583 | 593 | ||
584 | vector=0 | ||
585 | ENTRY(irq_entries_start) | 594 | ENTRY(irq_entries_start) |
586 | RING0_INT_FRAME | 595 | RING0_INT_FRAME |
596 | vector=0 | ||
587 | .rept NR_IRQS | 597 | .rept NR_IRQS |
588 | ALIGN | 598 | ALIGN |
589 | .if vector | 599 | .if vector |
@@ -592,11 +602,16 @@ ENTRY(irq_entries_start) | |||
592 | 1: pushl $~(vector) | 602 | 1: pushl $~(vector) |
593 | CFI_ADJUST_CFA_OFFSET 4 | 603 | CFI_ADJUST_CFA_OFFSET 4 |
594 | jmp common_interrupt | 604 | jmp common_interrupt |
595 | .data | 605 | .previous |
596 | .long 1b | 606 | .long 1b |
597 | .text | 607 | .text |
598 | vector=vector+1 | 608 | vector=vector+1 |
599 | .endr | 609 | .endr |
610 | END(irq_entries_start) | ||
611 | |||
612 | .previous | ||
613 | END(interrupt) | ||
614 | .previous | ||
600 | 615 | ||
601 | /* | 616 | /* |
602 | * the CPU automatically disables interrupts when executing an IRQ vector, | 617 | * the CPU automatically disables interrupts when executing an IRQ vector, |
@@ -609,6 +624,7 @@ common_interrupt: | |||
609 | movl %esp,%eax | 624 | movl %esp,%eax |
610 | call do_IRQ | 625 | call do_IRQ |
611 | jmp ret_from_intr | 626 | jmp ret_from_intr |
627 | ENDPROC(common_interrupt) | ||
612 | CFI_ENDPROC | 628 | CFI_ENDPROC |
613 | 629 | ||
614 | #define BUILD_INTERRUPT(name, nr) \ | 630 | #define BUILD_INTERRUPT(name, nr) \ |
@@ -621,18 +637,24 @@ ENTRY(name) \ | |||
621 | movl %esp,%eax; \ | 637 | movl %esp,%eax; \ |
622 | call smp_/**/name; \ | 638 | call smp_/**/name; \ |
623 | jmp ret_from_intr; \ | 639 | jmp ret_from_intr; \ |
624 | CFI_ENDPROC | 640 | CFI_ENDPROC; \ |
641 | ENDPROC(name) | ||
625 | 642 | ||
626 | /* The include is where all of the SMP etc. interrupts come from */ | 643 | /* The include is where all of the SMP etc. interrupts come from */ |
627 | #include "entry_arch.h" | 644 | #include "entry_arch.h" |
628 | 645 | ||
646 | /* This alternate entry is needed because we hijack the apic LVTT */ | ||
647 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) | ||
648 | BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) | ||
649 | #endif | ||
650 | |||
629 | KPROBE_ENTRY(page_fault) | 651 | KPROBE_ENTRY(page_fault) |
630 | RING0_EC_FRAME | 652 | RING0_EC_FRAME |
631 | pushl $do_page_fault | 653 | pushl $do_page_fault |
632 | CFI_ADJUST_CFA_OFFSET 4 | 654 | CFI_ADJUST_CFA_OFFSET 4 |
633 | ALIGN | 655 | ALIGN |
634 | error_code: | 656 | error_code: |
635 | /* the function address is in %gs's slot on the stack */ | 657 | /* the function address is in %fs's slot on the stack */ |
636 | pushl %es | 658 | pushl %es |
637 | CFI_ADJUST_CFA_OFFSET 4 | 659 | CFI_ADJUST_CFA_OFFSET 4 |
638 | /*CFI_REL_OFFSET es, 0*/ | 660 | /*CFI_REL_OFFSET es, 0*/ |
@@ -661,20 +683,20 @@ error_code: | |||
661 | CFI_ADJUST_CFA_OFFSET 4 | 683 | CFI_ADJUST_CFA_OFFSET 4 |
662 | CFI_REL_OFFSET ebx, 0 | 684 | CFI_REL_OFFSET ebx, 0 |
663 | cld | 685 | cld |
664 | pushl %gs | 686 | pushl %fs |
665 | CFI_ADJUST_CFA_OFFSET 4 | 687 | CFI_ADJUST_CFA_OFFSET 4 |
666 | /*CFI_REL_OFFSET gs, 0*/ | 688 | /*CFI_REL_OFFSET fs, 0*/ |
667 | movl $(__KERNEL_PDA), %ecx | 689 | movl $(__KERNEL_PDA), %ecx |
668 | movl %ecx, %gs | 690 | movl %ecx, %fs |
669 | UNWIND_ESPFIX_STACK | 691 | UNWIND_ESPFIX_STACK |
670 | popl %ecx | 692 | popl %ecx |
671 | CFI_ADJUST_CFA_OFFSET -4 | 693 | CFI_ADJUST_CFA_OFFSET -4 |
672 | /*CFI_REGISTER es, ecx*/ | 694 | /*CFI_REGISTER es, ecx*/ |
673 | movl PT_GS(%esp), %edi # get the function address | 695 | movl PT_FS(%esp), %edi # get the function address |
674 | movl PT_ORIG_EAX(%esp), %edx # get the error code | 696 | movl PT_ORIG_EAX(%esp), %edx # get the error code |
675 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | 697 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
676 | mov %ecx, PT_GS(%esp) | 698 | mov %ecx, PT_FS(%esp) |
677 | /*CFI_REL_OFFSET gs, ES*/ | 699 | /*CFI_REL_OFFSET fs, ES*/ |
678 | movl $(__USER_DS), %ecx | 700 | movl $(__USER_DS), %ecx |
679 | movl %ecx, %ds | 701 | movl %ecx, %ds |
680 | movl %ecx, %es | 702 | movl %ecx, %es |
@@ -692,6 +714,7 @@ ENTRY(coprocessor_error) | |||
692 | CFI_ADJUST_CFA_OFFSET 4 | 714 | CFI_ADJUST_CFA_OFFSET 4 |
693 | jmp error_code | 715 | jmp error_code |
694 | CFI_ENDPROC | 716 | CFI_ENDPROC |
717 | END(coprocessor_error) | ||
695 | 718 | ||
696 | ENTRY(simd_coprocessor_error) | 719 | ENTRY(simd_coprocessor_error) |
697 | RING0_INT_FRAME | 720 | RING0_INT_FRAME |
@@ -701,6 +724,7 @@ ENTRY(simd_coprocessor_error) | |||
701 | CFI_ADJUST_CFA_OFFSET 4 | 724 | CFI_ADJUST_CFA_OFFSET 4 |
702 | jmp error_code | 725 | jmp error_code |
703 | CFI_ENDPROC | 726 | CFI_ENDPROC |
727 | END(simd_coprocessor_error) | ||
704 | 728 | ||
705 | ENTRY(device_not_available) | 729 | ENTRY(device_not_available) |
706 | RING0_INT_FRAME | 730 | RING0_INT_FRAME |
@@ -721,6 +745,7 @@ device_not_available_emulate: | |||
721 | CFI_ADJUST_CFA_OFFSET -4 | 745 | CFI_ADJUST_CFA_OFFSET -4 |
722 | jmp ret_from_exception | 746 | jmp ret_from_exception |
723 | CFI_ENDPROC | 747 | CFI_ENDPROC |
748 | END(device_not_available) | ||
724 | 749 | ||
725 | /* | 750 | /* |
726 | * Debug traps and NMI can happen at the one SYSENTER instruction | 751 | * Debug traps and NMI can happen at the one SYSENTER instruction |
@@ -864,10 +889,12 @@ ENTRY(native_iret) | |||
864 | .align 4 | 889 | .align 4 |
865 | .long 1b,iret_exc | 890 | .long 1b,iret_exc |
866 | .previous | 891 | .previous |
892 | END(native_iret) | ||
867 | 893 | ||
868 | ENTRY(native_irq_enable_sysexit) | 894 | ENTRY(native_irq_enable_sysexit) |
869 | sti | 895 | sti |
870 | sysexit | 896 | sysexit |
897 | END(native_irq_enable_sysexit) | ||
871 | #endif | 898 | #endif |
872 | 899 | ||
873 | KPROBE_ENTRY(int3) | 900 | KPROBE_ENTRY(int3) |
@@ -890,6 +917,7 @@ ENTRY(overflow) | |||
890 | CFI_ADJUST_CFA_OFFSET 4 | 917 | CFI_ADJUST_CFA_OFFSET 4 |
891 | jmp error_code | 918 | jmp error_code |
892 | CFI_ENDPROC | 919 | CFI_ENDPROC |
920 | END(overflow) | ||
893 | 921 | ||
894 | ENTRY(bounds) | 922 | ENTRY(bounds) |
895 | RING0_INT_FRAME | 923 | RING0_INT_FRAME |
@@ -899,6 +927,7 @@ ENTRY(bounds) | |||
899 | CFI_ADJUST_CFA_OFFSET 4 | 927 | CFI_ADJUST_CFA_OFFSET 4 |
900 | jmp error_code | 928 | jmp error_code |
901 | CFI_ENDPROC | 929 | CFI_ENDPROC |
930 | END(bounds) | ||
902 | 931 | ||
903 | ENTRY(invalid_op) | 932 | ENTRY(invalid_op) |
904 | RING0_INT_FRAME | 933 | RING0_INT_FRAME |
@@ -908,6 +937,7 @@ ENTRY(invalid_op) | |||
908 | CFI_ADJUST_CFA_OFFSET 4 | 937 | CFI_ADJUST_CFA_OFFSET 4 |
909 | jmp error_code | 938 | jmp error_code |
910 | CFI_ENDPROC | 939 | CFI_ENDPROC |
940 | END(invalid_op) | ||
911 | 941 | ||
912 | ENTRY(coprocessor_segment_overrun) | 942 | ENTRY(coprocessor_segment_overrun) |
913 | RING0_INT_FRAME | 943 | RING0_INT_FRAME |
@@ -917,6 +947,7 @@ ENTRY(coprocessor_segment_overrun) | |||
917 | CFI_ADJUST_CFA_OFFSET 4 | 947 | CFI_ADJUST_CFA_OFFSET 4 |
918 | jmp error_code | 948 | jmp error_code |
919 | CFI_ENDPROC | 949 | CFI_ENDPROC |
950 | END(coprocessor_segment_overrun) | ||
920 | 951 | ||
921 | ENTRY(invalid_TSS) | 952 | ENTRY(invalid_TSS) |
922 | RING0_EC_FRAME | 953 | RING0_EC_FRAME |
@@ -924,6 +955,7 @@ ENTRY(invalid_TSS) | |||
924 | CFI_ADJUST_CFA_OFFSET 4 | 955 | CFI_ADJUST_CFA_OFFSET 4 |
925 | jmp error_code | 956 | jmp error_code |
926 | CFI_ENDPROC | 957 | CFI_ENDPROC |
958 | END(invalid_TSS) | ||
927 | 959 | ||
928 | ENTRY(segment_not_present) | 960 | ENTRY(segment_not_present) |
929 | RING0_EC_FRAME | 961 | RING0_EC_FRAME |
@@ -931,6 +963,7 @@ ENTRY(segment_not_present) | |||
931 | CFI_ADJUST_CFA_OFFSET 4 | 963 | CFI_ADJUST_CFA_OFFSET 4 |
932 | jmp error_code | 964 | jmp error_code |
933 | CFI_ENDPROC | 965 | CFI_ENDPROC |
966 | END(segment_not_present) | ||
934 | 967 | ||
935 | ENTRY(stack_segment) | 968 | ENTRY(stack_segment) |
936 | RING0_EC_FRAME | 969 | RING0_EC_FRAME |
@@ -938,6 +971,7 @@ ENTRY(stack_segment) | |||
938 | CFI_ADJUST_CFA_OFFSET 4 | 971 | CFI_ADJUST_CFA_OFFSET 4 |
939 | jmp error_code | 972 | jmp error_code |
940 | CFI_ENDPROC | 973 | CFI_ENDPROC |
974 | END(stack_segment) | ||
941 | 975 | ||
942 | KPROBE_ENTRY(general_protection) | 976 | KPROBE_ENTRY(general_protection) |
943 | RING0_EC_FRAME | 977 | RING0_EC_FRAME |
@@ -953,6 +987,7 @@ ENTRY(alignment_check) | |||
953 | CFI_ADJUST_CFA_OFFSET 4 | 987 | CFI_ADJUST_CFA_OFFSET 4 |
954 | jmp error_code | 988 | jmp error_code |
955 | CFI_ENDPROC | 989 | CFI_ENDPROC |
990 | END(alignment_check) | ||
956 | 991 | ||
957 | ENTRY(divide_error) | 992 | ENTRY(divide_error) |
958 | RING0_INT_FRAME | 993 | RING0_INT_FRAME |
@@ -962,6 +997,7 @@ ENTRY(divide_error) | |||
962 | CFI_ADJUST_CFA_OFFSET 4 | 997 | CFI_ADJUST_CFA_OFFSET 4 |
963 | jmp error_code | 998 | jmp error_code |
964 | CFI_ENDPROC | 999 | CFI_ENDPROC |
1000 | END(divide_error) | ||
965 | 1001 | ||
966 | #ifdef CONFIG_X86_MCE | 1002 | #ifdef CONFIG_X86_MCE |
967 | ENTRY(machine_check) | 1003 | ENTRY(machine_check) |
@@ -972,6 +1008,7 @@ ENTRY(machine_check) | |||
972 | CFI_ADJUST_CFA_OFFSET 4 | 1008 | CFI_ADJUST_CFA_OFFSET 4 |
973 | jmp error_code | 1009 | jmp error_code |
974 | CFI_ENDPROC | 1010 | CFI_ENDPROC |
1011 | END(machine_check) | ||
975 | #endif | 1012 | #endif |
976 | 1013 | ||
977 | ENTRY(spurious_interrupt_bug) | 1014 | ENTRY(spurious_interrupt_bug) |
@@ -982,6 +1019,7 @@ ENTRY(spurious_interrupt_bug) | |||
982 | CFI_ADJUST_CFA_OFFSET 4 | 1019 | CFI_ADJUST_CFA_OFFSET 4 |
983 | jmp error_code | 1020 | jmp error_code |
984 | CFI_ENDPROC | 1021 | CFI_ENDPROC |
1022 | END(spurious_interrupt_bug) | ||
985 | 1023 | ||
986 | ENTRY(kernel_thread_helper) | 1024 | ENTRY(kernel_thread_helper) |
987 | pushl $0 # fake return address for unwinder | 1025 | pushl $0 # fake return address for unwinder |
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index cb9abdfced9b..3fa7f9389afe 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S | |||
@@ -53,6 +53,7 @@ | |||
53 | * any particular GDT layout, because we load our own as soon as we | 53 | * any particular GDT layout, because we load our own as soon as we |
54 | * can. | 54 | * can. |
55 | */ | 55 | */ |
56 | .section .text.head,"ax",@progbits | ||
56 | ENTRY(startup_32) | 57 | ENTRY(startup_32) |
57 | 58 | ||
58 | #ifdef CONFIG_PARAVIRT | 59 | #ifdef CONFIG_PARAVIRT |
@@ -141,16 +142,25 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
141 | jb 10b | 142 | jb 10b |
142 | movl %edi,(init_pg_tables_end - __PAGE_OFFSET) | 143 | movl %edi,(init_pg_tables_end - __PAGE_OFFSET) |
143 | 144 | ||
144 | #ifdef CONFIG_SMP | ||
145 | xorl %ebx,%ebx /* This is the boot CPU (BSP) */ | 145 | xorl %ebx,%ebx /* This is the boot CPU (BSP) */ |
146 | jmp 3f | 146 | jmp 3f |
147 | |||
148 | /* | 147 | /* |
149 | * Non-boot CPU entry point; entered from trampoline.S | 148 | * Non-boot CPU entry point; entered from trampoline.S |
150 | * We can't lgdt here, because lgdt itself uses a data segment, but | 149 | * We can't lgdt here, because lgdt itself uses a data segment, but |
151 | * we know the trampoline has already loaded the boot_gdt_table GDT | 150 | * we know the trampoline has already loaded the boot_gdt_table GDT |
152 | * for us. | 151 | * for us. |
152 | * | ||
153 | * If cpu hotplug is not supported then this code can go in init section | ||
154 | * which will be freed later | ||
153 | */ | 155 | */ |
156 | |||
157 | #ifdef CONFIG_HOTPLUG_CPU | ||
158 | .section .text,"ax",@progbits | ||
159 | #else | ||
160 | .section .init.text,"ax",@progbits | ||
161 | #endif | ||
162 | |||
163 | #ifdef CONFIG_SMP | ||
154 | ENTRY(startup_32_smp) | 164 | ENTRY(startup_32_smp) |
155 | cld | 165 | cld |
156 | movl $(__BOOT_DS),%eax | 166 | movl $(__BOOT_DS),%eax |
@@ -208,8 +218,8 @@ ENTRY(startup_32_smp) | |||
208 | xorl %ebx,%ebx | 218 | xorl %ebx,%ebx |
209 | incl %ebx | 219 | incl %ebx |
210 | 220 | ||
211 | 3: | ||
212 | #endif /* CONFIG_SMP */ | 221 | #endif /* CONFIG_SMP */ |
222 | 3: | ||
213 | 223 | ||
214 | /* | 224 | /* |
215 | * Enable paging | 225 | * Enable paging |
@@ -309,7 +319,7 @@ is386: movl $2,%ecx # set MP | |||
309 | 319 | ||
310 | call check_x87 | 320 | call check_x87 |
311 | call setup_pda | 321 | call setup_pda |
312 | lgdt cpu_gdt_descr | 322 | lgdt early_gdt_descr |
313 | lidt idt_descr | 323 | lidt idt_descr |
314 | ljmp $(__KERNEL_CS),$1f | 324 | ljmp $(__KERNEL_CS),$1f |
315 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers | 325 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers |
@@ -319,12 +329,12 @@ is386: movl $2,%ecx # set MP | |||
319 | movl %eax,%ds | 329 | movl %eax,%ds |
320 | movl %eax,%es | 330 | movl %eax,%es |
321 | 331 | ||
322 | xorl %eax,%eax # Clear FS and LDT | 332 | xorl %eax,%eax # Clear GS and LDT |
323 | movl %eax,%fs | 333 | movl %eax,%gs |
324 | lldt %ax | 334 | lldt %ax |
325 | 335 | ||
326 | movl $(__KERNEL_PDA),%eax | 336 | movl $(__KERNEL_PDA),%eax |
327 | mov %eax,%gs | 337 | mov %eax,%fs |
328 | 338 | ||
329 | cld # gcc2 wants the direction flag cleared at all times | 339 | cld # gcc2 wants the direction flag cleared at all times |
330 | pushl $0 # fake return address for unwinder | 340 | pushl $0 # fake return address for unwinder |
@@ -360,12 +370,12 @@ check_x87: | |||
360 | * cpu_gdt_table and boot_pda; for secondary CPUs, these will be | 370 | * cpu_gdt_table and boot_pda; for secondary CPUs, these will be |
361 | * that CPU's GDT and PDA. | 371 | * that CPU's GDT and PDA. |
362 | */ | 372 | */ |
363 | setup_pda: | 373 | ENTRY(setup_pda) |
364 | /* get the PDA pointer */ | 374 | /* get the PDA pointer */ |
365 | movl start_pda, %eax | 375 | movl start_pda, %eax |
366 | 376 | ||
367 | /* slot the PDA address into the GDT */ | 377 | /* slot the PDA address into the GDT */ |
368 | mov cpu_gdt_descr+2, %ecx | 378 | mov early_gdt_descr+2, %ecx |
369 | mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ | 379 | mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ |
370 | shr $16, %eax | 380 | shr $16, %eax |
371 | mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ | 381 | mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ |
@@ -492,6 +502,7 @@ ignore_int: | |||
492 | #endif | 502 | #endif |
493 | iret | 503 | iret |
494 | 504 | ||
505 | .section .text | ||
495 | #ifdef CONFIG_PARAVIRT | 506 | #ifdef CONFIG_PARAVIRT |
496 | startup_paravirt: | 507 | startup_paravirt: |
497 | cld | 508 | cld |
@@ -502,10 +513,11 @@ startup_paravirt: | |||
502 | pushl %ecx | 513 | pushl %ecx |
503 | pushl %eax | 514 | pushl %eax |
504 | 515 | ||
505 | /* paravirt.o is last in link, and that probe fn never returns */ | ||
506 | pushl $__start_paravirtprobe | 516 | pushl $__start_paravirtprobe |
507 | 1: | 517 | 1: |
508 | movl 0(%esp), %eax | 518 | movl 0(%esp), %eax |
519 | cmpl $__stop_paravirtprobe, %eax | ||
520 | je unhandled_paravirt | ||
509 | pushl (%eax) | 521 | pushl (%eax) |
510 | movl 8(%esp), %eax | 522 | movl 8(%esp), %eax |
511 | call *(%esp) | 523 | call *(%esp) |
@@ -517,6 +529,10 @@ startup_paravirt: | |||
517 | 529 | ||
518 | addl $4, (%esp) | 530 | addl $4, (%esp) |
519 | jmp 1b | 531 | jmp 1b |
532 | |||
533 | unhandled_paravirt: | ||
534 | /* Nothing wanted us: we're screwed. */ | ||
535 | ud2 | ||
520 | #endif | 536 | #endif |
521 | 537 | ||
522 | /* | 538 | /* |
@@ -581,7 +597,7 @@ idt_descr: | |||
581 | 597 | ||
582 | # boot GDT descriptor (later on used by CPU#0): | 598 | # boot GDT descriptor (later on used by CPU#0): |
583 | .word 0 # 32 bit align gdt_desc.address | 599 | .word 0 # 32 bit align gdt_desc.address |
584 | ENTRY(cpu_gdt_descr) | 600 | ENTRY(early_gdt_descr) |
585 | .word GDT_ENTRIES*8-1 | 601 | .word GDT_ENTRIES*8-1 |
586 | .long cpu_gdt_table | 602 | .long cpu_gdt_table |
587 | 603 | ||
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c index 0b29d41322a2..e1006b7acc9e 100644 --- a/arch/i386/kernel/hpet.c +++ b/arch/i386/kernel/hpet.c | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/clocksource.h> | 1 | #include <linux/clocksource.h> |
2 | #include <linux/clockchips.h> | ||
2 | #include <linux/errno.h> | 3 | #include <linux/errno.h> |
3 | #include <linux/hpet.h> | 4 | #include <linux/hpet.h> |
4 | #include <linux/init.h> | 5 | #include <linux/init.h> |
@@ -6,17 +7,278 @@ | |||
6 | #include <asm/hpet.h> | 7 | #include <asm/hpet.h> |
7 | #include <asm/io.h> | 8 | #include <asm/io.h> |
8 | 9 | ||
10 | extern struct clock_event_device *global_clock_event; | ||
11 | |||
9 | #define HPET_MASK CLOCKSOURCE_MASK(32) | 12 | #define HPET_MASK CLOCKSOURCE_MASK(32) |
10 | #define HPET_SHIFT 22 | 13 | #define HPET_SHIFT 22 |
11 | 14 | ||
12 | /* FSEC = 10^-15 NSEC = 10^-9 */ | 15 | /* FSEC = 10^-15 NSEC = 10^-9 */ |
13 | #define FSEC_PER_NSEC 1000000 | 16 | #define FSEC_PER_NSEC 1000000 |
14 | 17 | ||
15 | static void __iomem *hpet_ptr; | 18 | /* |
19 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | ||
20 | */ | ||
21 | unsigned long hpet_address; | ||
22 | static void __iomem * hpet_virt_address; | ||
23 | |||
24 | static inline unsigned long hpet_readl(unsigned long a) | ||
25 | { | ||
26 | return readl(hpet_virt_address + a); | ||
27 | } | ||
28 | |||
29 | static inline void hpet_writel(unsigned long d, unsigned long a) | ||
30 | { | ||
31 | writel(d, hpet_virt_address + a); | ||
32 | } | ||
33 | |||
34 | /* | ||
35 | * HPET command line enable / disable | ||
36 | */ | ||
37 | static int boot_hpet_disable; | ||
38 | |||
39 | static int __init hpet_setup(char* str) | ||
40 | { | ||
41 | if (str) { | ||
42 | if (!strncmp("disable", str, 7)) | ||
43 | boot_hpet_disable = 1; | ||
44 | } | ||
45 | return 1; | ||
46 | } | ||
47 | __setup("hpet=", hpet_setup); | ||
48 | |||
49 | static inline int is_hpet_capable(void) | ||
50 | { | ||
51 | return (!boot_hpet_disable && hpet_address); | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * HPET timer interrupt enable / disable | ||
56 | */ | ||
57 | static int hpet_legacy_int_enabled; | ||
58 | |||
59 | /** | ||
60 | * is_hpet_enabled - check whether the hpet timer interrupt is enabled | ||
61 | */ | ||
62 | int is_hpet_enabled(void) | ||
63 | { | ||
64 | return is_hpet_capable() && hpet_legacy_int_enabled; | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * When the hpet driver (/dev/hpet) is enabled, we need to reserve | ||
69 | * timer 0 and timer 1 in case of RTC emulation. | ||
70 | */ | ||
71 | #ifdef CONFIG_HPET | ||
72 | static void hpet_reserve_platform_timers(unsigned long id) | ||
73 | { | ||
74 | struct hpet __iomem *hpet = hpet_virt_address; | ||
75 | struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; | ||
76 | unsigned int nrtimers, i; | ||
77 | struct hpet_data hd; | ||
78 | |||
79 | nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; | ||
80 | |||
81 | memset(&hd, 0, sizeof (hd)); | ||
82 | hd.hd_phys_address = hpet_address; | ||
83 | hd.hd_address = hpet_virt_address; | ||
84 | hd.hd_nirqs = nrtimers; | ||
85 | hd.hd_flags = HPET_DATA_PLATFORM; | ||
86 | hpet_reserve_timer(&hd, 0); | ||
87 | |||
88 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
89 | hpet_reserve_timer(&hd, 1); | ||
90 | #endif | ||
91 | |||
92 | hd.hd_irq[0] = HPET_LEGACY_8254; | ||
93 | hd.hd_irq[1] = HPET_LEGACY_RTC; | ||
94 | |||
95 | for (i = 2; i < nrtimers; timer++, i++) | ||
96 | hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> | ||
97 | Tn_INT_ROUTE_CNF_SHIFT; | ||
98 | |||
99 | hpet_alloc(&hd); | ||
100 | |||
101 | } | ||
102 | #else | ||
103 | static void hpet_reserve_platform_timers(unsigned long id) { } | ||
104 | #endif | ||
105 | |||
106 | /* | ||
107 | * Common hpet info | ||
108 | */ | ||
109 | static unsigned long hpet_period; | ||
110 | |||
111 | static void hpet_set_mode(enum clock_event_mode mode, | ||
112 | struct clock_event_device *evt); | ||
113 | static int hpet_next_event(unsigned long delta, | ||
114 | struct clock_event_device *evt); | ||
115 | |||
116 | /* | ||
117 | * The hpet clock event device | ||
118 | */ | ||
119 | static struct clock_event_device hpet_clockevent = { | ||
120 | .name = "hpet", | ||
121 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
122 | .set_mode = hpet_set_mode, | ||
123 | .set_next_event = hpet_next_event, | ||
124 | .shift = 32, | ||
125 | .irq = 0, | ||
126 | }; | ||
127 | |||
128 | static void hpet_start_counter(void) | ||
129 | { | ||
130 | unsigned long cfg = hpet_readl(HPET_CFG); | ||
131 | |||
132 | cfg &= ~HPET_CFG_ENABLE; | ||
133 | hpet_writel(cfg, HPET_CFG); | ||
134 | hpet_writel(0, HPET_COUNTER); | ||
135 | hpet_writel(0, HPET_COUNTER + 4); | ||
136 | cfg |= HPET_CFG_ENABLE; | ||
137 | hpet_writel(cfg, HPET_CFG); | ||
138 | } | ||
139 | |||
140 | static void hpet_enable_int(void) | ||
141 | { | ||
142 | unsigned long cfg = hpet_readl(HPET_CFG); | ||
143 | |||
144 | cfg |= HPET_CFG_LEGACY; | ||
145 | hpet_writel(cfg, HPET_CFG); | ||
146 | hpet_legacy_int_enabled = 1; | ||
147 | } | ||
148 | |||
149 | static void hpet_set_mode(enum clock_event_mode mode, | ||
150 | struct clock_event_device *evt) | ||
151 | { | ||
152 | unsigned long cfg, cmp, now; | ||
153 | uint64_t delta; | ||
154 | |||
155 | switch(mode) { | ||
156 | case CLOCK_EVT_MODE_PERIODIC: | ||
157 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; | ||
158 | delta >>= hpet_clockevent.shift; | ||
159 | now = hpet_readl(HPET_COUNTER); | ||
160 | cmp = now + (unsigned long) delta; | ||
161 | cfg = hpet_readl(HPET_T0_CFG); | ||
162 | cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | | ||
163 | HPET_TN_SETVAL | HPET_TN_32BIT; | ||
164 | hpet_writel(cfg, HPET_T0_CFG); | ||
165 | /* | ||
166 | * The first write after writing TN_SETVAL to the | ||
167 | * config register sets the counter value, the second | ||
168 | * write sets the period. | ||
169 | */ | ||
170 | hpet_writel(cmp, HPET_T0_CMP); | ||
171 | udelay(1); | ||
172 | hpet_writel((unsigned long) delta, HPET_T0_CMP); | ||
173 | break; | ||
174 | |||
175 | case CLOCK_EVT_MODE_ONESHOT: | ||
176 | cfg = hpet_readl(HPET_T0_CFG); | ||
177 | cfg &= ~HPET_TN_PERIODIC; | ||
178 | cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; | ||
179 | hpet_writel(cfg, HPET_T0_CFG); | ||
180 | break; | ||
181 | |||
182 | case CLOCK_EVT_MODE_UNUSED: | ||
183 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
184 | cfg = hpet_readl(HPET_T0_CFG); | ||
185 | cfg &= ~HPET_TN_ENABLE; | ||
186 | hpet_writel(cfg, HPET_T0_CFG); | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | static int hpet_next_event(unsigned long delta, | ||
192 | struct clock_event_device *evt) | ||
193 | { | ||
194 | unsigned long cnt; | ||
195 | |||
196 | cnt = hpet_readl(HPET_COUNTER); | ||
197 | cnt += delta; | ||
198 | hpet_writel(cnt, HPET_T0_CMP); | ||
199 | |||
200 | return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0); | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Try to setup the HPET timer | ||
205 | */ | ||
206 | int __init hpet_enable(void) | ||
207 | { | ||
208 | unsigned long id; | ||
209 | uint64_t hpet_freq; | ||
210 | |||
211 | if (!is_hpet_capable()) | ||
212 | return 0; | ||
213 | |||
214 | hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | ||
215 | |||
216 | /* | ||
217 | * Read the period and check for a sane value: | ||
218 | */ | ||
219 | hpet_period = hpet_readl(HPET_PERIOD); | ||
220 | if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) | ||
221 | goto out_nohpet; | ||
222 | |||
223 | /* | ||
224 | * The period is a femto seconds value. We need to calculate the | ||
225 | * scaled math multiplication factor for nanosecond to hpet tick | ||
226 | * conversion. | ||
227 | */ | ||
228 | hpet_freq = 1000000000000000ULL; | ||
229 | do_div(hpet_freq, hpet_period); | ||
230 | hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, | ||
231 | NSEC_PER_SEC, 32); | ||
232 | /* Calculate the min / max delta */ | ||
233 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | ||
234 | &hpet_clockevent); | ||
235 | hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, | ||
236 | &hpet_clockevent); | ||
237 | |||
238 | /* | ||
239 | * Read the HPET ID register to retrieve the IRQ routing | ||
240 | * information and the number of channels | ||
241 | */ | ||
242 | id = hpet_readl(HPET_ID); | ||
243 | |||
244 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
245 | /* | ||
246 | * The legacy routing mode needs at least two channels, tick timer | ||
247 | * and the rtc emulation channel. | ||
248 | */ | ||
249 | if (!(id & HPET_ID_NUMBER)) | ||
250 | goto out_nohpet; | ||
251 | #endif | ||
252 | |||
253 | /* Start the counter */ | ||
254 | hpet_start_counter(); | ||
255 | |||
256 | if (id & HPET_ID_LEGSUP) { | ||
257 | hpet_enable_int(); | ||
258 | hpet_reserve_platform_timers(id); | ||
259 | /* | ||
260 | * Start hpet with the boot cpu mask and make it | ||
261 | * global after the IO_APIC has been initialized. | ||
262 | */ | ||
263 | hpet_clockevent.cpumask =cpumask_of_cpu(0); | ||
264 | clockevents_register_device(&hpet_clockevent); | ||
265 | global_clock_event = &hpet_clockevent; | ||
266 | return 1; | ||
267 | } | ||
268 | return 0; | ||
16 | 269 | ||
270 | out_nohpet: | ||
271 | iounmap(hpet_virt_address); | ||
272 | hpet_virt_address = NULL; | ||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Clock source related code | ||
278 | */ | ||
17 | static cycle_t read_hpet(void) | 279 | static cycle_t read_hpet(void) |
18 | { | 280 | { |
19 | return (cycle_t)readl(hpet_ptr); | 281 | return (cycle_t)hpet_readl(HPET_COUNTER); |
20 | } | 282 | } |
21 | 283 | ||
22 | static struct clocksource clocksource_hpet = { | 284 | static struct clocksource clocksource_hpet = { |
@@ -24,28 +286,17 @@ static struct clocksource clocksource_hpet = { | |||
24 | .rating = 250, | 286 | .rating = 250, |
25 | .read = read_hpet, | 287 | .read = read_hpet, |
26 | .mask = HPET_MASK, | 288 | .mask = HPET_MASK, |
27 | .mult = 0, /* set below */ | ||
28 | .shift = HPET_SHIFT, | 289 | .shift = HPET_SHIFT, |
29 | .is_continuous = 1, | 290 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
30 | }; | 291 | }; |
31 | 292 | ||
32 | static int __init init_hpet_clocksource(void) | 293 | static int __init init_hpet_clocksource(void) |
33 | { | 294 | { |
34 | unsigned long hpet_period; | ||
35 | void __iomem* hpet_base; | ||
36 | u64 tmp; | 295 | u64 tmp; |
37 | int err; | ||
38 | 296 | ||
39 | if (!is_hpet_enabled()) | 297 | if (!hpet_virt_address) |
40 | return -ENODEV; | 298 | return -ENODEV; |
41 | 299 | ||
42 | /* calculate the hpet address: */ | ||
43 | hpet_base = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | ||
44 | hpet_ptr = hpet_base + HPET_COUNTER; | ||
45 | |||
46 | /* calculate the frequency: */ | ||
47 | hpet_period = readl(hpet_base + HPET_PERIOD); | ||
48 | |||
49 | /* | 300 | /* |
50 | * hpet period is in femto seconds per cycle | 301 | * hpet period is in femto seconds per cycle |
51 | * so we need to convert this to ns/cyc units | 302 | * so we need to convert this to ns/cyc units |
@@ -61,11 +312,218 @@ static int __init init_hpet_clocksource(void) | |||
61 | do_div(tmp, FSEC_PER_NSEC); | 312 | do_div(tmp, FSEC_PER_NSEC); |
62 | clocksource_hpet.mult = (u32)tmp; | 313 | clocksource_hpet.mult = (u32)tmp; |
63 | 314 | ||
64 | err = clocksource_register(&clocksource_hpet); | 315 | return clocksource_register(&clocksource_hpet); |
65 | if (err) | ||
66 | iounmap(hpet_base); | ||
67 | |||
68 | return err; | ||
69 | } | 316 | } |
70 | 317 | ||
71 | module_init(init_hpet_clocksource); | 318 | module_init(init_hpet_clocksource); |
319 | |||
320 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
321 | |||
322 | /* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET | ||
323 | * is enabled, we support RTC interrupt functionality in software. | ||
324 | * RTC has 3 kinds of interrupts: | ||
325 | * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock | ||
326 | * is updated | ||
327 | * 2) Alarm Interrupt - generate an interrupt at a specific time of day | ||
328 | * 3) Periodic Interrupt - generate periodic interrupt, with frequencies | ||
329 | * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) | ||
330 | * (1) and (2) above are implemented using polling at a frequency of | ||
331 | * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt | ||
332 | * overhead. (DEFAULT_RTC_INT_FREQ) | ||
333 | * For (3), we use interrupts at 64Hz or user specified periodic | ||
334 | * frequency, whichever is higher. | ||
335 | */ | ||
336 | #include <linux/mc146818rtc.h> | ||
337 | #include <linux/rtc.h> | ||
338 | |||
339 | #define DEFAULT_RTC_INT_FREQ 64 | ||
340 | #define DEFAULT_RTC_SHIFT 6 | ||
341 | #define RTC_NUM_INTS 1 | ||
342 | |||
343 | static unsigned long hpet_rtc_flags; | ||
344 | static unsigned long hpet_prev_update_sec; | ||
345 | static struct rtc_time hpet_alarm_time; | ||
346 | static unsigned long hpet_pie_count; | ||
347 | static unsigned long hpet_t1_cmp; | ||
348 | static unsigned long hpet_default_delta; | ||
349 | static unsigned long hpet_pie_delta; | ||
350 | static unsigned long hpet_pie_limit; | ||
351 | |||
352 | /* | ||
353 | * Timer 1 for RTC emulation. We use one shot mode, as periodic mode | ||
354 | * is not supported by all HPET implementations for timer 1. | ||
355 | * | ||
356 | * hpet_rtc_timer_init() is called when the rtc is initialized. | ||
357 | */ | ||
358 | int hpet_rtc_timer_init(void) | ||
359 | { | ||
360 | unsigned long cfg, cnt, delta, flags; | ||
361 | |||
362 | if (!is_hpet_enabled()) | ||
363 | return 0; | ||
364 | |||
365 | if (!hpet_default_delta) { | ||
366 | uint64_t clc; | ||
367 | |||
368 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; | ||
369 | clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; | ||
370 | hpet_default_delta = (unsigned long) clc; | ||
371 | } | ||
372 | |||
373 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) | ||
374 | delta = hpet_default_delta; | ||
375 | else | ||
376 | delta = hpet_pie_delta; | ||
377 | |||
378 | local_irq_save(flags); | ||
379 | |||
380 | cnt = delta + hpet_readl(HPET_COUNTER); | ||
381 | hpet_writel(cnt, HPET_T1_CMP); | ||
382 | hpet_t1_cmp = cnt; | ||
383 | |||
384 | cfg = hpet_readl(HPET_T1_CFG); | ||
385 | cfg &= ~HPET_TN_PERIODIC; | ||
386 | cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; | ||
387 | hpet_writel(cfg, HPET_T1_CFG); | ||
388 | |||
389 | local_irq_restore(flags); | ||
390 | |||
391 | return 1; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * The functions below are called from rtc driver. | ||
396 | * Return 0 if HPET is not being used. | ||
397 | * Otherwise do the necessary changes and return 1. | ||
398 | */ | ||
399 | int hpet_mask_rtc_irq_bit(unsigned long bit_mask) | ||
400 | { | ||
401 | if (!is_hpet_enabled()) | ||
402 | return 0; | ||
403 | |||
404 | hpet_rtc_flags &= ~bit_mask; | ||
405 | return 1; | ||
406 | } | ||
407 | |||
408 | int hpet_set_rtc_irq_bit(unsigned long bit_mask) | ||
409 | { | ||
410 | unsigned long oldbits = hpet_rtc_flags; | ||
411 | |||
412 | if (!is_hpet_enabled()) | ||
413 | return 0; | ||
414 | |||
415 | hpet_rtc_flags |= bit_mask; | ||
416 | |||
417 | if (!oldbits) | ||
418 | hpet_rtc_timer_init(); | ||
419 | |||
420 | return 1; | ||
421 | } | ||
422 | |||
423 | int hpet_set_alarm_time(unsigned char hrs, unsigned char min, | ||
424 | unsigned char sec) | ||
425 | { | ||
426 | if (!is_hpet_enabled()) | ||
427 | return 0; | ||
428 | |||
429 | hpet_alarm_time.tm_hour = hrs; | ||
430 | hpet_alarm_time.tm_min = min; | ||
431 | hpet_alarm_time.tm_sec = sec; | ||
432 | |||
433 | return 1; | ||
434 | } | ||
435 | |||
436 | int hpet_set_periodic_freq(unsigned long freq) | ||
437 | { | ||
438 | uint64_t clc; | ||
439 | |||
440 | if (!is_hpet_enabled()) | ||
441 | return 0; | ||
442 | |||
443 | if (freq <= DEFAULT_RTC_INT_FREQ) | ||
444 | hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq; | ||
445 | else { | ||
446 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; | ||
447 | do_div(clc, freq); | ||
448 | clc >>= hpet_clockevent.shift; | ||
449 | hpet_pie_delta = (unsigned long) clc; | ||
450 | } | ||
451 | return 1; | ||
452 | } | ||
453 | |||
454 | int hpet_rtc_dropped_irq(void) | ||
455 | { | ||
456 | return is_hpet_enabled(); | ||
457 | } | ||
458 | |||
459 | static void hpet_rtc_timer_reinit(void) | ||
460 | { | ||
461 | unsigned long cfg, delta; | ||
462 | int lost_ints = -1; | ||
463 | |||
464 | if (unlikely(!hpet_rtc_flags)) { | ||
465 | cfg = hpet_readl(HPET_T1_CFG); | ||
466 | cfg &= ~HPET_TN_ENABLE; | ||
467 | hpet_writel(cfg, HPET_T1_CFG); | ||
468 | return; | ||
469 | } | ||
470 | |||
471 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) | ||
472 | delta = hpet_default_delta; | ||
473 | else | ||
474 | delta = hpet_pie_delta; | ||
475 | |||
476 | /* | ||
477 | * Increment the comparator value until we are ahead of the | ||
478 | * current count. | ||
479 | */ | ||
480 | do { | ||
481 | hpet_t1_cmp += delta; | ||
482 | hpet_writel(hpet_t1_cmp, HPET_T1_CMP); | ||
483 | lost_ints++; | ||
484 | } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); | ||
485 | |||
486 | if (lost_ints) { | ||
487 | if (hpet_rtc_flags & RTC_PIE) | ||
488 | hpet_pie_count += lost_ints; | ||
489 | if (printk_ratelimit()) | ||
490 | printk(KERN_WARNING "rtc: lost %d interrupts\n", | ||
491 | lost_ints); | ||
492 | } | ||
493 | } | ||
494 | |||
495 | irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | ||
496 | { | ||
497 | struct rtc_time curr_time; | ||
498 | unsigned long rtc_int_flag = 0; | ||
499 | |||
500 | hpet_rtc_timer_reinit(); | ||
501 | |||
502 | if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) | ||
503 | rtc_get_rtc_time(&curr_time); | ||
504 | |||
505 | if (hpet_rtc_flags & RTC_UIE && | ||
506 | curr_time.tm_sec != hpet_prev_update_sec) { | ||
507 | rtc_int_flag = RTC_UF; | ||
508 | hpet_prev_update_sec = curr_time.tm_sec; | ||
509 | } | ||
510 | |||
511 | if (hpet_rtc_flags & RTC_PIE && | ||
512 | ++hpet_pie_count >= hpet_pie_limit) { | ||
513 | rtc_int_flag |= RTC_PF; | ||
514 | hpet_pie_count = 0; | ||
515 | } | ||
516 | |||
517 | if (hpet_rtc_flags & RTC_PIE && | ||
518 | (curr_time.tm_sec == hpet_alarm_time.tm_sec) && | ||
519 | (curr_time.tm_min == hpet_alarm_time.tm_min) && | ||
520 | (curr_time.tm_hour == hpet_alarm_time.tm_hour)) | ||
521 | rtc_int_flag |= RTC_AF; | ||
522 | |||
523 | if (rtc_int_flag) { | ||
524 | rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); | ||
525 | rtc_interrupt(rtc_int_flag, dev_id); | ||
526 | } | ||
527 | return IRQ_HANDLED; | ||
528 | } | ||
529 | #endif | ||
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c index 9a0060b92e32..a6bc7bb38834 100644 --- a/arch/i386/kernel/i8253.c +++ b/arch/i386/kernel/i8253.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * i8253.c 8253/PIT functions | 2 | * i8253.c 8253/PIT functions |
3 | * | 3 | * |
4 | */ | 4 | */ |
5 | #include <linux/clocksource.h> | 5 | #include <linux/clockchips.h> |
6 | #include <linux/spinlock.h> | 6 | #include <linux/spinlock.h> |
7 | #include <linux/jiffies.h> | 7 | #include <linux/jiffies.h> |
8 | #include <linux/sysdev.h> | 8 | #include <linux/sysdev.h> |
@@ -19,17 +19,97 @@ | |||
19 | DEFINE_SPINLOCK(i8253_lock); | 19 | DEFINE_SPINLOCK(i8253_lock); |
20 | EXPORT_SYMBOL(i8253_lock); | 20 | EXPORT_SYMBOL(i8253_lock); |
21 | 21 | ||
22 | void setup_pit_timer(void) | 22 | /* |
23 | * HPET replaces the PIT, when enabled. So we need to know, which of | ||
24 | * the two timers is used | ||
25 | */ | ||
26 | struct clock_event_device *global_clock_event; | ||
27 | |||
28 | /* | ||
29 | * Initialize the PIT timer. | ||
30 | * | ||
31 | * This is also called after resume to bring the PIT into operation again. | ||
32 | */ | ||
33 | static void init_pit_timer(enum clock_event_mode mode, | ||
34 | struct clock_event_device *evt) | ||
35 | { | ||
36 | unsigned long flags; | ||
37 | |||
38 | spin_lock_irqsave(&i8253_lock, flags); | ||
39 | |||
40 | switch(mode) { | ||
41 | case CLOCK_EVT_MODE_PERIODIC: | ||
42 | /* binary, mode 2, LSB/MSB, ch 0 */ | ||
43 | outb_p(0x34, PIT_MODE); | ||
44 | udelay(10); | ||
45 | outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ | ||
46 | udelay(10); | ||
47 | outb(LATCH >> 8 , PIT_CH0); /* MSB */ | ||
48 | break; | ||
49 | |||
50 | case CLOCK_EVT_MODE_ONESHOT: | ||
51 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
52 | case CLOCK_EVT_MODE_UNUSED: | ||
53 | /* One shot setup */ | ||
54 | outb_p(0x38, PIT_MODE); | ||
55 | udelay(10); | ||
56 | break; | ||
57 | } | ||
58 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | * Program the next event in oneshot mode | ||
63 | * | ||
64 | * Delta is given in PIT ticks | ||
65 | */ | ||
66 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) | ||
23 | { | 67 | { |
24 | unsigned long flags; | 68 | unsigned long flags; |
25 | 69 | ||
26 | spin_lock_irqsave(&i8253_lock, flags); | 70 | spin_lock_irqsave(&i8253_lock, flags); |
27 | outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | 71 | outb_p(delta & 0xff , PIT_CH0); /* LSB */ |
28 | udelay(10); | 72 | outb(delta >> 8 , PIT_CH0); /* MSB */ |
29 | outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ | ||
30 | udelay(10); | ||
31 | outb(LATCH >> 8 , PIT_CH0); /* MSB */ | ||
32 | spin_unlock_irqrestore(&i8253_lock, flags); | 73 | spin_unlock_irqrestore(&i8253_lock, flags); |
74 | |||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * On UP the PIT can serve all of the possible timer functions. On SMP systems | ||
80 | * it can be solely used for the global tick. | ||
81 | * | ||
82 | * The profiling and update capabilites are switched off once the local apic is | ||
83 | * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - | ||
84 | * !using_apic_timer decisions in do_timer_interrupt_hook() | ||
85 | */ | ||
86 | struct clock_event_device pit_clockevent = { | ||
87 | .name = "pit", | ||
88 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
89 | .set_mode = init_pit_timer, | ||
90 | .set_next_event = pit_next_event, | ||
91 | .shift = 32, | ||
92 | .irq = 0, | ||
93 | }; | ||
94 | |||
95 | /* | ||
96 | * Initialize the conversion factor and the min/max deltas of the clock event | ||
97 | * structure and register the clock event source with the framework. | ||
98 | */ | ||
99 | void __init setup_pit_timer(void) | ||
100 | { | ||
101 | /* | ||
102 | * Start pit with the boot cpu mask and make it global after the | ||
103 | * IO_APIC has been initialized. | ||
104 | */ | ||
105 | pit_clockevent.cpumask = cpumask_of_cpu(0); | ||
106 | pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); | ||
107 | pit_clockevent.max_delta_ns = | ||
108 | clockevent_delta2ns(0x7FFF, &pit_clockevent); | ||
109 | pit_clockevent.min_delta_ns = | ||
110 | clockevent_delta2ns(0xF, &pit_clockevent); | ||
111 | clockevents_register_device(&pit_clockevent); | ||
112 | global_clock_event = &pit_clockevent; | ||
33 | } | 113 | } |
34 | 114 | ||
35 | /* | 115 | /* |
@@ -46,7 +126,7 @@ static cycle_t pit_read(void) | |||
46 | static u32 old_jifs; | 126 | static u32 old_jifs; |
47 | 127 | ||
48 | spin_lock_irqsave(&i8253_lock, flags); | 128 | spin_lock_irqsave(&i8253_lock, flags); |
49 | /* | 129 | /* |
50 | * Although our caller may have the read side of xtime_lock, | 130 | * Although our caller may have the read side of xtime_lock, |
51 | * this is now a seqlock, and we are cheating in this routine | 131 | * this is now a seqlock, and we are cheating in this routine |
52 | * by having side effects on state that we cannot undo if | 132 | * by having side effects on state that we cannot undo if |
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index c8d45821c788..03abfdb1a6e4 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c | |||
@@ -41,6 +41,7 @@ static void mask_and_ack_8259A(unsigned int); | |||
41 | static struct irq_chip i8259A_chip = { | 41 | static struct irq_chip i8259A_chip = { |
42 | .name = "XT-PIC", | 42 | .name = "XT-PIC", |
43 | .mask = disable_8259A_irq, | 43 | .mask = disable_8259A_irq, |
44 | .disable = disable_8259A_irq, | ||
44 | .unmask = enable_8259A_irq, | 45 | .unmask = enable_8259A_irq, |
45 | .mask_ack = mask_and_ack_8259A, | 46 | .mask_ack = mask_and_ack_8259A, |
46 | }; | 47 | }; |
@@ -410,12 +411,6 @@ void __init native_init_IRQ(void) | |||
410 | intr_init_hook(); | 411 | intr_init_hook(); |
411 | 412 | ||
412 | /* | 413 | /* |
413 | * Set the clock to HZ Hz, we already have a valid | ||
414 | * vector now: | ||
415 | */ | ||
416 | setup_pit_timer(); | ||
417 | |||
418 | /* | ||
419 | * External FPU? Set up irq13 if so, for | 414 | * External FPU? Set up irq13 if so, for |
420 | * original braindamaged IBM FERR coupling. | 415 | * original braindamaged IBM FERR coupling. |
421 | */ | 416 | */ |
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index ba8d302a0b72..4ccebd454e25 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c | |||
@@ -482,8 +482,8 @@ static void do_irq_balance(void) | |||
482 | package_index = CPU_TO_PACKAGEINDEX(i); | 482 | package_index = CPU_TO_PACKAGEINDEX(i); |
483 | for (j = 0; j < NR_IRQS; j++) { | 483 | for (j = 0; j < NR_IRQS; j++) { |
484 | unsigned long value_now, delta; | 484 | unsigned long value_now, delta; |
485 | /* Is this an active IRQ? */ | 485 | /* Is this an active IRQ or balancing disabled ? */ |
486 | if (!irq_desc[j].action) | 486 | if (!irq_desc[j].action || irq_balancing_disabled(j)) |
487 | continue; | 487 | continue; |
488 | if ( package_index == i ) | 488 | if ( package_index == i ) |
489 | IRQ_DELTA(package_index,j) = 0; | 489 | IRQ_DELTA(package_index,j) = 0; |
@@ -1281,11 +1281,9 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) | |||
1281 | trigger == IOAPIC_LEVEL) | 1281 | trigger == IOAPIC_LEVEL) |
1282 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 1282 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
1283 | handle_fasteoi_irq, "fasteoi"); | 1283 | handle_fasteoi_irq, "fasteoi"); |
1284 | else { | 1284 | else |
1285 | irq_desc[irq].status |= IRQ_DELAYED_DISABLE; | ||
1286 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 1285 | set_irq_chip_and_handler_name(irq, &ioapic_chip, |
1287 | handle_edge_irq, "edge"); | 1286 | handle_edge_irq, "edge"); |
1288 | } | ||
1289 | set_intr_gate(vector, interrupt[irq]); | 1287 | set_intr_gate(vector, interrupt[irq]); |
1290 | } | 1288 | } |
1291 | 1289 | ||
@@ -1588,7 +1586,7 @@ void /*__init*/ print_local_APIC(void * dummy) | |||
1588 | v = apic_read(APIC_LVR); | 1586 | v = apic_read(APIC_LVR); |
1589 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | 1587 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); |
1590 | ver = GET_APIC_VERSION(v); | 1588 | ver = GET_APIC_VERSION(v); |
1591 | maxlvt = get_maxlvt(); | 1589 | maxlvt = lapic_get_maxlvt(); |
1592 | 1590 | ||
1593 | v = apic_read(APIC_TASKPRI); | 1591 | v = apic_read(APIC_TASKPRI); |
1594 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); | 1592 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); |
@@ -1920,7 +1918,7 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1920 | static void __init setup_ioapic_ids_from_mpc(void) { } | 1918 | static void __init setup_ioapic_ids_from_mpc(void) { } |
1921 | #endif | 1919 | #endif |
1922 | 1920 | ||
1923 | static int no_timer_check __initdata; | 1921 | int no_timer_check __initdata; |
1924 | 1922 | ||
1925 | static int __init notimercheck(char *s) | 1923 | static int __init notimercheck(char *s) |
1926 | { | 1924 | { |
@@ -2310,7 +2308,7 @@ static inline void __init check_timer(void) | |||
2310 | 2308 | ||
2311 | disable_8259A_irq(0); | 2309 | disable_8259A_irq(0); |
2312 | set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, | 2310 | set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, |
2313 | "fasteio"); | 2311 | "fasteoi"); |
2314 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | 2312 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
2315 | enable_8259A_irq(0); | 2313 | enable_8259A_irq(0); |
2316 | 2314 | ||
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 3201d421090a..0f2ca590bf23 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c | |||
@@ -10,7 +10,6 @@ | |||
10 | * io_apic.c.) | 10 | * io_apic.c.) |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #include <asm/uaccess.h> | ||
14 | #include <linux/module.h> | 13 | #include <linux/module.h> |
15 | #include <linux/seq_file.h> | 14 | #include <linux/seq_file.h> |
16 | #include <linux/interrupt.h> | 15 | #include <linux/interrupt.h> |
@@ -19,19 +18,36 @@ | |||
19 | #include <linux/cpu.h> | 18 | #include <linux/cpu.h> |
20 | #include <linux/delay.h> | 19 | #include <linux/delay.h> |
21 | 20 | ||
21 | #include <asm/idle.h> | ||
22 | |||
23 | #include <asm/apic.h> | ||
24 | #include <asm/uaccess.h> | ||
25 | |||
22 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; | 26 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; |
23 | EXPORT_PER_CPU_SYMBOL(irq_stat); | 27 | EXPORT_PER_CPU_SYMBOL(irq_stat); |
24 | 28 | ||
25 | #ifndef CONFIG_X86_LOCAL_APIC | ||
26 | /* | 29 | /* |
27 | * 'what should we do if we get a hw irq event on an illegal vector'. | 30 | * 'what should we do if we get a hw irq event on an illegal vector'. |
28 | * each architecture has to answer this themselves. | 31 | * each architecture has to answer this themselves. |
29 | */ | 32 | */ |
30 | void ack_bad_irq(unsigned int irq) | 33 | void ack_bad_irq(unsigned int irq) |
31 | { | 34 | { |
32 | printk("unexpected IRQ trap at vector %02x\n", irq); | 35 | printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); |
33 | } | 36 | |
37 | #ifdef CONFIG_X86_LOCAL_APIC | ||
38 | /* | ||
39 | * Currently unexpected vectors happen only on SMP and APIC. | ||
40 | * We _must_ ack these because every local APIC has only N | ||
41 | * irq slots per priority level, and a 'hanging, unacked' IRQ | ||
42 | * holds up an irq slot - in excessive cases (when multiple | ||
43 | * unexpected vectors occur) that might lock up the APIC | ||
44 | * completely. | ||
45 | * But only ack when the APIC is enabled -AK | ||
46 | */ | ||
47 | if (cpu_has_apic) | ||
48 | ack_APIC_irq(); | ||
34 | #endif | 49 | #endif |
50 | } | ||
35 | 51 | ||
36 | #ifdef CONFIG_4KSTACKS | 52 | #ifdef CONFIG_4KSTACKS |
37 | /* | 53 | /* |
@@ -61,6 +77,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) | |||
61 | union irq_ctx *curctx, *irqctx; | 77 | union irq_ctx *curctx, *irqctx; |
62 | u32 *isp; | 78 | u32 *isp; |
63 | #endif | 79 | #endif |
80 | exit_idle(); | ||
64 | 81 | ||
65 | if (unlikely((unsigned)irq >= NR_IRQS)) { | 82 | if (unlikely((unsigned)irq >= NR_IRQS)) { |
66 | printk(KERN_EMERG "%s: cannot handle IRQ %d\n", | 83 | printk(KERN_EMERG "%s: cannot handle IRQ %d\n", |
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index af1d53344993..b545bc746fce 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c | |||
@@ -363,7 +363,7 @@ no_kprobe: | |||
363 | " pushf\n" | 363 | " pushf\n" |
364 | /* skip cs, eip, orig_eax */ | 364 | /* skip cs, eip, orig_eax */ |
365 | " subl $12, %esp\n" | 365 | " subl $12, %esp\n" |
366 | " pushl %gs\n" | 366 | " pushl %fs\n" |
367 | " pushl %ds\n" | 367 | " pushl %ds\n" |
368 | " pushl %es\n" | 368 | " pushl %es\n" |
369 | " pushl %eax\n" | 369 | " pushl %eax\n" |
@@ -387,7 +387,7 @@ no_kprobe: | |||
387 | " popl %edi\n" | 387 | " popl %edi\n" |
388 | " popl %ebp\n" | 388 | " popl %ebp\n" |
389 | " popl %eax\n" | 389 | " popl %eax\n" |
390 | /* skip eip, orig_eax, es, ds, gs */ | 390 | /* skip eip, orig_eax, es, ds, fs */ |
391 | " addl $20, %esp\n" | 391 | " addl $20, %esp\n" |
392 | " popf\n" | 392 | " popf\n" |
393 | " ret\n"); | 393 | " ret\n"); |
@@ -408,7 +408,7 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) | |||
408 | spin_lock_irqsave(&kretprobe_lock, flags); | 408 | spin_lock_irqsave(&kretprobe_lock, flags); |
409 | head = kretprobe_inst_table_head(current); | 409 | head = kretprobe_inst_table_head(current); |
410 | /* fixup registers */ | 410 | /* fixup registers */ |
411 | regs->xcs = __KERNEL_CS; | 411 | regs->xcs = __KERNEL_CS | get_kernel_rpl(); |
412 | regs->eip = trampoline_address; | 412 | regs->eip = trampoline_address; |
413 | regs->orig_eax = 0xffffffff; | 413 | regs->orig_eax = 0xffffffff; |
414 | 414 | ||
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 381252bae3d8..b8f16633a6ec 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c | |||
@@ -384,7 +384,7 @@ static int do_microcode_update (void) | |||
384 | { | 384 | { |
385 | long cursor = 0; | 385 | long cursor = 0; |
386 | int error = 0; | 386 | int error = 0; |
387 | void *new_mc; | 387 | void *new_mc = NULL; |
388 | int cpu; | 388 | int cpu; |
389 | cpumask_t old; | 389 | cpumask_t old; |
390 | 390 | ||
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 4e14264f392a..bcaa6e9b6197 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c | |||
@@ -68,7 +68,6 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx) | |||
68 | #ifdef CONFIG_SMP | 68 | #ifdef CONFIG_SMP |
69 | 69 | ||
70 | struct msr_command { | 70 | struct msr_command { |
71 | int cpu; | ||
72 | int err; | 71 | int err; |
73 | u32 reg; | 72 | u32 reg; |
74 | u32 data[2]; | 73 | u32 data[2]; |
@@ -78,16 +77,14 @@ static void msr_smp_wrmsr(void *cmd_block) | |||
78 | { | 77 | { |
79 | struct msr_command *cmd = (struct msr_command *)cmd_block; | 78 | struct msr_command *cmd = (struct msr_command *)cmd_block; |
80 | 79 | ||
81 | if (cmd->cpu == smp_processor_id()) | 80 | cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]); |
82 | cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]); | ||
83 | } | 81 | } |
84 | 82 | ||
85 | static void msr_smp_rdmsr(void *cmd_block) | 83 | static void msr_smp_rdmsr(void *cmd_block) |
86 | { | 84 | { |
87 | struct msr_command *cmd = (struct msr_command *)cmd_block; | 85 | struct msr_command *cmd = (struct msr_command *)cmd_block; |
88 | 86 | ||
89 | if (cmd->cpu == smp_processor_id()) | 87 | cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]); |
90 | cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]); | ||
91 | } | 88 | } |
92 | 89 | ||
93 | static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) | 90 | static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) |
@@ -99,12 +96,11 @@ static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) | |||
99 | if (cpu == smp_processor_id()) { | 96 | if (cpu == smp_processor_id()) { |
100 | ret = wrmsr_eio(reg, eax, edx); | 97 | ret = wrmsr_eio(reg, eax, edx); |
101 | } else { | 98 | } else { |
102 | cmd.cpu = cpu; | ||
103 | cmd.reg = reg; | 99 | cmd.reg = reg; |
104 | cmd.data[0] = eax; | 100 | cmd.data[0] = eax; |
105 | cmd.data[1] = edx; | 101 | cmd.data[1] = edx; |
106 | 102 | ||
107 | smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); | 103 | smp_call_function_single(cpu, msr_smp_wrmsr, &cmd, 1, 1); |
108 | ret = cmd.err; | 104 | ret = cmd.err; |
109 | } | 105 | } |
110 | preempt_enable(); | 106 | preempt_enable(); |
@@ -120,10 +116,9 @@ static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx) | |||
120 | if (cpu == smp_processor_id()) { | 116 | if (cpu == smp_processor_id()) { |
121 | ret = rdmsr_eio(reg, eax, edx); | 117 | ret = rdmsr_eio(reg, eax, edx); |
122 | } else { | 118 | } else { |
123 | cmd.cpu = cpu; | ||
124 | cmd.reg = reg; | 119 | cmd.reg = reg; |
125 | 120 | ||
126 | smp_call_function(msr_smp_rdmsr, &cmd, 1, 1); | 121 | smp_call_function_single(cpu, msr_smp_rdmsr, &cmd, 1, 1); |
127 | 122 | ||
128 | *eax = cmd.data[0]; | 123 | *eax = cmd.data[0]; |
129 | *edx = cmd.data[1]; | 124 | *edx = cmd.data[1]; |
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 1a6f8bb8881c..821df34d2b3a 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/dmi.h> | 23 | #include <linux/dmi.h> |
24 | #include <linux/kprobes.h> | 24 | #include <linux/kprobes.h> |
25 | #include <linux/cpumask.h> | 25 | #include <linux/cpumask.h> |
26 | #include <linux/kernel_stat.h> | ||
26 | 27 | ||
27 | #include <asm/smp.h> | 28 | #include <asm/smp.h> |
28 | #include <asm/nmi.h> | 29 | #include <asm/nmi.h> |
@@ -185,7 +186,8 @@ static __cpuinit inline int nmi_known_cpu(void) | |||
185 | { | 186 | { |
186 | switch (boot_cpu_data.x86_vendor) { | 187 | switch (boot_cpu_data.x86_vendor) { |
187 | case X86_VENDOR_AMD: | 188 | case X86_VENDOR_AMD: |
188 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | 189 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6) |
190 | || (boot_cpu_data.x86 == 16)); | ||
189 | case X86_VENDOR_INTEL: | 191 | case X86_VENDOR_INTEL: |
190 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 192 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
191 | return 1; | 193 | return 1; |
@@ -216,6 +218,28 @@ static __init void nmi_cpu_busy(void *data) | |||
216 | } | 218 | } |
217 | #endif | 219 | #endif |
218 | 220 | ||
221 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
222 | { | ||
223 | u64 counter_val; | ||
224 | unsigned int retval = hz; | ||
225 | |||
226 | /* | ||
227 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | ||
228 | * are writable, with higher bits sign extending from bit 31. | ||
229 | * So, we can only program the counter with 31 bit values and | ||
230 | * 32nd bit should be 1, for 33.. to be 1. | ||
231 | * Find the appropriate nmi_hz | ||
232 | */ | ||
233 | counter_val = (u64)cpu_khz * 1000; | ||
234 | do_div(counter_val, retval); | ||
235 | if (counter_val > 0x7fffffffULL) { | ||
236 | u64 count = (u64)cpu_khz * 1000; | ||
237 | do_div(count, 0x7fffffffUL); | ||
238 | retval = count + 1; | ||
239 | } | ||
240 | return retval; | ||
241 | } | ||
242 | |||
219 | static int __init check_nmi_watchdog(void) | 243 | static int __init check_nmi_watchdog(void) |
220 | { | 244 | { |
221 | unsigned int *prev_nmi_count; | 245 | unsigned int *prev_nmi_count; |
@@ -281,18 +305,10 @@ static int __init check_nmi_watchdog(void) | |||
281 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 305 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
282 | 306 | ||
283 | nmi_hz = 1; | 307 | nmi_hz = 1; |
284 | /* | 308 | |
285 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | 309 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || |
286 | * are writable, with higher bits sign extending from bit 31. | 310 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { |
287 | * So, we can only program the counter with 31 bit values and | 311 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
288 | * 32nd bit should be 1, for 33.. to be 1. | ||
289 | * Find the appropriate nmi_hz | ||
290 | */ | ||
291 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && | ||
292 | ((u64)cpu_khz * 1000) > 0x7fffffffULL) { | ||
293 | u64 count = (u64)cpu_khz * 1000; | ||
294 | do_div(count, 0x7fffffffUL); | ||
295 | nmi_hz = count + 1; | ||
296 | } | 312 | } |
297 | } | 313 | } |
298 | 314 | ||
@@ -369,6 +385,34 @@ void enable_timer_nmi_watchdog(void) | |||
369 | } | 385 | } |
370 | } | 386 | } |
371 | 387 | ||
388 | static void __acpi_nmi_disable(void *__unused) | ||
389 | { | ||
390 | apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | * Disable timer based NMIs on all CPUs: | ||
395 | */ | ||
396 | void acpi_nmi_disable(void) | ||
397 | { | ||
398 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
399 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); | ||
400 | } | ||
401 | |||
402 | static void __acpi_nmi_enable(void *__unused) | ||
403 | { | ||
404 | apic_write_around(APIC_LVT0, APIC_DM_NMI); | ||
405 | } | ||
406 | |||
407 | /* | ||
408 | * Enable timer based NMIs on all CPUs: | ||
409 | */ | ||
410 | void acpi_nmi_enable(void) | ||
411 | { | ||
412 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
413 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | ||
414 | } | ||
415 | |||
372 | #ifdef CONFIG_PM | 416 | #ifdef CONFIG_PM |
373 | 417 | ||
374 | static int nmi_pm_active; /* nmi_active before suspend */ | 418 | static int nmi_pm_active; /* nmi_active before suspend */ |
@@ -442,6 +486,17 @@ static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) | |||
442 | wrmsrl(perfctr_msr, 0 - count); | 486 | wrmsrl(perfctr_msr, 0 - count); |
443 | } | 487 | } |
444 | 488 | ||
489 | static void write_watchdog_counter32(unsigned int perfctr_msr, | ||
490 | const char *descr) | ||
491 | { | ||
492 | u64 count = (u64)cpu_khz * 1000; | ||
493 | |||
494 | do_div(count, nmi_hz); | ||
495 | if(descr) | ||
496 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
497 | wrmsr(perfctr_msr, (u32)(-count), 0); | ||
498 | } | ||
499 | |||
445 | /* Note that these events don't tick when the CPU idles. This means | 500 | /* Note that these events don't tick when the CPU idles. This means |
446 | the frequency varies with CPU load. */ | 501 | the frequency varies with CPU load. */ |
447 | 502 | ||
@@ -531,7 +586,8 @@ static int setup_p6_watchdog(void) | |||
531 | 586 | ||
532 | /* setup the timer */ | 587 | /* setup the timer */ |
533 | wrmsr(evntsel_msr, evntsel, 0); | 588 | wrmsr(evntsel_msr, evntsel, 0); |
534 | write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); | 589 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
590 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0"); | ||
535 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 591 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
536 | evntsel |= P6_EVNTSEL0_ENABLE; | 592 | evntsel |= P6_EVNTSEL0_ENABLE; |
537 | wrmsr(evntsel_msr, evntsel, 0); | 593 | wrmsr(evntsel_msr, evntsel, 0); |
@@ -704,7 +760,8 @@ static int setup_intel_arch_watchdog(void) | |||
704 | 760 | ||
705 | /* setup the timer */ | 761 | /* setup the timer */ |
706 | wrmsr(evntsel_msr, evntsel, 0); | 762 | wrmsr(evntsel_msr, evntsel, 0); |
707 | write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | 763 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
764 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | ||
708 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 765 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
709 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 766 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
710 | wrmsr(evntsel_msr, evntsel, 0); | 767 | wrmsr(evntsel_msr, evntsel, 0); |
@@ -762,7 +819,8 @@ void setup_apic_nmi_watchdog (void *unused) | |||
762 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 819 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
763 | switch (boot_cpu_data.x86_vendor) { | 820 | switch (boot_cpu_data.x86_vendor) { |
764 | case X86_VENDOR_AMD: | 821 | case X86_VENDOR_AMD: |
765 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | 822 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && |
823 | boot_cpu_data.x86 != 16) | ||
766 | return; | 824 | return; |
767 | if (!setup_k7_watchdog()) | 825 | if (!setup_k7_watchdog()) |
768 | return; | 826 | return; |
@@ -916,9 +974,13 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
916 | cpu_clear(cpu, backtrace_mask); | 974 | cpu_clear(cpu, backtrace_mask); |
917 | } | 975 | } |
918 | 976 | ||
919 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; | 977 | /* |
978 | * Take the local apic timer and PIT/HPET into account. We don't | ||
979 | * know which one is active, when we have highres/dyntick on | ||
980 | */ | ||
981 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0); | ||
920 | 982 | ||
921 | /* if the apic timer isn't firing, this cpu isn't doing much */ | 983 | /* if the none of the timers isn't firing, this cpu isn't doing much */ |
922 | if (!touched && last_irq_sums[cpu] == sum) { | 984 | if (!touched && last_irq_sums[cpu] == sum) { |
923 | /* | 985 | /* |
924 | * Ayiee, looks like this CPU is stuck ... | 986 | * Ayiee, looks like this CPU is stuck ... |
@@ -956,6 +1018,8 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
956 | dummy &= ~P4_CCCR_OVF; | 1018 | dummy &= ~P4_CCCR_OVF; |
957 | wrmsrl(wd->cccr_msr, dummy); | 1019 | wrmsrl(wd->cccr_msr, dummy); |
958 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1020 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1021 | /* start the cycle over again */ | ||
1022 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
959 | } | 1023 | } |
960 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | 1024 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || |
961 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | 1025 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { |
@@ -964,9 +1028,12 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
964 | * other P6 variant. | 1028 | * other P6 variant. |
965 | * ArchPerfom/Core Duo also needs this */ | 1029 | * ArchPerfom/Core Duo also needs this */ |
966 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1030 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1031 | /* P6/ARCH_PERFMON has 32 bit counter write */ | ||
1032 | write_watchdog_counter32(wd->perfctr_msr, NULL); | ||
1033 | } else { | ||
1034 | /* start the cycle over again */ | ||
1035 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
967 | } | 1036 | } |
968 | /* start the cycle over again */ | ||
969 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
970 | rc = 1; | 1037 | rc = 1; |
971 | } else if (nmi_watchdog == NMI_IO_APIC) { | 1038 | } else if (nmi_watchdog == NMI_IO_APIC) { |
972 | /* don't know how to accurately check for this. | 1039 | /* don't know how to accurately check for this. |
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index e55fd05da0f5..c156ecfa3872 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c | |||
@@ -92,7 +92,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) | |||
92 | return insn_len; | 92 | return insn_len; |
93 | } | 93 | } |
94 | 94 | ||
95 | static fastcall unsigned long native_get_debugreg(int regno) | 95 | static unsigned long native_get_debugreg(int regno) |
96 | { | 96 | { |
97 | unsigned long val = 0; /* Damn you, gcc! */ | 97 | unsigned long val = 0; /* Damn you, gcc! */ |
98 | 98 | ||
@@ -115,7 +115,7 @@ static fastcall unsigned long native_get_debugreg(int regno) | |||
115 | return val; | 115 | return val; |
116 | } | 116 | } |
117 | 117 | ||
118 | static fastcall void native_set_debugreg(int regno, unsigned long value) | 118 | static void native_set_debugreg(int regno, unsigned long value) |
119 | { | 119 | { |
120 | switch (regno) { | 120 | switch (regno) { |
121 | case 0: | 121 | case 0: |
@@ -146,55 +146,55 @@ void init_IRQ(void) | |||
146 | paravirt_ops.init_IRQ(); | 146 | paravirt_ops.init_IRQ(); |
147 | } | 147 | } |
148 | 148 | ||
149 | static fastcall void native_clts(void) | 149 | static void native_clts(void) |
150 | { | 150 | { |
151 | asm volatile ("clts"); | 151 | asm volatile ("clts"); |
152 | } | 152 | } |
153 | 153 | ||
154 | static fastcall unsigned long native_read_cr0(void) | 154 | static unsigned long native_read_cr0(void) |
155 | { | 155 | { |
156 | unsigned long val; | 156 | unsigned long val; |
157 | asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); | 157 | asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); |
158 | return val; | 158 | return val; |
159 | } | 159 | } |
160 | 160 | ||
161 | static fastcall void native_write_cr0(unsigned long val) | 161 | static void native_write_cr0(unsigned long val) |
162 | { | 162 | { |
163 | asm volatile("movl %0,%%cr0": :"r" (val)); | 163 | asm volatile("movl %0,%%cr0": :"r" (val)); |
164 | } | 164 | } |
165 | 165 | ||
166 | static fastcall unsigned long native_read_cr2(void) | 166 | static unsigned long native_read_cr2(void) |
167 | { | 167 | { |
168 | unsigned long val; | 168 | unsigned long val; |
169 | asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); | 169 | asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); |
170 | return val; | 170 | return val; |
171 | } | 171 | } |
172 | 172 | ||
173 | static fastcall void native_write_cr2(unsigned long val) | 173 | static void native_write_cr2(unsigned long val) |
174 | { | 174 | { |
175 | asm volatile("movl %0,%%cr2": :"r" (val)); | 175 | asm volatile("movl %0,%%cr2": :"r" (val)); |
176 | } | 176 | } |
177 | 177 | ||
178 | static fastcall unsigned long native_read_cr3(void) | 178 | static unsigned long native_read_cr3(void) |
179 | { | 179 | { |
180 | unsigned long val; | 180 | unsigned long val; |
181 | asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); | 181 | asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); |
182 | return val; | 182 | return val; |
183 | } | 183 | } |
184 | 184 | ||
185 | static fastcall void native_write_cr3(unsigned long val) | 185 | static void native_write_cr3(unsigned long val) |
186 | { | 186 | { |
187 | asm volatile("movl %0,%%cr3": :"r" (val)); | 187 | asm volatile("movl %0,%%cr3": :"r" (val)); |
188 | } | 188 | } |
189 | 189 | ||
190 | static fastcall unsigned long native_read_cr4(void) | 190 | static unsigned long native_read_cr4(void) |
191 | { | 191 | { |
192 | unsigned long val; | 192 | unsigned long val; |
193 | asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); | 193 | asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); |
194 | return val; | 194 | return val; |
195 | } | 195 | } |
196 | 196 | ||
197 | static fastcall unsigned long native_read_cr4_safe(void) | 197 | static unsigned long native_read_cr4_safe(void) |
198 | { | 198 | { |
199 | unsigned long val; | 199 | unsigned long val; |
200 | /* This could fault if %cr4 does not exist */ | 200 | /* This could fault if %cr4 does not exist */ |
@@ -207,51 +207,51 @@ static fastcall unsigned long native_read_cr4_safe(void) | |||
207 | return val; | 207 | return val; |
208 | } | 208 | } |
209 | 209 | ||
210 | static fastcall void native_write_cr4(unsigned long val) | 210 | static void native_write_cr4(unsigned long val) |
211 | { | 211 | { |
212 | asm volatile("movl %0,%%cr4": :"r" (val)); | 212 | asm volatile("movl %0,%%cr4": :"r" (val)); |
213 | } | 213 | } |
214 | 214 | ||
215 | static fastcall unsigned long native_save_fl(void) | 215 | static unsigned long native_save_fl(void) |
216 | { | 216 | { |
217 | unsigned long f; | 217 | unsigned long f; |
218 | asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); | 218 | asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); |
219 | return f; | 219 | return f; |
220 | } | 220 | } |
221 | 221 | ||
222 | static fastcall void native_restore_fl(unsigned long f) | 222 | static void native_restore_fl(unsigned long f) |
223 | { | 223 | { |
224 | asm volatile("pushl %0 ; popfl": /* no output */ | 224 | asm volatile("pushl %0 ; popfl": /* no output */ |
225 | :"g" (f) | 225 | :"g" (f) |
226 | :"memory", "cc"); | 226 | :"memory", "cc"); |
227 | } | 227 | } |
228 | 228 | ||
229 | static fastcall void native_irq_disable(void) | 229 | static void native_irq_disable(void) |
230 | { | 230 | { |
231 | asm volatile("cli": : :"memory"); | 231 | asm volatile("cli": : :"memory"); |
232 | } | 232 | } |
233 | 233 | ||
234 | static fastcall void native_irq_enable(void) | 234 | static void native_irq_enable(void) |
235 | { | 235 | { |
236 | asm volatile("sti": : :"memory"); | 236 | asm volatile("sti": : :"memory"); |
237 | } | 237 | } |
238 | 238 | ||
239 | static fastcall void native_safe_halt(void) | 239 | static void native_safe_halt(void) |
240 | { | 240 | { |
241 | asm volatile("sti; hlt": : :"memory"); | 241 | asm volatile("sti; hlt": : :"memory"); |
242 | } | 242 | } |
243 | 243 | ||
244 | static fastcall void native_halt(void) | 244 | static void native_halt(void) |
245 | { | 245 | { |
246 | asm volatile("hlt": : :"memory"); | 246 | asm volatile("hlt": : :"memory"); |
247 | } | 247 | } |
248 | 248 | ||
249 | static fastcall void native_wbinvd(void) | 249 | static void native_wbinvd(void) |
250 | { | 250 | { |
251 | asm volatile("wbinvd": : :"memory"); | 251 | asm volatile("wbinvd": : :"memory"); |
252 | } | 252 | } |
253 | 253 | ||
254 | static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) | 254 | static unsigned long long native_read_msr(unsigned int msr, int *err) |
255 | { | 255 | { |
256 | unsigned long long val; | 256 | unsigned long long val; |
257 | 257 | ||
@@ -270,7 +270,7 @@ static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) | |||
270 | return val; | 270 | return val; |
271 | } | 271 | } |
272 | 272 | ||
273 | static fastcall int native_write_msr(unsigned int msr, unsigned long long val) | 273 | static int native_write_msr(unsigned int msr, unsigned long long val) |
274 | { | 274 | { |
275 | int err; | 275 | int err; |
276 | asm volatile("2: wrmsr ; xorl %0,%0\n" | 276 | asm volatile("2: wrmsr ; xorl %0,%0\n" |
@@ -288,53 +288,53 @@ static fastcall int native_write_msr(unsigned int msr, unsigned long long val) | |||
288 | return err; | 288 | return err; |
289 | } | 289 | } |
290 | 290 | ||
291 | static fastcall unsigned long long native_read_tsc(void) | 291 | static unsigned long long native_read_tsc(void) |
292 | { | 292 | { |
293 | unsigned long long val; | 293 | unsigned long long val; |
294 | asm volatile("rdtsc" : "=A" (val)); | 294 | asm volatile("rdtsc" : "=A" (val)); |
295 | return val; | 295 | return val; |
296 | } | 296 | } |
297 | 297 | ||
298 | static fastcall unsigned long long native_read_pmc(void) | 298 | static unsigned long long native_read_pmc(void) |
299 | { | 299 | { |
300 | unsigned long long val; | 300 | unsigned long long val; |
301 | asm volatile("rdpmc" : "=A" (val)); | 301 | asm volatile("rdpmc" : "=A" (val)); |
302 | return val; | 302 | return val; |
303 | } | 303 | } |
304 | 304 | ||
305 | static fastcall void native_load_tr_desc(void) | 305 | static void native_load_tr_desc(void) |
306 | { | 306 | { |
307 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); | 307 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); |
308 | } | 308 | } |
309 | 309 | ||
310 | static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr) | 310 | static void native_load_gdt(const struct Xgt_desc_struct *dtr) |
311 | { | 311 | { |
312 | asm volatile("lgdt %0"::"m" (*dtr)); | 312 | asm volatile("lgdt %0"::"m" (*dtr)); |
313 | } | 313 | } |
314 | 314 | ||
315 | static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr) | 315 | static void native_load_idt(const struct Xgt_desc_struct *dtr) |
316 | { | 316 | { |
317 | asm volatile("lidt %0"::"m" (*dtr)); | 317 | asm volatile("lidt %0"::"m" (*dtr)); |
318 | } | 318 | } |
319 | 319 | ||
320 | static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr) | 320 | static void native_store_gdt(struct Xgt_desc_struct *dtr) |
321 | { | 321 | { |
322 | asm ("sgdt %0":"=m" (*dtr)); | 322 | asm ("sgdt %0":"=m" (*dtr)); |
323 | } | 323 | } |
324 | 324 | ||
325 | static fastcall void native_store_idt(struct Xgt_desc_struct *dtr) | 325 | static void native_store_idt(struct Xgt_desc_struct *dtr) |
326 | { | 326 | { |
327 | asm ("sidt %0":"=m" (*dtr)); | 327 | asm ("sidt %0":"=m" (*dtr)); |
328 | } | 328 | } |
329 | 329 | ||
330 | static fastcall unsigned long native_store_tr(void) | 330 | static unsigned long native_store_tr(void) |
331 | { | 331 | { |
332 | unsigned long tr; | 332 | unsigned long tr; |
333 | asm ("str %0":"=r" (tr)); | 333 | asm ("str %0":"=r" (tr)); |
334 | return tr; | 334 | return tr; |
335 | } | 335 | } |
336 | 336 | ||
337 | static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu) | 337 | static void native_load_tls(struct thread_struct *t, unsigned int cpu) |
338 | { | 338 | { |
339 | #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] | 339 | #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] |
340 | C(0); C(1); C(2); | 340 | C(0); C(1); C(2); |
@@ -348,22 +348,22 @@ static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 | |||
348 | lp[1] = entry_high; | 348 | lp[1] = entry_high; |
349 | } | 349 | } |
350 | 350 | ||
351 | static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) | 351 | static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) |
352 | { | 352 | { |
353 | native_write_dt_entry(dt, entrynum, low, high); | 353 | native_write_dt_entry(dt, entrynum, low, high); |
354 | } | 354 | } |
355 | 355 | ||
356 | static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) | 356 | static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) |
357 | { | 357 | { |
358 | native_write_dt_entry(dt, entrynum, low, high); | 358 | native_write_dt_entry(dt, entrynum, low, high); |
359 | } | 359 | } |
360 | 360 | ||
361 | static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) | 361 | static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) |
362 | { | 362 | { |
363 | native_write_dt_entry(dt, entrynum, low, high); | 363 | native_write_dt_entry(dt, entrynum, low, high); |
364 | } | 364 | } |
365 | 365 | ||
366 | static fastcall void native_load_esp0(struct tss_struct *tss, | 366 | static void native_load_esp0(struct tss_struct *tss, |
367 | struct thread_struct *thread) | 367 | struct thread_struct *thread) |
368 | { | 368 | { |
369 | tss->esp0 = thread->esp0; | 369 | tss->esp0 = thread->esp0; |
@@ -375,12 +375,12 @@ static fastcall void native_load_esp0(struct tss_struct *tss, | |||
375 | } | 375 | } |
376 | } | 376 | } |
377 | 377 | ||
378 | static fastcall void native_io_delay(void) | 378 | static void native_io_delay(void) |
379 | { | 379 | { |
380 | asm volatile("outb %al,$0x80"); | 380 | asm volatile("outb %al,$0x80"); |
381 | } | 381 | } |
382 | 382 | ||
383 | static fastcall void native_flush_tlb(void) | 383 | static void native_flush_tlb(void) |
384 | { | 384 | { |
385 | __native_flush_tlb(); | 385 | __native_flush_tlb(); |
386 | } | 386 | } |
@@ -389,49 +389,49 @@ static fastcall void native_flush_tlb(void) | |||
389 | * Global pages have to be flushed a bit differently. Not a real | 389 | * Global pages have to be flushed a bit differently. Not a real |
390 | * performance problem because this does not happen often. | 390 | * performance problem because this does not happen often. |
391 | */ | 391 | */ |
392 | static fastcall void native_flush_tlb_global(void) | 392 | static void native_flush_tlb_global(void) |
393 | { | 393 | { |
394 | __native_flush_tlb_global(); | 394 | __native_flush_tlb_global(); |
395 | } | 395 | } |
396 | 396 | ||
397 | static fastcall void native_flush_tlb_single(u32 addr) | 397 | static void native_flush_tlb_single(u32 addr) |
398 | { | 398 | { |
399 | __native_flush_tlb_single(addr); | 399 | __native_flush_tlb_single(addr); |
400 | } | 400 | } |
401 | 401 | ||
402 | #ifndef CONFIG_X86_PAE | 402 | #ifndef CONFIG_X86_PAE |
403 | static fastcall void native_set_pte(pte_t *ptep, pte_t pteval) | 403 | static void native_set_pte(pte_t *ptep, pte_t pteval) |
404 | { | 404 | { |
405 | *ptep = pteval; | 405 | *ptep = pteval; |
406 | } | 406 | } |
407 | 407 | ||
408 | static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) | 408 | static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) |
409 | { | 409 | { |
410 | *ptep = pteval; | 410 | *ptep = pteval; |
411 | } | 411 | } |
412 | 412 | ||
413 | static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 413 | static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
414 | { | 414 | { |
415 | *pmdp = pmdval; | 415 | *pmdp = pmdval; |
416 | } | 416 | } |
417 | 417 | ||
418 | #else /* CONFIG_X86_PAE */ | 418 | #else /* CONFIG_X86_PAE */ |
419 | 419 | ||
420 | static fastcall void native_set_pte(pte_t *ptep, pte_t pte) | 420 | static void native_set_pte(pte_t *ptep, pte_t pte) |
421 | { | 421 | { |
422 | ptep->pte_high = pte.pte_high; | 422 | ptep->pte_high = pte.pte_high; |
423 | smp_wmb(); | 423 | smp_wmb(); |
424 | ptep->pte_low = pte.pte_low; | 424 | ptep->pte_low = pte.pte_low; |
425 | } | 425 | } |
426 | 426 | ||
427 | static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | 427 | static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) |
428 | { | 428 | { |
429 | ptep->pte_high = pte.pte_high; | 429 | ptep->pte_high = pte.pte_high; |
430 | smp_wmb(); | 430 | smp_wmb(); |
431 | ptep->pte_low = pte.pte_low; | 431 | ptep->pte_low = pte.pte_low; |
432 | } | 432 | } |
433 | 433 | ||
434 | static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 434 | static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
435 | { | 435 | { |
436 | ptep->pte_low = 0; | 436 | ptep->pte_low = 0; |
437 | smp_wmb(); | 437 | smp_wmb(); |
@@ -440,29 +440,29 @@ static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long | |||
440 | ptep->pte_low = pte.pte_low; | 440 | ptep->pte_low = pte.pte_low; |
441 | } | 441 | } |
442 | 442 | ||
443 | static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval) | 443 | static void native_set_pte_atomic(pte_t *ptep, pte_t pteval) |
444 | { | 444 | { |
445 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | 445 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); |
446 | } | 446 | } |
447 | 447 | ||
448 | static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 448 | static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
449 | { | 449 | { |
450 | set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); | 450 | set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); |
451 | } | 451 | } |
452 | 452 | ||
453 | static fastcall void native_set_pud(pud_t *pudp, pud_t pudval) | 453 | static void native_set_pud(pud_t *pudp, pud_t pudval) |
454 | { | 454 | { |
455 | *pudp = pudval; | 455 | *pudp = pudval; |
456 | } | 456 | } |
457 | 457 | ||
458 | static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 458 | static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
459 | { | 459 | { |
460 | ptep->pte_low = 0; | 460 | ptep->pte_low = 0; |
461 | smp_wmb(); | 461 | smp_wmb(); |
462 | ptep->pte_high = 0; | 462 | ptep->pte_high = 0; |
463 | } | 463 | } |
464 | 464 | ||
465 | static fastcall void native_pmd_clear(pmd_t *pmd) | 465 | static void native_pmd_clear(pmd_t *pmd) |
466 | { | 466 | { |
467 | u32 *tmp = (u32 *)pmd; | 467 | u32 *tmp = (u32 *)pmd; |
468 | *tmp = 0; | 468 | *tmp = 0; |
@@ -472,8 +472,8 @@ static fastcall void native_pmd_clear(pmd_t *pmd) | |||
472 | #endif /* CONFIG_X86_PAE */ | 472 | #endif /* CONFIG_X86_PAE */ |
473 | 473 | ||
474 | /* These are in entry.S */ | 474 | /* These are in entry.S */ |
475 | extern fastcall void native_iret(void); | 475 | extern void native_iret(void); |
476 | extern fastcall void native_irq_enable_sysexit(void); | 476 | extern void native_irq_enable_sysexit(void); |
477 | 477 | ||
478 | static int __init print_banner(void) | 478 | static int __init print_banner(void) |
479 | { | 479 | { |
@@ -482,9 +482,6 @@ static int __init print_banner(void) | |||
482 | } | 482 | } |
483 | core_initcall(print_banner); | 483 | core_initcall(print_banner); |
484 | 484 | ||
485 | /* We simply declare start_kernel to be the paravirt probe of last resort. */ | ||
486 | paravirt_probe(start_kernel); | ||
487 | |||
488 | struct paravirt_ops paravirt_ops = { | 485 | struct paravirt_ops paravirt_ops = { |
489 | .name = "bare hardware", | 486 | .name = "bare hardware", |
490 | .paravirt_enabled = 0, | 487 | .paravirt_enabled = 0, |
@@ -544,12 +541,21 @@ struct paravirt_ops paravirt_ops = { | |||
544 | .apic_write = native_apic_write, | 541 | .apic_write = native_apic_write, |
545 | .apic_write_atomic = native_apic_write_atomic, | 542 | .apic_write_atomic = native_apic_write_atomic, |
546 | .apic_read = native_apic_read, | 543 | .apic_read = native_apic_read, |
544 | .setup_boot_clock = setup_boot_APIC_clock, | ||
545 | .setup_secondary_clock = setup_secondary_APIC_clock, | ||
547 | #endif | 546 | #endif |
547 | .set_lazy_mode = (void *)native_nop, | ||
548 | 548 | ||
549 | .flush_tlb_user = native_flush_tlb, | 549 | .flush_tlb_user = native_flush_tlb, |
550 | .flush_tlb_kernel = native_flush_tlb_global, | 550 | .flush_tlb_kernel = native_flush_tlb_global, |
551 | .flush_tlb_single = native_flush_tlb_single, | 551 | .flush_tlb_single = native_flush_tlb_single, |
552 | 552 | ||
553 | .alloc_pt = (void *)native_nop, | ||
554 | .alloc_pd = (void *)native_nop, | ||
555 | .alloc_pd_clone = (void *)native_nop, | ||
556 | .release_pt = (void *)native_nop, | ||
557 | .release_pd = (void *)native_nop, | ||
558 | |||
553 | .set_pte = native_set_pte, | 559 | .set_pte = native_set_pte, |
554 | .set_pte_at = native_set_pte_at, | 560 | .set_pte_at = native_set_pte_at, |
555 | .set_pmd = native_set_pmd, | 561 | .set_pmd = native_set_pmd, |
@@ -565,6 +571,8 @@ struct paravirt_ops paravirt_ops = { | |||
565 | 571 | ||
566 | .irq_enable_sysexit = native_irq_enable_sysexit, | 572 | .irq_enable_sysexit = native_irq_enable_sysexit, |
567 | .iret = native_iret, | 573 | .iret = native_iret, |
574 | |||
575 | .startup_ipi_hook = (void *)native_nop, | ||
568 | }; | 576 | }; |
569 | 577 | ||
570 | /* | 578 | /* |
diff --git a/arch/i386/kernel/pcspeaker.c b/arch/i386/kernel/pcspeaker.c new file mode 100644 index 000000000000..bc1f2d3ea277 --- /dev/null +++ b/arch/i386/kernel/pcspeaker.c | |||
@@ -0,0 +1,20 @@ | |||
1 | #include <linux/platform_device.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/init.h> | ||
4 | |||
5 | static __init int add_pcspkr(void) | ||
6 | { | ||
7 | struct platform_device *pd; | ||
8 | int ret; | ||
9 | |||
10 | pd = platform_device_alloc("pcspkr", -1); | ||
11 | if (!pd) | ||
12 | return -ENOMEM; | ||
13 | |||
14 | ret = platform_device_add(pd); | ||
15 | if (ret) | ||
16 | platform_device_put(pd); | ||
17 | |||
18 | return ret; | ||
19 | } | ||
20 | device_initcall(add_pcspkr); | ||
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c641056233a6..bea304d48cdb 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/ptrace.h> | 38 | #include <linux/ptrace.h> |
39 | #include <linux/random.h> | 39 | #include <linux/random.h> |
40 | #include <linux/personality.h> | 40 | #include <linux/personality.h> |
41 | #include <linux/tick.h> | ||
41 | 42 | ||
42 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
43 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
@@ -48,6 +49,7 @@ | |||
48 | #include <asm/i387.h> | 49 | #include <asm/i387.h> |
49 | #include <asm/desc.h> | 50 | #include <asm/desc.h> |
50 | #include <asm/vm86.h> | 51 | #include <asm/vm86.h> |
52 | #include <asm/idle.h> | ||
51 | #ifdef CONFIG_MATH_EMULATION | 53 | #ifdef CONFIG_MATH_EMULATION |
52 | #include <asm/math_emu.h> | 54 | #include <asm/math_emu.h> |
53 | #endif | 55 | #endif |
@@ -80,6 +82,42 @@ void (*pm_idle)(void); | |||
80 | EXPORT_SYMBOL(pm_idle); | 82 | EXPORT_SYMBOL(pm_idle); |
81 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | 83 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); |
82 | 84 | ||
85 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | ||
86 | |||
87 | void idle_notifier_register(struct notifier_block *n) | ||
88 | { | ||
89 | atomic_notifier_chain_register(&idle_notifier, n); | ||
90 | } | ||
91 | |||
92 | void idle_notifier_unregister(struct notifier_block *n) | ||
93 | { | ||
94 | atomic_notifier_chain_unregister(&idle_notifier, n); | ||
95 | } | ||
96 | |||
97 | static DEFINE_PER_CPU(volatile unsigned long, idle_state); | ||
98 | |||
99 | void enter_idle(void) | ||
100 | { | ||
101 | /* needs to be atomic w.r.t. interrupts, not against other CPUs */ | ||
102 | __set_bit(0, &__get_cpu_var(idle_state)); | ||
103 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); | ||
104 | } | ||
105 | |||
106 | static void __exit_idle(void) | ||
107 | { | ||
108 | /* needs to be atomic w.r.t. interrupts, not against other CPUs */ | ||
109 | if (__test_and_clear_bit(0, &__get_cpu_var(idle_state)) == 0) | ||
110 | return; | ||
111 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); | ||
112 | } | ||
113 | |||
114 | void exit_idle(void) | ||
115 | { | ||
116 | if (current->pid) | ||
117 | return; | ||
118 | __exit_idle(); | ||
119 | } | ||
120 | |||
83 | void disable_hlt(void) | 121 | void disable_hlt(void) |
84 | { | 122 | { |
85 | hlt_counter++; | 123 | hlt_counter++; |
@@ -130,6 +168,7 @@ EXPORT_SYMBOL(default_idle); | |||
130 | */ | 168 | */ |
131 | static void poll_idle (void) | 169 | static void poll_idle (void) |
132 | { | 170 | { |
171 | local_irq_enable(); | ||
133 | cpu_relax(); | 172 | cpu_relax(); |
134 | } | 173 | } |
135 | 174 | ||
@@ -173,6 +212,7 @@ void cpu_idle(void) | |||
173 | 212 | ||
174 | /* endless idle loop with no priority at all */ | 213 | /* endless idle loop with no priority at all */ |
175 | while (1) { | 214 | while (1) { |
215 | tick_nohz_stop_sched_tick(); | ||
176 | while (!need_resched()) { | 216 | while (!need_resched()) { |
177 | void (*idle)(void); | 217 | void (*idle)(void); |
178 | 218 | ||
@@ -189,8 +229,18 @@ void cpu_idle(void) | |||
189 | play_dead(); | 229 | play_dead(); |
190 | 230 | ||
191 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; | 231 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; |
232 | |||
233 | /* | ||
234 | * Idle routines should keep interrupts disabled | ||
235 | * from here on, until they go to idle. | ||
236 | * Otherwise, idle callbacks can misfire. | ||
237 | */ | ||
238 | local_irq_disable(); | ||
239 | enter_idle(); | ||
192 | idle(); | 240 | idle(); |
241 | __exit_idle(); | ||
193 | } | 242 | } |
243 | tick_nohz_restart_sched_tick(); | ||
194 | preempt_enable_no_resched(); | 244 | preempt_enable_no_resched(); |
195 | schedule(); | 245 | schedule(); |
196 | preempt_disable(); | 246 | preempt_disable(); |
@@ -243,7 +293,11 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) | |||
243 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 293 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
244 | smp_mb(); | 294 | smp_mb(); |
245 | if (!need_resched()) | 295 | if (!need_resched()) |
246 | __mwait(eax, ecx); | 296 | __sti_mwait(eax, ecx); |
297 | else | ||
298 | local_irq_enable(); | ||
299 | } else { | ||
300 | local_irq_enable(); | ||
247 | } | 301 | } |
248 | } | 302 | } |
249 | 303 | ||
@@ -308,8 +362,8 @@ void show_regs(struct pt_regs * regs) | |||
308 | regs->eax,regs->ebx,regs->ecx,regs->edx); | 362 | regs->eax,regs->ebx,regs->ecx,regs->edx); |
309 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", | 363 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", |
310 | regs->esi, regs->edi, regs->ebp); | 364 | regs->esi, regs->edi, regs->ebp); |
311 | printk(" DS: %04x ES: %04x GS: %04x\n", | 365 | printk(" DS: %04x ES: %04x FS: %04x\n", |
312 | 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); | 366 | 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); |
313 | 367 | ||
314 | cr0 = read_cr0(); | 368 | cr0 = read_cr0(); |
315 | cr2 = read_cr2(); | 369 | cr2 = read_cr2(); |
@@ -340,7 +394,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
340 | 394 | ||
341 | regs.xds = __USER_DS; | 395 | regs.xds = __USER_DS; |
342 | regs.xes = __USER_DS; | 396 | regs.xes = __USER_DS; |
343 | regs.xgs = __KERNEL_PDA; | 397 | regs.xfs = __KERNEL_PDA; |
344 | regs.orig_eax = -1; | 398 | regs.orig_eax = -1; |
345 | regs.eip = (unsigned long) kernel_thread_helper; | 399 | regs.eip = (unsigned long) kernel_thread_helper; |
346 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | 400 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
@@ -425,7 +479,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | |||
425 | 479 | ||
426 | p->thread.eip = (unsigned long) ret_from_fork; | 480 | p->thread.eip = (unsigned long) ret_from_fork; |
427 | 481 | ||
428 | savesegment(fs,p->thread.fs); | 482 | savesegment(gs,p->thread.gs); |
429 | 483 | ||
430 | tsk = current; | 484 | tsk = current; |
431 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 485 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
@@ -501,8 +555,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump) | |||
501 | dump->regs.eax = regs->eax; | 555 | dump->regs.eax = regs->eax; |
502 | dump->regs.ds = regs->xds; | 556 | dump->regs.ds = regs->xds; |
503 | dump->regs.es = regs->xes; | 557 | dump->regs.es = regs->xes; |
504 | savesegment(fs,dump->regs.fs); | 558 | dump->regs.fs = regs->xfs; |
505 | dump->regs.gs = regs->xgs; | 559 | savesegment(gs,dump->regs.gs); |
506 | dump->regs.orig_eax = regs->orig_eax; | 560 | dump->regs.orig_eax = regs->orig_eax; |
507 | dump->regs.eip = regs->eip; | 561 | dump->regs.eip = regs->eip; |
508 | dump->regs.cs = regs->xcs; | 562 | dump->regs.cs = regs->xcs; |
@@ -653,7 +707,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
653 | load_esp0(tss, next); | 707 | load_esp0(tss, next); |
654 | 708 | ||
655 | /* | 709 | /* |
656 | * Save away %fs. No need to save %gs, as it was saved on the | 710 | * Save away %gs. No need to save %fs, as it was saved on the |
657 | * stack on entry. No need to save %es and %ds, as those are | 711 | * stack on entry. No need to save %es and %ds, as those are |
658 | * always kernel segments while inside the kernel. Doing this | 712 | * always kernel segments while inside the kernel. Doing this |
659 | * before setting the new TLS descriptors avoids the situation | 713 | * before setting the new TLS descriptors avoids the situation |
@@ -662,7 +716,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
662 | * used %fs or %gs (it does not today), or if the kernel is | 716 | * used %fs or %gs (it does not today), or if the kernel is |
663 | * running inside of a hypervisor layer. | 717 | * running inside of a hypervisor layer. |
664 | */ | 718 | */ |
665 | savesegment(fs, prev->fs); | 719 | savesegment(gs, prev->gs); |
666 | 720 | ||
667 | /* | 721 | /* |
668 | * Load the per-thread Thread-Local Storage descriptor. | 722 | * Load the per-thread Thread-Local Storage descriptor. |
@@ -670,14 +724,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
670 | load_TLS(next, cpu); | 724 | load_TLS(next, cpu); |
671 | 725 | ||
672 | /* | 726 | /* |
673 | * Restore %fs if needed. | 727 | * Restore IOPL if needed. In normal use, the flags restore |
674 | * | 728 | * in the switch assembly will handle this. But if the kernel |
675 | * Glibc normally makes %fs be zero. | 729 | * is running virtualized at a non-zero CPL, the popf will |
730 | * not restore flags, so it must be done in a separate step. | ||
676 | */ | 731 | */ |
677 | if (unlikely(prev->fs | next->fs)) | 732 | if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) |
678 | loadsegment(fs, next->fs); | 733 | set_iopl_mask(next->iopl); |
679 | |||
680 | write_pda(pcurrent, next_p); | ||
681 | 734 | ||
682 | /* | 735 | /* |
683 | * Now maybe handle debug registers and/or IO bitmaps | 736 | * Now maybe handle debug registers and/or IO bitmaps |
@@ -688,6 +741,15 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
688 | 741 | ||
689 | disable_tsc(prev_p, next_p); | 742 | disable_tsc(prev_p, next_p); |
690 | 743 | ||
744 | /* | ||
745 | * Leave lazy mode, flushing any hypercalls made here. | ||
746 | * This must be done before restoring TLS segments so | ||
747 | * the GDT and LDT are properly updated, and must be | ||
748 | * done before math_state_restore, so the TS bit is up | ||
749 | * to date. | ||
750 | */ | ||
751 | arch_leave_lazy_cpu_mode(); | ||
752 | |||
691 | /* If the task has used fpu the last 5 timeslices, just do a full | 753 | /* If the task has used fpu the last 5 timeslices, just do a full |
692 | * restore of the math state immediately to avoid the trap; the | 754 | * restore of the math state immediately to avoid the trap; the |
693 | * chances of needing FPU soon are obviously high now | 755 | * chances of needing FPU soon are obviously high now |
@@ -695,6 +757,14 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
695 | if (next_p->fpu_counter > 5) | 757 | if (next_p->fpu_counter > 5) |
696 | math_state_restore(); | 758 | math_state_restore(); |
697 | 759 | ||
760 | /* | ||
761 | * Restore %gs if needed (which is common) | ||
762 | */ | ||
763 | if (prev->gs | next->gs) | ||
764 | loadsegment(gs, next->gs); | ||
765 | |||
766 | write_pda(pcurrent, next_p); | ||
767 | |||
698 | return prev_p; | 768 | return prev_p; |
699 | } | 769 | } |
700 | 770 | ||
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index af8aabe85800..4a8f8a259723 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c | |||
@@ -89,14 +89,14 @@ static int putreg(struct task_struct *child, | |||
89 | unsigned long regno, unsigned long value) | 89 | unsigned long regno, unsigned long value) |
90 | { | 90 | { |
91 | switch (regno >> 2) { | 91 | switch (regno >> 2) { |
92 | case FS: | 92 | case GS: |
93 | if (value && (value & 3) != 3) | 93 | if (value && (value & 3) != 3) |
94 | return -EIO; | 94 | return -EIO; |
95 | child->thread.fs = value; | 95 | child->thread.gs = value; |
96 | return 0; | 96 | return 0; |
97 | case DS: | 97 | case DS: |
98 | case ES: | 98 | case ES: |
99 | case GS: | 99 | case FS: |
100 | if (value && (value & 3) != 3) | 100 | if (value && (value & 3) != 3) |
101 | return -EIO; | 101 | return -EIO; |
102 | value &= 0xffff; | 102 | value &= 0xffff; |
@@ -112,7 +112,7 @@ static int putreg(struct task_struct *child, | |||
112 | value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; | 112 | value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; |
113 | break; | 113 | break; |
114 | } | 114 | } |
115 | if (regno > ES*4) | 115 | if (regno > FS*4) |
116 | regno -= 1*4; | 116 | regno -= 1*4; |
117 | put_stack_long(child, regno, value); | 117 | put_stack_long(child, regno, value); |
118 | return 0; | 118 | return 0; |
@@ -124,18 +124,18 @@ static unsigned long getreg(struct task_struct *child, | |||
124 | unsigned long retval = ~0UL; | 124 | unsigned long retval = ~0UL; |
125 | 125 | ||
126 | switch (regno >> 2) { | 126 | switch (regno >> 2) { |
127 | case FS: | 127 | case GS: |
128 | retval = child->thread.fs; | 128 | retval = child->thread.gs; |
129 | break; | 129 | break; |
130 | case DS: | 130 | case DS: |
131 | case ES: | 131 | case ES: |
132 | case GS: | 132 | case FS: |
133 | case SS: | 133 | case SS: |
134 | case CS: | 134 | case CS: |
135 | retval = 0xffff; | 135 | retval = 0xffff; |
136 | /* fall through */ | 136 | /* fall through */ |
137 | default: | 137 | default: |
138 | if (regno > ES*4) | 138 | if (regno > FS*4) |
139 | regno -= 1*4; | 139 | regno -= 1*4; |
140 | retval &= get_stack_long(child, regno); | 140 | retval &= get_stack_long(child, regno); |
141 | } | 141 | } |
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 4694ac980cd2..122623dcc6e1 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/initrd.h> | 33 | #include <linux/initrd.h> |
34 | #include <linux/bootmem.h> | 34 | #include <linux/bootmem.h> |
35 | #include <linux/seq_file.h> | 35 | #include <linux/seq_file.h> |
36 | #include <linux/platform_device.h> | ||
37 | #include <linux/console.h> | 36 | #include <linux/console.h> |
38 | #include <linux/mca.h> | 37 | #include <linux/mca.h> |
39 | #include <linux/root_dev.h> | 38 | #include <linux/root_dev.h> |
@@ -60,6 +59,7 @@ | |||
60 | #include <asm/io_apic.h> | 59 | #include <asm/io_apic.h> |
61 | #include <asm/ist.h> | 60 | #include <asm/ist.h> |
62 | #include <asm/io.h> | 61 | #include <asm/io.h> |
62 | #include <asm/vmi.h> | ||
63 | #include <setup_arch.h> | 63 | #include <setup_arch.h> |
64 | #include <bios_ebda.h> | 64 | #include <bios_ebda.h> |
65 | 65 | ||
@@ -581,6 +581,14 @@ void __init setup_arch(char **cmdline_p) | |||
581 | 581 | ||
582 | max_low_pfn = setup_memory(); | 582 | max_low_pfn = setup_memory(); |
583 | 583 | ||
584 | #ifdef CONFIG_VMI | ||
585 | /* | ||
586 | * Must be after max_low_pfn is determined, and before kernel | ||
587 | * pagetables are setup. | ||
588 | */ | ||
589 | vmi_init(); | ||
590 | #endif | ||
591 | |||
584 | /* | 592 | /* |
585 | * NOTE: before this point _nobody_ is allowed to allocate | 593 | * NOTE: before this point _nobody_ is allowed to allocate |
586 | * any memory using the bootmem allocator. Although the | 594 | * any memory using the bootmem allocator. Although the |
@@ -651,28 +659,3 @@ void __init setup_arch(char **cmdline_p) | |||
651 | #endif | 659 | #endif |
652 | tsc_init(); | 660 | tsc_init(); |
653 | } | 661 | } |
654 | |||
655 | static __init int add_pcspkr(void) | ||
656 | { | ||
657 | struct platform_device *pd; | ||
658 | int ret; | ||
659 | |||
660 | pd = platform_device_alloc("pcspkr", -1); | ||
661 | if (!pd) | ||
662 | return -ENOMEM; | ||
663 | |||
664 | ret = platform_device_add(pd); | ||
665 | if (ret) | ||
666 | platform_device_put(pd); | ||
667 | |||
668 | return ret; | ||
669 | } | ||
670 | device_initcall(add_pcspkr); | ||
671 | |||
672 | /* | ||
673 | * Local Variables: | ||
674 | * mode:c | ||
675 | * c-file-style:"k&r" | ||
676 | * c-basic-offset:8 | ||
677 | * End: | ||
678 | */ | ||
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 65d7620eaa09..4f99e870c986 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/suspend.h> | 21 | #include <linux/suspend.h> |
22 | #include <linux/ptrace.h> | 22 | #include <linux/ptrace.h> |
23 | #include <linux/elf.h> | 23 | #include <linux/elf.h> |
24 | #include <linux/binfmts.h> | ||
24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
25 | #include <asm/ucontext.h> | 26 | #include <asm/ucontext.h> |
26 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
@@ -128,8 +129,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax | |||
128 | X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ | 129 | X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ |
129 | X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) | 130 | X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) |
130 | 131 | ||
131 | COPY_SEG(gs); | 132 | GET_SEG(gs); |
132 | GET_SEG(fs); | 133 | COPY_SEG(fs); |
133 | COPY_SEG(es); | 134 | COPY_SEG(es); |
134 | COPY_SEG(ds); | 135 | COPY_SEG(ds); |
135 | COPY(edi); | 136 | COPY(edi); |
@@ -244,9 +245,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, | |||
244 | { | 245 | { |
245 | int tmp, err = 0; | 246 | int tmp, err = 0; |
246 | 247 | ||
247 | err |= __put_user(regs->xgs, (unsigned int __user *)&sc->gs); | 248 | err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs); |
248 | savesegment(fs, tmp); | 249 | savesegment(gs, tmp); |
249 | err |= __put_user(tmp, (unsigned int __user *)&sc->fs); | 250 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); |
250 | 251 | ||
251 | err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); | 252 | err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); |
252 | err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); | 253 | err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); |
@@ -349,7 +350,10 @@ static int setup_frame(int sig, struct k_sigaction *ka, | |||
349 | goto give_sigsegv; | 350 | goto give_sigsegv; |
350 | } | 351 | } |
351 | 352 | ||
352 | restorer = (void *)VDSO_SYM(&__kernel_sigreturn); | 353 | if (current->binfmt->hasvdso) |
354 | restorer = (void *)VDSO_SYM(&__kernel_sigreturn); | ||
355 | else | ||
356 | restorer = (void *)&frame->retcode; | ||
353 | if (ka->sa.sa_flags & SA_RESTORER) | 357 | if (ka->sa.sa_flags & SA_RESTORER) |
354 | restorer = ka->sa.sa_restorer; | 358 | restorer = ka->sa.sa_restorer; |
355 | 359 | ||
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 5285aff8367f..9bd9637ae692 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #include <asm/mtrr.h> | 24 | #include <asm/mtrr.h> |
25 | #include <asm/tlbflush.h> | 25 | #include <asm/tlbflush.h> |
26 | #include <asm/idle.h> | ||
26 | #include <mach_apic.h> | 27 | #include <mach_apic.h> |
27 | 28 | ||
28 | /* | 29 | /* |
@@ -374,8 +375,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
374 | /* | 375 | /* |
375 | * i'm not happy about this global shared spinlock in the | 376 | * i'm not happy about this global shared spinlock in the |
376 | * MM hot path, but we'll see how contended it is. | 377 | * MM hot path, but we'll see how contended it is. |
377 | * Temporarily this turns IRQs off, so that lockups are | 378 | * AK: x86-64 has a faster method that could be ported. |
378 | * detected by the NMI watchdog. | ||
379 | */ | 379 | */ |
380 | spin_lock(&tlbstate_lock); | 380 | spin_lock(&tlbstate_lock); |
381 | 381 | ||
@@ -400,7 +400,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
400 | 400 | ||
401 | while (!cpus_empty(flush_cpumask)) | 401 | while (!cpus_empty(flush_cpumask)) |
402 | /* nothing. lockup detection does not belong here */ | 402 | /* nothing. lockup detection does not belong here */ |
403 | mb(); | 403 | cpu_relax(); |
404 | 404 | ||
405 | flush_mm = NULL; | 405 | flush_mm = NULL; |
406 | flush_va = 0; | 406 | flush_va = 0; |
@@ -624,6 +624,7 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) | |||
624 | /* | 624 | /* |
625 | * At this point the info structure may be out of scope unless wait==1 | 625 | * At this point the info structure may be out of scope unless wait==1 |
626 | */ | 626 | */ |
627 | exit_idle(); | ||
627 | irq_enter(); | 628 | irq_enter(); |
628 | (*func)(info); | 629 | (*func)(info); |
629 | irq_exit(); | 630 | irq_exit(); |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 8c6c8c52b95c..48bfcaa13ecc 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #include <mach_apic.h> | 63 | #include <mach_apic.h> |
64 | #include <mach_wakecpu.h> | 64 | #include <mach_wakecpu.h> |
65 | #include <smpboot_hooks.h> | 65 | #include <smpboot_hooks.h> |
66 | #include <asm/vmi.h> | ||
66 | 67 | ||
67 | /* Set if we find a B stepping CPU */ | 68 | /* Set if we find a B stepping CPU */ |
68 | static int __devinitdata smp_b_stepping; | 69 | static int __devinitdata smp_b_stepping; |
@@ -93,12 +94,6 @@ cpumask_t cpu_possible_map; | |||
93 | EXPORT_SYMBOL(cpu_possible_map); | 94 | EXPORT_SYMBOL(cpu_possible_map); |
94 | static cpumask_t smp_commenced_mask; | 95 | static cpumask_t smp_commenced_mask; |
95 | 96 | ||
96 | /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there | ||
97 | * is no way to resync one AP against BP. TBD: for prescott and above, we | ||
98 | * should use IA64's algorithm | ||
99 | */ | ||
100 | static int __devinitdata tsc_sync_disabled; | ||
101 | |||
102 | /* Per CPU bogomips and other parameters */ | 97 | /* Per CPU bogomips and other parameters */ |
103 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | 98 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; |
104 | EXPORT_SYMBOL(cpu_data); | 99 | EXPORT_SYMBOL(cpu_data); |
@@ -215,151 +210,6 @@ valid_k7: | |||
215 | ; | 210 | ; |
216 | } | 211 | } |
217 | 212 | ||
218 | /* | ||
219 | * TSC synchronization. | ||
220 | * | ||
221 | * We first check whether all CPUs have their TSC's synchronized, | ||
222 | * then we print a warning if not, and always resync. | ||
223 | */ | ||
224 | |||
225 | static struct { | ||
226 | atomic_t start_flag; | ||
227 | atomic_t count_start; | ||
228 | atomic_t count_stop; | ||
229 | unsigned long long values[NR_CPUS]; | ||
230 | } tsc __cpuinitdata = { | ||
231 | .start_flag = ATOMIC_INIT(0), | ||
232 | .count_start = ATOMIC_INIT(0), | ||
233 | .count_stop = ATOMIC_INIT(0), | ||
234 | }; | ||
235 | |||
236 | #define NR_LOOPS 5 | ||
237 | |||
238 | static void __init synchronize_tsc_bp(void) | ||
239 | { | ||
240 | int i; | ||
241 | unsigned long long t0; | ||
242 | unsigned long long sum, avg; | ||
243 | long long delta; | ||
244 | unsigned int one_usec; | ||
245 | int buggy = 0; | ||
246 | |||
247 | printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); | ||
248 | |||
249 | /* convert from kcyc/sec to cyc/usec */ | ||
250 | one_usec = cpu_khz / 1000; | ||
251 | |||
252 | atomic_set(&tsc.start_flag, 1); | ||
253 | wmb(); | ||
254 | |||
255 | /* | ||
256 | * We loop a few times to get a primed instruction cache, | ||
257 | * then the last pass is more or less synchronized and | ||
258 | * the BP and APs set their cycle counters to zero all at | ||
259 | * once. This reduces the chance of having random offsets | ||
260 | * between the processors, and guarantees that the maximum | ||
261 | * delay between the cycle counters is never bigger than | ||
262 | * the latency of information-passing (cachelines) between | ||
263 | * two CPUs. | ||
264 | */ | ||
265 | for (i = 0; i < NR_LOOPS; i++) { | ||
266 | /* | ||
267 | * all APs synchronize but they loop on '== num_cpus' | ||
268 | */ | ||
269 | while (atomic_read(&tsc.count_start) != num_booting_cpus()-1) | ||
270 | cpu_relax(); | ||
271 | atomic_set(&tsc.count_stop, 0); | ||
272 | wmb(); | ||
273 | /* | ||
274 | * this lets the APs save their current TSC: | ||
275 | */ | ||
276 | atomic_inc(&tsc.count_start); | ||
277 | |||
278 | rdtscll(tsc.values[smp_processor_id()]); | ||
279 | /* | ||
280 | * We clear the TSC in the last loop: | ||
281 | */ | ||
282 | if (i == NR_LOOPS-1) | ||
283 | write_tsc(0, 0); | ||
284 | |||
285 | /* | ||
286 | * Wait for all APs to leave the synchronization point: | ||
287 | */ | ||
288 | while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1) | ||
289 | cpu_relax(); | ||
290 | atomic_set(&tsc.count_start, 0); | ||
291 | wmb(); | ||
292 | atomic_inc(&tsc.count_stop); | ||
293 | } | ||
294 | |||
295 | sum = 0; | ||
296 | for (i = 0; i < NR_CPUS; i++) { | ||
297 | if (cpu_isset(i, cpu_callout_map)) { | ||
298 | t0 = tsc.values[i]; | ||
299 | sum += t0; | ||
300 | } | ||
301 | } | ||
302 | avg = sum; | ||
303 | do_div(avg, num_booting_cpus()); | ||
304 | |||
305 | for (i = 0; i < NR_CPUS; i++) { | ||
306 | if (!cpu_isset(i, cpu_callout_map)) | ||
307 | continue; | ||
308 | delta = tsc.values[i] - avg; | ||
309 | if (delta < 0) | ||
310 | delta = -delta; | ||
311 | /* | ||
312 | * We report bigger than 2 microseconds clock differences. | ||
313 | */ | ||
314 | if (delta > 2*one_usec) { | ||
315 | long long realdelta; | ||
316 | |||
317 | if (!buggy) { | ||
318 | buggy = 1; | ||
319 | printk("\n"); | ||
320 | } | ||
321 | realdelta = delta; | ||
322 | do_div(realdelta, one_usec); | ||
323 | if (tsc.values[i] < avg) | ||
324 | realdelta = -realdelta; | ||
325 | |||
326 | if (realdelta) | ||
327 | printk(KERN_INFO "CPU#%d had %Ld usecs TSC " | ||
328 | "skew, fixed it up.\n", i, realdelta); | ||
329 | } | ||
330 | } | ||
331 | if (!buggy) | ||
332 | printk("passed.\n"); | ||
333 | } | ||
334 | |||
335 | static void __cpuinit synchronize_tsc_ap(void) | ||
336 | { | ||
337 | int i; | ||
338 | |||
339 | /* | ||
340 | * Not every cpu is online at the time | ||
341 | * this gets called, so we first wait for the BP to | ||
342 | * finish SMP initialization: | ||
343 | */ | ||
344 | while (!atomic_read(&tsc.start_flag)) | ||
345 | cpu_relax(); | ||
346 | |||
347 | for (i = 0; i < NR_LOOPS; i++) { | ||
348 | atomic_inc(&tsc.count_start); | ||
349 | while (atomic_read(&tsc.count_start) != num_booting_cpus()) | ||
350 | cpu_relax(); | ||
351 | |||
352 | rdtscll(tsc.values[smp_processor_id()]); | ||
353 | if (i == NR_LOOPS-1) | ||
354 | write_tsc(0, 0); | ||
355 | |||
356 | atomic_inc(&tsc.count_stop); | ||
357 | while (atomic_read(&tsc.count_stop) != num_booting_cpus()) | ||
358 | cpu_relax(); | ||
359 | } | ||
360 | } | ||
361 | #undef NR_LOOPS | ||
362 | |||
363 | extern void calibrate_delay(void); | 213 | extern void calibrate_delay(void); |
364 | 214 | ||
365 | static atomic_t init_deasserted; | 215 | static atomic_t init_deasserted; |
@@ -437,20 +287,12 @@ static void __cpuinit smp_callin(void) | |||
437 | /* | 287 | /* |
438 | * Save our processor parameters | 288 | * Save our processor parameters |
439 | */ | 289 | */ |
440 | smp_store_cpu_info(cpuid); | 290 | smp_store_cpu_info(cpuid); |
441 | |||
442 | disable_APIC_timer(); | ||
443 | 291 | ||
444 | /* | 292 | /* |
445 | * Allow the master to continue. | 293 | * Allow the master to continue. |
446 | */ | 294 | */ |
447 | cpu_set(cpuid, cpu_callin_map); | 295 | cpu_set(cpuid, cpu_callin_map); |
448 | |||
449 | /* | ||
450 | * Synchronize the TSC with the BP | ||
451 | */ | ||
452 | if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) | ||
453 | synchronize_tsc_ap(); | ||
454 | } | 296 | } |
455 | 297 | ||
456 | static int cpucount; | 298 | static int cpucount; |
@@ -545,18 +387,25 @@ static void __cpuinit start_secondary(void *unused) | |||
545 | * booting is too fragile that we want to limit the | 387 | * booting is too fragile that we want to limit the |
546 | * things done here to the most necessary things. | 388 | * things done here to the most necessary things. |
547 | */ | 389 | */ |
390 | #ifdef CONFIG_VMI | ||
391 | vmi_bringup(); | ||
392 | #endif | ||
548 | secondary_cpu_init(); | 393 | secondary_cpu_init(); |
549 | preempt_disable(); | 394 | preempt_disable(); |
550 | smp_callin(); | 395 | smp_callin(); |
551 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 396 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
552 | rep_nop(); | 397 | rep_nop(); |
553 | setup_secondary_APIC_clock(); | 398 | /* |
399 | * Check TSC synchronization with the BP: | ||
400 | */ | ||
401 | check_tsc_sync_target(); | ||
402 | |||
403 | setup_secondary_clock(); | ||
554 | if (nmi_watchdog == NMI_IO_APIC) { | 404 | if (nmi_watchdog == NMI_IO_APIC) { |
555 | disable_8259A_irq(0); | 405 | disable_8259A_irq(0); |
556 | enable_NMI_through_LVT0(NULL); | 406 | enable_NMI_through_LVT0(NULL); |
557 | enable_8259A_irq(0); | 407 | enable_8259A_irq(0); |
558 | } | 408 | } |
559 | enable_APIC_timer(); | ||
560 | /* | 409 | /* |
561 | * low-memory mappings have been cleared, flush them from | 410 | * low-memory mappings have been cleared, flush them from |
562 | * the local TLBs too. | 411 | * the local TLBs too. |
@@ -619,7 +468,6 @@ extern struct { | |||
619 | unsigned short ss; | 468 | unsigned short ss; |
620 | } stack_start; | 469 | } stack_start; |
621 | extern struct i386_pda *start_pda; | 470 | extern struct i386_pda *start_pda; |
622 | extern struct Xgt_desc_struct cpu_gdt_descr; | ||
623 | 471 | ||
624 | #ifdef CONFIG_NUMA | 472 | #ifdef CONFIG_NUMA |
625 | 473 | ||
@@ -749,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
749 | /* | 597 | /* |
750 | * Due to the Pentium erratum 3AP. | 598 | * Due to the Pentium erratum 3AP. |
751 | */ | 599 | */ |
752 | maxlvt = get_maxlvt(); | 600 | maxlvt = lapic_get_maxlvt(); |
753 | if (maxlvt > 3) { | 601 | if (maxlvt > 3) { |
754 | apic_read_around(APIC_SPIV); | 602 | apic_read_around(APIC_SPIV); |
755 | apic_write(APIC_ESR, 0); | 603 | apic_write(APIC_ESR, 0); |
@@ -835,11 +683,18 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
835 | num_starts = 0; | 683 | num_starts = 0; |
836 | 684 | ||
837 | /* | 685 | /* |
686 | * Paravirt / VMI wants a startup IPI hook here to set up the | ||
687 | * target processor state. | ||
688 | */ | ||
689 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, | ||
690 | (unsigned long) stack_start.esp); | ||
691 | |||
692 | /* | ||
838 | * Run STARTUP IPI loop. | 693 | * Run STARTUP IPI loop. |
839 | */ | 694 | */ |
840 | Dprintk("#startup loops: %d.\n", num_starts); | 695 | Dprintk("#startup loops: %d.\n", num_starts); |
841 | 696 | ||
842 | maxlvt = get_maxlvt(); | 697 | maxlvt = lapic_get_maxlvt(); |
843 | 698 | ||
844 | for (j = 1; j <= num_starts; j++) { | 699 | for (j = 1; j <= num_starts; j++) { |
845 | Dprintk("Sending STARTUP #%d.\n",j); | 700 | Dprintk("Sending STARTUP #%d.\n",j); |
@@ -1115,8 +970,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1115 | info.cpu = cpu; | 970 | info.cpu = cpu; |
1116 | INIT_WORK(&info.task, do_warm_boot_cpu); | 971 | INIT_WORK(&info.task, do_warm_boot_cpu); |
1117 | 972 | ||
1118 | tsc_sync_disabled = 1; | ||
1119 | |||
1120 | /* init low mem mapping */ | 973 | /* init low mem mapping */ |
1121 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 974 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, |
1122 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); | 975 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); |
@@ -1124,7 +977,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1124 | schedule_work(&info.task); | 977 | schedule_work(&info.task); |
1125 | wait_for_completion(&done); | 978 | wait_for_completion(&done); |
1126 | 979 | ||
1127 | tsc_sync_disabled = 0; | ||
1128 | zap_low_mappings(); | 980 | zap_low_mappings(); |
1129 | ret = 0; | 981 | ret = 0; |
1130 | exit: | 982 | exit: |
@@ -1320,13 +1172,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1320 | 1172 | ||
1321 | smpboot_setup_io_apic(); | 1173 | smpboot_setup_io_apic(); |
1322 | 1174 | ||
1323 | setup_boot_APIC_clock(); | 1175 | setup_boot_clock(); |
1324 | |||
1325 | /* | ||
1326 | * Synchronize the TSC with the AP | ||
1327 | */ | ||
1328 | if (cpu_has_tsc && cpucount && cpu_khz) | ||
1329 | synchronize_tsc_bp(); | ||
1330 | } | 1176 | } |
1331 | 1177 | ||
1332 | /* These are wrappers to interface to the new boot process. Someone | 1178 | /* These are wrappers to interface to the new boot process. Someone |
@@ -1461,9 +1307,16 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1461 | } | 1307 | } |
1462 | 1308 | ||
1463 | local_irq_enable(); | 1309 | local_irq_enable(); |
1310 | |||
1464 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 1311 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
1465 | /* Unleash the CPU! */ | 1312 | /* Unleash the CPU! */ |
1466 | cpu_set(cpu, smp_commenced_mask); | 1313 | cpu_set(cpu, smp_commenced_mask); |
1314 | |||
1315 | /* | ||
1316 | * Check TSC synchronization with the AP: | ||
1317 | */ | ||
1318 | check_tsc_sync_source(cpu); | ||
1319 | |||
1467 | while (!cpu_isset(cpu, cpu_online_map)) | 1320 | while (!cpu_isset(cpu, cpu_online_map)) |
1468 | cpu_relax(); | 1321 | cpu_relax(); |
1469 | 1322 | ||
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index bc882a2b1db6..13ca54a85a1c 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -78,7 +78,7 @@ int __init sysenter_setup(void) | |||
78 | syscall_pages[0] = virt_to_page(syscall_page); | 78 | syscall_pages[0] = virt_to_page(syscall_page); |
79 | 79 | ||
80 | #ifdef CONFIG_COMPAT_VDSO | 80 | #ifdef CONFIG_COMPAT_VDSO |
81 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); | 81 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); |
82 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | 82 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); |
83 | #endif | 83 | #endif |
84 | 84 | ||
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index c505b16c0990..a5350059557a 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c | |||
@@ -131,15 +131,13 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
131 | unsigned long pc = instruction_pointer(regs); | 131 | unsigned long pc = instruction_pointer(regs); |
132 | 132 | ||
133 | #ifdef CONFIG_SMP | 133 | #ifdef CONFIG_SMP |
134 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | 134 | if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) && |
135 | in_lock_functions(pc)) { | ||
135 | #ifdef CONFIG_FRAME_POINTER | 136 | #ifdef CONFIG_FRAME_POINTER |
136 | return *(unsigned long *)(regs->ebp + 4); | 137 | return *(unsigned long *)(regs->ebp + 4); |
137 | #else | 138 | #else |
138 | unsigned long *sp; | 139 | unsigned long *sp = (unsigned long *)®s->esp; |
139 | if ((regs->xcs & 3) == 0) | 140 | |
140 | sp = (unsigned long *)®s->esp; | ||
141 | else | ||
142 | sp = (unsigned long *)regs->esp; | ||
143 | /* Return address is either directly at stack pointer | 141 | /* Return address is either directly at stack pointer |
144 | or above a saved eflags. Eflags has bits 22-31 zero, | 142 | or above a saved eflags. Eflags has bits 22-31 zero, |
145 | kernel addresses don't. */ | 143 | kernel addresses don't. */ |
@@ -161,15 +159,6 @@ EXPORT_SYMBOL(profile_pc); | |||
161 | */ | 159 | */ |
162 | irqreturn_t timer_interrupt(int irq, void *dev_id) | 160 | irqreturn_t timer_interrupt(int irq, void *dev_id) |
163 | { | 161 | { |
164 | /* | ||
165 | * Here we are in the timer irq handler. We just have irqs locally | ||
166 | * disabled but we don't know if the timer_bh is running on the other | ||
167 | * CPU. We need to avoid to SMP race with it. NOTE: we don' t need | ||
168 | * the irq version of write_lock because as just said we have irq | ||
169 | * locally disabled. -arca | ||
170 | */ | ||
171 | write_seqlock(&xtime_lock); | ||
172 | |||
173 | #ifdef CONFIG_X86_IO_APIC | 162 | #ifdef CONFIG_X86_IO_APIC |
174 | if (timer_ack) { | 163 | if (timer_ack) { |
175 | /* | 164 | /* |
@@ -188,7 +177,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
188 | 177 | ||
189 | do_timer_interrupt_hook(); | 178 | do_timer_interrupt_hook(); |
190 | 179 | ||
191 | |||
192 | if (MCA_bus) { | 180 | if (MCA_bus) { |
193 | /* The PS/2 uses level-triggered interrupts. You can't | 181 | /* The PS/2 uses level-triggered interrupts. You can't |
194 | turn them off, nor would you want to (any attempt to | 182 | turn them off, nor would you want to (any attempt to |
@@ -203,18 +191,11 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
203 | outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ | 191 | outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ |
204 | } | 192 | } |
205 | 193 | ||
206 | write_sequnlock(&xtime_lock); | ||
207 | |||
208 | #ifdef CONFIG_X86_LOCAL_APIC | ||
209 | if (using_apic_timer) | ||
210 | smp_send_timer_broadcast_ipi(); | ||
211 | #endif | ||
212 | |||
213 | return IRQ_HANDLED; | 194 | return IRQ_HANDLED; |
214 | } | 195 | } |
215 | 196 | ||
216 | /* not static: needed by APM */ | 197 | /* not static: needed by APM */ |
217 | unsigned long get_cmos_time(void) | 198 | unsigned long read_persistent_clock(void) |
218 | { | 199 | { |
219 | unsigned long retval; | 200 | unsigned long retval; |
220 | unsigned long flags; | 201 | unsigned long flags; |
@@ -227,11 +208,11 @@ unsigned long get_cmos_time(void) | |||
227 | 208 | ||
228 | return retval; | 209 | return retval; |
229 | } | 210 | } |
230 | EXPORT_SYMBOL(get_cmos_time); | ||
231 | 211 | ||
232 | static void sync_cmos_clock(unsigned long dummy); | 212 | static void sync_cmos_clock(unsigned long dummy); |
233 | 213 | ||
234 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); | 214 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); |
215 | int no_sync_cmos_clock; | ||
235 | 216 | ||
236 | static void sync_cmos_clock(unsigned long dummy) | 217 | static void sync_cmos_clock(unsigned long dummy) |
237 | { | 218 | { |
@@ -275,117 +256,20 @@ static void sync_cmos_clock(unsigned long dummy) | |||
275 | 256 | ||
276 | void notify_arch_cmos_timer(void) | 257 | void notify_arch_cmos_timer(void) |
277 | { | 258 | { |
278 | mod_timer(&sync_cmos_timer, jiffies + 1); | 259 | if (!no_sync_cmos_clock) |
279 | } | 260 | mod_timer(&sync_cmos_timer, jiffies + 1); |
280 | |||
281 | static long clock_cmos_diff; | ||
282 | static unsigned long sleep_start; | ||
283 | |||
284 | static int timer_suspend(struct sys_device *dev, pm_message_t state) | ||
285 | { | ||
286 | /* | ||
287 | * Estimate time zone so that set_time can update the clock | ||
288 | */ | ||
289 | unsigned long ctime = get_cmos_time(); | ||
290 | |||
291 | clock_cmos_diff = -ctime; | ||
292 | clock_cmos_diff += get_seconds(); | ||
293 | sleep_start = ctime; | ||
294 | return 0; | ||
295 | } | ||
296 | |||
297 | static int timer_resume(struct sys_device *dev) | ||
298 | { | ||
299 | unsigned long flags; | ||
300 | unsigned long sec; | ||
301 | unsigned long ctime = get_cmos_time(); | ||
302 | long sleep_length = (ctime - sleep_start) * HZ; | ||
303 | struct timespec ts; | ||
304 | |||
305 | if (sleep_length < 0) { | ||
306 | printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n"); | ||
307 | /* The time after the resume must not be earlier than the time | ||
308 | * before the suspend or some nasty things will happen | ||
309 | */ | ||
310 | sleep_length = 0; | ||
311 | ctime = sleep_start; | ||
312 | } | ||
313 | #ifdef CONFIG_HPET_TIMER | ||
314 | if (is_hpet_enabled()) | ||
315 | hpet_reenable(); | ||
316 | #endif | ||
317 | setup_pit_timer(); | ||
318 | |||
319 | sec = ctime + clock_cmos_diff; | ||
320 | ts.tv_sec = sec; | ||
321 | ts.tv_nsec = 0; | ||
322 | do_settimeofday(&ts); | ||
323 | write_seqlock_irqsave(&xtime_lock, flags); | ||
324 | jiffies_64 += sleep_length; | ||
325 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
326 | touch_softlockup_watchdog(); | ||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | static struct sysdev_class timer_sysclass = { | ||
331 | .resume = timer_resume, | ||
332 | .suspend = timer_suspend, | ||
333 | set_kset_name("timer"), | ||
334 | }; | ||
335 | |||
336 | |||
337 | /* XXX this driverfs stuff should probably go elsewhere later -john */ | ||
338 | static struct sys_device device_timer = { | ||
339 | .id = 0, | ||
340 | .cls = &timer_sysclass, | ||
341 | }; | ||
342 | |||
343 | static int time_init_device(void) | ||
344 | { | ||
345 | int error = sysdev_class_register(&timer_sysclass); | ||
346 | if (!error) | ||
347 | error = sysdev_register(&device_timer); | ||
348 | return error; | ||
349 | } | 261 | } |
350 | 262 | ||
351 | device_initcall(time_init_device); | ||
352 | |||
353 | #ifdef CONFIG_HPET_TIMER | ||
354 | extern void (*late_time_init)(void); | 263 | extern void (*late_time_init)(void); |
355 | /* Duplicate of time_init() below, with hpet_enable part added */ | 264 | /* Duplicate of time_init() below, with hpet_enable part added */ |
356 | static void __init hpet_time_init(void) | 265 | static void __init hpet_time_init(void) |
357 | { | 266 | { |
358 | struct timespec ts; | 267 | if (!hpet_enable()) |
359 | ts.tv_sec = get_cmos_time(); | 268 | setup_pit_timer(); |
360 | ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); | ||
361 | |||
362 | do_settimeofday(&ts); | ||
363 | |||
364 | if ((hpet_enable() >= 0) && hpet_use_timer) { | ||
365 | printk("Using HPET for base-timer\n"); | ||
366 | } | ||
367 | |||
368 | do_time_init(); | 269 | do_time_init(); |
369 | } | 270 | } |
370 | #endif | ||
371 | 271 | ||
372 | void __init time_init(void) | 272 | void __init time_init(void) |
373 | { | 273 | { |
374 | struct timespec ts; | 274 | late_time_init = hpet_time_init; |
375 | #ifdef CONFIG_HPET_TIMER | ||
376 | if (is_hpet_capable()) { | ||
377 | /* | ||
378 | * HPET initialization needs to do memory-mapped io. So, let | ||
379 | * us do a late initialization after mem_init(). | ||
380 | */ | ||
381 | late_time_init = hpet_time_init; | ||
382 | return; | ||
383 | } | ||
384 | #endif | ||
385 | ts.tv_sec = get_cmos_time(); | ||
386 | ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); | ||
387 | |||
388 | do_settimeofday(&ts); | ||
389 | |||
390 | do_time_init(); | ||
391 | } | 275 | } |
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c deleted file mode 100644 index 1e4702dfcd01..000000000000 --- a/arch/i386/kernel/time_hpet.c +++ /dev/null | |||
@@ -1,497 +0,0 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/time_hpet.c | ||
3 | * This code largely copied from arch/x86_64/kernel/time.c | ||
4 | * See that file for credits. | ||
5 | * | ||
6 | * 2003-06-30 Venkatesh Pallipadi - Additional changes for HPET support | ||
7 | */ | ||
8 | |||
9 | #include <linux/errno.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/param.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/smp.h> | ||
15 | |||
16 | #include <asm/timer.h> | ||
17 | #include <asm/fixmap.h> | ||
18 | #include <asm/apic.h> | ||
19 | |||
20 | #include <linux/timex.h> | ||
21 | |||
22 | #include <asm/hpet.h> | ||
23 | #include <linux/hpet.h> | ||
24 | |||
25 | static unsigned long hpet_period; /* fsecs / HPET clock */ | ||
26 | unsigned long hpet_tick; /* hpet clks count per tick */ | ||
27 | unsigned long hpet_address; /* hpet memory map physical address */ | ||
28 | int hpet_use_timer; | ||
29 | |||
30 | static int use_hpet; /* can be used for runtime check of hpet */ | ||
31 | static int boot_hpet_disable; /* boottime override for HPET timer */ | ||
32 | static void __iomem * hpet_virt_address; /* hpet kernel virtual address */ | ||
33 | |||
34 | #define FSEC_TO_USEC (1000000000UL) | ||
35 | |||
36 | int hpet_readl(unsigned long a) | ||
37 | { | ||
38 | return readl(hpet_virt_address + a); | ||
39 | } | ||
40 | |||
41 | static void hpet_writel(unsigned long d, unsigned long a) | ||
42 | { | ||
43 | writel(d, hpet_virt_address + a); | ||
44 | } | ||
45 | |||
46 | #ifdef CONFIG_X86_LOCAL_APIC | ||
47 | /* | ||
48 | * HPET counters dont wrap around on every tick. They just change the | ||
49 | * comparator value and continue. Next tick can be caught by checking | ||
50 | * for a change in the comparator value. Used in apic.c. | ||
51 | */ | ||
52 | static void __devinit wait_hpet_tick(void) | ||
53 | { | ||
54 | unsigned int start_cmp_val, end_cmp_val; | ||
55 | |||
56 | start_cmp_val = hpet_readl(HPET_T0_CMP); | ||
57 | do { | ||
58 | end_cmp_val = hpet_readl(HPET_T0_CMP); | ||
59 | } while (start_cmp_val == end_cmp_val); | ||
60 | } | ||
61 | #endif | ||
62 | |||
63 | static int hpet_timer_stop_set_go(unsigned long tick) | ||
64 | { | ||
65 | unsigned int cfg; | ||
66 | |||
67 | /* | ||
68 | * Stop the timers and reset the main counter. | ||
69 | */ | ||
70 | cfg = hpet_readl(HPET_CFG); | ||
71 | cfg &= ~HPET_CFG_ENABLE; | ||
72 | hpet_writel(cfg, HPET_CFG); | ||
73 | hpet_writel(0, HPET_COUNTER); | ||
74 | hpet_writel(0, HPET_COUNTER + 4); | ||
75 | |||
76 | if (hpet_use_timer) { | ||
77 | /* | ||
78 | * Set up timer 0, as periodic with first interrupt to happen at | ||
79 | * hpet_tick, and period also hpet_tick. | ||
80 | */ | ||
81 | cfg = hpet_readl(HPET_T0_CFG); | ||
82 | cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | | ||
83 | HPET_TN_SETVAL | HPET_TN_32BIT; | ||
84 | hpet_writel(cfg, HPET_T0_CFG); | ||
85 | |||
86 | /* | ||
87 | * The first write after writing TN_SETVAL to the config register sets | ||
88 | * the counter value, the second write sets the threshold. | ||
89 | */ | ||
90 | hpet_writel(tick, HPET_T0_CMP); | ||
91 | hpet_writel(tick, HPET_T0_CMP); | ||
92 | } | ||
93 | /* | ||
94 | * Go! | ||
95 | */ | ||
96 | cfg = hpet_readl(HPET_CFG); | ||
97 | if (hpet_use_timer) | ||
98 | cfg |= HPET_CFG_LEGACY; | ||
99 | cfg |= HPET_CFG_ENABLE; | ||
100 | hpet_writel(cfg, HPET_CFG); | ||
101 | |||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * Check whether HPET was found by ACPI boot parse. If yes setup HPET | ||
107 | * counter 0 for kernel base timer. | ||
108 | */ | ||
109 | int __init hpet_enable(void) | ||
110 | { | ||
111 | unsigned int id; | ||
112 | unsigned long tick_fsec_low, tick_fsec_high; /* tick in femto sec */ | ||
113 | unsigned long hpet_tick_rem; | ||
114 | |||
115 | if (boot_hpet_disable) | ||
116 | return -1; | ||
117 | |||
118 | if (!hpet_address) { | ||
119 | return -1; | ||
120 | } | ||
121 | hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | ||
122 | /* | ||
123 | * Read the period, compute tick and quotient. | ||
124 | */ | ||
125 | id = hpet_readl(HPET_ID); | ||
126 | |||
127 | /* | ||
128 | * We are checking for value '1' or more in number field if | ||
129 | * CONFIG_HPET_EMULATE_RTC is set because we will need an | ||
130 | * additional timer for RTC emulation. | ||
131 | * However, we can do with one timer otherwise using the | ||
132 | * the single HPET timer for system time. | ||
133 | */ | ||
134 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
135 | if (!(id & HPET_ID_NUMBER)) { | ||
136 | iounmap(hpet_virt_address); | ||
137 | hpet_virt_address = NULL; | ||
138 | return -1; | ||
139 | } | ||
140 | #endif | ||
141 | |||
142 | |||
143 | hpet_period = hpet_readl(HPET_PERIOD); | ||
144 | if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD)) { | ||
145 | iounmap(hpet_virt_address); | ||
146 | hpet_virt_address = NULL; | ||
147 | return -1; | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * 64 bit math | ||
152 | * First changing tick into fsec | ||
153 | * Then 64 bit div to find number of hpet clk per tick | ||
154 | */ | ||
155 | ASM_MUL64_REG(tick_fsec_low, tick_fsec_high, | ||
156 | KERNEL_TICK_USEC, FSEC_TO_USEC); | ||
157 | ASM_DIV64_REG(hpet_tick, hpet_tick_rem, | ||
158 | hpet_period, tick_fsec_low, tick_fsec_high); | ||
159 | |||
160 | if (hpet_tick_rem > (hpet_period >> 1)) | ||
161 | hpet_tick++; /* rounding the result */ | ||
162 | |||
163 | hpet_use_timer = id & HPET_ID_LEGSUP; | ||
164 | |||
165 | if (hpet_timer_stop_set_go(hpet_tick)) { | ||
166 | iounmap(hpet_virt_address); | ||
167 | hpet_virt_address = NULL; | ||
168 | return -1; | ||
169 | } | ||
170 | |||
171 | use_hpet = 1; | ||
172 | |||
173 | #ifdef CONFIG_HPET | ||
174 | { | ||
175 | struct hpet_data hd; | ||
176 | unsigned int ntimer; | ||
177 | |||
178 | memset(&hd, 0, sizeof (hd)); | ||
179 | |||
180 | ntimer = hpet_readl(HPET_ID); | ||
181 | ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; | ||
182 | ntimer++; | ||
183 | |||
184 | /* | ||
185 | * Register with driver. | ||
186 | * Timer0 and Timer1 is used by platform. | ||
187 | */ | ||
188 | hd.hd_phys_address = hpet_address; | ||
189 | hd.hd_address = hpet_virt_address; | ||
190 | hd.hd_nirqs = ntimer; | ||
191 | hd.hd_flags = HPET_DATA_PLATFORM; | ||
192 | hpet_reserve_timer(&hd, 0); | ||
193 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
194 | hpet_reserve_timer(&hd, 1); | ||
195 | #endif | ||
196 | hd.hd_irq[0] = HPET_LEGACY_8254; | ||
197 | hd.hd_irq[1] = HPET_LEGACY_RTC; | ||
198 | if (ntimer > 2) { | ||
199 | struct hpet __iomem *hpet; | ||
200 | struct hpet_timer __iomem *timer; | ||
201 | int i; | ||
202 | |||
203 | hpet = hpet_virt_address; | ||
204 | |||
205 | for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer; | ||
206 | timer++, i++) | ||
207 | hd.hd_irq[i] = (timer->hpet_config & | ||
208 | Tn_INT_ROUTE_CNF_MASK) >> | ||
209 | Tn_INT_ROUTE_CNF_SHIFT; | ||
210 | |||
211 | } | ||
212 | |||
213 | hpet_alloc(&hd); | ||
214 | } | ||
215 | #endif | ||
216 | |||
217 | #ifdef CONFIG_X86_LOCAL_APIC | ||
218 | if (hpet_use_timer) | ||
219 | wait_timer_tick = wait_hpet_tick; | ||
220 | #endif | ||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | int hpet_reenable(void) | ||
225 | { | ||
226 | return hpet_timer_stop_set_go(hpet_tick); | ||
227 | } | ||
228 | |||
229 | int is_hpet_enabled(void) | ||
230 | { | ||
231 | return use_hpet; | ||
232 | } | ||
233 | |||
234 | int is_hpet_capable(void) | ||
235 | { | ||
236 | if (!boot_hpet_disable && hpet_address) | ||
237 | return 1; | ||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | static int __init hpet_setup(char* str) | ||
242 | { | ||
243 | if (str) { | ||
244 | if (!strncmp("disable", str, 7)) | ||
245 | boot_hpet_disable = 1; | ||
246 | } | ||
247 | return 1; | ||
248 | } | ||
249 | |||
250 | __setup("hpet=", hpet_setup); | ||
251 | |||
252 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
253 | /* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET | ||
254 | * is enabled, we support RTC interrupt functionality in software. | ||
255 | * RTC has 3 kinds of interrupts: | ||
256 | * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock | ||
257 | * is updated | ||
258 | * 2) Alarm Interrupt - generate an interrupt at a specific time of day | ||
259 | * 3) Periodic Interrupt - generate periodic interrupt, with frequencies | ||
260 | * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) | ||
261 | * (1) and (2) above are implemented using polling at a frequency of | ||
262 | * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt | ||
263 | * overhead. (DEFAULT_RTC_INT_FREQ) | ||
264 | * For (3), we use interrupts at 64Hz or user specified periodic | ||
265 | * frequency, whichever is higher. | ||
266 | */ | ||
267 | #include <linux/mc146818rtc.h> | ||
268 | #include <linux/rtc.h> | ||
269 | |||
270 | #define DEFAULT_RTC_INT_FREQ 64 | ||
271 | #define RTC_NUM_INTS 1 | ||
272 | |||
273 | static unsigned long UIE_on; | ||
274 | static unsigned long prev_update_sec; | ||
275 | |||
276 | static unsigned long AIE_on; | ||
277 | static struct rtc_time alarm_time; | ||
278 | |||
279 | static unsigned long PIE_on; | ||
280 | static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; | ||
281 | static unsigned long PIE_count; | ||
282 | |||
283 | static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ | ||
284 | static unsigned int hpet_t1_cmp; /* cached comparator register */ | ||
285 | |||
286 | /* | ||
287 | * Timer 1 for RTC, we do not use periodic interrupt feature, | ||
288 | * even if HPET supports periodic interrupts on Timer 1. | ||
289 | * The reason being, to set up a periodic interrupt in HPET, we need to | ||
290 | * stop the main counter. And if we do that everytime someone diables/enables | ||
291 | * RTC, we will have adverse effect on main kernel timer running on Timer 0. | ||
292 | * So, for the time being, simulate the periodic interrupt in software. | ||
293 | * | ||
294 | * hpet_rtc_timer_init() is called for the first time and during subsequent | ||
295 | * interuppts reinit happens through hpet_rtc_timer_reinit(). | ||
296 | */ | ||
297 | int hpet_rtc_timer_init(void) | ||
298 | { | ||
299 | unsigned int cfg, cnt; | ||
300 | unsigned long flags; | ||
301 | |||
302 | if (!is_hpet_enabled()) | ||
303 | return 0; | ||
304 | /* | ||
305 | * Set the counter 1 and enable the interrupts. | ||
306 | */ | ||
307 | if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) | ||
308 | hpet_rtc_int_freq = PIE_freq; | ||
309 | else | ||
310 | hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; | ||
311 | |||
312 | local_irq_save(flags); | ||
313 | |||
314 | cnt = hpet_readl(HPET_COUNTER); | ||
315 | cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); | ||
316 | hpet_writel(cnt, HPET_T1_CMP); | ||
317 | hpet_t1_cmp = cnt; | ||
318 | |||
319 | cfg = hpet_readl(HPET_T1_CFG); | ||
320 | cfg &= ~HPET_TN_PERIODIC; | ||
321 | cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; | ||
322 | hpet_writel(cfg, HPET_T1_CFG); | ||
323 | |||
324 | local_irq_restore(flags); | ||
325 | |||
326 | return 1; | ||
327 | } | ||
328 | |||
329 | static void hpet_rtc_timer_reinit(void) | ||
330 | { | ||
331 | unsigned int cfg, cnt, ticks_per_int, lost_ints; | ||
332 | |||
333 | if (unlikely(!(PIE_on | AIE_on | UIE_on))) { | ||
334 | cfg = hpet_readl(HPET_T1_CFG); | ||
335 | cfg &= ~HPET_TN_ENABLE; | ||
336 | hpet_writel(cfg, HPET_T1_CFG); | ||
337 | return; | ||
338 | } | ||
339 | |||
340 | if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) | ||
341 | hpet_rtc_int_freq = PIE_freq; | ||
342 | else | ||
343 | hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; | ||
344 | |||
345 | /* It is more accurate to use the comparator value than current count.*/ | ||
346 | ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq; | ||
347 | hpet_t1_cmp += ticks_per_int; | ||
348 | hpet_writel(hpet_t1_cmp, HPET_T1_CMP); | ||
349 | |||
350 | /* | ||
351 | * If the interrupt handler was delayed too long, the write above tries | ||
352 | * to schedule the next interrupt in the past and the hardware would | ||
353 | * not interrupt until the counter had wrapped around. | ||
354 | * So we have to check that the comparator wasn't set to a past time. | ||
355 | */ | ||
356 | cnt = hpet_readl(HPET_COUNTER); | ||
357 | if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) { | ||
358 | lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1; | ||
359 | /* Make sure that, even with the time needed to execute | ||
360 | * this code, the next scheduled interrupt has been moved | ||
361 | * back to the future: */ | ||
362 | lost_ints++; | ||
363 | |||
364 | hpet_t1_cmp += lost_ints * ticks_per_int; | ||
365 | hpet_writel(hpet_t1_cmp, HPET_T1_CMP); | ||
366 | |||
367 | if (PIE_on) | ||
368 | PIE_count += lost_ints; | ||
369 | |||
370 | printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", | ||
371 | hpet_rtc_int_freq); | ||
372 | } | ||
373 | } | ||
374 | |||
375 | /* | ||
376 | * The functions below are called from rtc driver. | ||
377 | * Return 0 if HPET is not being used. | ||
378 | * Otherwise do the necessary changes and return 1. | ||
379 | */ | ||
380 | int hpet_mask_rtc_irq_bit(unsigned long bit_mask) | ||
381 | { | ||
382 | if (!is_hpet_enabled()) | ||
383 | return 0; | ||
384 | |||
385 | if (bit_mask & RTC_UIE) | ||
386 | UIE_on = 0; | ||
387 | if (bit_mask & RTC_PIE) | ||
388 | PIE_on = 0; | ||
389 | if (bit_mask & RTC_AIE) | ||
390 | AIE_on = 0; | ||
391 | |||
392 | return 1; | ||
393 | } | ||
394 | |||
395 | int hpet_set_rtc_irq_bit(unsigned long bit_mask) | ||
396 | { | ||
397 | int timer_init_reqd = 0; | ||
398 | |||
399 | if (!is_hpet_enabled()) | ||
400 | return 0; | ||
401 | |||
402 | if (!(PIE_on | AIE_on | UIE_on)) | ||
403 | timer_init_reqd = 1; | ||
404 | |||
405 | if (bit_mask & RTC_UIE) { | ||
406 | UIE_on = 1; | ||
407 | } | ||
408 | if (bit_mask & RTC_PIE) { | ||
409 | PIE_on = 1; | ||
410 | PIE_count = 0; | ||
411 | } | ||
412 | if (bit_mask & RTC_AIE) { | ||
413 | AIE_on = 1; | ||
414 | } | ||
415 | |||
416 | if (timer_init_reqd) | ||
417 | hpet_rtc_timer_init(); | ||
418 | |||
419 | return 1; | ||
420 | } | ||
421 | |||
422 | int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) | ||
423 | { | ||
424 | if (!is_hpet_enabled()) | ||
425 | return 0; | ||
426 | |||
427 | alarm_time.tm_hour = hrs; | ||
428 | alarm_time.tm_min = min; | ||
429 | alarm_time.tm_sec = sec; | ||
430 | |||
431 | return 1; | ||
432 | } | ||
433 | |||
434 | int hpet_set_periodic_freq(unsigned long freq) | ||
435 | { | ||
436 | if (!is_hpet_enabled()) | ||
437 | return 0; | ||
438 | |||
439 | PIE_freq = freq; | ||
440 | PIE_count = 0; | ||
441 | |||
442 | return 1; | ||
443 | } | ||
444 | |||
445 | int hpet_rtc_dropped_irq(void) | ||
446 | { | ||
447 | if (!is_hpet_enabled()) | ||
448 | return 0; | ||
449 | |||
450 | return 1; | ||
451 | } | ||
452 | |||
453 | irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | ||
454 | { | ||
455 | struct rtc_time curr_time; | ||
456 | unsigned long rtc_int_flag = 0; | ||
457 | int call_rtc_interrupt = 0; | ||
458 | |||
459 | hpet_rtc_timer_reinit(); | ||
460 | |||
461 | if (UIE_on | AIE_on) { | ||
462 | rtc_get_rtc_time(&curr_time); | ||
463 | } | ||
464 | if (UIE_on) { | ||
465 | if (curr_time.tm_sec != prev_update_sec) { | ||
466 | /* Set update int info, call real rtc int routine */ | ||
467 | call_rtc_interrupt = 1; | ||
468 | rtc_int_flag = RTC_UF; | ||
469 | prev_update_sec = curr_time.tm_sec; | ||
470 | } | ||
471 | } | ||
472 | if (PIE_on) { | ||
473 | PIE_count++; | ||
474 | if (PIE_count >= hpet_rtc_int_freq/PIE_freq) { | ||
475 | /* Set periodic int info, call real rtc int routine */ | ||
476 | call_rtc_interrupt = 1; | ||
477 | rtc_int_flag |= RTC_PF; | ||
478 | PIE_count = 0; | ||
479 | } | ||
480 | } | ||
481 | if (AIE_on) { | ||
482 | if ((curr_time.tm_sec == alarm_time.tm_sec) && | ||
483 | (curr_time.tm_min == alarm_time.tm_min) && | ||
484 | (curr_time.tm_hour == alarm_time.tm_hour)) { | ||
485 | /* Set alarm int info, call real rtc int routine */ | ||
486 | call_rtc_interrupt = 1; | ||
487 | rtc_int_flag |= RTC_AF; | ||
488 | } | ||
489 | } | ||
490 | if (call_rtc_interrupt) { | ||
491 | rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); | ||
492 | rtc_interrupt(rtc_int_flag, dev_id); | ||
493 | } | ||
494 | return IRQ_HANDLED; | ||
495 | } | ||
496 | #endif | ||
497 | |||
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 0efad8aeb41a..af0d3f70a817 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -94,6 +94,7 @@ asmlinkage void spurious_interrupt_bug(void); | |||
94 | asmlinkage void machine_check(void); | 94 | asmlinkage void machine_check(void); |
95 | 95 | ||
96 | int kstack_depth_to_print = 24; | 96 | int kstack_depth_to_print = 24; |
97 | static unsigned int code_bytes = 64; | ||
97 | ATOMIC_NOTIFIER_HEAD(i386die_chain); | 98 | ATOMIC_NOTIFIER_HEAD(i386die_chain); |
98 | 99 | ||
99 | int register_die_notifier(struct notifier_block *nb) | 100 | int register_die_notifier(struct notifier_block *nb) |
@@ -291,10 +292,11 @@ void show_registers(struct pt_regs *regs) | |||
291 | int i; | 292 | int i; |
292 | int in_kernel = 1; | 293 | int in_kernel = 1; |
293 | unsigned long esp; | 294 | unsigned long esp; |
294 | unsigned short ss; | 295 | unsigned short ss, gs; |
295 | 296 | ||
296 | esp = (unsigned long) (®s->esp); | 297 | esp = (unsigned long) (®s->esp); |
297 | savesegment(ss, ss); | 298 | savesegment(ss, ss); |
299 | savesegment(gs, gs); | ||
298 | if (user_mode_vm(regs)) { | 300 | if (user_mode_vm(regs)) { |
299 | in_kernel = 0; | 301 | in_kernel = 0; |
300 | esp = regs->esp; | 302 | esp = regs->esp; |
@@ -313,8 +315,8 @@ void show_registers(struct pt_regs *regs) | |||
313 | regs->eax, regs->ebx, regs->ecx, regs->edx); | 315 | regs->eax, regs->ebx, regs->ecx, regs->edx); |
314 | printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", | 316 | printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", |
315 | regs->esi, regs->edi, regs->ebp, esp); | 317 | regs->esi, regs->edi, regs->ebp, esp); |
316 | printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", | 318 | printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", |
317 | regs->xds & 0xffff, regs->xes & 0xffff, ss); | 319 | regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); |
318 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", | 320 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", |
319 | TASK_COMM_LEN, current->comm, current->pid, | 321 | TASK_COMM_LEN, current->comm, current->pid, |
320 | current_thread_info(), current, current->thread_info); | 322 | current_thread_info(), current, current->thread_info); |
@@ -324,7 +326,8 @@ void show_registers(struct pt_regs *regs) | |||
324 | */ | 326 | */ |
325 | if (in_kernel) { | 327 | if (in_kernel) { |
326 | u8 *eip; | 328 | u8 *eip; |
327 | int code_bytes = 64; | 329 | unsigned int code_prologue = code_bytes * 43 / 64; |
330 | unsigned int code_len = code_bytes; | ||
328 | unsigned char c; | 331 | unsigned char c; |
329 | 332 | ||
330 | printk("\n" KERN_EMERG "Stack: "); | 333 | printk("\n" KERN_EMERG "Stack: "); |
@@ -332,14 +335,14 @@ void show_registers(struct pt_regs *regs) | |||
332 | 335 | ||
333 | printk(KERN_EMERG "Code: "); | 336 | printk(KERN_EMERG "Code: "); |
334 | 337 | ||
335 | eip = (u8 *)regs->eip - 43; | 338 | eip = (u8 *)regs->eip - code_prologue; |
336 | if (eip < (u8 *)PAGE_OFFSET || | 339 | if (eip < (u8 *)PAGE_OFFSET || |
337 | probe_kernel_address(eip, c)) { | 340 | probe_kernel_address(eip, c)) { |
338 | /* try starting at EIP */ | 341 | /* try starting at EIP */ |
339 | eip = (u8 *)regs->eip; | 342 | eip = (u8 *)regs->eip; |
340 | code_bytes = 32; | 343 | code_len = code_len - code_prologue + 1; |
341 | } | 344 | } |
342 | for (i = 0; i < code_bytes; i++, eip++) { | 345 | for (i = 0; i < code_len; i++, eip++) { |
343 | if (eip < (u8 *)PAGE_OFFSET || | 346 | if (eip < (u8 *)PAGE_OFFSET || |
344 | probe_kernel_address(eip, c)) { | 347 | probe_kernel_address(eip, c)) { |
345 | printk(" Bad EIP value."); | 348 | printk(" Bad EIP value."); |
@@ -1191,3 +1194,13 @@ static int __init kstack_setup(char *s) | |||
1191 | return 1; | 1194 | return 1; |
1192 | } | 1195 | } |
1193 | __setup("kstack=", kstack_setup); | 1196 | __setup("kstack=", kstack_setup); |
1197 | |||
1198 | static int __init code_bytes_setup(char *s) | ||
1199 | { | ||
1200 | code_bytes = simple_strtoul(s, NULL, 0); | ||
1201 | if (code_bytes > 8192) | ||
1202 | code_bytes = 8192; | ||
1203 | |||
1204 | return 1; | ||
1205 | } | ||
1206 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 2cfc7b09b925..3082a418635c 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
@@ -23,6 +23,7 @@ | |||
23 | * an extra value to store the TSC freq | 23 | * an extra value to store the TSC freq |
24 | */ | 24 | */ |
25 | unsigned int tsc_khz; | 25 | unsigned int tsc_khz; |
26 | unsigned long long (*custom_sched_clock)(void); | ||
26 | 27 | ||
27 | int tsc_disable; | 28 | int tsc_disable; |
28 | 29 | ||
@@ -59,12 +60,6 @@ static inline int check_tsc_unstable(void) | |||
59 | return tsc_unstable; | 60 | return tsc_unstable; |
60 | } | 61 | } |
61 | 62 | ||
62 | void mark_tsc_unstable(void) | ||
63 | { | ||
64 | tsc_unstable = 1; | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
67 | |||
68 | /* Accellerators for sched_clock() | 63 | /* Accellerators for sched_clock() |
69 | * convert from cycles(64bits) => nanoseconds (64bits) | 64 | * convert from cycles(64bits) => nanoseconds (64bits) |
70 | * basic equation: | 65 | * basic equation: |
@@ -107,14 +102,14 @@ unsigned long long sched_clock(void) | |||
107 | { | 102 | { |
108 | unsigned long long this_offset; | 103 | unsigned long long this_offset; |
109 | 104 | ||
105 | if (unlikely(custom_sched_clock)) | ||
106 | return (*custom_sched_clock)(); | ||
107 | |||
110 | /* | 108 | /* |
111 | * in the NUMA case we dont use the TSC as they are not | 109 | * Fall back to jiffies if there's no TSC available: |
112 | * synchronized across all CPUs. | ||
113 | */ | 110 | */ |
114 | #ifndef CONFIG_NUMA | 111 | if (unlikely(tsc_disable)) |
115 | if (!cpu_khz || check_tsc_unstable()) | 112 | /* No locking but a rare wrong value is not a big deal: */ |
116 | #endif | ||
117 | /* no locking but a rare wrong value is not a big deal */ | ||
118 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | 113 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); |
119 | 114 | ||
120 | /* read the Time Stamp Counter: */ | 115 | /* read the Time Stamp Counter: */ |
@@ -194,13 +189,13 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); | |||
194 | void __init tsc_init(void) | 189 | void __init tsc_init(void) |
195 | { | 190 | { |
196 | if (!cpu_has_tsc || tsc_disable) | 191 | if (!cpu_has_tsc || tsc_disable) |
197 | return; | 192 | goto out_no_tsc; |
198 | 193 | ||
199 | cpu_khz = calculate_cpu_khz(); | 194 | cpu_khz = calculate_cpu_khz(); |
200 | tsc_khz = cpu_khz; | 195 | tsc_khz = cpu_khz; |
201 | 196 | ||
202 | if (!cpu_khz) | 197 | if (!cpu_khz) |
203 | return; | 198 | goto out_no_tsc; |
204 | 199 | ||
205 | printk("Detected %lu.%03lu MHz processor.\n", | 200 | printk("Detected %lu.%03lu MHz processor.\n", |
206 | (unsigned long)cpu_khz / 1000, | 201 | (unsigned long)cpu_khz / 1000, |
@@ -208,37 +203,18 @@ void __init tsc_init(void) | |||
208 | 203 | ||
209 | set_cyc2ns_scale(cpu_khz); | 204 | set_cyc2ns_scale(cpu_khz); |
210 | use_tsc_delay(); | 205 | use_tsc_delay(); |
211 | } | 206 | return; |
212 | 207 | ||
213 | #ifdef CONFIG_CPU_FREQ | 208 | out_no_tsc: |
214 | 209 | /* | |
215 | static unsigned int cpufreq_delayed_issched = 0; | 210 | * Set the tsc_disable flag if there's no TSC support, this |
216 | static unsigned int cpufreq_init = 0; | 211 | * makes it a fast flag for the kernel to see whether it |
217 | static struct work_struct cpufreq_delayed_get_work; | 212 | * should be using the TSC. |
218 | 213 | */ | |
219 | static void handle_cpufreq_delayed_get(struct work_struct *work) | 214 | tsc_disable = 1; |
220 | { | ||
221 | unsigned int cpu; | ||
222 | |||
223 | for_each_online_cpu(cpu) | ||
224 | cpufreq_get(cpu); | ||
225 | |||
226 | cpufreq_delayed_issched = 0; | ||
227 | } | 215 | } |
228 | 216 | ||
229 | /* | 217 | #ifdef CONFIG_CPU_FREQ |
230 | * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries | ||
231 | * to verify the CPU frequency the timing core thinks the CPU is running | ||
232 | * at is still correct. | ||
233 | */ | ||
234 | static inline void cpufreq_delayed_get(void) | ||
235 | { | ||
236 | if (cpufreq_init && !cpufreq_delayed_issched) { | ||
237 | cpufreq_delayed_issched = 1; | ||
238 | printk(KERN_DEBUG "Checking if CPU frequency changed.\n"); | ||
239 | schedule_work(&cpufreq_delayed_get_work); | ||
240 | } | ||
241 | } | ||
242 | 218 | ||
243 | /* | 219 | /* |
244 | * if the CPU frequency is scaled, TSC-based delays will need a different | 220 | * if the CPU frequency is scaled, TSC-based delays will need a different |
@@ -303,17 +279,9 @@ static struct notifier_block time_cpufreq_notifier_block = { | |||
303 | 279 | ||
304 | static int __init cpufreq_tsc(void) | 280 | static int __init cpufreq_tsc(void) |
305 | { | 281 | { |
306 | int ret; | 282 | return cpufreq_register_notifier(&time_cpufreq_notifier_block, |
307 | 283 | CPUFREQ_TRANSITION_NOTIFIER); | |
308 | INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get); | ||
309 | ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
310 | CPUFREQ_TRANSITION_NOTIFIER); | ||
311 | if (!ret) | ||
312 | cpufreq_init = 1; | ||
313 | |||
314 | return ret; | ||
315 | } | 284 | } |
316 | |||
317 | core_initcall(cpufreq_tsc); | 285 | core_initcall(cpufreq_tsc); |
318 | 286 | ||
319 | #endif | 287 | #endif |
@@ -321,7 +289,6 @@ core_initcall(cpufreq_tsc); | |||
321 | /* clock source code */ | 289 | /* clock source code */ |
322 | 290 | ||
323 | static unsigned long current_tsc_khz = 0; | 291 | static unsigned long current_tsc_khz = 0; |
324 | static int tsc_update_callback(void); | ||
325 | 292 | ||
326 | static cycle_t read_tsc(void) | 293 | static cycle_t read_tsc(void) |
327 | { | 294 | { |
@@ -339,37 +306,28 @@ static struct clocksource clocksource_tsc = { | |||
339 | .mask = CLOCKSOURCE_MASK(64), | 306 | .mask = CLOCKSOURCE_MASK(64), |
340 | .mult = 0, /* to be set */ | 307 | .mult = 0, /* to be set */ |
341 | .shift = 22, | 308 | .shift = 22, |
342 | .update_callback = tsc_update_callback, | 309 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
343 | .is_continuous = 1, | 310 | CLOCK_SOURCE_MUST_VERIFY, |
344 | }; | 311 | }; |
345 | 312 | ||
346 | static int tsc_update_callback(void) | 313 | void mark_tsc_unstable(void) |
347 | { | 314 | { |
348 | int change = 0; | 315 | if (!tsc_unstable) { |
349 | 316 | tsc_unstable = 1; | |
350 | /* check to see if we should switch to the safe clocksource: */ | 317 | /* Can be called before registration */ |
351 | if (clocksource_tsc.rating != 0 && check_tsc_unstable()) { | 318 | if (clocksource_tsc.mult) |
352 | clocksource_tsc.rating = 0; | 319 | clocksource_change_rating(&clocksource_tsc, 0); |
353 | clocksource_reselect(); | 320 | else |
354 | change = 1; | 321 | clocksource_tsc.rating = 0; |
355 | } | ||
356 | |||
357 | /* only update if tsc_khz has changed: */ | ||
358 | if (current_tsc_khz != tsc_khz) { | ||
359 | current_tsc_khz = tsc_khz; | ||
360 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, | ||
361 | clocksource_tsc.shift); | ||
362 | change = 1; | ||
363 | } | 322 | } |
364 | |||
365 | return change; | ||
366 | } | 323 | } |
324 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
367 | 325 | ||
368 | static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d) | 326 | static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d) |
369 | { | 327 | { |
370 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", | 328 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", |
371 | d->ident); | 329 | d->ident); |
372 | mark_tsc_unstable(); | 330 | tsc_unstable = 1; |
373 | return 0; | 331 | return 0; |
374 | } | 332 | } |
375 | 333 | ||
@@ -386,65 +344,44 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | |||
386 | {} | 344 | {} |
387 | }; | 345 | }; |
388 | 346 | ||
389 | #define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */ | ||
390 | static struct timer_list verify_tsc_freq_timer; | ||
391 | |||
392 | /* XXX - Probably should add locking */ | ||
393 | static void verify_tsc_freq(unsigned long unused) | ||
394 | { | ||
395 | static u64 last_tsc; | ||
396 | static unsigned long last_jiffies; | ||
397 | |||
398 | u64 now_tsc, interval_tsc; | ||
399 | unsigned long now_jiffies, interval_jiffies; | ||
400 | |||
401 | |||
402 | if (check_tsc_unstable()) | ||
403 | return; | ||
404 | |||
405 | rdtscll(now_tsc); | ||
406 | now_jiffies = jiffies; | ||
407 | |||
408 | if (!last_jiffies) { | ||
409 | goto out; | ||
410 | } | ||
411 | |||
412 | interval_jiffies = now_jiffies - last_jiffies; | ||
413 | interval_tsc = now_tsc - last_tsc; | ||
414 | interval_tsc *= HZ; | ||
415 | do_div(interval_tsc, cpu_khz*1000); | ||
416 | |||
417 | if (interval_tsc < (interval_jiffies * 3 / 4)) { | ||
418 | printk("TSC appears to be running slowly. " | ||
419 | "Marking it as unstable\n"); | ||
420 | mark_tsc_unstable(); | ||
421 | return; | ||
422 | } | ||
423 | |||
424 | out: | ||
425 | last_tsc = now_tsc; | ||
426 | last_jiffies = now_jiffies; | ||
427 | /* set us up to go off on the next interval: */ | ||
428 | mod_timer(&verify_tsc_freq_timer, | ||
429 | jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL)); | ||
430 | } | ||
431 | |||
432 | /* | 347 | /* |
433 | * Make an educated guess if the TSC is trustworthy and synchronized | 348 | * Make an educated guess if the TSC is trustworthy and synchronized |
434 | * over all CPUs. | 349 | * over all CPUs. |
435 | */ | 350 | */ |
436 | static __init int unsynchronized_tsc(void) | 351 | __cpuinit int unsynchronized_tsc(void) |
437 | { | 352 | { |
353 | if (!cpu_has_tsc || tsc_unstable) | ||
354 | return 1; | ||
438 | /* | 355 | /* |
439 | * Intel systems are normally all synchronized. | 356 | * Intel systems are normally all synchronized. |
440 | * Exceptions must mark TSC as unstable: | 357 | * Exceptions must mark TSC as unstable: |
441 | */ | 358 | */ |
442 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | 359 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { |
443 | return 0; | 360 | /* assume multi socket systems are not synchronized: */ |
361 | if (num_possible_cpus() > 1) | ||
362 | tsc_unstable = 1; | ||
363 | } | ||
364 | return tsc_unstable; | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * Geode_LX - the OLPC CPU has a possibly a very reliable TSC | ||
369 | */ | ||
370 | #ifdef CONFIG_MGEODE_LX | ||
371 | /* RTSC counts during suspend */ | ||
372 | #define RTSC_SUSP 0x100 | ||
373 | |||
374 | static void __init check_geode_tsc_reliable(void) | ||
375 | { | ||
376 | unsigned long val; | ||
444 | 377 | ||
445 | /* assume multi socket systems are not synchronized: */ | 378 | rdmsrl(MSR_GEODE_BUSCONT_CONF0, val); |
446 | return num_possible_cpus() > 1; | 379 | if ((val & RTSC_SUSP)) |
380 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | ||
447 | } | 381 | } |
382 | #else | ||
383 | static inline void check_geode_tsc_reliable(void) { } | ||
384 | #endif | ||
448 | 385 | ||
449 | static int __init init_tsc_clocksource(void) | 386 | static int __init init_tsc_clocksource(void) |
450 | { | 387 | { |
@@ -453,20 +390,16 @@ static int __init init_tsc_clocksource(void) | |||
453 | /* check blacklist */ | 390 | /* check blacklist */ |
454 | dmi_check_system(bad_tsc_dmi_table); | 391 | dmi_check_system(bad_tsc_dmi_table); |
455 | 392 | ||
456 | if (unsynchronized_tsc()) /* mark unstable if unsynced */ | 393 | unsynchronized_tsc(); |
457 | mark_tsc_unstable(); | 394 | check_geode_tsc_reliable(); |
458 | current_tsc_khz = tsc_khz; | 395 | current_tsc_khz = tsc_khz; |
459 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, | 396 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, |
460 | clocksource_tsc.shift); | 397 | clocksource_tsc.shift); |
461 | /* lower the rating if we already know its unstable: */ | 398 | /* lower the rating if we already know its unstable: */ |
462 | if (check_tsc_unstable()) | 399 | if (check_tsc_unstable()) { |
463 | clocksource_tsc.rating = 0; | 400 | clocksource_tsc.rating = 0; |
464 | 401 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | |
465 | init_timer(&verify_tsc_freq_timer); | 402 | } |
466 | verify_tsc_freq_timer.function = verify_tsc_freq; | ||
467 | verify_tsc_freq_timer.expires = | ||
468 | jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL); | ||
469 | add_timer(&verify_tsc_freq_timer); | ||
470 | 403 | ||
471 | return clocksource_register(&clocksource_tsc); | 404 | return clocksource_register(&clocksource_tsc); |
472 | } | 405 | } |
diff --git a/arch/i386/kernel/tsc_sync.c b/arch/i386/kernel/tsc_sync.c new file mode 100644 index 000000000000..12424629af87 --- /dev/null +++ b/arch/i386/kernel/tsc_sync.c | |||
@@ -0,0 +1 @@ | |||
#include "../../x86_64/kernel/tsc_sync.c" | |||
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index be2f96e67f78..d1b8f2b7aea6 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c | |||
@@ -96,12 +96,12 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user, | |||
96 | { | 96 | { |
97 | int ret = 0; | 97 | int ret = 0; |
98 | 98 | ||
99 | /* kernel_vm86_regs is missing xfs, so copy everything up to | 99 | /* kernel_vm86_regs is missing xgs, so copy everything up to |
100 | (but not including) xgs, and then rest after xgs. */ | 100 | (but not including) orig_eax, and then rest including orig_eax. */ |
101 | ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs)); | 101 | ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax)); |
102 | ret += copy_to_user(&user->__null_gs, ®s->pt.xgs, | 102 | ret += copy_to_user(&user->orig_eax, ®s->pt.orig_eax, |
103 | sizeof(struct kernel_vm86_regs) - | 103 | sizeof(struct kernel_vm86_regs) - |
104 | offsetof(struct kernel_vm86_regs, pt.xgs)); | 104 | offsetof(struct kernel_vm86_regs, pt.orig_eax)); |
105 | 105 | ||
106 | return ret; | 106 | return ret; |
107 | } | 107 | } |
@@ -113,12 +113,13 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, | |||
113 | { | 113 | { |
114 | int ret = 0; | 114 | int ret = 0; |
115 | 115 | ||
116 | ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs)); | 116 | /* copy eax-xfs inclusive */ |
117 | ret += copy_from_user(®s->pt.xgs, &user->__null_gs, | 117 | ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax)); |
118 | /* copy orig_eax-__gsh+extra */ | ||
119 | ret += copy_from_user(®s->pt.orig_eax, &user->orig_eax, | ||
118 | sizeof(struct kernel_vm86_regs) - | 120 | sizeof(struct kernel_vm86_regs) - |
119 | offsetof(struct kernel_vm86_regs, pt.xgs) + | 121 | offsetof(struct kernel_vm86_regs, pt.orig_eax) + |
120 | extra); | 122 | extra); |
121 | |||
122 | return ret; | 123 | return ret; |
123 | } | 124 | } |
124 | 125 | ||
@@ -157,8 +158,8 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) | |||
157 | 158 | ||
158 | ret = KVM86->regs32; | 159 | ret = KVM86->regs32; |
159 | 160 | ||
160 | loadsegment(fs, current->thread.saved_fs); | 161 | ret->xfs = current->thread.saved_fs; |
161 | ret->xgs = current->thread.saved_gs; | 162 | loadsegment(gs, current->thread.saved_gs); |
162 | 163 | ||
163 | return ret; | 164 | return ret; |
164 | } | 165 | } |
@@ -285,9 +286,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
285 | */ | 286 | */ |
286 | info->regs.pt.xds = 0; | 287 | info->regs.pt.xds = 0; |
287 | info->regs.pt.xes = 0; | 288 | info->regs.pt.xes = 0; |
288 | info->regs.pt.xgs = 0; | 289 | info->regs.pt.xfs = 0; |
289 | 290 | ||
290 | /* we are clearing fs later just before "jmp resume_userspace", | 291 | /* we are clearing gs later just before "jmp resume_userspace", |
291 | * because it is not saved/restored. | 292 | * because it is not saved/restored. |
292 | */ | 293 | */ |
293 | 294 | ||
@@ -321,8 +322,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
321 | */ | 322 | */ |
322 | info->regs32->eax = 0; | 323 | info->regs32->eax = 0; |
323 | tsk->thread.saved_esp0 = tsk->thread.esp0; | 324 | tsk->thread.saved_esp0 = tsk->thread.esp0; |
324 | savesegment(fs, tsk->thread.saved_fs); | 325 | tsk->thread.saved_fs = info->regs32->xfs; |
325 | tsk->thread.saved_gs = info->regs32->xgs; | 326 | savesegment(gs, tsk->thread.saved_gs); |
326 | 327 | ||
327 | tss = &per_cpu(init_tss, get_cpu()); | 328 | tss = &per_cpu(init_tss, get_cpu()); |
328 | tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; | 329 | tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; |
@@ -342,7 +343,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
342 | __asm__ __volatile__( | 343 | __asm__ __volatile__( |
343 | "movl %0,%%esp\n\t" | 344 | "movl %0,%%esp\n\t" |
344 | "movl %1,%%ebp\n\t" | 345 | "movl %1,%%ebp\n\t" |
345 | "mov %2, %%fs\n\t" | 346 | "mov %2, %%gs\n\t" |
346 | "jmp resume_userspace" | 347 | "jmp resume_userspace" |
347 | : /* no outputs */ | 348 | : /* no outputs */ |
348 | :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); | 349 | :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); |
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c new file mode 100644 index 000000000000..bb5a7abf949c --- /dev/null +++ b/arch/i386/kernel/vmi.c | |||
@@ -0,0 +1,949 @@ | |||
1 | /* | ||
2 | * VMI specific paravirt-ops implementation | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to zach@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/module.h> | ||
26 | #include <linux/license.h> | ||
27 | #include <linux/cpu.h> | ||
28 | #include <linux/bootmem.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <asm/vmi.h> | ||
31 | #include <asm/io.h> | ||
32 | #include <asm/fixmap.h> | ||
33 | #include <asm/apicdef.h> | ||
34 | #include <asm/apic.h> | ||
35 | #include <asm/processor.h> | ||
36 | #include <asm/timer.h> | ||
37 | #include <asm/vmi_time.h> | ||
38 | |||
39 | /* Convenient for calling VMI functions indirectly in the ROM */ | ||
40 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); | ||
41 | typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); | ||
42 | |||
43 | #define call_vrom_func(rom,func) \ | ||
44 | (((VROMFUNC *)(rom->func))()) | ||
45 | |||
46 | #define call_vrom_long_func(rom,func,arg) \ | ||
47 | (((VROMLONGFUNC *)(rom->func)) (arg)) | ||
48 | |||
49 | static struct vrom_header *vmi_rom; | ||
50 | static int license_gplok; | ||
51 | static int disable_nodelay; | ||
52 | static int disable_pge; | ||
53 | static int disable_pse; | ||
54 | static int disable_sep; | ||
55 | static int disable_tsc; | ||
56 | static int disable_mtrr; | ||
57 | |||
58 | /* Cached VMI operations */ | ||
59 | struct { | ||
60 | void (*cpuid)(void /* non-c */); | ||
61 | void (*_set_ldt)(u32 selector); | ||
62 | void (*set_tr)(u32 selector); | ||
63 | void (*set_kernel_stack)(u32 selector, u32 esp0); | ||
64 | void (*allocate_page)(u32, u32, u32, u32, u32); | ||
65 | void (*release_page)(u32, u32); | ||
66 | void (*set_pte)(pte_t, pte_t *, unsigned); | ||
67 | void (*update_pte)(pte_t *, unsigned); | ||
68 | void (*set_linear_mapping)(int, u32, u32, u32); | ||
69 | void (*flush_tlb)(int); | ||
70 | void (*set_initial_ap_state)(int, int); | ||
71 | void (*halt)(void); | ||
72 | } vmi_ops; | ||
73 | |||
74 | /* XXX move this to alternative.h */ | ||
75 | extern struct paravirt_patch __start_parainstructions[], | ||
76 | __stop_parainstructions[]; | ||
77 | |||
78 | /* | ||
79 | * VMI patching routines. | ||
80 | */ | ||
81 | #define MNEM_CALL 0xe8 | ||
82 | #define MNEM_JMP 0xe9 | ||
83 | #define MNEM_RET 0xc3 | ||
84 | |||
85 | static char irq_save_disable_callout[] = { | ||
86 | MNEM_CALL, 0, 0, 0, 0, | ||
87 | MNEM_CALL, 0, 0, 0, 0, | ||
88 | MNEM_RET | ||
89 | }; | ||
90 | #define IRQ_PATCH_INT_MASK 0 | ||
91 | #define IRQ_PATCH_DISABLE 5 | ||
92 | |||
93 | static inline void patch_offset(unsigned char *eip, unsigned char *dest) | ||
94 | { | ||
95 | *(unsigned long *)(eip+1) = dest-eip-5; | ||
96 | } | ||
97 | |||
98 | static unsigned patch_internal(int call, unsigned len, void *insns) | ||
99 | { | ||
100 | u64 reloc; | ||
101 | struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; | ||
102 | reloc = call_vrom_long_func(vmi_rom, get_reloc, call); | ||
103 | switch(rel->type) { | ||
104 | case VMI_RELOCATION_CALL_REL: | ||
105 | BUG_ON(len < 5); | ||
106 | *(char *)insns = MNEM_CALL; | ||
107 | patch_offset(insns, rel->eip); | ||
108 | return 5; | ||
109 | |||
110 | case VMI_RELOCATION_JUMP_REL: | ||
111 | BUG_ON(len < 5); | ||
112 | *(char *)insns = MNEM_JMP; | ||
113 | patch_offset(insns, rel->eip); | ||
114 | return 5; | ||
115 | |||
116 | case VMI_RELOCATION_NOP: | ||
117 | /* obliterate the whole thing */ | ||
118 | return 0; | ||
119 | |||
120 | case VMI_RELOCATION_NONE: | ||
121 | /* leave native code in place */ | ||
122 | break; | ||
123 | |||
124 | default: | ||
125 | BUG(); | ||
126 | } | ||
127 | return len; | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Apply patch if appropriate, return length of new instruction | ||
132 | * sequence. The callee does nop padding for us. | ||
133 | */ | ||
134 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) | ||
135 | { | ||
136 | switch (type) { | ||
137 | case PARAVIRT_IRQ_DISABLE: | ||
138 | return patch_internal(VMI_CALL_DisableInterrupts, len, insns); | ||
139 | case PARAVIRT_IRQ_ENABLE: | ||
140 | return patch_internal(VMI_CALL_EnableInterrupts, len, insns); | ||
141 | case PARAVIRT_RESTORE_FLAGS: | ||
142 | return patch_internal(VMI_CALL_SetInterruptMask, len, insns); | ||
143 | case PARAVIRT_SAVE_FLAGS: | ||
144 | return patch_internal(VMI_CALL_GetInterruptMask, len, insns); | ||
145 | case PARAVIRT_SAVE_FLAGS_IRQ_DISABLE: | ||
146 | if (len >= 10) { | ||
147 | patch_internal(VMI_CALL_GetInterruptMask, len, insns); | ||
148 | patch_internal(VMI_CALL_DisableInterrupts, len-5, insns+5); | ||
149 | return 10; | ||
150 | } else { | ||
151 | /* | ||
152 | * You bastards didn't leave enough room to | ||
153 | * patch save_flags_irq_disable inline. Patch | ||
154 | * to a helper | ||
155 | */ | ||
156 | BUG_ON(len < 5); | ||
157 | *(char *)insns = MNEM_CALL; | ||
158 | patch_offset(insns, irq_save_disable_callout); | ||
159 | return 5; | ||
160 | } | ||
161 | case PARAVIRT_INTERRUPT_RETURN: | ||
162 | return patch_internal(VMI_CALL_IRET, len, insns); | ||
163 | case PARAVIRT_STI_SYSEXIT: | ||
164 | return patch_internal(VMI_CALL_SYSEXIT, len, insns); | ||
165 | default: | ||
166 | break; | ||
167 | } | ||
168 | return len; | ||
169 | } | ||
170 | |||
171 | /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ | ||
172 | static void vmi_cpuid(unsigned int *eax, unsigned int *ebx, | ||
173 | unsigned int *ecx, unsigned int *edx) | ||
174 | { | ||
175 | int override = 0; | ||
176 | if (*eax == 1) | ||
177 | override = 1; | ||
178 | asm volatile ("call *%6" | ||
179 | : "=a" (*eax), | ||
180 | "=b" (*ebx), | ||
181 | "=c" (*ecx), | ||
182 | "=d" (*edx) | ||
183 | : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid)); | ||
184 | if (override) { | ||
185 | if (disable_pse) | ||
186 | *edx &= ~X86_FEATURE_PSE; | ||
187 | if (disable_pge) | ||
188 | *edx &= ~X86_FEATURE_PGE; | ||
189 | if (disable_sep) | ||
190 | *edx &= ~X86_FEATURE_SEP; | ||
191 | if (disable_tsc) | ||
192 | *edx &= ~X86_FEATURE_TSC; | ||
193 | if (disable_mtrr) | ||
194 | *edx &= ~X86_FEATURE_MTRR; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) | ||
199 | { | ||
200 | if (gdt[nr].a != new->a || gdt[nr].b != new->b) | ||
201 | write_gdt_entry(gdt, nr, new->a, new->b); | ||
202 | } | ||
203 | |||
204 | static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) | ||
205 | { | ||
206 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
207 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]); | ||
208 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]); | ||
209 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]); | ||
210 | } | ||
211 | |||
212 | static void vmi_set_ldt(const void *addr, unsigned entries) | ||
213 | { | ||
214 | unsigned cpu = smp_processor_id(); | ||
215 | u32 low, high; | ||
216 | |||
217 | pack_descriptor(&low, &high, (unsigned long)addr, | ||
218 | entries * sizeof(struct desc_struct) - 1, | ||
219 | DESCTYPE_LDT, 0); | ||
220 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high); | ||
221 | vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); | ||
222 | } | ||
223 | |||
224 | static void vmi_set_tr(void) | ||
225 | { | ||
226 | vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); | ||
227 | } | ||
228 | |||
229 | static void vmi_load_esp0(struct tss_struct *tss, | ||
230 | struct thread_struct *thread) | ||
231 | { | ||
232 | tss->esp0 = thread->esp0; | ||
233 | |||
234 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
235 | if (unlikely(tss->ss1 != thread->sysenter_cs)) { | ||
236 | tss->ss1 = thread->sysenter_cs; | ||
237 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
238 | } | ||
239 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->esp0); | ||
240 | } | ||
241 | |||
242 | static void vmi_flush_tlb_user(void) | ||
243 | { | ||
244 | vmi_ops.flush_tlb(VMI_FLUSH_TLB); | ||
245 | } | ||
246 | |||
247 | static void vmi_flush_tlb_kernel(void) | ||
248 | { | ||
249 | vmi_ops.flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); | ||
250 | } | ||
251 | |||
252 | /* Stub to do nothing at all; used for delays and unimplemented calls */ | ||
253 | static void vmi_nop(void) | ||
254 | { | ||
255 | } | ||
256 | |||
257 | /* For NO_IDLE_HZ, we stop the clock when halting the kernel */ | ||
258 | #ifdef CONFIG_NO_IDLE_HZ | ||
259 | static fastcall void vmi_safe_halt(void) | ||
260 | { | ||
261 | int idle = vmi_stop_hz_timer(); | ||
262 | vmi_ops.halt(); | ||
263 | if (idle) { | ||
264 | local_irq_disable(); | ||
265 | vmi_account_time_restart_hz_timer(); | ||
266 | local_irq_enable(); | ||
267 | } | ||
268 | } | ||
269 | #endif | ||
270 | |||
271 | #ifdef CONFIG_DEBUG_PAGE_TYPE | ||
272 | |||
273 | #ifdef CONFIG_X86_PAE | ||
274 | #define MAX_BOOT_PTS (2048+4+1) | ||
275 | #else | ||
276 | #define MAX_BOOT_PTS (1024+1) | ||
277 | #endif | ||
278 | |||
279 | /* | ||
280 | * During boot, mem_map is not yet available in paging_init, so stash | ||
281 | * all the boot page allocations here. | ||
282 | */ | ||
283 | static struct { | ||
284 | u32 pfn; | ||
285 | int type; | ||
286 | } boot_page_allocations[MAX_BOOT_PTS]; | ||
287 | static int num_boot_page_allocations; | ||
288 | static int boot_allocations_applied; | ||
289 | |||
290 | void vmi_apply_boot_page_allocations(void) | ||
291 | { | ||
292 | int i; | ||
293 | BUG_ON(!mem_map); | ||
294 | for (i = 0; i < num_boot_page_allocations; i++) { | ||
295 | struct page *page = pfn_to_page(boot_page_allocations[i].pfn); | ||
296 | page->type = boot_page_allocations[i].type; | ||
297 | page->type = boot_page_allocations[i].type & | ||
298 | ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
299 | } | ||
300 | boot_allocations_applied = 1; | ||
301 | } | ||
302 | |||
303 | static void record_page_type(u32 pfn, int type) | ||
304 | { | ||
305 | BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS); | ||
306 | boot_page_allocations[num_boot_page_allocations].pfn = pfn; | ||
307 | boot_page_allocations[num_boot_page_allocations].type = type; | ||
308 | num_boot_page_allocations++; | ||
309 | } | ||
310 | |||
311 | static void check_zeroed_page(u32 pfn, int type, struct page *page) | ||
312 | { | ||
313 | u32 *ptr; | ||
314 | int i; | ||
315 | int limit = PAGE_SIZE / sizeof(int); | ||
316 | |||
317 | if (page_address(page)) | ||
318 | ptr = (u32 *)page_address(page); | ||
319 | else | ||
320 | ptr = (u32 *)__va(pfn << PAGE_SHIFT); | ||
321 | /* | ||
322 | * When cloning the root in non-PAE mode, only the userspace | ||
323 | * pdes need to be zeroed. | ||
324 | */ | ||
325 | if (type & VMI_PAGE_CLONE) | ||
326 | limit = USER_PTRS_PER_PGD; | ||
327 | for (i = 0; i < limit; i++) | ||
328 | BUG_ON(ptr[i]); | ||
329 | } | ||
330 | |||
331 | /* | ||
332 | * We stash the page type into struct page so we can verify the page | ||
333 | * types are used properly. | ||
334 | */ | ||
335 | static void vmi_set_page_type(u32 pfn, int type) | ||
336 | { | ||
337 | /* PAE can have multiple roots per page - don't track */ | ||
338 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
339 | return; | ||
340 | |||
341 | if (boot_allocations_applied) { | ||
342 | struct page *page = pfn_to_page(pfn); | ||
343 | if (type != VMI_PAGE_NORMAL) | ||
344 | BUG_ON(page->type); | ||
345 | else | ||
346 | BUG_ON(page->type == VMI_PAGE_NORMAL); | ||
347 | page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
348 | if (type & VMI_PAGE_ZEROED) | ||
349 | check_zeroed_page(pfn, type, page); | ||
350 | } else { | ||
351 | record_page_type(pfn, type); | ||
352 | } | ||
353 | } | ||
354 | |||
355 | static void vmi_check_page_type(u32 pfn, int type) | ||
356 | { | ||
357 | /* PAE can have multiple roots per page - skip checks */ | ||
358 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
359 | return; | ||
360 | |||
361 | type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
362 | if (boot_allocations_applied) { | ||
363 | struct page *page = pfn_to_page(pfn); | ||
364 | BUG_ON((page->type ^ type) & VMI_PAGE_PAE); | ||
365 | BUG_ON(type == VMI_PAGE_NORMAL && page->type); | ||
366 | BUG_ON((type & page->type) == 0); | ||
367 | } | ||
368 | } | ||
369 | #else | ||
370 | #define vmi_set_page_type(p,t) do { } while (0) | ||
371 | #define vmi_check_page_type(p,t) do { } while (0) | ||
372 | #endif | ||
373 | |||
374 | static void vmi_allocate_pt(u32 pfn) | ||
375 | { | ||
376 | vmi_set_page_type(pfn, VMI_PAGE_L1); | ||
377 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | ||
378 | } | ||
379 | |||
380 | static void vmi_allocate_pd(u32 pfn) | ||
381 | { | ||
382 | /* | ||
383 | * This call comes in very early, before mem_map is setup. | ||
384 | * It is called only for swapper_pg_dir, which already has | ||
385 | * data on it. | ||
386 | */ | ||
387 | vmi_set_page_type(pfn, VMI_PAGE_L2); | ||
388 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | ||
389 | } | ||
390 | |||
391 | static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) | ||
392 | { | ||
393 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | ||
394 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | ||
395 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | ||
396 | } | ||
397 | |||
398 | static void vmi_release_pt(u32 pfn) | ||
399 | { | ||
400 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | ||
401 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
402 | } | ||
403 | |||
404 | static void vmi_release_pd(u32 pfn) | ||
405 | { | ||
406 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | ||
407 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
408 | } | ||
409 | |||
410 | /* | ||
411 | * Helper macros for MMU update flags. We can defer updates until a flush | ||
412 | * or page invalidation only if the update is to the current address space | ||
413 | * (otherwise, there is no flush). We must check against init_mm, since | ||
414 | * this could be a kernel update, which usually passes init_mm, although | ||
415 | * sometimes this check can be skipped if we know the particular function | ||
416 | * is only called on user mode PTEs. We could change the kernel to pass | ||
417 | * current->active_mm here, but in particular, I was unsure if changing | ||
418 | * mm/highmem.c to do this would still be correct on other architectures. | ||
419 | */ | ||
420 | #define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \ | ||
421 | (!mustbeuser && (mm) == &init_mm)) | ||
422 | #define vmi_flags_addr(mm, addr, level, user) \ | ||
423 | ((level) | (is_current_as(mm, user) ? \ | ||
424 | (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
425 | #define vmi_flags_addr_defer(mm, addr, level, user) \ | ||
426 | ((level) | (is_current_as(mm, user) ? \ | ||
427 | (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
428 | |||
429 | static void vmi_update_pte(struct mm_struct *mm, u32 addr, pte_t *ptep) | ||
430 | { | ||
431 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
432 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
433 | } | ||
434 | |||
435 | static void vmi_update_pte_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) | ||
436 | { | ||
437 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
438 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | ||
439 | } | ||
440 | |||
441 | static void vmi_set_pte(pte_t *ptep, pte_t pte) | ||
442 | { | ||
443 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ | ||
444 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD); | ||
445 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | ||
446 | } | ||
447 | |||
448 | static void vmi_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | ||
449 | { | ||
450 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
451 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
452 | } | ||
453 | |||
454 | static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
455 | { | ||
456 | #ifdef CONFIG_X86_PAE | ||
457 | const pte_t pte = { pmdval.pmd, pmdval.pmd >> 32 }; | ||
458 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
459 | #else | ||
460 | const pte_t pte = { pmdval.pud.pgd.pgd }; | ||
461 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
462 | #endif | ||
463 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); | ||
464 | } | ||
465 | |||
466 | #ifdef CONFIG_X86_PAE | ||
467 | |||
468 | static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) | ||
469 | { | ||
470 | /* | ||
471 | * XXX This is called from set_pmd_pte, but at both PT | ||
472 | * and PD layers so the VMI_PAGE_PT flag is wrong. But | ||
473 | * it is only called for large page mapping changes, | ||
474 | * the Xen backend, doesn't support large pages, and the | ||
475 | * ESX backend doesn't depend on the flag. | ||
476 | */ | ||
477 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | ||
478 | vmi_ops.update_pte(ptep, VMI_PAGE_PT); | ||
479 | } | ||
480 | |||
481 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | ||
482 | { | ||
483 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
484 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); | ||
485 | } | ||
486 | |||
487 | static void vmi_set_pud(pud_t *pudp, pud_t pudval) | ||
488 | { | ||
489 | /* Um, eww */ | ||
490 | const pte_t pte = { pudval.pgd.pgd, pudval.pgd.pgd >> 32 }; | ||
491 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
492 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | ||
493 | } | ||
494 | |||
495 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
496 | { | ||
497 | const pte_t pte = { 0 }; | ||
498 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
499 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
500 | } | ||
501 | |||
502 | void vmi_pmd_clear(pmd_t *pmd) | ||
503 | { | ||
504 | const pte_t pte = { 0 }; | ||
505 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
506 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | ||
507 | } | ||
508 | #endif | ||
509 | |||
510 | #ifdef CONFIG_SMP | ||
511 | struct vmi_ap_state ap; | ||
512 | extern void setup_pda(void); | ||
513 | |||
514 | static void __init /* XXX cpu hotplug */ | ||
515 | vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | ||
516 | unsigned long start_esp) | ||
517 | { | ||
518 | /* Default everything to zero. This is fine for most GPRs. */ | ||
519 | memset(&ap, 0, sizeof(struct vmi_ap_state)); | ||
520 | |||
521 | ap.gdtr_limit = GDT_SIZE - 1; | ||
522 | ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid); | ||
523 | |||
524 | ap.idtr_limit = IDT_ENTRIES * 8 - 1; | ||
525 | ap.idtr_base = (unsigned long) idt_table; | ||
526 | |||
527 | ap.ldtr = 0; | ||
528 | |||
529 | ap.cs = __KERNEL_CS; | ||
530 | ap.eip = (unsigned long) start_eip; | ||
531 | ap.ss = __KERNEL_DS; | ||
532 | ap.esp = (unsigned long) start_esp; | ||
533 | |||
534 | ap.ds = __USER_DS; | ||
535 | ap.es = __USER_DS; | ||
536 | ap.fs = __KERNEL_PDA; | ||
537 | ap.gs = 0; | ||
538 | |||
539 | ap.eflags = 0; | ||
540 | |||
541 | setup_pda(); | ||
542 | |||
543 | #ifdef CONFIG_X86_PAE | ||
544 | /* efer should match BSP efer. */ | ||
545 | if (cpu_has_nx) { | ||
546 | unsigned l, h; | ||
547 | rdmsr(MSR_EFER, l, h); | ||
548 | ap.efer = (unsigned long long) h << 32 | l; | ||
549 | } | ||
550 | #endif | ||
551 | |||
552 | ap.cr3 = __pa(swapper_pg_dir); | ||
553 | /* Protected mode, paging, AM, WP, NE, MP. */ | ||
554 | ap.cr0 = 0x80050023; | ||
555 | ap.cr4 = mmu_cr4_features; | ||
556 | vmi_ops.set_initial_ap_state(__pa(&ap), phys_apicid); | ||
557 | } | ||
558 | #endif | ||
559 | |||
560 | static inline int __init check_vmi_rom(struct vrom_header *rom) | ||
561 | { | ||
562 | struct pci_header *pci; | ||
563 | struct pnp_header *pnp; | ||
564 | const char *manufacturer = "UNKNOWN"; | ||
565 | const char *product = "UNKNOWN"; | ||
566 | const char *license = "unspecified"; | ||
567 | |||
568 | if (rom->rom_signature != 0xaa55) | ||
569 | return 0; | ||
570 | if (rom->vrom_signature != VMI_SIGNATURE) | ||
571 | return 0; | ||
572 | if (rom->api_version_maj != VMI_API_REV_MAJOR || | ||
573 | rom->api_version_min+1 < VMI_API_REV_MINOR+1) { | ||
574 | printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n", | ||
575 | rom->api_version_maj, | ||
576 | rom->api_version_min); | ||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | /* | ||
581 | * Relying on the VMI_SIGNATURE field is not 100% safe, so check | ||
582 | * the PCI header and device type to make sure this is really a | ||
583 | * VMI device. | ||
584 | */ | ||
585 | if (!rom->pci_header_offs) { | ||
586 | printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n"); | ||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | pci = (struct pci_header *)((char *)rom+rom->pci_header_offs); | ||
591 | if (pci->vendorID != PCI_VENDOR_ID_VMWARE || | ||
592 | pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) { | ||
593 | /* Allow it to run... anyways, but warn */ | ||
594 | printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n"); | ||
595 | } | ||
596 | |||
597 | if (rom->pnp_header_offs) { | ||
598 | pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs); | ||
599 | if (pnp->manufacturer_offset) | ||
600 | manufacturer = (const char *)rom+pnp->manufacturer_offset; | ||
601 | if (pnp->product_offset) | ||
602 | product = (const char *)rom+pnp->product_offset; | ||
603 | } | ||
604 | |||
605 | if (rom->license_offs) | ||
606 | license = (char *)rom+rom->license_offs; | ||
607 | |||
608 | printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n", | ||
609 | manufacturer, product, | ||
610 | rom->api_version_maj, rom->api_version_min, | ||
611 | pci->rom_version_maj, pci->rom_version_min); | ||
612 | |||
613 | license_gplok = license_is_gpl_compatible(license); | ||
614 | if (!license_gplok) { | ||
615 | printk(KERN_WARNING "VMI: ROM license '%s' taints kernel... " | ||
616 | "inlining disabled\n", | ||
617 | license); | ||
618 | add_taint(TAINT_PROPRIETARY_MODULE); | ||
619 | } | ||
620 | return 1; | ||
621 | } | ||
622 | |||
623 | /* | ||
624 | * Probe for the VMI option ROM | ||
625 | */ | ||
626 | static inline int __init probe_vmi_rom(void) | ||
627 | { | ||
628 | unsigned long base; | ||
629 | |||
630 | /* VMI ROM is in option ROM area, check signature */ | ||
631 | for (base = 0xC0000; base < 0xE0000; base += 2048) { | ||
632 | struct vrom_header *romstart; | ||
633 | romstart = (struct vrom_header *)isa_bus_to_virt(base); | ||
634 | if (check_vmi_rom(romstart)) { | ||
635 | vmi_rom = romstart; | ||
636 | return 1; | ||
637 | } | ||
638 | } | ||
639 | return 0; | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * VMI setup common to all processors | ||
644 | */ | ||
645 | void vmi_bringup(void) | ||
646 | { | ||
647 | /* We must establish the lowmem mapping for MMU ops to work */ | ||
648 | if (vmi_rom) | ||
649 | vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); | ||
650 | } | ||
651 | |||
652 | /* | ||
653 | * Return a pointer to the VMI function or a NOP stub | ||
654 | */ | ||
655 | static void *vmi_get_function(int vmicall) | ||
656 | { | ||
657 | u64 reloc; | ||
658 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
659 | reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall); | ||
660 | BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); | ||
661 | if (rel->type == VMI_RELOCATION_CALL_REL) | ||
662 | return (void *)rel->eip; | ||
663 | else | ||
664 | return (void *)vmi_nop; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Helper macro for making the VMI paravirt-ops fill code readable. | ||
669 | * For unimplemented operations, fall back to default. | ||
670 | */ | ||
671 | #define para_fill(opname, vmicall) \ | ||
672 | do { \ | ||
673 | reloc = call_vrom_long_func(vmi_rom, get_reloc, \ | ||
674 | VMI_CALL_##vmicall); \ | ||
675 | if (rel->type != VMI_RELOCATION_NONE) { \ | ||
676 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \ | ||
677 | paravirt_ops.opname = (void *)rel->eip; \ | ||
678 | } \ | ||
679 | } while (0) | ||
680 | |||
681 | /* | ||
682 | * Activate the VMI interface and switch into paravirtualized mode | ||
683 | */ | ||
684 | static inline int __init activate_vmi(void) | ||
685 | { | ||
686 | short kernel_cs; | ||
687 | u64 reloc; | ||
688 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
689 | |||
690 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { | ||
691 | printk(KERN_ERR "VMI ROM failed to initialize!"); | ||
692 | return 0; | ||
693 | } | ||
694 | savesegment(cs, kernel_cs); | ||
695 | |||
696 | paravirt_ops.paravirt_enabled = 1; | ||
697 | paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; | ||
698 | |||
699 | paravirt_ops.patch = vmi_patch; | ||
700 | paravirt_ops.name = "vmi"; | ||
701 | |||
702 | /* | ||
703 | * Many of these operations are ABI compatible with VMI. | ||
704 | * This means we can fill in the paravirt-ops with direct | ||
705 | * pointers into the VMI ROM. If the calling convention for | ||
706 | * these operations changes, this code needs to be updated. | ||
707 | * | ||
708 | * Exceptions | ||
709 | * CPUID paravirt-op uses pointers, not the native ISA | ||
710 | * halt has no VMI equivalent; all VMI halts are "safe" | ||
711 | * no MSR support yet - just trap and emulate. VMI uses the | ||
712 | * same ABI as the native ISA, but Linux wants exceptions | ||
713 | * from bogus MSR read / write handled | ||
714 | * rdpmc is not yet used in Linux | ||
715 | */ | ||
716 | |||
717 | /* CPUID is special, so very special */ | ||
718 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID); | ||
719 | if (rel->type != VMI_RELOCATION_NONE) { | ||
720 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
721 | vmi_ops.cpuid = (void *)rel->eip; | ||
722 | paravirt_ops.cpuid = vmi_cpuid; | ||
723 | } | ||
724 | |||
725 | para_fill(clts, CLTS); | ||
726 | para_fill(get_debugreg, GetDR); | ||
727 | para_fill(set_debugreg, SetDR); | ||
728 | para_fill(read_cr0, GetCR0); | ||
729 | para_fill(read_cr2, GetCR2); | ||
730 | para_fill(read_cr3, GetCR3); | ||
731 | para_fill(read_cr4, GetCR4); | ||
732 | para_fill(write_cr0, SetCR0); | ||
733 | para_fill(write_cr2, SetCR2); | ||
734 | para_fill(write_cr3, SetCR3); | ||
735 | para_fill(write_cr4, SetCR4); | ||
736 | para_fill(save_fl, GetInterruptMask); | ||
737 | para_fill(restore_fl, SetInterruptMask); | ||
738 | para_fill(irq_disable, DisableInterrupts); | ||
739 | para_fill(irq_enable, EnableInterrupts); | ||
740 | /* irq_save_disable !!! sheer pain */ | ||
741 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK], | ||
742 | (char *)paravirt_ops.save_fl); | ||
743 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], | ||
744 | (char *)paravirt_ops.irq_disable); | ||
745 | #ifndef CONFIG_NO_IDLE_HZ | ||
746 | para_fill(safe_halt, Halt); | ||
747 | #else | ||
748 | vmi_ops.halt = vmi_get_function(VMI_CALL_Halt); | ||
749 | paravirt_ops.safe_halt = vmi_safe_halt; | ||
750 | #endif | ||
751 | para_fill(wbinvd, WBINVD); | ||
752 | /* paravirt_ops.read_msr = vmi_rdmsr */ | ||
753 | /* paravirt_ops.write_msr = vmi_wrmsr */ | ||
754 | para_fill(read_tsc, RDTSC); | ||
755 | /* paravirt_ops.rdpmc = vmi_rdpmc */ | ||
756 | |||
757 | /* TR interface doesn't pass TR value */ | ||
758 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR); | ||
759 | if (rel->type != VMI_RELOCATION_NONE) { | ||
760 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
761 | vmi_ops.set_tr = (void *)rel->eip; | ||
762 | paravirt_ops.load_tr_desc = vmi_set_tr; | ||
763 | } | ||
764 | |||
765 | /* LDT is special, too */ | ||
766 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT); | ||
767 | if (rel->type != VMI_RELOCATION_NONE) { | ||
768 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
769 | vmi_ops._set_ldt = (void *)rel->eip; | ||
770 | paravirt_ops.set_ldt = vmi_set_ldt; | ||
771 | } | ||
772 | |||
773 | para_fill(load_gdt, SetGDT); | ||
774 | para_fill(load_idt, SetIDT); | ||
775 | para_fill(store_gdt, GetGDT); | ||
776 | para_fill(store_idt, GetIDT); | ||
777 | para_fill(store_tr, GetTR); | ||
778 | paravirt_ops.load_tls = vmi_load_tls; | ||
779 | para_fill(write_ldt_entry, WriteLDTEntry); | ||
780 | para_fill(write_gdt_entry, WriteGDTEntry); | ||
781 | para_fill(write_idt_entry, WriteIDTEntry); | ||
782 | reloc = call_vrom_long_func(vmi_rom, get_reloc, | ||
783 | VMI_CALL_UpdateKernelStack); | ||
784 | if (rel->type != VMI_RELOCATION_NONE) { | ||
785 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
786 | vmi_ops.set_kernel_stack = (void *)rel->eip; | ||
787 | paravirt_ops.load_esp0 = vmi_load_esp0; | ||
788 | } | ||
789 | |||
790 | para_fill(set_iopl_mask, SetIOPLMask); | ||
791 | paravirt_ops.io_delay = (void *)vmi_nop; | ||
792 | if (!disable_nodelay) { | ||
793 | paravirt_ops.const_udelay = (void *)vmi_nop; | ||
794 | } | ||
795 | |||
796 | para_fill(set_lazy_mode, SetLazyMode); | ||
797 | |||
798 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB); | ||
799 | if (rel->type != VMI_RELOCATION_NONE) { | ||
800 | vmi_ops.flush_tlb = (void *)rel->eip; | ||
801 | paravirt_ops.flush_tlb_user = vmi_flush_tlb_user; | ||
802 | paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel; | ||
803 | } | ||
804 | para_fill(flush_tlb_single, InvalPage); | ||
805 | |||
806 | /* | ||
807 | * Until a standard flag format can be agreed on, we need to | ||
808 | * implement these as wrappers in Linux. Get the VMI ROM | ||
809 | * function pointers for the two backend calls. | ||
810 | */ | ||
811 | #ifdef CONFIG_X86_PAE | ||
812 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong); | ||
813 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong); | ||
814 | #else | ||
815 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); | ||
816 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); | ||
817 | #endif | ||
818 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | ||
819 | vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); | ||
820 | vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); | ||
821 | |||
822 | paravirt_ops.alloc_pt = vmi_allocate_pt; | ||
823 | paravirt_ops.alloc_pd = vmi_allocate_pd; | ||
824 | paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; | ||
825 | paravirt_ops.release_pt = vmi_release_pt; | ||
826 | paravirt_ops.release_pd = vmi_release_pd; | ||
827 | paravirt_ops.set_pte = vmi_set_pte; | ||
828 | paravirt_ops.set_pte_at = vmi_set_pte_at; | ||
829 | paravirt_ops.set_pmd = vmi_set_pmd; | ||
830 | paravirt_ops.pte_update = vmi_update_pte; | ||
831 | paravirt_ops.pte_update_defer = vmi_update_pte_defer; | ||
832 | #ifdef CONFIG_X86_PAE | ||
833 | paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; | ||
834 | paravirt_ops.set_pte_present = vmi_set_pte_present; | ||
835 | paravirt_ops.set_pud = vmi_set_pud; | ||
836 | paravirt_ops.pte_clear = vmi_pte_clear; | ||
837 | paravirt_ops.pmd_clear = vmi_pmd_clear; | ||
838 | #endif | ||
839 | /* | ||
840 | * These MUST always be patched. Don't support indirect jumps | ||
841 | * through these operations, as the VMI interface may use either | ||
842 | * a jump or a call to get to these operations, depending on | ||
843 | * the backend. They are performance critical anyway, so requiring | ||
844 | * a patch is not a big problem. | ||
845 | */ | ||
846 | paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0; | ||
847 | paravirt_ops.iret = (void *)0xbadbab0; | ||
848 | |||
849 | #ifdef CONFIG_SMP | ||
850 | paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook; | ||
851 | vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState); | ||
852 | #endif | ||
853 | |||
854 | #ifdef CONFIG_X86_LOCAL_APIC | ||
855 | paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead); | ||
856 | paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite); | ||
857 | paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite); | ||
858 | #endif | ||
859 | |||
860 | /* | ||
861 | * Check for VMI timer functionality by probing for a cycle frequency method | ||
862 | */ | ||
863 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); | ||
864 | if (rel->type != VMI_RELOCATION_NONE) { | ||
865 | vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; | ||
866 | vmi_timer_ops.get_cycle_counter = | ||
867 | vmi_get_function(VMI_CALL_GetCycleCounter); | ||
868 | vmi_timer_ops.get_wallclock = | ||
869 | vmi_get_function(VMI_CALL_GetWallclockTime); | ||
870 | vmi_timer_ops.wallclock_updated = | ||
871 | vmi_get_function(VMI_CALL_WallclockUpdated); | ||
872 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); | ||
873 | vmi_timer_ops.cancel_alarm = | ||
874 | vmi_get_function(VMI_CALL_CancelAlarm); | ||
875 | paravirt_ops.time_init = vmi_time_init; | ||
876 | paravirt_ops.get_wallclock = vmi_get_wallclock; | ||
877 | paravirt_ops.set_wallclock = vmi_set_wallclock; | ||
878 | #ifdef CONFIG_X86_LOCAL_APIC | ||
879 | paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; | ||
880 | paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; | ||
881 | #endif | ||
882 | custom_sched_clock = vmi_sched_clock; | ||
883 | } | ||
884 | |||
885 | /* | ||
886 | * Alternative instruction rewriting doesn't happen soon enough | ||
887 | * to convert VMI_IRET to a call instead of a jump; so we have | ||
888 | * to do this before IRQs get reenabled. Fortunately, it is | ||
889 | * idempotent. | ||
890 | */ | ||
891 | apply_paravirt(__start_parainstructions, __stop_parainstructions); | ||
892 | |||
893 | vmi_bringup(); | ||
894 | |||
895 | return 1; | ||
896 | } | ||
897 | |||
898 | #undef para_fill | ||
899 | |||
900 | void __init vmi_init(void) | ||
901 | { | ||
902 | unsigned long flags; | ||
903 | |||
904 | if (!vmi_rom) | ||
905 | probe_vmi_rom(); | ||
906 | else | ||
907 | check_vmi_rom(vmi_rom); | ||
908 | |||
909 | /* In case probing for or validating the ROM failed, basil */ | ||
910 | if (!vmi_rom) | ||
911 | return; | ||
912 | |||
913 | reserve_top_address(-vmi_rom->virtual_top); | ||
914 | |||
915 | local_irq_save(flags); | ||
916 | activate_vmi(); | ||
917 | #ifdef CONFIG_SMP | ||
918 | no_timer_check = 1; | ||
919 | #endif | ||
920 | local_irq_restore(flags & X86_EFLAGS_IF); | ||
921 | } | ||
922 | |||
923 | static int __init parse_vmi(char *arg) | ||
924 | { | ||
925 | if (!arg) | ||
926 | return -EINVAL; | ||
927 | |||
928 | if (!strcmp(arg, "disable_nodelay")) | ||
929 | disable_nodelay = 1; | ||
930 | else if (!strcmp(arg, "disable_pge")) { | ||
931 | clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); | ||
932 | disable_pge = 1; | ||
933 | } else if (!strcmp(arg, "disable_pse")) { | ||
934 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | ||
935 | disable_pse = 1; | ||
936 | } else if (!strcmp(arg, "disable_sep")) { | ||
937 | clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); | ||
938 | disable_sep = 1; | ||
939 | } else if (!strcmp(arg, "disable_tsc")) { | ||
940 | clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | ||
941 | disable_tsc = 1; | ||
942 | } else if (!strcmp(arg, "disable_mtrr")) { | ||
943 | clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); | ||
944 | disable_mtrr = 1; | ||
945 | } | ||
946 | return 0; | ||
947 | } | ||
948 | |||
949 | early_param("vmi", parse_vmi); | ||
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c new file mode 100644 index 000000000000..76d2adcae5a3 --- /dev/null +++ b/arch/i386/kernel/vmitime.c | |||
@@ -0,0 +1,499 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to dhecht@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * Portions of this code from arch/i386/kernel/timers/timer_tsc.c. | ||
27 | * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c. | ||
28 | * See comments there for proper credits. | ||
29 | */ | ||
30 | |||
31 | #include <linux/spinlock.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/jiffies.h> | ||
35 | #include <linux/interrupt.h> | ||
36 | #include <linux/kernel_stat.h> | ||
37 | #include <linux/rcupdate.h> | ||
38 | #include <linux/clocksource.h> | ||
39 | |||
40 | #include <asm/timer.h> | ||
41 | #include <asm/io.h> | ||
42 | #include <asm/apic.h> | ||
43 | #include <asm/div64.h> | ||
44 | #include <asm/timer.h> | ||
45 | #include <asm/desc.h> | ||
46 | |||
47 | #include <asm/vmi.h> | ||
48 | #include <asm/vmi_time.h> | ||
49 | |||
50 | #include <mach_timer.h> | ||
51 | #include <io_ports.h> | ||
52 | |||
53 | #ifdef CONFIG_X86_LOCAL_APIC | ||
54 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT | ||
55 | #else | ||
56 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0 | ||
57 | #endif | ||
58 | |||
59 | /* Cached VMI operations */ | ||
60 | struct vmi_timer_ops vmi_timer_ops; | ||
61 | |||
62 | #ifdef CONFIG_NO_IDLE_HZ | ||
63 | |||
64 | /* /proc/sys/kernel/hz_timer state. */ | ||
65 | int sysctl_hz_timer; | ||
66 | |||
67 | /* Some stats */ | ||
68 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs); | ||
69 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies); | ||
70 | static DEFINE_PER_CPU(unsigned long, idle_start_jiffies); | ||
71 | |||
72 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
73 | |||
74 | /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */ | ||
75 | static int alarm_hz = CONFIG_VMI_ALARM_HZ; | ||
76 | |||
77 | /* Cache of the value get_cycle_frequency / HZ. */ | ||
78 | static signed long long cycles_per_jiffy; | ||
79 | |||
80 | /* Cache of the value get_cycle_frequency / alarm_hz. */ | ||
81 | static signed long long cycles_per_alarm; | ||
82 | |||
83 | /* The number of cycles accounted for by the 'jiffies'/'xtime' count. | ||
84 | * Protected by xtime_lock. */ | ||
85 | static unsigned long long real_cycles_accounted_system; | ||
86 | |||
87 | /* The number of cycles accounted for by update_process_times(), per cpu. */ | ||
88 | static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu); | ||
89 | |||
90 | /* The number of stolen cycles accounted, per cpu. */ | ||
91 | static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu); | ||
92 | |||
93 | /* Clock source. */ | ||
94 | static cycle_t read_real_cycles(void) | ||
95 | { | ||
96 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
97 | } | ||
98 | |||
99 | static cycle_t read_available_cycles(void) | ||
100 | { | ||
101 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | ||
102 | } | ||
103 | |||
104 | #if 0 | ||
105 | static cycle_t read_stolen_cycles(void) | ||
106 | { | ||
107 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN); | ||
108 | } | ||
109 | #endif /* 0 */ | ||
110 | |||
111 | static struct clocksource clocksource_vmi = { | ||
112 | .name = "vmi-timer", | ||
113 | .rating = 450, | ||
114 | .read = read_real_cycles, | ||
115 | .mask = CLOCKSOURCE_MASK(64), | ||
116 | .mult = 0, /* to be set */ | ||
117 | .shift = 22, | ||
118 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
119 | }; | ||
120 | |||
121 | |||
122 | /* Timer interrupt handler. */ | ||
123 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id); | ||
124 | |||
125 | static struct irqaction vmi_timer_irq = { | ||
126 | vmi_timer_interrupt, | ||
127 | SA_INTERRUPT, | ||
128 | CPU_MASK_NONE, | ||
129 | "VMI-alarm", | ||
130 | NULL, | ||
131 | NULL | ||
132 | }; | ||
133 | |||
134 | /* Alarm rate */ | ||
135 | static int __init vmi_timer_alarm_rate_setup(char* str) | ||
136 | { | ||
137 | int alarm_rate; | ||
138 | if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) { | ||
139 | alarm_hz = alarm_rate; | ||
140 | printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz); | ||
141 | } | ||
142 | return 1; | ||
143 | } | ||
144 | __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup); | ||
145 | |||
146 | |||
147 | /* Initialization */ | ||
148 | static void vmi_get_wallclock_ts(struct timespec *ts) | ||
149 | { | ||
150 | unsigned long long wallclock; | ||
151 | wallclock = vmi_timer_ops.get_wallclock(); // nsec units | ||
152 | ts->tv_nsec = do_div(wallclock, 1000000000); | ||
153 | ts->tv_sec = wallclock; | ||
154 | } | ||
155 | |||
156 | static void update_xtime_from_wallclock(void) | ||
157 | { | ||
158 | struct timespec ts; | ||
159 | vmi_get_wallclock_ts(&ts); | ||
160 | do_settimeofday(&ts); | ||
161 | } | ||
162 | |||
163 | unsigned long vmi_get_wallclock(void) | ||
164 | { | ||
165 | struct timespec ts; | ||
166 | vmi_get_wallclock_ts(&ts); | ||
167 | return ts.tv_sec; | ||
168 | } | ||
169 | |||
170 | int vmi_set_wallclock(unsigned long now) | ||
171 | { | ||
172 | return -1; | ||
173 | } | ||
174 | |||
175 | unsigned long long vmi_sched_clock(void) | ||
176 | { | ||
177 | return read_available_cycles(); | ||
178 | } | ||
179 | |||
180 | void __init vmi_time_init(void) | ||
181 | { | ||
182 | unsigned long long cycles_per_sec, cycles_per_msec; | ||
183 | unsigned long flags; | ||
184 | |||
185 | local_irq_save(flags); | ||
186 | setup_irq(0, &vmi_timer_irq); | ||
187 | #ifdef CONFIG_X86_LOCAL_APIC | ||
188 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); | ||
189 | #endif | ||
190 | |||
191 | no_sync_cmos_clock = 1; | ||
192 | |||
193 | vmi_get_wallclock_ts(&xtime); | ||
194 | set_normalized_timespec(&wall_to_monotonic, | ||
195 | -xtime.tv_sec, -xtime.tv_nsec); | ||
196 | |||
197 | real_cycles_accounted_system = read_real_cycles(); | ||
198 | update_xtime_from_wallclock(); | ||
199 | per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); | ||
200 | |||
201 | cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); | ||
202 | |||
203 | cycles_per_jiffy = cycles_per_sec; | ||
204 | (void)do_div(cycles_per_jiffy, HZ); | ||
205 | cycles_per_alarm = cycles_per_sec; | ||
206 | (void)do_div(cycles_per_alarm, alarm_hz); | ||
207 | cycles_per_msec = cycles_per_sec; | ||
208 | (void)do_div(cycles_per_msec, 1000); | ||
209 | cpu_khz = cycles_per_msec; | ||
210 | |||
211 | printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" | ||
212 | "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, | ||
213 | cycles_per_alarm); | ||
214 | |||
215 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
216 | clocksource_vmi.shift); | ||
217 | if (clocksource_register(&clocksource_vmi)) | ||
218 | printk(KERN_WARNING "Error registering VMITIME clocksource."); | ||
219 | |||
220 | /* Disable PIT. */ | ||
221 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
222 | |||
223 | /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu | ||
224 | * reduce the latency calling update_process_times. */ | ||
225 | vmi_timer_ops.set_alarm( | ||
226 | VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
227 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
228 | cycles_per_alarm); | ||
229 | |||
230 | local_irq_restore(flags); | ||
231 | } | ||
232 | |||
233 | #ifdef CONFIG_X86_LOCAL_APIC | ||
234 | |||
235 | void __init vmi_timer_setup_boot_alarm(void) | ||
236 | { | ||
237 | local_irq_disable(); | ||
238 | |||
239 | /* Route the interrupt to the correct vector. */ | ||
240 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
241 | |||
242 | /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */ | ||
243 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
244 | vmi_timer_ops.set_alarm( | ||
245 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
246 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
247 | cycles_per_alarm); | ||
248 | local_irq_enable(); | ||
249 | } | ||
250 | |||
251 | /* Initialize the time accounting variables for an AP on an SMP system. | ||
252 | * Also, set the local alarm for the AP. */ | ||
253 | void __init vmi_timer_setup_secondary_alarm(void) | ||
254 | { | ||
255 | int cpu = smp_processor_id(); | ||
256 | |||
257 | /* Route the interrupt to the correct vector. */ | ||
258 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
259 | |||
260 | per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles(); | ||
261 | |||
262 | vmi_timer_ops.set_alarm( | ||
263 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
264 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
265 | cycles_per_alarm); | ||
266 | } | ||
267 | |||
268 | #endif | ||
269 | |||
270 | /* Update system wide (real) time accounting (e.g. jiffies, xtime). */ | ||
271 | static void vmi_account_real_cycles(unsigned long long cur_real_cycles) | ||
272 | { | ||
273 | long long cycles_not_accounted; | ||
274 | |||
275 | write_seqlock(&xtime_lock); | ||
276 | |||
277 | cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; | ||
278 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
279 | /* systems wide jiffies and wallclock. */ | ||
280 | do_timer(1); | ||
281 | |||
282 | cycles_not_accounted -= cycles_per_jiffy; | ||
283 | real_cycles_accounted_system += cycles_per_jiffy; | ||
284 | } | ||
285 | |||
286 | if (vmi_timer_ops.wallclock_updated()) | ||
287 | update_xtime_from_wallclock(); | ||
288 | |||
289 | write_sequnlock(&xtime_lock); | ||
290 | } | ||
291 | |||
292 | /* Update per-cpu process times. */ | ||
293 | static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu, | ||
294 | unsigned long long cur_process_times_cycles) | ||
295 | { | ||
296 | long long cycles_not_accounted; | ||
297 | cycles_not_accounted = cur_process_times_cycles - | ||
298 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
299 | |||
300 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
301 | /* Account time to the current process. This includes | ||
302 | * calling into the scheduler to decrement the timeslice | ||
303 | * and possibly reschedule.*/ | ||
304 | update_process_times(user_mode(regs)); | ||
305 | /* XXX handle /proc/profile multiplier. */ | ||
306 | profile_tick(CPU_PROFILING); | ||
307 | |||
308 | cycles_not_accounted -= cycles_per_jiffy; | ||
309 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
310 | } | ||
311 | } | ||
312 | |||
313 | #ifdef CONFIG_NO_IDLE_HZ | ||
314 | /* Update per-cpu idle times. Used when a no-hz halt is ended. */ | ||
315 | static void vmi_account_no_hz_idle_cycles(int cpu, | ||
316 | unsigned long long cur_process_times_cycles) | ||
317 | { | ||
318 | long long cycles_not_accounted; | ||
319 | unsigned long no_idle_hz_jiffies = 0; | ||
320 | |||
321 | cycles_not_accounted = cur_process_times_cycles - | ||
322 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
323 | |||
324 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
325 | no_idle_hz_jiffies++; | ||
326 | cycles_not_accounted -= cycles_per_jiffy; | ||
327 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
328 | } | ||
329 | /* Account time to the idle process. */ | ||
330 | account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies)); | ||
331 | } | ||
332 | #endif | ||
333 | |||
334 | /* Update per-cpu stolen time. */ | ||
335 | static void vmi_account_stolen_cycles(int cpu, | ||
336 | unsigned long long cur_real_cycles, | ||
337 | unsigned long long cur_avail_cycles) | ||
338 | { | ||
339 | long long stolen_cycles_not_accounted; | ||
340 | unsigned long stolen_jiffies = 0; | ||
341 | |||
342 | if (cur_real_cycles < cur_avail_cycles) | ||
343 | return; | ||
344 | |||
345 | stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles - | ||
346 | per_cpu(stolen_cycles_accounted_cpu, cpu); | ||
347 | |||
348 | while (stolen_cycles_not_accounted >= cycles_per_jiffy) { | ||
349 | stolen_jiffies++; | ||
350 | stolen_cycles_not_accounted -= cycles_per_jiffy; | ||
351 | per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
352 | } | ||
353 | /* HACK: pass NULL to force time onto cpustat->steal. */ | ||
354 | account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies)); | ||
355 | } | ||
356 | |||
357 | /* Body of either IRQ0 interrupt handler (UP no local-APIC) or | ||
358 | * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */ | ||
359 | static void vmi_local_timer_interrupt(int cpu) | ||
360 | { | ||
361 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
362 | |||
363 | cur_real_cycles = read_real_cycles(); | ||
364 | cur_process_times_cycles = read_available_cycles(); | ||
365 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
366 | vmi_account_real_cycles(cur_real_cycles); | ||
367 | /* Update per-cpu process times. */ | ||
368 | vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles); | ||
369 | /* Update time stolen from this cpu by the hypervisor. */ | ||
370 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
371 | } | ||
372 | |||
373 | #ifdef CONFIG_NO_IDLE_HZ | ||
374 | |||
375 | /* Must be called only from idle loop, with interrupts disabled. */ | ||
376 | int vmi_stop_hz_timer(void) | ||
377 | { | ||
378 | /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */ | ||
379 | |||
380 | unsigned long seq, next; | ||
381 | unsigned long long real_cycles_expiry; | ||
382 | int cpu = smp_processor_id(); | ||
383 | int idle; | ||
384 | |||
385 | BUG_ON(!irqs_disabled()); | ||
386 | if (sysctl_hz_timer != 0) | ||
387 | return 0; | ||
388 | |||
389 | cpu_set(cpu, nohz_cpu_mask); | ||
390 | smp_mb(); | ||
391 | if (rcu_needs_cpu(cpu) || local_softirq_pending() || | ||
392 | (next = next_timer_interrupt(), time_before_eq(next, jiffies))) { | ||
393 | cpu_clear(cpu, nohz_cpu_mask); | ||
394 | next = jiffies; | ||
395 | idle = 0; | ||
396 | } else | ||
397 | idle = 1; | ||
398 | |||
399 | /* Convert jiffies to the real cycle counter. */ | ||
400 | do { | ||
401 | seq = read_seqbegin(&xtime_lock); | ||
402 | real_cycles_expiry = real_cycles_accounted_system + | ||
403 | (long)(next - jiffies) * cycles_per_jiffy; | ||
404 | } while (read_seqretry(&xtime_lock, seq)); | ||
405 | |||
406 | /* This cpu is going idle. Disable the periodic alarm. */ | ||
407 | if (idle) { | ||
408 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
409 | per_cpu(idle_start_jiffies, cpu) = jiffies; | ||
410 | } | ||
411 | |||
412 | /* Set the real time alarm to expire at the next event. */ | ||
413 | vmi_timer_ops.set_alarm( | ||
414 | VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, | ||
415 | real_cycles_expiry, 0); | ||
416 | |||
417 | return idle; | ||
418 | } | ||
419 | |||
420 | static void vmi_reenable_hz_timer(int cpu) | ||
421 | { | ||
422 | /* For /proc/vmi/info idle_hz stat. */ | ||
423 | per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu); | ||
424 | per_cpu(vmi_idle_no_hz_irqs, cpu)++; | ||
425 | |||
426 | /* Don't bother explicitly cancelling the one-shot alarm -- at | ||
427 | * worse we will receive a spurious timer interrupt. */ | ||
428 | vmi_timer_ops.set_alarm( | ||
429 | VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
430 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
431 | cycles_per_alarm); | ||
432 | /* Indicate this cpu is no longer nohz idle. */ | ||
433 | cpu_clear(cpu, nohz_cpu_mask); | ||
434 | } | ||
435 | |||
436 | /* Called from interrupt handlers when (local) HZ timer is disabled. */ | ||
437 | void vmi_account_time_restart_hz_timer(void) | ||
438 | { | ||
439 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
440 | int cpu = smp_processor_id(); | ||
441 | |||
442 | BUG_ON(!irqs_disabled()); | ||
443 | /* Account the time during which the HZ timer was disabled. */ | ||
444 | cur_real_cycles = read_real_cycles(); | ||
445 | cur_process_times_cycles = read_available_cycles(); | ||
446 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
447 | vmi_account_real_cycles(cur_real_cycles); | ||
448 | /* Update per-cpu idle times. */ | ||
449 | vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles); | ||
450 | /* Update time stolen from this cpu by the hypervisor. */ | ||
451 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
452 | /* Reenable the hz timer. */ | ||
453 | vmi_reenable_hz_timer(cpu); | ||
454 | } | ||
455 | |||
456 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
457 | |||
458 | /* UP (and no local-APIC) VMI-timer alarm interrupt handler. | ||
459 | * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after | ||
460 | * APIC setup and setup_boot_vmi_alarm() is called. */ | ||
461 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
462 | { | ||
463 | vmi_local_timer_interrupt(smp_processor_id()); | ||
464 | return IRQ_HANDLED; | ||
465 | } | ||
466 | |||
467 | #ifdef CONFIG_X86_LOCAL_APIC | ||
468 | |||
469 | /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector. | ||
470 | * Also used in UP when CONFIG_X86_LOCAL_APIC. | ||
471 | * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */ | ||
472 | void smp_apic_vmi_timer_interrupt(struct pt_regs *regs) | ||
473 | { | ||
474 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
475 | int cpu = smp_processor_id(); | ||
476 | |||
477 | /* | ||
478 | * the NMI deadlock-detector uses this. | ||
479 | */ | ||
480 | per_cpu(irq_stat,cpu).apic_timer_irqs++; | ||
481 | |||
482 | /* | ||
483 | * NOTE! We'd better ACK the irq immediately, | ||
484 | * because timer handling can be slow. | ||
485 | */ | ||
486 | ack_APIC_irq(); | ||
487 | |||
488 | /* | ||
489 | * update_process_times() expects us to have done irq_enter(). | ||
490 | * Besides, if we don't timer interrupts ignore the global | ||
491 | * interrupt lock, which is the WrongThing (tm) to do. | ||
492 | */ | ||
493 | irq_enter(); | ||
494 | vmi_local_timer_interrupt(cpu); | ||
495 | irq_exit(); | ||
496 | set_irq_regs(old_regs); | ||
497 | } | ||
498 | |||
499 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 5038a73d554e..ca51610955df 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S | |||
@@ -37,9 +37,14 @@ SECTIONS | |||
37 | { | 37 | { |
38 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; | 38 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; |
39 | phys_startup_32 = startup_32 - LOAD_OFFSET; | 39 | phys_startup_32 = startup_32 - LOAD_OFFSET; |
40 | |||
41 | .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { | ||
42 | _text = .; /* Text and read-only data */ | ||
43 | *(.text.head) | ||
44 | } :text = 0x9090 | ||
45 | |||
40 | /* read-only */ | 46 | /* read-only */ |
41 | .text : AT(ADDR(.text) - LOAD_OFFSET) { | 47 | .text : AT(ADDR(.text) - LOAD_OFFSET) { |
42 | _text = .; /* Text and read-only data */ | ||
43 | *(.text) | 48 | *(.text) |
44 | SCHED_TEXT | 49 | SCHED_TEXT |
45 | LOCK_TEXT | 50 | LOCK_TEXT |