diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386/kernel |
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/i386/kernel')
124 files changed, 43777 insertions, 0 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile new file mode 100644 index 000000000000..933787a46b4c --- /dev/null +++ b/arch/i386/kernel/Makefile | |||
@@ -0,0 +1,71 @@ | |||
1 | # | ||
2 | # Makefile for the linux kernel. | ||
3 | # | ||
4 | |||
5 | extra-y := head.o init_task.o vmlinux.lds | ||
6 | |||
7 | obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ | ||
8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ | ||
9 | pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ | ||
10 | doublefault.o quirks.o | ||
11 | |||
12 | obj-y += cpu/ | ||
13 | obj-y += timers/ | ||
14 | obj-$(CONFIG_ACPI_BOOT) += acpi/ | ||
15 | obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o | ||
16 | obj-$(CONFIG_MCA) += mca.o | ||
17 | obj-$(CONFIG_X86_MSR) += msr.o | ||
18 | obj-$(CONFIG_X86_CPUID) += cpuid.o | ||
19 | obj-$(CONFIG_MICROCODE) += microcode.o | ||
20 | obj-$(CONFIG_APM) += apm.o | ||
21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o | ||
22 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | ||
23 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | ||
24 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | ||
25 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | ||
26 | obj-$(CONFIG_X86_NUMAQ) += numaq.o | ||
27 | obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o | ||
28 | obj-$(CONFIG_KPROBES) += kprobes.o | ||
29 | obj-$(CONFIG_MODULES) += module.o | ||
30 | obj-y += sysenter.o vsyscall.o | ||
31 | obj-$(CONFIG_ACPI_SRAT) += srat.o | ||
32 | obj-$(CONFIG_HPET_TIMER) += time_hpet.o | ||
33 | obj-$(CONFIG_EFI) += efi.o efi_stub.o | ||
34 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | ||
35 | |||
36 | EXTRA_AFLAGS := -traditional | ||
37 | |||
38 | obj-$(CONFIG_SCx200) += scx200.o | ||
39 | |||
40 | # vsyscall.o contains the vsyscall DSO images as __initdata. | ||
41 | # We must build both images before we can assemble it. | ||
42 | # Note: kbuild does not track this dependency due to usage of .incbin | ||
43 | $(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so | ||
44 | targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so) | ||
45 | targets += vsyscall.lds | ||
46 | |||
47 | # The DSO images are built using a special linker script. | ||
48 | quiet_cmd_syscall = SYSCALL $@ | ||
49 | cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \ | ||
50 | -Wl,-T,$(filter-out FORCE,$^) -o $@ | ||
51 | |||
52 | export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH) | ||
53 | |||
54 | vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 | ||
55 | SYSCFLAGS_vsyscall-sysenter.so = $(vsyscall-flags) | ||
56 | SYSCFLAGS_vsyscall-int80.so = $(vsyscall-flags) | ||
57 | |||
58 | $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \ | ||
59 | $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE | ||
60 | $(call if_changed,syscall) | ||
61 | |||
62 | # We also create a special relocatable object that should mirror the symbol | ||
63 | # table and layout of the linked DSO. With ld -R we can then refer to | ||
64 | # these symbols in the kernel code rather than hand-coded addresses. | ||
65 | extra-y += vsyscall-syms.o | ||
66 | $(obj)/built-in.o: $(obj)/vsyscall-syms.o | ||
67 | $(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o | ||
68 | |||
69 | SYSCFLAGS_vsyscall-syms.o = -r | ||
70 | $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o FORCE | ||
71 | $(call if_changed,syscall) | ||
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile new file mode 100644 index 000000000000..ee75cb286cfe --- /dev/null +++ b/arch/i386/kernel/acpi/Makefile | |||
@@ -0,0 +1,4 @@ | |||
1 | obj-$(CONFIG_ACPI_BOOT) := boot.o | ||
2 | obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o | ||
3 | obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o | ||
4 | |||
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c new file mode 100644 index 000000000000..9ba0b957d11f --- /dev/null +++ b/arch/i386/kernel/acpi/boot.c | |||
@@ -0,0 +1,908 @@ | |||
1 | /* | ||
2 | * boot.c - Architecture-Specific Low-Level ACPI Boot Support | ||
3 | * | ||
4 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> | ||
5 | * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com> | ||
6 | * | ||
7 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 | * | ||
23 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
24 | */ | ||
25 | |||
26 | #include <linux/init.h> | ||
27 | #include <linux/config.h> | ||
28 | #include <linux/acpi.h> | ||
29 | #include <linux/efi.h> | ||
30 | #include <linux/irq.h> | ||
31 | #include <linux/module.h> | ||
32 | |||
33 | #include <asm/pgtable.h> | ||
34 | #include <asm/io_apic.h> | ||
35 | #include <asm/apic.h> | ||
36 | #include <asm/io.h> | ||
37 | #include <asm/irq.h> | ||
38 | #include <asm/mpspec.h> | ||
39 | |||
40 | #ifdef CONFIG_X86_64 | ||
41 | |||
42 | static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) { } | ||
43 | extern void __init clustered_apic_check(void); | ||
44 | static inline int ioapic_setup_disabled(void) { return 0; } | ||
45 | #include <asm/proto.h> | ||
46 | |||
47 | #else /* X86 */ | ||
48 | |||
49 | #ifdef CONFIG_X86_LOCAL_APIC | ||
50 | #include <mach_apic.h> | ||
51 | #include <mach_mpparse.h> | ||
52 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
53 | |||
54 | #endif /* X86 */ | ||
55 | |||
56 | #define BAD_MADT_ENTRY(entry, end) ( \ | ||
57 | (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ | ||
58 | ((acpi_table_entry_header *)entry)->length != sizeof(*entry)) | ||
59 | |||
60 | #define PREFIX "ACPI: " | ||
61 | |||
62 | #ifdef CONFIG_ACPI_PCI | ||
63 | int acpi_noirq __initdata; /* skip ACPI IRQ initialization */ | ||
64 | int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ | ||
65 | #else | ||
66 | int acpi_noirq __initdata = 1; | ||
67 | int acpi_pci_disabled __initdata = 1; | ||
68 | #endif | ||
69 | int acpi_ht __initdata = 1; /* enable HT */ | ||
70 | |||
71 | int acpi_lapic; | ||
72 | int acpi_ioapic; | ||
73 | int acpi_strict; | ||
74 | EXPORT_SYMBOL(acpi_strict); | ||
75 | |||
76 | acpi_interrupt_flags acpi_sci_flags __initdata; | ||
77 | int acpi_sci_override_gsi __initdata; | ||
78 | int acpi_skip_timer_override __initdata; | ||
79 | |||
80 | #ifdef CONFIG_X86_LOCAL_APIC | ||
81 | static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | ||
82 | #endif | ||
83 | |||
84 | #ifndef __HAVE_ARCH_CMPXCHG | ||
85 | #warning ACPI uses CMPXCHG, i486 and later hardware | ||
86 | #endif | ||
87 | |||
88 | #define MAX_MADT_ENTRIES 256 | ||
89 | u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] = | ||
90 | { [0 ... MAX_MADT_ENTRIES-1] = 0xff }; | ||
91 | EXPORT_SYMBOL(x86_acpiid_to_apicid); | ||
92 | |||
93 | /* -------------------------------------------------------------------------- | ||
94 | Boot-time Configuration | ||
95 | -------------------------------------------------------------------------- */ | ||
96 | |||
97 | /* | ||
98 | * The default interrupt routing model is PIC (8259). This gets | ||
99 | * overriden if IOAPICs are enumerated (below). | ||
100 | */ | ||
101 | enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; | ||
102 | |||
103 | #ifdef CONFIG_X86_64 | ||
104 | |||
105 | /* rely on all ACPI tables being in the direct mapping */ | ||
106 | char *__acpi_map_table(unsigned long phys_addr, unsigned long size) | ||
107 | { | ||
108 | if (!phys_addr || !size) | ||
109 | return NULL; | ||
110 | |||
111 | if (phys_addr < (end_pfn_map << PAGE_SHIFT)) | ||
112 | return __va(phys_addr); | ||
113 | |||
114 | return NULL; | ||
115 | } | ||
116 | |||
117 | #else | ||
118 | |||
119 | /* | ||
120 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, | ||
121 | * to map the target physical address. The problem is that set_fixmap() | ||
122 | * provides a single page, and it is possible that the page is not | ||
123 | * sufficient. | ||
124 | * By using this area, we can map up to MAX_IO_APICS pages temporarily, | ||
125 | * i.e. until the next __va_range() call. | ||
126 | * | ||
127 | * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* | ||
128 | * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and | ||
129 | * count idx down while incrementing the phys address. | ||
130 | */ | ||
131 | char *__acpi_map_table(unsigned long phys, unsigned long size) | ||
132 | { | ||
133 | unsigned long base, offset, mapped_size; | ||
134 | int idx; | ||
135 | |||
136 | if (phys + size < 8*1024*1024) | ||
137 | return __va(phys); | ||
138 | |||
139 | offset = phys & (PAGE_SIZE - 1); | ||
140 | mapped_size = PAGE_SIZE - offset; | ||
141 | set_fixmap(FIX_ACPI_END, phys); | ||
142 | base = fix_to_virt(FIX_ACPI_END); | ||
143 | |||
144 | /* | ||
145 | * Most cases can be covered by the below. | ||
146 | */ | ||
147 | idx = FIX_ACPI_END; | ||
148 | while (mapped_size < size) { | ||
149 | if (--idx < FIX_ACPI_BEGIN) | ||
150 | return NULL; /* cannot handle this */ | ||
151 | phys += PAGE_SIZE; | ||
152 | set_fixmap(idx, phys); | ||
153 | mapped_size += PAGE_SIZE; | ||
154 | } | ||
155 | |||
156 | return ((unsigned char *) base + offset); | ||
157 | } | ||
158 | #endif | ||
159 | |||
160 | #ifdef CONFIG_PCI_MMCONFIG | ||
161 | static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size) | ||
162 | { | ||
163 | struct acpi_table_mcfg *mcfg; | ||
164 | |||
165 | if (!phys_addr || !size) | ||
166 | return -EINVAL; | ||
167 | |||
168 | mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size); | ||
169 | if (!mcfg) { | ||
170 | printk(KERN_WARNING PREFIX "Unable to map MCFG\n"); | ||
171 | return -ENODEV; | ||
172 | } | ||
173 | |||
174 | if (mcfg->base_reserved) { | ||
175 | printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n"); | ||
176 | return -ENODEV; | ||
177 | } | ||
178 | |||
179 | pci_mmcfg_base_addr = mcfg->base_address; | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | #else | ||
184 | #define acpi_parse_mcfg NULL | ||
185 | #endif /* !CONFIG_PCI_MMCONFIG */ | ||
186 | |||
187 | #ifdef CONFIG_X86_LOCAL_APIC | ||
188 | static int __init | ||
189 | acpi_parse_madt ( | ||
190 | unsigned long phys_addr, | ||
191 | unsigned long size) | ||
192 | { | ||
193 | struct acpi_table_madt *madt = NULL; | ||
194 | |||
195 | if (!phys_addr || !size) | ||
196 | return -EINVAL; | ||
197 | |||
198 | madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size); | ||
199 | if (!madt) { | ||
200 | printk(KERN_WARNING PREFIX "Unable to map MADT\n"); | ||
201 | return -ENODEV; | ||
202 | } | ||
203 | |||
204 | if (madt->lapic_address) { | ||
205 | acpi_lapic_addr = (u64) madt->lapic_address; | ||
206 | |||
207 | printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", | ||
208 | madt->lapic_address); | ||
209 | } | ||
210 | |||
211 | acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); | ||
212 | |||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | |||
217 | static int __init | ||
218 | acpi_parse_lapic ( | ||
219 | acpi_table_entry_header *header, const unsigned long end) | ||
220 | { | ||
221 | struct acpi_table_lapic *processor = NULL; | ||
222 | |||
223 | processor = (struct acpi_table_lapic*) header; | ||
224 | |||
225 | if (BAD_MADT_ENTRY(processor, end)) | ||
226 | return -EINVAL; | ||
227 | |||
228 | acpi_table_print_madt_entry(header); | ||
229 | |||
230 | /* no utility in registering a disabled processor */ | ||
231 | if (processor->flags.enabled == 0) | ||
232 | return 0; | ||
233 | |||
234 | x86_acpiid_to_apicid[processor->acpi_id] = processor->id; | ||
235 | |||
236 | mp_register_lapic ( | ||
237 | processor->id, /* APIC ID */ | ||
238 | processor->flags.enabled); /* Enabled? */ | ||
239 | |||
240 | return 0; | ||
241 | } | ||
242 | |||
243 | static int __init | ||
244 | acpi_parse_lapic_addr_ovr ( | ||
245 | acpi_table_entry_header *header, const unsigned long end) | ||
246 | { | ||
247 | struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; | ||
248 | |||
249 | lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header; | ||
250 | |||
251 | if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) | ||
252 | return -EINVAL; | ||
253 | |||
254 | acpi_lapic_addr = lapic_addr_ovr->address; | ||
255 | |||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | static int __init | ||
260 | acpi_parse_lapic_nmi ( | ||
261 | acpi_table_entry_header *header, const unsigned long end) | ||
262 | { | ||
263 | struct acpi_table_lapic_nmi *lapic_nmi = NULL; | ||
264 | |||
265 | lapic_nmi = (struct acpi_table_lapic_nmi*) header; | ||
266 | |||
267 | if (BAD_MADT_ENTRY(lapic_nmi, end)) | ||
268 | return -EINVAL; | ||
269 | |||
270 | acpi_table_print_madt_entry(header); | ||
271 | |||
272 | if (lapic_nmi->lint != 1) | ||
273 | printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); | ||
274 | |||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | |||
279 | #endif /*CONFIG_X86_LOCAL_APIC*/ | ||
280 | |||
281 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) | ||
282 | |||
283 | static int __init | ||
284 | acpi_parse_ioapic ( | ||
285 | acpi_table_entry_header *header, const unsigned long end) | ||
286 | { | ||
287 | struct acpi_table_ioapic *ioapic = NULL; | ||
288 | |||
289 | ioapic = (struct acpi_table_ioapic*) header; | ||
290 | |||
291 | if (BAD_MADT_ENTRY(ioapic, end)) | ||
292 | return -EINVAL; | ||
293 | |||
294 | acpi_table_print_madt_entry(header); | ||
295 | |||
296 | mp_register_ioapic ( | ||
297 | ioapic->id, | ||
298 | ioapic->address, | ||
299 | ioapic->global_irq_base); | ||
300 | |||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | /* | ||
305 | * Parse Interrupt Source Override for the ACPI SCI | ||
306 | */ | ||
307 | static void | ||
308 | acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) | ||
309 | { | ||
310 | if (trigger == 0) /* compatible SCI trigger is level */ | ||
311 | trigger = 3; | ||
312 | |||
313 | if (polarity == 0) /* compatible SCI polarity is low */ | ||
314 | polarity = 3; | ||
315 | |||
316 | /* Command-line over-ride via acpi_sci= */ | ||
317 | if (acpi_sci_flags.trigger) | ||
318 | trigger = acpi_sci_flags.trigger; | ||
319 | |||
320 | if (acpi_sci_flags.polarity) | ||
321 | polarity = acpi_sci_flags.polarity; | ||
322 | |||
323 | /* | ||
324 | * mp_config_acpi_legacy_irqs() already setup IRQs < 16 | ||
325 | * If GSI is < 16, this will update its flags, | ||
326 | * else it will create a new mp_irqs[] entry. | ||
327 | */ | ||
328 | mp_override_legacy_irq(gsi, polarity, trigger, gsi); | ||
329 | |||
330 | /* | ||
331 | * stash over-ride to indicate we've been here | ||
332 | * and for later update of acpi_fadt | ||
333 | */ | ||
334 | acpi_sci_override_gsi = gsi; | ||
335 | return; | ||
336 | } | ||
337 | |||
338 | static int __init | ||
339 | acpi_parse_int_src_ovr ( | ||
340 | acpi_table_entry_header *header, const unsigned long end) | ||
341 | { | ||
342 | struct acpi_table_int_src_ovr *intsrc = NULL; | ||
343 | |||
344 | intsrc = (struct acpi_table_int_src_ovr*) header; | ||
345 | |||
346 | if (BAD_MADT_ENTRY(intsrc, end)) | ||
347 | return -EINVAL; | ||
348 | |||
349 | acpi_table_print_madt_entry(header); | ||
350 | |||
351 | if (intsrc->bus_irq == acpi_fadt.sci_int) { | ||
352 | acpi_sci_ioapic_setup(intsrc->global_irq, | ||
353 | intsrc->flags.polarity, intsrc->flags.trigger); | ||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | if (acpi_skip_timer_override && | ||
358 | intsrc->bus_irq == 0 && intsrc->global_irq == 2) { | ||
359 | printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); | ||
360 | return 0; | ||
361 | } | ||
362 | |||
363 | mp_override_legacy_irq ( | ||
364 | intsrc->bus_irq, | ||
365 | intsrc->flags.polarity, | ||
366 | intsrc->flags.trigger, | ||
367 | intsrc->global_irq); | ||
368 | |||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | |||
373 | static int __init | ||
374 | acpi_parse_nmi_src ( | ||
375 | acpi_table_entry_header *header, const unsigned long end) | ||
376 | { | ||
377 | struct acpi_table_nmi_src *nmi_src = NULL; | ||
378 | |||
379 | nmi_src = (struct acpi_table_nmi_src*) header; | ||
380 | |||
381 | if (BAD_MADT_ENTRY(nmi_src, end)) | ||
382 | return -EINVAL; | ||
383 | |||
384 | acpi_table_print_madt_entry(header); | ||
385 | |||
386 | /* TBD: Support nimsrc entries? */ | ||
387 | |||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | #endif /* CONFIG_X86_IO_APIC */ | ||
392 | |||
393 | #ifdef CONFIG_ACPI_BUS | ||
394 | |||
395 | /* | ||
396 | * acpi_pic_sci_set_trigger() | ||
397 | * | ||
398 | * use ELCR to set PIC-mode trigger type for SCI | ||
399 | * | ||
400 | * If a PIC-mode SCI is not recognized or gives spurious IRQ7's | ||
401 | * it may require Edge Trigger -- use "acpi_sci=edge" | ||
402 | * | ||
403 | * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers | ||
404 | * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. | ||
405 | * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) | ||
406 | * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) | ||
407 | */ | ||
408 | |||
409 | void __init | ||
410 | acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) | ||
411 | { | ||
412 | unsigned int mask = 1 << irq; | ||
413 | unsigned int old, new; | ||
414 | |||
415 | /* Real old ELCR mask */ | ||
416 | old = inb(0x4d0) | (inb(0x4d1) << 8); | ||
417 | |||
418 | /* | ||
419 | * If we use ACPI to set PCI irq's, then we should clear ELCR | ||
420 | * since we will set it correctly as we enable the PCI irq | ||
421 | * routing. | ||
422 | */ | ||
423 | new = acpi_noirq ? old : 0; | ||
424 | |||
425 | /* | ||
426 | * Update SCI information in the ELCR, it isn't in the PCI | ||
427 | * routing tables.. | ||
428 | */ | ||
429 | switch (trigger) { | ||
430 | case 1: /* Edge - clear */ | ||
431 | new &= ~mask; | ||
432 | break; | ||
433 | case 3: /* Level - set */ | ||
434 | new |= mask; | ||
435 | break; | ||
436 | } | ||
437 | |||
438 | if (old == new) | ||
439 | return; | ||
440 | |||
441 | printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old); | ||
442 | outb(new, 0x4d0); | ||
443 | outb(new >> 8, 0x4d1); | ||
444 | } | ||
445 | |||
446 | |||
447 | #endif /* CONFIG_ACPI_BUS */ | ||
448 | |||
449 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | ||
450 | { | ||
451 | #ifdef CONFIG_X86_IO_APIC | ||
452 | if (use_pci_vector() && !platform_legacy_irq(gsi)) | ||
453 | *irq = IO_APIC_VECTOR(gsi); | ||
454 | else | ||
455 | #endif | ||
456 | *irq = gsi; | ||
457 | return 0; | ||
458 | } | ||
459 | |||
460 | unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) | ||
461 | { | ||
462 | unsigned int irq; | ||
463 | unsigned int plat_gsi = gsi; | ||
464 | |||
465 | #ifdef CONFIG_PCI | ||
466 | /* | ||
467 | * Make sure all (legacy) PCI IRQs are set as level-triggered. | ||
468 | */ | ||
469 | if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { | ||
470 | extern void eisa_set_level_irq(unsigned int irq); | ||
471 | |||
472 | if (edge_level == ACPI_LEVEL_SENSITIVE) | ||
473 | eisa_set_level_irq(gsi); | ||
474 | } | ||
475 | #endif | ||
476 | |||
477 | #ifdef CONFIG_X86_IO_APIC | ||
478 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { | ||
479 | plat_gsi = mp_register_gsi(gsi, edge_level, active_high_low); | ||
480 | } | ||
481 | #endif | ||
482 | acpi_gsi_to_irq(plat_gsi, &irq); | ||
483 | return irq; | ||
484 | } | ||
485 | EXPORT_SYMBOL(acpi_register_gsi); | ||
486 | |||
487 | /* | ||
488 | * ACPI based hotplug support for CPU | ||
489 | */ | ||
490 | #ifdef CONFIG_ACPI_HOTPLUG_CPU | ||
491 | int | ||
492 | acpi_map_lsapic(acpi_handle handle, int *pcpu) | ||
493 | { | ||
494 | /* TBD */ | ||
495 | return -EINVAL; | ||
496 | } | ||
497 | EXPORT_SYMBOL(acpi_map_lsapic); | ||
498 | |||
499 | |||
500 | int | ||
501 | acpi_unmap_lsapic(int cpu) | ||
502 | { | ||
503 | /* TBD */ | ||
504 | return -EINVAL; | ||
505 | } | ||
506 | EXPORT_SYMBOL(acpi_unmap_lsapic); | ||
507 | #endif /* CONFIG_ACPI_HOTPLUG_CPU */ | ||
508 | |||
509 | static unsigned long __init | ||
510 | acpi_scan_rsdp ( | ||
511 | unsigned long start, | ||
512 | unsigned long length) | ||
513 | { | ||
514 | unsigned long offset = 0; | ||
515 | unsigned long sig_len = sizeof("RSD PTR ") - 1; | ||
516 | |||
517 | /* | ||
518 | * Scan all 16-byte boundaries of the physical memory region for the | ||
519 | * RSDP signature. | ||
520 | */ | ||
521 | for (offset = 0; offset < length; offset += 16) { | ||
522 | if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) | ||
523 | continue; | ||
524 | return (start + offset); | ||
525 | } | ||
526 | |||
527 | return 0; | ||
528 | } | ||
529 | |||
530 | static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size) | ||
531 | { | ||
532 | struct acpi_table_sbf *sb; | ||
533 | |||
534 | if (!phys_addr || !size) | ||
535 | return -EINVAL; | ||
536 | |||
537 | sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size); | ||
538 | if (!sb) { | ||
539 | printk(KERN_WARNING PREFIX "Unable to map SBF\n"); | ||
540 | return -ENODEV; | ||
541 | } | ||
542 | |||
543 | sbf_port = sb->sbf_cmos; /* Save CMOS port */ | ||
544 | |||
545 | return 0; | ||
546 | } | ||
547 | |||
548 | |||
549 | #ifdef CONFIG_HPET_TIMER | ||
550 | |||
551 | static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) | ||
552 | { | ||
553 | struct acpi_table_hpet *hpet_tbl; | ||
554 | |||
555 | if (!phys || !size) | ||
556 | return -EINVAL; | ||
557 | |||
558 | hpet_tbl = (struct acpi_table_hpet *) __acpi_map_table(phys, size); | ||
559 | if (!hpet_tbl) { | ||
560 | printk(KERN_WARNING PREFIX "Unable to map HPET\n"); | ||
561 | return -ENODEV; | ||
562 | } | ||
563 | |||
564 | if (hpet_tbl->addr.space_id != ACPI_SPACE_MEM) { | ||
565 | printk(KERN_WARNING PREFIX "HPET timers must be located in " | ||
566 | "memory.\n"); | ||
567 | return -1; | ||
568 | } | ||
569 | |||
570 | #ifdef CONFIG_X86_64 | ||
571 | vxtime.hpet_address = hpet_tbl->addr.addrl | | ||
572 | ((long) hpet_tbl->addr.addrh << 32); | ||
573 | |||
574 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", | ||
575 | hpet_tbl->id, vxtime.hpet_address); | ||
576 | #else /* X86 */ | ||
577 | { | ||
578 | extern unsigned long hpet_address; | ||
579 | |||
580 | hpet_address = hpet_tbl->addr.addrl; | ||
581 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", | ||
582 | hpet_tbl->id, hpet_address); | ||
583 | } | ||
584 | #endif /* X86 */ | ||
585 | |||
586 | return 0; | ||
587 | } | ||
588 | #else | ||
589 | #define acpi_parse_hpet NULL | ||
590 | #endif | ||
591 | |||
592 | #ifdef CONFIG_X86_PM_TIMER | ||
593 | extern u32 pmtmr_ioport; | ||
594 | #endif | ||
595 | |||
596 | static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) | ||
597 | { | ||
598 | struct fadt_descriptor_rev2 *fadt = NULL; | ||
599 | |||
600 | fadt = (struct fadt_descriptor_rev2*) __acpi_map_table(phys,size); | ||
601 | if(!fadt) { | ||
602 | printk(KERN_WARNING PREFIX "Unable to map FADT\n"); | ||
603 | return 0; | ||
604 | } | ||
605 | |||
606 | #ifdef CONFIG_ACPI_INTERPRETER | ||
607 | /* initialize sci_int early for INT_SRC_OVR MADT parsing */ | ||
608 | acpi_fadt.sci_int = fadt->sci_int; | ||
609 | #endif | ||
610 | |||
611 | #ifdef CONFIG_X86_PM_TIMER | ||
612 | /* detect the location of the ACPI PM Timer */ | ||
613 | if (fadt->revision >= FADT2_REVISION_ID) { | ||
614 | /* FADT rev. 2 */ | ||
615 | if (fadt->xpm_tmr_blk.address_space_id != ACPI_ADR_SPACE_SYSTEM_IO) | ||
616 | return 0; | ||
617 | |||
618 | pmtmr_ioport = fadt->xpm_tmr_blk.address; | ||
619 | } else { | ||
620 | /* FADT rev. 1 */ | ||
621 | pmtmr_ioport = fadt->V1_pm_tmr_blk; | ||
622 | } | ||
623 | if (pmtmr_ioport) | ||
624 | printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", pmtmr_ioport); | ||
625 | #endif | ||
626 | return 0; | ||
627 | } | ||
628 | |||
629 | |||
630 | unsigned long __init | ||
631 | acpi_find_rsdp (void) | ||
632 | { | ||
633 | unsigned long rsdp_phys = 0; | ||
634 | |||
635 | if (efi_enabled) { | ||
636 | if (efi.acpi20) | ||
637 | return __pa(efi.acpi20); | ||
638 | else if (efi.acpi) | ||
639 | return __pa(efi.acpi); | ||
640 | } | ||
641 | /* | ||
642 | * Scan memory looking for the RSDP signature. First search EBDA (low | ||
643 | * memory) paragraphs and then search upper memory (E0000-FFFFF). | ||
644 | */ | ||
645 | rsdp_phys = acpi_scan_rsdp (0, 0x400); | ||
646 | if (!rsdp_phys) | ||
647 | rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF); | ||
648 | |||
649 | return rsdp_phys; | ||
650 | } | ||
651 | |||
652 | #ifdef CONFIG_X86_LOCAL_APIC | ||
653 | /* | ||
654 | * Parse LAPIC entries in MADT | ||
655 | * returns 0 on success, < 0 on error | ||
656 | */ | ||
657 | static int __init | ||
658 | acpi_parse_madt_lapic_entries(void) | ||
659 | { | ||
660 | int count; | ||
661 | |||
662 | /* | ||
663 | * Note that the LAPIC address is obtained from the MADT (32-bit value) | ||
664 | * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). | ||
665 | */ | ||
666 | |||
667 | count = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0); | ||
668 | if (count < 0) { | ||
669 | printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); | ||
670 | return count; | ||
671 | } | ||
672 | |||
673 | mp_register_lapic_address(acpi_lapic_addr); | ||
674 | |||
675 | count = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic, | ||
676 | MAX_APICS); | ||
677 | if (!count) { | ||
678 | printk(KERN_ERR PREFIX "No LAPIC entries present\n"); | ||
679 | /* TBD: Cleanup to allow fallback to MPS */ | ||
680 | return -ENODEV; | ||
681 | } | ||
682 | else if (count < 0) { | ||
683 | printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); | ||
684 | /* TBD: Cleanup to allow fallback to MPS */ | ||
685 | return count; | ||
686 | } | ||
687 | |||
688 | count = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0); | ||
689 | if (count < 0) { | ||
690 | printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); | ||
691 | /* TBD: Cleanup to allow fallback to MPS */ | ||
692 | return count; | ||
693 | } | ||
694 | return 0; | ||
695 | } | ||
696 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
697 | |||
698 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) | ||
699 | /* | ||
700 | * Parse IOAPIC related entries in MADT | ||
701 | * returns 0 on success, < 0 on error | ||
702 | */ | ||
703 | static int __init | ||
704 | acpi_parse_madt_ioapic_entries(void) | ||
705 | { | ||
706 | int count; | ||
707 | |||
708 | /* | ||
709 | * ACPI interpreter is required to complete interrupt setup, | ||
710 | * so if it is off, don't enumerate the io-apics with ACPI. | ||
711 | * If MPS is present, it will handle them, | ||
712 | * otherwise the system will stay in PIC mode | ||
713 | */ | ||
714 | if (acpi_disabled || acpi_noirq) { | ||
715 | return -ENODEV; | ||
716 | } | ||
717 | |||
718 | /* | ||
719 | * if "noapic" boot option, don't look for IO-APICs | ||
720 | */ | ||
721 | if (skip_ioapic_setup) { | ||
722 | printk(KERN_INFO PREFIX "Skipping IOAPIC probe " | ||
723 | "due to 'noapic' option.\n"); | ||
724 | return -ENODEV; | ||
725 | } | ||
726 | |||
727 | count = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic, MAX_IO_APICS); | ||
728 | if (!count) { | ||
729 | printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); | ||
730 | return -ENODEV; | ||
731 | } | ||
732 | else if (count < 0) { | ||
733 | printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); | ||
734 | return count; | ||
735 | } | ||
736 | |||
737 | count = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, NR_IRQ_VECTORS); | ||
738 | if (count < 0) { | ||
739 | printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); | ||
740 | /* TBD: Cleanup to allow fallback to MPS */ | ||
741 | return count; | ||
742 | } | ||
743 | |||
744 | /* | ||
745 | * If BIOS did not supply an INT_SRC_OVR for the SCI | ||
746 | * pretend we got one so we can set the SCI flags. | ||
747 | */ | ||
748 | if (!acpi_sci_override_gsi) | ||
749 | acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0); | ||
750 | |||
751 | /* Fill in identity legacy mapings where no override */ | ||
752 | mp_config_acpi_legacy_irqs(); | ||
753 | |||
754 | count = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, NR_IRQ_VECTORS); | ||
755 | if (count < 0) { | ||
756 | printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); | ||
757 | /* TBD: Cleanup to allow fallback to MPS */ | ||
758 | return count; | ||
759 | } | ||
760 | |||
761 | return 0; | ||
762 | } | ||
763 | #else | ||
764 | static inline int acpi_parse_madt_ioapic_entries(void) | ||
765 | { | ||
766 | return -1; | ||
767 | } | ||
768 | #endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */ | ||
769 | |||
770 | |||
771 | static void __init | ||
772 | acpi_process_madt(void) | ||
773 | { | ||
774 | #ifdef CONFIG_X86_LOCAL_APIC | ||
775 | int count, error; | ||
776 | |||
777 | count = acpi_table_parse(ACPI_APIC, acpi_parse_madt); | ||
778 | if (count >= 1) { | ||
779 | |||
780 | /* | ||
781 | * Parse MADT LAPIC entries | ||
782 | */ | ||
783 | error = acpi_parse_madt_lapic_entries(); | ||
784 | if (!error) { | ||
785 | acpi_lapic = 1; | ||
786 | |||
787 | /* | ||
788 | * Parse MADT IO-APIC entries | ||
789 | */ | ||
790 | error = acpi_parse_madt_ioapic_entries(); | ||
791 | if (!error) { | ||
792 | acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; | ||
793 | acpi_irq_balance_set(NULL); | ||
794 | acpi_ioapic = 1; | ||
795 | |||
796 | smp_found_config = 1; | ||
797 | clustered_apic_check(); | ||
798 | } | ||
799 | } | ||
800 | if (error == -EINVAL) { | ||
801 | /* | ||
802 | * Dell Precision Workstation 410, 610 come here. | ||
803 | */ | ||
804 | printk(KERN_ERR PREFIX "Invalid BIOS MADT, disabling ACPI\n"); | ||
805 | disable_acpi(); | ||
806 | } | ||
807 | } | ||
808 | #endif | ||
809 | return; | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * acpi_boot_table_init() and acpi_boot_init() | ||
814 | * called from setup_arch(), always. | ||
815 | * 1. checksums all tables | ||
816 | * 2. enumerates lapics | ||
817 | * 3. enumerates io-apics | ||
818 | * | ||
819 | * acpi_table_init() is separate to allow reading SRAT without | ||
820 | * other side effects. | ||
821 | * | ||
822 | * side effects of acpi_boot_init: | ||
823 | * acpi_lapic = 1 if LAPIC found | ||
824 | * acpi_ioapic = 1 if IOAPIC found | ||
825 | * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; | ||
826 | * if acpi_blacklisted() acpi_disabled = 1; | ||
827 | * acpi_irq_model=... | ||
828 | * ... | ||
829 | * | ||
830 | * return value: (currently ignored) | ||
831 | * 0: success | ||
832 | * !0: failure | ||
833 | */ | ||
834 | |||
835 | int __init | ||
836 | acpi_boot_table_init(void) | ||
837 | { | ||
838 | int error; | ||
839 | |||
840 | /* | ||
841 | * If acpi_disabled, bail out | ||
842 | * One exception: acpi=ht continues far enough to enumerate LAPICs | ||
843 | */ | ||
844 | if (acpi_disabled && !acpi_ht) | ||
845 | return 1; | ||
846 | |||
847 | /* | ||
848 | * Initialize the ACPI boot-time table parser. | ||
849 | */ | ||
850 | error = acpi_table_init(); | ||
851 | if (error) { | ||
852 | disable_acpi(); | ||
853 | return error; | ||
854 | } | ||
855 | |||
856 | #ifdef __i386__ | ||
857 | check_acpi_pci(); | ||
858 | #endif | ||
859 | |||
860 | acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); | ||
861 | |||
862 | /* | ||
863 | * blacklist may disable ACPI entirely | ||
864 | */ | ||
865 | error = acpi_blacklisted(); | ||
866 | if (error) { | ||
867 | extern int acpi_force; | ||
868 | |||
869 | if (acpi_force) { | ||
870 | printk(KERN_WARNING PREFIX "acpi=force override\n"); | ||
871 | } else { | ||
872 | printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); | ||
873 | disable_acpi(); | ||
874 | return error; | ||
875 | } | ||
876 | } | ||
877 | |||
878 | return 0; | ||
879 | } | ||
880 | |||
881 | |||
882 | int __init acpi_boot_init(void) | ||
883 | { | ||
884 | /* | ||
885 | * If acpi_disabled, bail out | ||
886 | * One exception: acpi=ht continues far enough to enumerate LAPICs | ||
887 | */ | ||
888 | if (acpi_disabled && !acpi_ht) | ||
889 | return 1; | ||
890 | |||
891 | acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); | ||
892 | |||
893 | /* | ||
894 | * set sci_int and PM timer address | ||
895 | */ | ||
896 | acpi_table_parse(ACPI_FADT, acpi_parse_fadt); | ||
897 | |||
898 | /* | ||
899 | * Process the Multiple APIC Description Table (MADT), if present | ||
900 | */ | ||
901 | acpi_process_madt(); | ||
902 | |||
903 | acpi_table_parse(ACPI_HPET, acpi_parse_hpet); | ||
904 | acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg); | ||
905 | |||
906 | return 0; | ||
907 | } | ||
908 | |||
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c new file mode 100644 index 000000000000..726a5ca4b165 --- /dev/null +++ b/arch/i386/kernel/acpi/earlyquirk.c | |||
@@ -0,0 +1,51 @@ | |||
1 | /* | ||
2 | * Do early PCI probing for bug detection when the main PCI subsystem is | ||
3 | * not up yet. | ||
4 | */ | ||
5 | #include <linux/init.h> | ||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/pci.h> | ||
8 | #include <asm/pci-direct.h> | ||
9 | #include <asm/acpi.h> | ||
10 | |||
11 | static int __init check_bridge(int vendor, int device) | ||
12 | { | ||
13 | /* According to Nvidia all timer overrides are bogus. Just ignore | ||
14 | them all. */ | ||
15 | if (vendor == PCI_VENDOR_ID_NVIDIA) { | ||
16 | acpi_skip_timer_override = 1; | ||
17 | } | ||
18 | return 0; | ||
19 | } | ||
20 | |||
21 | void __init check_acpi_pci(void) | ||
22 | { | ||
23 | int num,slot,func; | ||
24 | |||
25 | /* Assume the machine supports type 1. If not it will | ||
26 | always read ffffffff and should not have any side effect. */ | ||
27 | |||
28 | /* Poor man's PCI discovery */ | ||
29 | for (num = 0; num < 32; num++) { | ||
30 | for (slot = 0; slot < 32; slot++) { | ||
31 | for (func = 0; func < 8; func++) { | ||
32 | u32 class; | ||
33 | u32 vendor; | ||
34 | class = read_pci_config(num,slot,func, | ||
35 | PCI_CLASS_REVISION); | ||
36 | if (class == 0xffffffff) | ||
37 | break; | ||
38 | |||
39 | if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) | ||
40 | continue; | ||
41 | |||
42 | vendor = read_pci_config(num, slot, func, | ||
43 | PCI_VENDOR_ID); | ||
44 | |||
45 | if (check_bridge(vendor&0xffff, vendor >> 16)) | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | } | ||
50 | } | ||
51 | } | ||
diff --git a/arch/i386/kernel/acpi/sleep.c b/arch/i386/kernel/acpi/sleep.c new file mode 100644 index 000000000000..28bb0514bb6e --- /dev/null +++ b/arch/i386/kernel/acpi/sleep.c | |||
@@ -0,0 +1,93 @@ | |||
1 | /* | ||
2 | * sleep.c - x86-specific ACPI sleep support. | ||
3 | * | ||
4 | * Copyright (C) 2001-2003 Patrick Mochel | ||
5 | * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz> | ||
6 | */ | ||
7 | |||
8 | #include <linux/acpi.h> | ||
9 | #include <linux/bootmem.h> | ||
10 | #include <asm/smp.h> | ||
11 | #include <asm/tlbflush.h> | ||
12 | |||
13 | /* address in low memory of the wakeup routine. */ | ||
14 | unsigned long acpi_wakeup_address = 0; | ||
15 | unsigned long acpi_video_flags; | ||
16 | extern char wakeup_start, wakeup_end; | ||
17 | |||
18 | extern void zap_low_mappings(void); | ||
19 | |||
20 | extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); | ||
21 | |||
22 | static void init_low_mapping(pgd_t *pgd, int pgd_limit) | ||
23 | { | ||
24 | int pgd_ofs = 0; | ||
25 | |||
26 | while ((pgd_ofs < pgd_limit) && (pgd_ofs + USER_PTRS_PER_PGD < PTRS_PER_PGD)) { | ||
27 | set_pgd(pgd, *(pgd+USER_PTRS_PER_PGD)); | ||
28 | pgd_ofs++, pgd++; | ||
29 | } | ||
30 | flush_tlb_all(); | ||
31 | } | ||
32 | |||
33 | /** | ||
34 | * acpi_save_state_mem - save kernel state | ||
35 | * | ||
36 | * Create an identity mapped page table and copy the wakeup routine to | ||
37 | * low memory. | ||
38 | */ | ||
39 | int acpi_save_state_mem (void) | ||
40 | { | ||
41 | if (!acpi_wakeup_address) | ||
42 | return 1; | ||
43 | init_low_mapping(swapper_pg_dir, USER_PTRS_PER_PGD); | ||
44 | memcpy((void *) acpi_wakeup_address, &wakeup_start, &wakeup_end - &wakeup_start); | ||
45 | acpi_copy_wakeup_routine(acpi_wakeup_address); | ||
46 | |||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | /* | ||
51 | * acpi_restore_state - undo effects of acpi_save_state_mem | ||
52 | */ | ||
53 | void acpi_restore_state_mem (void) | ||
54 | { | ||
55 | zap_low_mappings(); | ||
56 | } | ||
57 | |||
58 | /** | ||
59 | * acpi_reserve_bootmem - do _very_ early ACPI initialisation | ||
60 | * | ||
61 | * We allocate a page from the first 1MB of memory for the wakeup | ||
62 | * routine for when we come back from a sleep state. The | ||
63 | * runtime allocator allows specification of <16MB pages, but not | ||
64 | * <1MB pages. | ||
65 | */ | ||
66 | void __init acpi_reserve_bootmem(void) | ||
67 | { | ||
68 | if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { | ||
69 | printk(KERN_ERR "ACPI: Wakeup code way too big, S3 disabled.\n"); | ||
70 | return; | ||
71 | } | ||
72 | |||
73 | acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); | ||
74 | if (!acpi_wakeup_address) | ||
75 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | ||
76 | } | ||
77 | |||
78 | static int __init acpi_sleep_setup(char *str) | ||
79 | { | ||
80 | while ((str != NULL) && (*str != '\0')) { | ||
81 | if (strncmp(str, "s3_bios", 7) == 0) | ||
82 | acpi_video_flags = 1; | ||
83 | if (strncmp(str, "s3_mode", 7) == 0) | ||
84 | acpi_video_flags |= 2; | ||
85 | str = strchr(str, ','); | ||
86 | if (str != NULL) | ||
87 | str += strspn(str, ", \t"); | ||
88 | } | ||
89 | return 1; | ||
90 | } | ||
91 | |||
92 | |||
93 | __setup("acpi_sleep=", acpi_sleep_setup); | ||
diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S new file mode 100644 index 000000000000..39d32484f6f5 --- /dev/null +++ b/arch/i386/kernel/acpi/wakeup.S | |||
@@ -0,0 +1,318 @@ | |||
1 | .text | ||
2 | #include <linux/linkage.h> | ||
3 | #include <asm/segment.h> | ||
4 | #include <asm/page.h> | ||
5 | |||
6 | # | ||
7 | # wakeup_code runs in real mode, and at unknown address (determined at run-time). | ||
8 | # Therefore it must only use relative jumps/calls. | ||
9 | # | ||
10 | # Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled | ||
11 | # | ||
12 | # If physical address of wakeup_code is 0x12345, BIOS should call us with | ||
13 | # cs = 0x1234, eip = 0x05 | ||
14 | # | ||
15 | |||
16 | ALIGN | ||
17 | .align 4096 | ||
18 | ENTRY(wakeup_start) | ||
19 | wakeup_code: | ||
20 | wakeup_code_start = . | ||
21 | .code16 | ||
22 | |||
23 | movw $0xb800, %ax | ||
24 | movw %ax,%fs | ||
25 | movw $0x0e00 + 'L', %fs:(0x10) | ||
26 | |||
27 | cli | ||
28 | cld | ||
29 | |||
30 | # setup data segment | ||
31 | movw %cs, %ax | ||
32 | movw %ax, %ds # Make ds:0 point to wakeup_start | ||
33 | movw %ax, %ss | ||
34 | mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board | ||
35 | movw $0x0e00 + 'S', %fs:(0x12) | ||
36 | |||
37 | pushl $0 # Kill any dangerous flags | ||
38 | popfl | ||
39 | |||
40 | movl real_magic - wakeup_code, %eax | ||
41 | cmpl $0x12345678, %eax | ||
42 | jne bogus_real_magic | ||
43 | |||
44 | testl $1, video_flags - wakeup_code | ||
45 | jz 1f | ||
46 | lcall $0xc000,$3 | ||
47 | movw %cs, %ax | ||
48 | movw %ax, %ds # Bios might have played with that | ||
49 | movw %ax, %ss | ||
50 | 1: | ||
51 | |||
52 | testl $2, video_flags - wakeup_code | ||
53 | jz 1f | ||
54 | mov video_mode - wakeup_code, %ax | ||
55 | call mode_set | ||
56 | 1: | ||
57 | |||
58 | # set up page table | ||
59 | movl $swapper_pg_dir-__PAGE_OFFSET, %eax | ||
60 | movl %eax, %cr3 | ||
61 | |||
62 | testl $1, real_efer_save_restore - wakeup_code | ||
63 | jz 4f | ||
64 | # restore efer setting | ||
65 | movl real_save_efer_edx - wakeup_code, %edx | ||
66 | movl real_save_efer_eax - wakeup_code, %eax | ||
67 | mov $0xc0000080, %ecx | ||
68 | wrmsr | ||
69 | 4: | ||
70 | # make sure %cr4 is set correctly (features, etc) | ||
71 | movl real_save_cr4 - wakeup_code, %eax | ||
72 | movl %eax, %cr4 | ||
73 | movw $0xb800, %ax | ||
74 | movw %ax,%fs | ||
75 | movw $0x0e00 + 'i', %fs:(0x12) | ||
76 | |||
77 | # need a gdt | ||
78 | lgdt real_save_gdt - wakeup_code | ||
79 | |||
80 | movl real_save_cr0 - wakeup_code, %eax | ||
81 | movl %eax, %cr0 | ||
82 | jmp 1f | ||
83 | 1: | ||
84 | movw $0x0e00 + 'n', %fs:(0x14) | ||
85 | |||
86 | movl real_magic - wakeup_code, %eax | ||
87 | cmpl $0x12345678, %eax | ||
88 | jne bogus_real_magic | ||
89 | |||
90 | ljmpl $__KERNEL_CS,$wakeup_pmode_return | ||
91 | |||
92 | real_save_gdt: .word 0 | ||
93 | .long 0 | ||
94 | real_save_cr0: .long 0 | ||
95 | real_save_cr3: .long 0 | ||
96 | real_save_cr4: .long 0 | ||
97 | real_magic: .long 0 | ||
98 | video_mode: .long 0 | ||
99 | video_flags: .long 0 | ||
100 | real_efer_save_restore: .long 0 | ||
101 | real_save_efer_edx: .long 0 | ||
102 | real_save_efer_eax: .long 0 | ||
103 | |||
104 | bogus_real_magic: | ||
105 | movw $0x0e00 + 'B', %fs:(0x12) | ||
106 | jmp bogus_real_magic | ||
107 | |||
108 | /* This code uses an extended set of video mode numbers. These include: | ||
109 | * Aliases for standard modes | ||
110 | * NORMAL_VGA (-1) | ||
111 | * EXTENDED_VGA (-2) | ||
112 | * ASK_VGA (-3) | ||
113 | * Video modes numbered by menu position -- NOT RECOMMENDED because of lack | ||
114 | * of compatibility when extending the table. These are between 0x00 and 0xff. | ||
115 | */ | ||
116 | #define VIDEO_FIRST_MENU 0x0000 | ||
117 | |||
118 | /* Standard BIOS video modes (BIOS number + 0x0100) */ | ||
119 | #define VIDEO_FIRST_BIOS 0x0100 | ||
120 | |||
121 | /* VESA BIOS video modes (VESA number + 0x0200) */ | ||
122 | #define VIDEO_FIRST_VESA 0x0200 | ||
123 | |||
124 | /* Video7 special modes (BIOS number + 0x0900) */ | ||
125 | #define VIDEO_FIRST_V7 0x0900 | ||
126 | |||
127 | # Setting of user mode (AX=mode ID) => CF=success | ||
128 | mode_set: | ||
129 | movw %ax, %bx | ||
130 | #if 0 | ||
131 | cmpb $0xff, %ah | ||
132 | jz setalias | ||
133 | |||
134 | testb $VIDEO_RECALC>>8, %ah | ||
135 | jnz _setrec | ||
136 | |||
137 | cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah | ||
138 | jnc setres | ||
139 | |||
140 | cmpb $VIDEO_FIRST_SPECIAL>>8, %ah | ||
141 | jz setspc | ||
142 | |||
143 | cmpb $VIDEO_FIRST_V7>>8, %ah | ||
144 | jz setv7 | ||
145 | #endif | ||
146 | |||
147 | cmpb $VIDEO_FIRST_VESA>>8, %ah | ||
148 | jnc check_vesa | ||
149 | #if 0 | ||
150 | orb %ah, %ah | ||
151 | jz setmenu | ||
152 | #endif | ||
153 | |||
154 | decb %ah | ||
155 | # jz setbios Add bios modes later | ||
156 | |||
157 | setbad: clc | ||
158 | ret | ||
159 | |||
160 | check_vesa: | ||
161 | subb $VIDEO_FIRST_VESA>>8, %bh | ||
162 | orw $0x4000, %bx # Use linear frame buffer | ||
163 | movw $0x4f02, %ax # VESA BIOS mode set call | ||
164 | int $0x10 | ||
165 | cmpw $0x004f, %ax # AL=4f if implemented | ||
166 | jnz _setbad # AH=0 if OK | ||
167 | |||
168 | stc | ||
169 | ret | ||
170 | |||
171 | _setbad: jmp setbad | ||
172 | |||
173 | .code32 | ||
174 | ALIGN | ||
175 | |||
176 | .org 0x800 | ||
177 | wakeup_stack_begin: # Stack grows down | ||
178 | |||
179 | .org 0xff0 # Just below end of page | ||
180 | wakeup_stack: | ||
181 | ENTRY(wakeup_end) | ||
182 | |||
183 | .org 0x1000 | ||
184 | |||
185 | wakeup_pmode_return: | ||
186 | movw $__KERNEL_DS, %ax | ||
187 | movw %ax, %ss | ||
188 | movw %ax, %ds | ||
189 | movw %ax, %es | ||
190 | movw %ax, %fs | ||
191 | movw %ax, %gs | ||
192 | movw $0x0e00 + 'u', 0xb8016 | ||
193 | |||
194 | # reload the gdt, as we need the full 32 bit address | ||
195 | lgdt saved_gdt | ||
196 | lidt saved_idt | ||
197 | lldt saved_ldt | ||
198 | ljmp $(__KERNEL_CS),$1f | ||
199 | 1: | ||
200 | movl %cr3, %eax | ||
201 | movl %eax, %cr3 | ||
202 | wbinvd | ||
203 | |||
204 | # and restore the stack ... but you need gdt for this to work | ||
205 | movl saved_context_esp, %esp | ||
206 | |||
207 | movl %cs:saved_magic, %eax | ||
208 | cmpl $0x12345678, %eax | ||
209 | jne bogus_magic | ||
210 | |||
211 | # jump to place where we left off | ||
212 | movl saved_eip,%eax | ||
213 | jmp *%eax | ||
214 | |||
215 | bogus_magic: | ||
216 | movw $0x0e00 + 'B', 0xb8018 | ||
217 | jmp bogus_magic | ||
218 | |||
219 | |||
220 | ## | ||
221 | # acpi_copy_wakeup_routine | ||
222 | # | ||
223 | # Copy the above routine to low memory. | ||
224 | # | ||
225 | # Parameters: | ||
226 | # %eax: place to copy wakeup routine to | ||
227 | # | ||
228 | # Returned address is location of code in low memory (past data and stack) | ||
229 | # | ||
230 | ENTRY(acpi_copy_wakeup_routine) | ||
231 | |||
232 | sgdt saved_gdt | ||
233 | sidt saved_idt | ||
234 | sldt saved_ldt | ||
235 | str saved_tss | ||
236 | |||
237 | movl nx_enabled, %edx | ||
238 | movl %edx, real_efer_save_restore - wakeup_start (%eax) | ||
239 | testl $1, real_efer_save_restore - wakeup_start (%eax) | ||
240 | jz 2f | ||
241 | # save efer setting | ||
242 | pushl %eax | ||
243 | movl %eax, %ebx | ||
244 | mov $0xc0000080, %ecx | ||
245 | rdmsr | ||
246 | movl %edx, real_save_efer_edx - wakeup_start (%ebx) | ||
247 | movl %eax, real_save_efer_eax - wakeup_start (%ebx) | ||
248 | popl %eax | ||
249 | 2: | ||
250 | |||
251 | movl %cr3, %edx | ||
252 | movl %edx, real_save_cr3 - wakeup_start (%eax) | ||
253 | movl %cr4, %edx | ||
254 | movl %edx, real_save_cr4 - wakeup_start (%eax) | ||
255 | movl %cr0, %edx | ||
256 | movl %edx, real_save_cr0 - wakeup_start (%eax) | ||
257 | sgdt real_save_gdt - wakeup_start (%eax) | ||
258 | |||
259 | movl saved_videomode, %edx | ||
260 | movl %edx, video_mode - wakeup_start (%eax) | ||
261 | movl acpi_video_flags, %edx | ||
262 | movl %edx, video_flags - wakeup_start (%eax) | ||
263 | movl $0x12345678, real_magic - wakeup_start (%eax) | ||
264 | movl $0x12345678, saved_magic | ||
265 | ret | ||
266 | |||
267 | .data | ||
268 | ALIGN | ||
269 | ENTRY(saved_magic) .long 0 | ||
270 | ENTRY(saved_eip) .long 0 | ||
271 | |||
272 | save_registers: | ||
273 | leal 4(%esp), %eax | ||
274 | movl %eax, saved_context_esp | ||
275 | movl %ebx, saved_context_ebx | ||
276 | movl %ebp, saved_context_ebp | ||
277 | movl %esi, saved_context_esi | ||
278 | movl %edi, saved_context_edi | ||
279 | pushfl ; popl saved_context_eflags | ||
280 | |||
281 | movl $ret_point, saved_eip | ||
282 | ret | ||
283 | |||
284 | |||
285 | restore_registers: | ||
286 | movl saved_context_ebp, %ebp | ||
287 | movl saved_context_ebx, %ebx | ||
288 | movl saved_context_esi, %esi | ||
289 | movl saved_context_edi, %edi | ||
290 | pushl saved_context_eflags ; popfl | ||
291 | ret | ||
292 | |||
293 | ENTRY(do_suspend_lowlevel) | ||
294 | call save_processor_state | ||
295 | call save_registers | ||
296 | pushl $3 | ||
297 | call acpi_enter_sleep_state | ||
298 | addl $4, %esp | ||
299 | ret | ||
300 | .p2align 4,,7 | ||
301 | ret_point: | ||
302 | call restore_registers | ||
303 | call restore_processor_state | ||
304 | ret | ||
305 | |||
306 | ENTRY(do_suspend_lowlevel_s4bios) | ||
307 | call save_processor_state | ||
308 | call save_registers | ||
309 | call acpi_enter_sleep_state_s4bios | ||
310 | ret | ||
311 | |||
312 | ALIGN | ||
313 | # saved registers | ||
314 | saved_gdt: .long 0,0 | ||
315 | saved_idt: .long 0,0 | ||
316 | saved_ldt: .long 0 | ||
317 | saved_tss: .long 0 | ||
318 | |||
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c new file mode 100644 index 000000000000..35c1751ea0b0 --- /dev/null +++ b/arch/i386/kernel/apic.c | |||
@@ -0,0 +1,1278 @@ | |||
1 | /* | ||
2 | * Local APIC handling, local APIC timers | ||
3 | * | ||
4 | * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com> | ||
5 | * | ||
6 | * Fixes | ||
7 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs; | ||
8 | * thanks to Eric Gilmore | ||
9 | * and Rolf G. Tews | ||
10 | * for testing these extensively. | ||
11 | * Maciej W. Rozycki : Various updates and fixes. | ||
12 | * Mikael Pettersson : Power Management for UP-APIC. | ||
13 | * Pavel Machek and | ||
14 | * Mikael Pettersson : PM converted to driver model. | ||
15 | */ | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | #include <linux/init.h> | ||
19 | |||
20 | #include <linux/mm.h> | ||
21 | #include <linux/irq.h> | ||
22 | #include <linux/delay.h> | ||
23 | #include <linux/bootmem.h> | ||
24 | #include <linux/smp_lock.h> | ||
25 | #include <linux/interrupt.h> | ||
26 | #include <linux/mc146818rtc.h> | ||
27 | #include <linux/kernel_stat.h> | ||
28 | #include <linux/sysdev.h> | ||
29 | |||
30 | #include <asm/atomic.h> | ||
31 | #include <asm/smp.h> | ||
32 | #include <asm/mtrr.h> | ||
33 | #include <asm/mpspec.h> | ||
34 | #include <asm/desc.h> | ||
35 | #include <asm/arch_hooks.h> | ||
36 | #include <asm/hpet.h> | ||
37 | |||
38 | #include <mach_apic.h> | ||
39 | |||
40 | #include "io_ports.h" | ||
41 | |||
42 | /* | ||
43 | * Debug level | ||
44 | */ | ||
45 | int apic_verbosity; | ||
46 | |||
47 | |||
48 | static void apic_pm_activate(void); | ||
49 | |||
50 | /* | ||
51 | * 'what should we do if we get a hw irq event on an illegal vector'. | ||
52 | * each architecture has to answer this themselves. | ||
53 | */ | ||
54 | void ack_bad_irq(unsigned int irq) | ||
55 | { | ||
56 | printk("unexpected IRQ trap at vector %02x\n", irq); | ||
57 | /* | ||
58 | * Currently unexpected vectors happen only on SMP and APIC. | ||
59 | * We _must_ ack these because every local APIC has only N | ||
60 | * irq slots per priority level, and a 'hanging, unacked' IRQ | ||
61 | * holds up an irq slot - in excessive cases (when multiple | ||
62 | * unexpected vectors occur) that might lock up the APIC | ||
63 | * completely. | ||
64 | */ | ||
65 | ack_APIC_irq(); | ||
66 | } | ||
67 | |||
68 | void __init apic_intr_init(void) | ||
69 | { | ||
70 | #ifdef CONFIG_SMP | ||
71 | smp_intr_init(); | ||
72 | #endif | ||
73 | /* self generated IPI for local APIC timer */ | ||
74 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | ||
75 | |||
76 | /* IPI vectors for APIC spurious and error interrupts */ | ||
77 | set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | ||
78 | set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | ||
79 | |||
80 | /* thermal monitor LVT interrupt */ | ||
81 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
82 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | ||
83 | #endif | ||
84 | } | ||
85 | |||
86 | /* Using APIC to generate smp_local_timer_interrupt? */ | ||
87 | int using_apic_timer = 0; | ||
88 | |||
89 | static DEFINE_PER_CPU(int, prof_multiplier) = 1; | ||
90 | static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; | ||
91 | static DEFINE_PER_CPU(int, prof_counter) = 1; | ||
92 | |||
93 | static int enabled_via_apicbase; | ||
94 | |||
95 | void enable_NMI_through_LVT0 (void * dummy) | ||
96 | { | ||
97 | unsigned int v, ver; | ||
98 | |||
99 | ver = apic_read(APIC_LVR); | ||
100 | ver = GET_APIC_VERSION(ver); | ||
101 | v = APIC_DM_NMI; /* unmask and set to NMI */ | ||
102 | if (!APIC_INTEGRATED(ver)) /* 82489DX */ | ||
103 | v |= APIC_LVT_LEVEL_TRIGGER; | ||
104 | apic_write_around(APIC_LVT0, v); | ||
105 | } | ||
106 | |||
107 | int get_physical_broadcast(void) | ||
108 | { | ||
109 | unsigned int lvr, version; | ||
110 | lvr = apic_read(APIC_LVR); | ||
111 | version = GET_APIC_VERSION(lvr); | ||
112 | if (!APIC_INTEGRATED(version) || version >= 0x14) | ||
113 | return 0xff; | ||
114 | else | ||
115 | return 0xf; | ||
116 | } | ||
117 | |||
118 | int get_maxlvt(void) | ||
119 | { | ||
120 | unsigned int v, ver, maxlvt; | ||
121 | |||
122 | v = apic_read(APIC_LVR); | ||
123 | ver = GET_APIC_VERSION(v); | ||
124 | /* 82489DXs do not report # of LVT entries. */ | ||
125 | maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; | ||
126 | return maxlvt; | ||
127 | } | ||
128 | |||
129 | void clear_local_APIC(void) | ||
130 | { | ||
131 | int maxlvt; | ||
132 | unsigned long v; | ||
133 | |||
134 | maxlvt = get_maxlvt(); | ||
135 | |||
136 | /* | ||
137 | * Masking an LVT entry on a P6 can trigger a local APIC error | ||
138 | * if the vector is zero. Mask LVTERR first to prevent this. | ||
139 | */ | ||
140 | if (maxlvt >= 3) { | ||
141 | v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ | ||
142 | apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); | ||
143 | } | ||
144 | /* | ||
145 | * Careful: we have to set masks only first to deassert | ||
146 | * any level-triggered sources. | ||
147 | */ | ||
148 | v = apic_read(APIC_LVTT); | ||
149 | apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); | ||
150 | v = apic_read(APIC_LVT0); | ||
151 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | ||
152 | v = apic_read(APIC_LVT1); | ||
153 | apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); | ||
154 | if (maxlvt >= 4) { | ||
155 | v = apic_read(APIC_LVTPC); | ||
156 | apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); | ||
157 | } | ||
158 | |||
159 | /* lets not touch this if we didn't frob it */ | ||
160 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
161 | if (maxlvt >= 5) { | ||
162 | v = apic_read(APIC_LVTTHMR); | ||
163 | apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); | ||
164 | } | ||
165 | #endif | ||
166 | /* | ||
167 | * Clean APIC state for other OSs: | ||
168 | */ | ||
169 | apic_write_around(APIC_LVTT, APIC_LVT_MASKED); | ||
170 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | ||
171 | apic_write_around(APIC_LVT1, APIC_LVT_MASKED); | ||
172 | if (maxlvt >= 3) | ||
173 | apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); | ||
174 | if (maxlvt >= 4) | ||
175 | apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); | ||
176 | |||
177 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
178 | if (maxlvt >= 5) | ||
179 | apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); | ||
180 | #endif | ||
181 | v = GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
182 | if (APIC_INTEGRATED(v)) { /* !82489DX */ | ||
183 | if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ | ||
184 | apic_write(APIC_ESR, 0); | ||
185 | apic_read(APIC_ESR); | ||
186 | } | ||
187 | } | ||
188 | |||
189 | void __init connect_bsp_APIC(void) | ||
190 | { | ||
191 | if (pic_mode) { | ||
192 | /* | ||
193 | * Do not trust the local APIC being empty at bootup. | ||
194 | */ | ||
195 | clear_local_APIC(); | ||
196 | /* | ||
197 | * PIC mode, enable APIC mode in the IMCR, i.e. | ||
198 | * connect BSP's local APIC to INT and NMI lines. | ||
199 | */ | ||
200 | apic_printk(APIC_VERBOSE, "leaving PIC mode, " | ||
201 | "enabling APIC mode.\n"); | ||
202 | outb(0x70, 0x22); | ||
203 | outb(0x01, 0x23); | ||
204 | } | ||
205 | enable_apic_mode(); | ||
206 | } | ||
207 | |||
208 | void disconnect_bsp_APIC(void) | ||
209 | { | ||
210 | if (pic_mode) { | ||
211 | /* | ||
212 | * Put the board back into PIC mode (has an effect | ||
213 | * only on certain older boards). Note that APIC | ||
214 | * interrupts, including IPIs, won't work beyond | ||
215 | * this point! The only exception are INIT IPIs. | ||
216 | */ | ||
217 | apic_printk(APIC_VERBOSE, "disabling APIC mode, " | ||
218 | "entering PIC mode.\n"); | ||
219 | outb(0x70, 0x22); | ||
220 | outb(0x00, 0x23); | ||
221 | } | ||
222 | } | ||
223 | |||
224 | void disable_local_APIC(void) | ||
225 | { | ||
226 | unsigned long value; | ||
227 | |||
228 | clear_local_APIC(); | ||
229 | |||
230 | /* | ||
231 | * Disable APIC (implies clearing of registers | ||
232 | * for 82489DX!). | ||
233 | */ | ||
234 | value = apic_read(APIC_SPIV); | ||
235 | value &= ~APIC_SPIV_APIC_ENABLED; | ||
236 | apic_write_around(APIC_SPIV, value); | ||
237 | |||
238 | if (enabled_via_apicbase) { | ||
239 | unsigned int l, h; | ||
240 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
241 | l &= ~MSR_IA32_APICBASE_ENABLE; | ||
242 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | /* | ||
247 | * This is to verify that we're looking at a real local APIC. | ||
248 | * Check these against your board if the CPUs aren't getting | ||
249 | * started for no apparent reason. | ||
250 | */ | ||
251 | int __init verify_local_APIC(void) | ||
252 | { | ||
253 | unsigned int reg0, reg1; | ||
254 | |||
255 | /* | ||
256 | * The version register is read-only in a real APIC. | ||
257 | */ | ||
258 | reg0 = apic_read(APIC_LVR); | ||
259 | apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); | ||
260 | apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); | ||
261 | reg1 = apic_read(APIC_LVR); | ||
262 | apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); | ||
263 | |||
264 | /* | ||
265 | * The two version reads above should print the same | ||
266 | * numbers. If the second one is different, then we | ||
267 | * poke at a non-APIC. | ||
268 | */ | ||
269 | if (reg1 != reg0) | ||
270 | return 0; | ||
271 | |||
272 | /* | ||
273 | * Check if the version looks reasonably. | ||
274 | */ | ||
275 | reg1 = GET_APIC_VERSION(reg0); | ||
276 | if (reg1 == 0x00 || reg1 == 0xff) | ||
277 | return 0; | ||
278 | reg1 = get_maxlvt(); | ||
279 | if (reg1 < 0x02 || reg1 == 0xff) | ||
280 | return 0; | ||
281 | |||
282 | /* | ||
283 | * The ID register is read/write in a real APIC. | ||
284 | */ | ||
285 | reg0 = apic_read(APIC_ID); | ||
286 | apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); | ||
287 | |||
288 | /* | ||
289 | * The next two are just to see if we have sane values. | ||
290 | * They're only really relevant if we're in Virtual Wire | ||
291 | * compatibility mode, but most boxes are anymore. | ||
292 | */ | ||
293 | reg0 = apic_read(APIC_LVT0); | ||
294 | apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); | ||
295 | reg1 = apic_read(APIC_LVT1); | ||
296 | apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); | ||
297 | |||
298 | return 1; | ||
299 | } | ||
300 | |||
301 | void __init sync_Arb_IDs(void) | ||
302 | { | ||
303 | /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ | ||
304 | unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
305 | if (ver >= 0x14) /* P4 or higher */ | ||
306 | return; | ||
307 | /* | ||
308 | * Wait for idle. | ||
309 | */ | ||
310 | apic_wait_icr_idle(); | ||
311 | |||
312 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); | ||
313 | apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | ||
314 | | APIC_DM_INIT); | ||
315 | } | ||
316 | |||
317 | extern void __error_in_apic_c (void); | ||
318 | |||
319 | /* | ||
320 | * An initial setup of the virtual wire mode. | ||
321 | */ | ||
322 | void __init init_bsp_APIC(void) | ||
323 | { | ||
324 | unsigned long value, ver; | ||
325 | |||
326 | /* | ||
327 | * Don't do the setup now if we have a SMP BIOS as the | ||
328 | * through-I/O-APIC virtual wire mode might be active. | ||
329 | */ | ||
330 | if (smp_found_config || !cpu_has_apic) | ||
331 | return; | ||
332 | |||
333 | value = apic_read(APIC_LVR); | ||
334 | ver = GET_APIC_VERSION(value); | ||
335 | |||
336 | /* | ||
337 | * Do not trust the local APIC being empty at bootup. | ||
338 | */ | ||
339 | clear_local_APIC(); | ||
340 | |||
341 | /* | ||
342 | * Enable APIC. | ||
343 | */ | ||
344 | value = apic_read(APIC_SPIV); | ||
345 | value &= ~APIC_VECTOR_MASK; | ||
346 | value |= APIC_SPIV_APIC_ENABLED; | ||
347 | |||
348 | /* This bit is reserved on P4/Xeon and should be cleared */ | ||
349 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) | ||
350 | value &= ~APIC_SPIV_FOCUS_DISABLED; | ||
351 | else | ||
352 | value |= APIC_SPIV_FOCUS_DISABLED; | ||
353 | value |= SPURIOUS_APIC_VECTOR; | ||
354 | apic_write_around(APIC_SPIV, value); | ||
355 | |||
356 | /* | ||
357 | * Set up the virtual wire mode. | ||
358 | */ | ||
359 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | ||
360 | value = APIC_DM_NMI; | ||
361 | if (!APIC_INTEGRATED(ver)) /* 82489DX */ | ||
362 | value |= APIC_LVT_LEVEL_TRIGGER; | ||
363 | apic_write_around(APIC_LVT1, value); | ||
364 | } | ||
365 | |||
366 | void __init setup_local_APIC (void) | ||
367 | { | ||
368 | unsigned long oldvalue, value, ver, maxlvt; | ||
369 | |||
370 | /* Pound the ESR really hard over the head with a big hammer - mbligh */ | ||
371 | if (esr_disable) { | ||
372 | apic_write(APIC_ESR, 0); | ||
373 | apic_write(APIC_ESR, 0); | ||
374 | apic_write(APIC_ESR, 0); | ||
375 | apic_write(APIC_ESR, 0); | ||
376 | } | ||
377 | |||
378 | value = apic_read(APIC_LVR); | ||
379 | ver = GET_APIC_VERSION(value); | ||
380 | |||
381 | if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) | ||
382 | __error_in_apic_c(); | ||
383 | |||
384 | /* | ||
385 | * Double-check whether this APIC is really registered. | ||
386 | */ | ||
387 | if (!apic_id_registered()) | ||
388 | BUG(); | ||
389 | |||
390 | /* | ||
391 | * Intel recommends to set DFR, LDR and TPR before enabling | ||
392 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | ||
393 | * document number 292116). So here it goes... | ||
394 | */ | ||
395 | init_apic_ldr(); | ||
396 | |||
397 | /* | ||
398 | * Set Task Priority to 'accept all'. We never change this | ||
399 | * later on. | ||
400 | */ | ||
401 | value = apic_read(APIC_TASKPRI); | ||
402 | value &= ~APIC_TPRI_MASK; | ||
403 | apic_write_around(APIC_TASKPRI, value); | ||
404 | |||
405 | /* | ||
406 | * Now that we are all set up, enable the APIC | ||
407 | */ | ||
408 | value = apic_read(APIC_SPIV); | ||
409 | value &= ~APIC_VECTOR_MASK; | ||
410 | /* | ||
411 | * Enable APIC | ||
412 | */ | ||
413 | value |= APIC_SPIV_APIC_ENABLED; | ||
414 | |||
415 | /* | ||
416 | * Some unknown Intel IO/APIC (or APIC) errata is biting us with | ||
417 | * certain networking cards. If high frequency interrupts are | ||
418 | * happening on a particular IOAPIC pin, plus the IOAPIC routing | ||
419 | * entry is masked/unmasked at a high rate as well then sooner or | ||
420 | * later IOAPIC line gets 'stuck', no more interrupts are received | ||
421 | * from the device. If focus CPU is disabled then the hang goes | ||
422 | * away, oh well :-( | ||
423 | * | ||
424 | * [ This bug can be reproduced easily with a level-triggered | ||
425 | * PCI Ne2000 networking cards and PII/PIII processors, dual | ||
426 | * BX chipset. ] | ||
427 | */ | ||
428 | /* | ||
429 | * Actually disabling the focus CPU check just makes the hang less | ||
430 | * frequent as it makes the interrupt distributon model be more | ||
431 | * like LRU than MRU (the short-term load is more even across CPUs). | ||
432 | * See also the comment in end_level_ioapic_irq(). --macro | ||
433 | */ | ||
434 | #if 1 | ||
435 | /* Enable focus processor (bit==0) */ | ||
436 | value &= ~APIC_SPIV_FOCUS_DISABLED; | ||
437 | #else | ||
438 | /* Disable focus processor (bit==1) */ | ||
439 | value |= APIC_SPIV_FOCUS_DISABLED; | ||
440 | #endif | ||
441 | /* | ||
442 | * Set spurious IRQ vector | ||
443 | */ | ||
444 | value |= SPURIOUS_APIC_VECTOR; | ||
445 | apic_write_around(APIC_SPIV, value); | ||
446 | |||
447 | /* | ||
448 | * Set up LVT0, LVT1: | ||
449 | * | ||
450 | * set up through-local-APIC on the BP's LINT0. This is not | ||
451 | * strictly necessery in pure symmetric-IO mode, but sometimes | ||
452 | * we delegate interrupts to the 8259A. | ||
453 | */ | ||
454 | /* | ||
455 | * TODO: set up through-local-APIC from through-I/O-APIC? --macro | ||
456 | */ | ||
457 | value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; | ||
458 | if (!smp_processor_id() && (pic_mode || !value)) { | ||
459 | value = APIC_DM_EXTINT; | ||
460 | apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", | ||
461 | smp_processor_id()); | ||
462 | } else { | ||
463 | value = APIC_DM_EXTINT | APIC_LVT_MASKED; | ||
464 | apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", | ||
465 | smp_processor_id()); | ||
466 | } | ||
467 | apic_write_around(APIC_LVT0, value); | ||
468 | |||
469 | /* | ||
470 | * only the BP should see the LINT1 NMI signal, obviously. | ||
471 | */ | ||
472 | if (!smp_processor_id()) | ||
473 | value = APIC_DM_NMI; | ||
474 | else | ||
475 | value = APIC_DM_NMI | APIC_LVT_MASKED; | ||
476 | if (!APIC_INTEGRATED(ver)) /* 82489DX */ | ||
477 | value |= APIC_LVT_LEVEL_TRIGGER; | ||
478 | apic_write_around(APIC_LVT1, value); | ||
479 | |||
480 | if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */ | ||
481 | maxlvt = get_maxlvt(); | ||
482 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | ||
483 | apic_write(APIC_ESR, 0); | ||
484 | oldvalue = apic_read(APIC_ESR); | ||
485 | |||
486 | value = ERROR_APIC_VECTOR; // enables sending errors | ||
487 | apic_write_around(APIC_LVTERR, value); | ||
488 | /* | ||
489 | * spec says clear errors after enabling vector. | ||
490 | */ | ||
491 | if (maxlvt > 3) | ||
492 | apic_write(APIC_ESR, 0); | ||
493 | value = apic_read(APIC_ESR); | ||
494 | if (value != oldvalue) | ||
495 | apic_printk(APIC_VERBOSE, "ESR value before enabling " | ||
496 | "vector: 0x%08lx after: 0x%08lx\n", | ||
497 | oldvalue, value); | ||
498 | } else { | ||
499 | if (esr_disable) | ||
500 | /* | ||
501 | * Something untraceble is creating bad interrupts on | ||
502 | * secondary quads ... for the moment, just leave the | ||
503 | * ESR disabled - we can't do anything useful with the | ||
504 | * errors anyway - mbligh | ||
505 | */ | ||
506 | printk("Leaving ESR disabled.\n"); | ||
507 | else | ||
508 | printk("No ESR for 82489DX.\n"); | ||
509 | } | ||
510 | |||
511 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
512 | setup_apic_nmi_watchdog(); | ||
513 | apic_pm_activate(); | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * If Linux enabled the LAPIC against the BIOS default | ||
518 | * disable it down before re-entering the BIOS on shutdown. | ||
519 | * Otherwise the BIOS may get confused and not power-off. | ||
520 | */ | ||
521 | void lapic_shutdown(void) | ||
522 | { | ||
523 | if (!cpu_has_apic || !enabled_via_apicbase) | ||
524 | return; | ||
525 | |||
526 | local_irq_disable(); | ||
527 | disable_local_APIC(); | ||
528 | local_irq_enable(); | ||
529 | } | ||
530 | |||
531 | #ifdef CONFIG_PM | ||
532 | |||
533 | static struct { | ||
534 | int active; | ||
535 | /* r/w apic fields */ | ||
536 | unsigned int apic_id; | ||
537 | unsigned int apic_taskpri; | ||
538 | unsigned int apic_ldr; | ||
539 | unsigned int apic_dfr; | ||
540 | unsigned int apic_spiv; | ||
541 | unsigned int apic_lvtt; | ||
542 | unsigned int apic_lvtpc; | ||
543 | unsigned int apic_lvt0; | ||
544 | unsigned int apic_lvt1; | ||
545 | unsigned int apic_lvterr; | ||
546 | unsigned int apic_tmict; | ||
547 | unsigned int apic_tdcr; | ||
548 | unsigned int apic_thmr; | ||
549 | } apic_pm_state; | ||
550 | |||
551 | static int lapic_suspend(struct sys_device *dev, u32 state) | ||
552 | { | ||
553 | unsigned long flags; | ||
554 | |||
555 | if (!apic_pm_state.active) | ||
556 | return 0; | ||
557 | |||
558 | apic_pm_state.apic_id = apic_read(APIC_ID); | ||
559 | apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); | ||
560 | apic_pm_state.apic_ldr = apic_read(APIC_LDR); | ||
561 | apic_pm_state.apic_dfr = apic_read(APIC_DFR); | ||
562 | apic_pm_state.apic_spiv = apic_read(APIC_SPIV); | ||
563 | apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); | ||
564 | apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); | ||
565 | apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); | ||
566 | apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); | ||
567 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | ||
568 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | ||
569 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | ||
570 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | ||
571 | |||
572 | local_irq_save(flags); | ||
573 | disable_local_APIC(); | ||
574 | local_irq_restore(flags); | ||
575 | return 0; | ||
576 | } | ||
577 | |||
578 | static int lapic_resume(struct sys_device *dev) | ||
579 | { | ||
580 | unsigned int l, h; | ||
581 | unsigned long flags; | ||
582 | |||
583 | if (!apic_pm_state.active) | ||
584 | return 0; | ||
585 | |||
586 | local_irq_save(flags); | ||
587 | |||
588 | /* | ||
589 | * Make sure the APICBASE points to the right address | ||
590 | * | ||
591 | * FIXME! This will be wrong if we ever support suspend on | ||
592 | * SMP! We'll need to do this as part of the CPU restore! | ||
593 | */ | ||
594 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
595 | l &= ~MSR_IA32_APICBASE_BASE; | ||
596 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; | ||
597 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
598 | |||
599 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); | ||
600 | apic_write(APIC_ID, apic_pm_state.apic_id); | ||
601 | apic_write(APIC_DFR, apic_pm_state.apic_dfr); | ||
602 | apic_write(APIC_LDR, apic_pm_state.apic_ldr); | ||
603 | apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); | ||
604 | apic_write(APIC_SPIV, apic_pm_state.apic_spiv); | ||
605 | apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); | ||
606 | apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); | ||
607 | apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); | ||
608 | apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); | ||
609 | apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); | ||
610 | apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); | ||
611 | apic_write(APIC_TMICT, apic_pm_state.apic_tmict); | ||
612 | apic_write(APIC_ESR, 0); | ||
613 | apic_read(APIC_ESR); | ||
614 | apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); | ||
615 | apic_write(APIC_ESR, 0); | ||
616 | apic_read(APIC_ESR); | ||
617 | local_irq_restore(flags); | ||
618 | return 0; | ||
619 | } | ||
620 | |||
621 | /* | ||
622 | * This device has no shutdown method - fully functioning local APICs | ||
623 | * are needed on every CPU up until machine_halt/restart/poweroff. | ||
624 | */ | ||
625 | |||
626 | static struct sysdev_class lapic_sysclass = { | ||
627 | set_kset_name("lapic"), | ||
628 | .resume = lapic_resume, | ||
629 | .suspend = lapic_suspend, | ||
630 | }; | ||
631 | |||
632 | static struct sys_device device_lapic = { | ||
633 | .id = 0, | ||
634 | .cls = &lapic_sysclass, | ||
635 | }; | ||
636 | |||
637 | static void __init apic_pm_activate(void) | ||
638 | { | ||
639 | apic_pm_state.active = 1; | ||
640 | } | ||
641 | |||
642 | static int __init init_lapic_sysfs(void) | ||
643 | { | ||
644 | int error; | ||
645 | |||
646 | if (!cpu_has_apic) | ||
647 | return 0; | ||
648 | /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ | ||
649 | |||
650 | error = sysdev_class_register(&lapic_sysclass); | ||
651 | if (!error) | ||
652 | error = sysdev_register(&device_lapic); | ||
653 | return error; | ||
654 | } | ||
655 | device_initcall(init_lapic_sysfs); | ||
656 | |||
657 | #else /* CONFIG_PM */ | ||
658 | |||
659 | static void apic_pm_activate(void) { } | ||
660 | |||
661 | #endif /* CONFIG_PM */ | ||
662 | |||
663 | /* | ||
664 | * Detect and enable local APICs on non-SMP boards. | ||
665 | * Original code written by Keir Fraser. | ||
666 | */ | ||
667 | |||
668 | /* | ||
669 | * Knob to control our willingness to enable the local APIC. | ||
670 | */ | ||
671 | int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ | ||
672 | |||
673 | static int __init lapic_disable(char *str) | ||
674 | { | ||
675 | enable_local_apic = -1; | ||
676 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | ||
677 | return 0; | ||
678 | } | ||
679 | __setup("nolapic", lapic_disable); | ||
680 | |||
681 | static int __init lapic_enable(char *str) | ||
682 | { | ||
683 | enable_local_apic = 1; | ||
684 | return 0; | ||
685 | } | ||
686 | __setup("lapic", lapic_enable); | ||
687 | |||
688 | static int __init apic_set_verbosity(char *str) | ||
689 | { | ||
690 | if (strcmp("debug", str) == 0) | ||
691 | apic_verbosity = APIC_DEBUG; | ||
692 | else if (strcmp("verbose", str) == 0) | ||
693 | apic_verbosity = APIC_VERBOSE; | ||
694 | else | ||
695 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | ||
696 | " use apic=verbose or apic=debug", str); | ||
697 | |||
698 | return 0; | ||
699 | } | ||
700 | |||
701 | __setup("apic=", apic_set_verbosity); | ||
702 | |||
703 | static int __init detect_init_APIC (void) | ||
704 | { | ||
705 | u32 h, l, features; | ||
706 | extern void get_cpu_vendor(struct cpuinfo_x86*); | ||
707 | |||
708 | /* Disabled by kernel option? */ | ||
709 | if (enable_local_apic < 0) | ||
710 | return -1; | ||
711 | |||
712 | /* Workaround for us being called before identify_cpu(). */ | ||
713 | get_cpu_vendor(&boot_cpu_data); | ||
714 | |||
715 | switch (boot_cpu_data.x86_vendor) { | ||
716 | case X86_VENDOR_AMD: | ||
717 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || | ||
718 | (boot_cpu_data.x86 == 15)) | ||
719 | break; | ||
720 | goto no_apic; | ||
721 | case X86_VENDOR_INTEL: | ||
722 | if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || | ||
723 | (boot_cpu_data.x86 == 5 && cpu_has_apic)) | ||
724 | break; | ||
725 | goto no_apic; | ||
726 | default: | ||
727 | goto no_apic; | ||
728 | } | ||
729 | |||
730 | if (!cpu_has_apic) { | ||
731 | /* | ||
732 | * Over-ride BIOS and try to enable the local | ||
733 | * APIC only if "lapic" specified. | ||
734 | */ | ||
735 | if (enable_local_apic <= 0) { | ||
736 | printk("Local APIC disabled by BIOS -- " | ||
737 | "you can enable it with \"lapic\"\n"); | ||
738 | return -1; | ||
739 | } | ||
740 | /* | ||
741 | * Some BIOSes disable the local APIC in the | ||
742 | * APIC_BASE MSR. This can only be done in | ||
743 | * software for Intel P6 or later and AMD K7 | ||
744 | * (Model > 1) or later. | ||
745 | */ | ||
746 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
747 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { | ||
748 | printk("Local APIC disabled by BIOS -- reenabling.\n"); | ||
749 | l &= ~MSR_IA32_APICBASE_BASE; | ||
750 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | ||
751 | wrmsr(MSR_IA32_APICBASE, l, h); | ||
752 | enabled_via_apicbase = 1; | ||
753 | } | ||
754 | } | ||
755 | /* | ||
756 | * The APIC feature bit should now be enabled | ||
757 | * in `cpuid' | ||
758 | */ | ||
759 | features = cpuid_edx(1); | ||
760 | if (!(features & (1 << X86_FEATURE_APIC))) { | ||
761 | printk("Could not enable APIC!\n"); | ||
762 | return -1; | ||
763 | } | ||
764 | set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | ||
765 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
766 | |||
767 | /* The BIOS may have set up the APIC at some other address */ | ||
768 | rdmsr(MSR_IA32_APICBASE, l, h); | ||
769 | if (l & MSR_IA32_APICBASE_ENABLE) | ||
770 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; | ||
771 | |||
772 | if (nmi_watchdog != NMI_NONE) | ||
773 | nmi_watchdog = NMI_LOCAL_APIC; | ||
774 | |||
775 | printk("Found and enabled local APIC!\n"); | ||
776 | |||
777 | apic_pm_activate(); | ||
778 | |||
779 | return 0; | ||
780 | |||
781 | no_apic: | ||
782 | printk("No local APIC present or hardware disabled\n"); | ||
783 | return -1; | ||
784 | } | ||
785 | |||
786 | void __init init_apic_mappings(void) | ||
787 | { | ||
788 | unsigned long apic_phys; | ||
789 | |||
790 | /* | ||
791 | * If no local APIC can be found then set up a fake all | ||
792 | * zeroes page to simulate the local APIC and another | ||
793 | * one for the IO-APIC. | ||
794 | */ | ||
795 | if (!smp_found_config && detect_init_APIC()) { | ||
796 | apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); | ||
797 | apic_phys = __pa(apic_phys); | ||
798 | } else | ||
799 | apic_phys = mp_lapic_addr; | ||
800 | |||
801 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); | ||
802 | printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, | ||
803 | apic_phys); | ||
804 | |||
805 | /* | ||
806 | * Fetch the APIC ID of the BSP in case we have a | ||
807 | * default configuration (or the MP table is broken). | ||
808 | */ | ||
809 | if (boot_cpu_physical_apicid == -1U) | ||
810 | boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); | ||
811 | |||
812 | #ifdef CONFIG_X86_IO_APIC | ||
813 | { | ||
814 | unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; | ||
815 | int i; | ||
816 | |||
817 | for (i = 0; i < nr_ioapics; i++) { | ||
818 | if (smp_found_config) { | ||
819 | ioapic_phys = mp_ioapics[i].mpc_apicaddr; | ||
820 | if (!ioapic_phys) { | ||
821 | printk(KERN_ERR | ||
822 | "WARNING: bogus zero IO-APIC " | ||
823 | "address found in MPTABLE, " | ||
824 | "disabling IO/APIC support!\n"); | ||
825 | smp_found_config = 0; | ||
826 | skip_ioapic_setup = 1; | ||
827 | goto fake_ioapic_page; | ||
828 | } | ||
829 | } else { | ||
830 | fake_ioapic_page: | ||
831 | ioapic_phys = (unsigned long) | ||
832 | alloc_bootmem_pages(PAGE_SIZE); | ||
833 | ioapic_phys = __pa(ioapic_phys); | ||
834 | } | ||
835 | set_fixmap_nocache(idx, ioapic_phys); | ||
836 | printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", | ||
837 | __fix_to_virt(idx), ioapic_phys); | ||
838 | idx++; | ||
839 | } | ||
840 | } | ||
841 | #endif | ||
842 | } | ||
843 | |||
844 | /* | ||
845 | * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts | ||
846 | * per second. We assume that the caller has already set up the local | ||
847 | * APIC. | ||
848 | * | ||
849 | * The APIC timer is not exactly sync with the external timer chip, it | ||
850 | * closely follows bus clocks. | ||
851 | */ | ||
852 | |||
853 | /* | ||
854 | * The timer chip is already set up at HZ interrupts per second here, | ||
855 | * but we do not accept timer interrupts yet. We only allow the BP | ||
856 | * to calibrate. | ||
857 | */ | ||
858 | static unsigned int __init get_8254_timer_count(void) | ||
859 | { | ||
860 | extern spinlock_t i8253_lock; | ||
861 | unsigned long flags; | ||
862 | |||
863 | unsigned int count; | ||
864 | |||
865 | spin_lock_irqsave(&i8253_lock, flags); | ||
866 | |||
867 | outb_p(0x00, PIT_MODE); | ||
868 | count = inb_p(PIT_CH0); | ||
869 | count |= inb_p(PIT_CH0) << 8; | ||
870 | |||
871 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
872 | |||
873 | return count; | ||
874 | } | ||
875 | |||
876 | /* next tick in 8254 can be caught by catching timer wraparound */ | ||
877 | static void __init wait_8254_wraparound(void) | ||
878 | { | ||
879 | unsigned int curr_count, prev_count; | ||
880 | |||
881 | curr_count = get_8254_timer_count(); | ||
882 | do { | ||
883 | prev_count = curr_count; | ||
884 | curr_count = get_8254_timer_count(); | ||
885 | |||
886 | /* workaround for broken Mercury/Neptune */ | ||
887 | if (prev_count >= curr_count + 0x100) | ||
888 | curr_count = get_8254_timer_count(); | ||
889 | |||
890 | } while (prev_count >= curr_count); | ||
891 | } | ||
892 | |||
893 | /* | ||
894 | * Default initialization for 8254 timers. If we use other timers like HPET, | ||
895 | * we override this later | ||
896 | */ | ||
897 | void (*wait_timer_tick)(void) __initdata = wait_8254_wraparound; | ||
898 | |||
899 | /* | ||
900 | * This function sets up the local APIC timer, with a timeout of | ||
901 | * 'clocks' APIC bus clock. During calibration we actually call | ||
902 | * this function twice on the boot CPU, once with a bogus timeout | ||
903 | * value, second time for real. The other (noncalibrating) CPUs | ||
904 | * call this function only once, with the real, calibrated value. | ||
905 | * | ||
906 | * We do reads before writes even if unnecessary, to get around the | ||
907 | * P5 APIC double write bug. | ||
908 | */ | ||
909 | |||
910 | #define APIC_DIVISOR 16 | ||
911 | |||
912 | static void __setup_APIC_LVTT(unsigned int clocks) | ||
913 | { | ||
914 | unsigned int lvtt_value, tmp_value, ver; | ||
915 | |||
916 | ver = GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
917 | lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; | ||
918 | if (!APIC_INTEGRATED(ver)) | ||
919 | lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); | ||
920 | apic_write_around(APIC_LVTT, lvtt_value); | ||
921 | |||
922 | /* | ||
923 | * Divide PICLK by 16 | ||
924 | */ | ||
925 | tmp_value = apic_read(APIC_TDCR); | ||
926 | apic_write_around(APIC_TDCR, (tmp_value | ||
927 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | ||
928 | | APIC_TDR_DIV_16); | ||
929 | |||
930 | apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); | ||
931 | } | ||
932 | |||
933 | static void __init setup_APIC_timer(unsigned int clocks) | ||
934 | { | ||
935 | unsigned long flags; | ||
936 | |||
937 | local_irq_save(flags); | ||
938 | |||
939 | /* | ||
940 | * Wait for IRQ0's slice: | ||
941 | */ | ||
942 | wait_timer_tick(); | ||
943 | |||
944 | __setup_APIC_LVTT(clocks); | ||
945 | |||
946 | local_irq_restore(flags); | ||
947 | } | ||
948 | |||
949 | /* | ||
950 | * In this function we calibrate APIC bus clocks to the external | ||
951 | * timer. Unfortunately we cannot use jiffies and the timer irq | ||
952 | * to calibrate, since some later bootup code depends on getting | ||
953 | * the first irq? Ugh. | ||
954 | * | ||
955 | * We want to do the calibration only once since we | ||
956 | * want to have local timer irqs syncron. CPUs connected | ||
957 | * by the same APIC bus have the very same bus frequency. | ||
958 | * And we want to have irqs off anyways, no accidental | ||
959 | * APIC irq that way. | ||
960 | */ | ||
961 | |||
962 | static int __init calibrate_APIC_clock(void) | ||
963 | { | ||
964 | unsigned long long t1 = 0, t2 = 0; | ||
965 | long tt1, tt2; | ||
966 | long result; | ||
967 | int i; | ||
968 | const int LOOPS = HZ/10; | ||
969 | |||
970 | apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n"); | ||
971 | |||
972 | /* | ||
973 | * Put whatever arbitrary (but long enough) timeout | ||
974 | * value into the APIC clock, we just want to get the | ||
975 | * counter running for calibration. | ||
976 | */ | ||
977 | __setup_APIC_LVTT(1000000000); | ||
978 | |||
979 | /* | ||
980 | * The timer chip counts down to zero. Let's wait | ||
981 | * for a wraparound to start exact measurement: | ||
982 | * (the current tick might have been already half done) | ||
983 | */ | ||
984 | |||
985 | wait_timer_tick(); | ||
986 | |||
987 | /* | ||
988 | * We wrapped around just now. Let's start: | ||
989 | */ | ||
990 | if (cpu_has_tsc) | ||
991 | rdtscll(t1); | ||
992 | tt1 = apic_read(APIC_TMCCT); | ||
993 | |||
994 | /* | ||
995 | * Let's wait LOOPS wraprounds: | ||
996 | */ | ||
997 | for (i = 0; i < LOOPS; i++) | ||
998 | wait_timer_tick(); | ||
999 | |||
1000 | tt2 = apic_read(APIC_TMCCT); | ||
1001 | if (cpu_has_tsc) | ||
1002 | rdtscll(t2); | ||
1003 | |||
1004 | /* | ||
1005 | * The APIC bus clock counter is 32 bits only, it | ||
1006 | * might have overflown, but note that we use signed | ||
1007 | * longs, thus no extra care needed. | ||
1008 | * | ||
1009 | * underflown to be exact, as the timer counts down ;) | ||
1010 | */ | ||
1011 | |||
1012 | result = (tt1-tt2)*APIC_DIVISOR/LOOPS; | ||
1013 | |||
1014 | if (cpu_has_tsc) | ||
1015 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " | ||
1016 | "%ld.%04ld MHz.\n", | ||
1017 | ((long)(t2-t1)/LOOPS)/(1000000/HZ), | ||
1018 | ((long)(t2-t1)/LOOPS)%(1000000/HZ)); | ||
1019 | |||
1020 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " | ||
1021 | "%ld.%04ld MHz.\n", | ||
1022 | result/(1000000/HZ), | ||
1023 | result%(1000000/HZ)); | ||
1024 | |||
1025 | return result; | ||
1026 | } | ||
1027 | |||
1028 | static unsigned int calibration_result; | ||
1029 | |||
1030 | void __init setup_boot_APIC_clock(void) | ||
1031 | { | ||
1032 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); | ||
1033 | using_apic_timer = 1; | ||
1034 | |||
1035 | local_irq_disable(); | ||
1036 | |||
1037 | calibration_result = calibrate_APIC_clock(); | ||
1038 | /* | ||
1039 | * Now set up the timer for real. | ||
1040 | */ | ||
1041 | setup_APIC_timer(calibration_result); | ||
1042 | |||
1043 | local_irq_enable(); | ||
1044 | } | ||
1045 | |||
1046 | void __init setup_secondary_APIC_clock(void) | ||
1047 | { | ||
1048 | setup_APIC_timer(calibration_result); | ||
1049 | } | ||
1050 | |||
1051 | void __init disable_APIC_timer(void) | ||
1052 | { | ||
1053 | if (using_apic_timer) { | ||
1054 | unsigned long v; | ||
1055 | |||
1056 | v = apic_read(APIC_LVTT); | ||
1057 | apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); | ||
1058 | } | ||
1059 | } | ||
1060 | |||
1061 | void enable_APIC_timer(void) | ||
1062 | { | ||
1063 | if (using_apic_timer) { | ||
1064 | unsigned long v; | ||
1065 | |||
1066 | v = apic_read(APIC_LVTT); | ||
1067 | apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED); | ||
1068 | } | ||
1069 | } | ||
1070 | |||
1071 | /* | ||
1072 | * the frequency of the profiling timer can be changed | ||
1073 | * by writing a multiplier value into /proc/profile. | ||
1074 | */ | ||
1075 | int setup_profiling_timer(unsigned int multiplier) | ||
1076 | { | ||
1077 | int i; | ||
1078 | |||
1079 | /* | ||
1080 | * Sanity check. [at least 500 APIC cycles should be | ||
1081 | * between APIC interrupts as a rule of thumb, to avoid | ||
1082 | * irqs flooding us] | ||
1083 | */ | ||
1084 | if ( (!multiplier) || (calibration_result/multiplier < 500)) | ||
1085 | return -EINVAL; | ||
1086 | |||
1087 | /* | ||
1088 | * Set the new multiplier for each CPU. CPUs don't start using the | ||
1089 | * new values until the next timer interrupt in which they do process | ||
1090 | * accounting. At that time they also adjust their APIC timers | ||
1091 | * accordingly. | ||
1092 | */ | ||
1093 | for (i = 0; i < NR_CPUS; ++i) | ||
1094 | per_cpu(prof_multiplier, i) = multiplier; | ||
1095 | |||
1096 | return 0; | ||
1097 | } | ||
1098 | |||
1099 | #undef APIC_DIVISOR | ||
1100 | |||
1101 | /* | ||
1102 | * Local timer interrupt handler. It does both profiling and | ||
1103 | * process statistics/rescheduling. | ||
1104 | * | ||
1105 | * We do profiling in every local tick, statistics/rescheduling | ||
1106 | * happen only every 'profiling multiplier' ticks. The default | ||
1107 | * multiplier is 1 and it can be changed by writing the new multiplier | ||
1108 | * value into /proc/profile. | ||
1109 | */ | ||
1110 | |||
1111 | inline void smp_local_timer_interrupt(struct pt_regs * regs) | ||
1112 | { | ||
1113 | int cpu = smp_processor_id(); | ||
1114 | |||
1115 | profile_tick(CPU_PROFILING, regs); | ||
1116 | if (--per_cpu(prof_counter, cpu) <= 0) { | ||
1117 | /* | ||
1118 | * The multiplier may have changed since the last time we got | ||
1119 | * to this point as a result of the user writing to | ||
1120 | * /proc/profile. In this case we need to adjust the APIC | ||
1121 | * timer accordingly. | ||
1122 | * | ||
1123 | * Interrupts are already masked off at this point. | ||
1124 | */ | ||
1125 | per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); | ||
1126 | if (per_cpu(prof_counter, cpu) != | ||
1127 | per_cpu(prof_old_multiplier, cpu)) { | ||
1128 | __setup_APIC_LVTT( | ||
1129 | calibration_result/ | ||
1130 | per_cpu(prof_counter, cpu)); | ||
1131 | per_cpu(prof_old_multiplier, cpu) = | ||
1132 | per_cpu(prof_counter, cpu); | ||
1133 | } | ||
1134 | |||
1135 | #ifdef CONFIG_SMP | ||
1136 | update_process_times(user_mode(regs)); | ||
1137 | #endif | ||
1138 | } | ||
1139 | |||
1140 | /* | ||
1141 | * We take the 'long' return path, and there every subsystem | ||
1142 | * grabs the apropriate locks (kernel lock/ irq lock). | ||
1143 | * | ||
1144 | * we might want to decouple profiling from the 'long path', | ||
1145 | * and do the profiling totally in assembly. | ||
1146 | * | ||
1147 | * Currently this isn't too much of an issue (performance wise), | ||
1148 | * we can take more than 100K local irqs per second on a 100 MHz P5. | ||
1149 | */ | ||
1150 | } | ||
1151 | |||
1152 | /* | ||
1153 | * Local APIC timer interrupt. This is the most natural way for doing | ||
1154 | * local interrupts, but local timer interrupts can be emulated by | ||
1155 | * broadcast interrupts too. [in case the hw doesn't support APIC timers] | ||
1156 | * | ||
1157 | * [ if a single-CPU system runs an SMP kernel then we call the local | ||
1158 | * interrupt as well. Thus we cannot inline the local irq ... ] | ||
1159 | */ | ||
1160 | |||
1161 | fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) | ||
1162 | { | ||
1163 | int cpu = smp_processor_id(); | ||
1164 | |||
1165 | /* | ||
1166 | * the NMI deadlock-detector uses this. | ||
1167 | */ | ||
1168 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
1169 | |||
1170 | /* | ||
1171 | * NOTE! We'd better ACK the irq immediately, | ||
1172 | * because timer handling can be slow. | ||
1173 | */ | ||
1174 | ack_APIC_irq(); | ||
1175 | /* | ||
1176 | * update_process_times() expects us to have done irq_enter(). | ||
1177 | * Besides, if we don't timer interrupts ignore the global | ||
1178 | * interrupt lock, which is the WrongThing (tm) to do. | ||
1179 | */ | ||
1180 | irq_enter(); | ||
1181 | smp_local_timer_interrupt(regs); | ||
1182 | irq_exit(); | ||
1183 | } | ||
1184 | |||
1185 | /* | ||
1186 | * This interrupt should _never_ happen with our APIC/SMP architecture | ||
1187 | */ | ||
1188 | fastcall void smp_spurious_interrupt(struct pt_regs *regs) | ||
1189 | { | ||
1190 | unsigned long v; | ||
1191 | |||
1192 | irq_enter(); | ||
1193 | /* | ||
1194 | * Check if this really is a spurious interrupt and ACK it | ||
1195 | * if it is a vectored one. Just in case... | ||
1196 | * Spurious interrupts should not be ACKed. | ||
1197 | */ | ||
1198 | v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); | ||
1199 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) | ||
1200 | ack_APIC_irq(); | ||
1201 | |||
1202 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ | ||
1203 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n", | ||
1204 | smp_processor_id()); | ||
1205 | irq_exit(); | ||
1206 | } | ||
1207 | |||
1208 | /* | ||
1209 | * This interrupt should never happen with our APIC/SMP architecture | ||
1210 | */ | ||
1211 | |||
1212 | fastcall void smp_error_interrupt(struct pt_regs *regs) | ||
1213 | { | ||
1214 | unsigned long v, v1; | ||
1215 | |||
1216 | irq_enter(); | ||
1217 | /* First tickle the hardware, only then report what went on. -- REW */ | ||
1218 | v = apic_read(APIC_ESR); | ||
1219 | apic_write(APIC_ESR, 0); | ||
1220 | v1 = apic_read(APIC_ESR); | ||
1221 | ack_APIC_irq(); | ||
1222 | atomic_inc(&irq_err_count); | ||
1223 | |||
1224 | /* Here is what the APIC error bits mean: | ||
1225 | 0: Send CS error | ||
1226 | 1: Receive CS error | ||
1227 | 2: Send accept error | ||
1228 | 3: Receive accept error | ||
1229 | 4: Reserved | ||
1230 | 5: Send illegal vector | ||
1231 | 6: Received illegal vector | ||
1232 | 7: Illegal register address | ||
1233 | */ | ||
1234 | printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", | ||
1235 | smp_processor_id(), v , v1); | ||
1236 | irq_exit(); | ||
1237 | } | ||
1238 | |||
1239 | /* | ||
1240 | * This initializes the IO-APIC and APIC hardware if this is | ||
1241 | * a UP kernel. | ||
1242 | */ | ||
1243 | int __init APIC_init_uniprocessor (void) | ||
1244 | { | ||
1245 | if (enable_local_apic < 0) | ||
1246 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | ||
1247 | |||
1248 | if (!smp_found_config && !cpu_has_apic) | ||
1249 | return -1; | ||
1250 | |||
1251 | /* | ||
1252 | * Complain if the BIOS pretends there is one. | ||
1253 | */ | ||
1254 | if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | ||
1255 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | ||
1256 | boot_cpu_physical_apicid); | ||
1257 | return -1; | ||
1258 | } | ||
1259 | |||
1260 | verify_local_APIC(); | ||
1261 | |||
1262 | connect_bsp_APIC(); | ||
1263 | |||
1264 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | ||
1265 | |||
1266 | setup_local_APIC(); | ||
1267 | |||
1268 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1269 | check_nmi_watchdog(); | ||
1270 | #ifdef CONFIG_X86_IO_APIC | ||
1271 | if (smp_found_config) | ||
1272 | if (!skip_ioapic_setup && nr_ioapics) | ||
1273 | setup_IO_APIC(); | ||
1274 | #endif | ||
1275 | setup_boot_APIC_clock(); | ||
1276 | |||
1277 | return 0; | ||
1278 | } | ||
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c new file mode 100644 index 000000000000..45641a872550 --- /dev/null +++ b/arch/i386/kernel/apm.c | |||
@@ -0,0 +1,2428 @@ | |||
1 | /* -*- linux-c -*- | ||
2 | * APM BIOS driver for Linux | ||
3 | * Copyright 1994-2001 Stephen Rothwell (sfr@canb.auug.org.au) | ||
4 | * | ||
5 | * Initial development of this driver was funded by NEC Australia P/L | ||
6 | * and NEC Corporation | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License as published by the | ||
10 | * Free Software Foundation; either version 2, or (at your option) any | ||
11 | * later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, but | ||
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * General Public License for more details. | ||
17 | * | ||
18 | * October 1995, Rik Faith (faith@cs.unc.edu): | ||
19 | * Minor enhancements and updates (to the patch set) for 1.3.x | ||
20 | * Documentation | ||
21 | * January 1996, Rik Faith (faith@cs.unc.edu): | ||
22 | * Make /proc/apm easy to format (bump driver version) | ||
23 | * March 1996, Rik Faith (faith@cs.unc.edu): | ||
24 | * Prohibit APM BIOS calls unless apm_enabled. | ||
25 | * (Thanks to Ulrich Windl <Ulrich.Windl@rz.uni-regensburg.de>) | ||
26 | * April 1996, Stephen Rothwell (sfr@canb.auug.org.au) | ||
27 | * Version 1.0 and 1.1 | ||
28 | * May 1996, Version 1.2 | ||
29 | * Feb 1998, Version 1.3 | ||
30 | * Feb 1998, Version 1.4 | ||
31 | * Aug 1998, Version 1.5 | ||
32 | * Sep 1998, Version 1.6 | ||
33 | * Nov 1998, Version 1.7 | ||
34 | * Jan 1999, Version 1.8 | ||
35 | * Jan 1999, Version 1.9 | ||
36 | * Oct 1999, Version 1.10 | ||
37 | * Nov 1999, Version 1.11 | ||
38 | * Jan 2000, Version 1.12 | ||
39 | * Feb 2000, Version 1.13 | ||
40 | * Nov 2000, Version 1.14 | ||
41 | * Oct 2001, Version 1.15 | ||
42 | * Jan 2002, Version 1.16 | ||
43 | * Oct 2002, Version 1.16ac | ||
44 | * | ||
45 | * History: | ||
46 | * 0.6b: first version in official kernel, Linux 1.3.46 | ||
47 | * 0.7: changed /proc/apm format, Linux 1.3.58 | ||
48 | * 0.8: fixed gcc 2.7.[12] compilation problems, Linux 1.3.59 | ||
49 | * 0.9: only call bios if bios is present, Linux 1.3.72 | ||
50 | * 1.0: use fixed device number, consolidate /proc/apm into this file, | ||
51 | * Linux 1.3.85 | ||
52 | * 1.1: support user-space standby and suspend, power off after system | ||
53 | * halted, Linux 1.3.98 | ||
54 | * 1.2: When resetting RTC after resume, take care so that the time | ||
55 | * is only incorrect by 30-60mS (vs. 1S previously) (Gabor J. Toth | ||
56 | * <jtoth@princeton.edu>); improve interaction between | ||
57 | * screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4 | ||
58 | * 1.2a:Simple change to stop mysterious bug reports with SMP also added | ||
59 | * levels to the printk calls. APM is not defined for SMP machines. | ||
60 | * The new replacment for it is, but Linux doesn't yet support this. | ||
61 | * Alan Cox Linux 2.1.55 | ||
62 | * 1.3: Set up a valid data descriptor 0x40 for buggy BIOS's | ||
63 | * 1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by | ||
64 | * Dean Gaudet <dgaudet@arctic.org>. | ||
65 | * C. Scott Ananian <cananian@alumni.princeton.edu> Linux 2.1.87 | ||
66 | * 1.5: Fix segment register reloading (in case of bad segments saved | ||
67 | * across BIOS call). | ||
68 | * Stephen Rothwell | ||
69 | * 1.6: Cope with complier/assembler differences. | ||
70 | * Only try to turn off the first display device. | ||
71 | * Fix OOPS at power off with no APM BIOS by Jan Echternach | ||
72 | * <echter@informatik.uni-rostock.de> | ||
73 | * Stephen Rothwell | ||
74 | * 1.7: Modify driver's cached copy of the disabled/disengaged flags | ||
75 | * to reflect current state of APM BIOS. | ||
76 | * Chris Rankin <rankinc@bellsouth.net> | ||
77 | * Reset interrupt 0 timer to 100Hz after suspend | ||
78 | * Chad Miller <cmiller@surfsouth.com> | ||
79 | * Add CONFIG_APM_IGNORE_SUSPEND_BOUNCE | ||
80 | * Richard Gooch <rgooch@atnf.csiro.au> | ||
81 | * Allow boot time disabling of APM | ||
82 | * Make boot messages far less verbose by default | ||
83 | * Make asm safer | ||
84 | * Stephen Rothwell | ||
85 | * 1.8: Add CONFIG_APM_RTC_IS_GMT | ||
86 | * Richard Gooch <rgooch@atnf.csiro.au> | ||
87 | * change APM_NOINTS to CONFIG_APM_ALLOW_INTS | ||
88 | * remove dependency on CONFIG_PROC_FS | ||
89 | * Stephen Rothwell | ||
90 | * 1.9: Fix small typo. <laslo@wodip.opole.pl> | ||
91 | * Try to cope with BIOS's that need to have all display | ||
92 | * devices blanked and not just the first one. | ||
93 | * Ross Paterson <ross@soi.city.ac.uk> | ||
94 | * Fix segment limit setting it has always been wrong as | ||
95 | * the segments needed to have byte granularity. | ||
96 | * Mark a few things __init. | ||
97 | * Add hack to allow power off of SMP systems by popular request. | ||
98 | * Use CONFIG_SMP instead of __SMP__ | ||
99 | * Ignore BOUNCES for three seconds. | ||
100 | * Stephen Rothwell | ||
101 | * 1.10: Fix for Thinkpad return code. | ||
102 | * Merge 2.2 and 2.3 drivers. | ||
103 | * Remove APM dependencies in arch/i386/kernel/process.c | ||
104 | * Remove APM dependencies in drivers/char/sysrq.c | ||
105 | * Reset time across standby. | ||
106 | * Allow more inititialisation on SMP. | ||
107 | * Remove CONFIG_APM_POWER_OFF and make it boot time | ||
108 | * configurable (default on). | ||
109 | * Make debug only a boot time parameter (remove APM_DEBUG). | ||
110 | * Try to blank all devices on any error. | ||
111 | * 1.11: Remove APM dependencies in drivers/char/console.c | ||
112 | * Check nr_running to detect if we are idle (from | ||
113 | * Borislav Deianov <borislav@lix.polytechnique.fr>) | ||
114 | * Fix for bioses that don't zero the top part of the | ||
115 | * entrypoint offset (Mario Sitta <sitta@al.unipmn.it>) | ||
116 | * (reported by Panos Katsaloulis <teras@writeme.com>). | ||
117 | * Real mode power off patch (Walter Hofmann | ||
118 | * <Walter.Hofmann@physik.stud.uni-erlangen.de>). | ||
119 | * 1.12: Remove CONFIG_SMP as the compiler will optimize | ||
120 | * the code away anyway (smp_num_cpus == 1 in UP) | ||
121 | * noted by Artur Skawina <skawina@geocities.com>. | ||
122 | * Make power off under SMP work again. | ||
123 | * Fix thinko with initial engaging of BIOS. | ||
124 | * Make sure power off only happens on CPU 0 | ||
125 | * (Paul "Rusty" Russell <rusty@rustcorp.com.au>). | ||
126 | * Do error notification to user mode if BIOS calls fail. | ||
127 | * Move entrypoint offset fix to ...boot/setup.S | ||
128 | * where it belongs (Cosmos <gis88564@cis.nctu.edu.tw>). | ||
129 | * Remove smp-power-off. SMP users must now specify | ||
130 | * "apm=power-off" on the kernel command line. Suggested | ||
131 | * by Jim Avera <jima@hal.com>, modified by Alan Cox | ||
132 | * <alan@lxorguk.ukuu.org.uk>. | ||
133 | * Register the /proc/apm entry even on SMP so that | ||
134 | * scripts that check for it before doing power off | ||
135 | * work (Jim Avera <jima@hal.com>). | ||
136 | * 1.13: Changes for new pm_ interfaces (Andy Henroid | ||
137 | * <andy_henroid@yahoo.com>). | ||
138 | * Modularize the code. | ||
139 | * Fix the Thinkpad (again) :-( (CONFIG_APM_IGNORE_MULTIPLE_SUSPENDS | ||
140 | * is now the way life works). | ||
141 | * Fix thinko in suspend() (wrong return). | ||
142 | * Notify drivers on critical suspend. | ||
143 | * Make kapmd absorb more idle time (Pavel Machek <pavel@suse.cz> | ||
144 | * modified by sfr). | ||
145 | * Disable interrupts while we are suspended (Andy Henroid | ||
146 | * <andy_henroid@yahoo.com> fixed by sfr). | ||
147 | * Make power off work on SMP again (Tony Hoyle | ||
148 | * <tmh@magenta-logic.com> and <zlatko@iskon.hr>) modified by sfr. | ||
149 | * Remove CONFIG_APM_SUSPEND_BOUNCE. The bounce ignore | ||
150 | * interval is now configurable. | ||
151 | * 1.14: Make connection version persist across module unload/load. | ||
152 | * Enable and engage power management earlier. | ||
153 | * Disengage power management on module unload. | ||
154 | * Changed to use the sysrq-register hack for registering the | ||
155 | * power off function called by magic sysrq based upon discussions | ||
156 | * in irc://irc.openprojects.net/#kernelnewbies | ||
157 | * (Crutcher Dunnavant <crutcher+kernel@datastacks.com>). | ||
158 | * Make CONFIG_APM_REAL_MODE_POWER_OFF run time configurable. | ||
159 | * (Arjan van de Ven <arjanv@redhat.com>) modified by sfr. | ||
160 | * Work around byte swap bug in one of the Vaio's BIOS's | ||
161 | * (Marc Boucher <marc@mbsi.ca>). | ||
162 | * Exposed the disable flag to dmi so that we can handle known | ||
163 | * broken APM (Alan Cox <alan@redhat.com>). | ||
164 | * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin | ||
165 | * calling it - instead idle. (Alan Cox <alan@redhat.com>) | ||
166 | * If an APM idle fails log it and idle sensibly | ||
167 | * 1.15: Don't queue events to clients who open the device O_WRONLY. | ||
168 | * Don't expect replies from clients who open the device O_RDONLY. | ||
169 | * (Idea from Thomas Hood) | ||
170 | * Minor waitqueue cleanups. (John Fremlin <chief@bandits.org>) | ||
171 | * 1.16: Fix idle calling. (Andreas Steinmetz <ast@domdv.de> et al.) | ||
172 | * Notify listeners of standby or suspend events before notifying | ||
173 | * drivers. Return EBUSY to ioctl() if suspend is rejected. | ||
174 | * (Russell King <rmk@arm.linux.org.uk> and Thomas Hood) | ||
175 | * Ignore first resume after we generate our own resume event | ||
176 | * after a suspend (Thomas Hood) | ||
177 | * Daemonize now gets rid of our controlling terminal (sfr). | ||
178 | * CONFIG_APM_CPU_IDLE now just affects the default value of | ||
179 | * idle_threshold (sfr). | ||
180 | * Change name of kernel apm daemon (as it no longer idles) (sfr). | ||
181 | * 1.16ac: Fix up SMP support somewhat. You can now force SMP on and we | ||
182 | * make _all_ APM calls on the CPU#0. Fix unsafe sign bug. | ||
183 | * TODO: determine if its "boot CPU" or "CPU0" we want to lock to. | ||
184 | * | ||
185 | * APM 1.1 Reference: | ||
186 | * | ||
187 | * Intel Corporation, Microsoft Corporation. Advanced Power Management | ||
188 | * (APM) BIOS Interface Specification, Revision 1.1, September 1993. | ||
189 | * Intel Order Number 241704-001. Microsoft Part Number 781-110-X01. | ||
190 | * | ||
191 | * [This document is available free from Intel by calling 800.628.8686 (fax | ||
192 | * 916.356.6100) or 800.548.4725; or via anonymous ftp from | ||
193 | * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc. It is also | ||
194 | * available from Microsoft by calling 206.882.8080.] | ||
195 | * | ||
196 | * APM 1.2 Reference: | ||
197 | * Intel Corporation, Microsoft Corporation. Advanced Power Management | ||
198 | * (APM) BIOS Interface Specification, Revision 1.2, February 1996. | ||
199 | * | ||
200 | * [This document is available from Microsoft at: | ||
201 | * http://www.microsoft.com/hwdev/busbios/amp_12.htm] | ||
202 | */ | ||
203 | |||
204 | #include <linux/config.h> | ||
205 | #include <linux/module.h> | ||
206 | |||
207 | #include <linux/poll.h> | ||
208 | #include <linux/types.h> | ||
209 | #include <linux/stddef.h> | ||
210 | #include <linux/timer.h> | ||
211 | #include <linux/fcntl.h> | ||
212 | #include <linux/slab.h> | ||
213 | #include <linux/stat.h> | ||
214 | #include <linux/proc_fs.h> | ||
215 | #include <linux/miscdevice.h> | ||
216 | #include <linux/apm_bios.h> | ||
217 | #include <linux/init.h> | ||
218 | #include <linux/time.h> | ||
219 | #include <linux/sched.h> | ||
220 | #include <linux/pm.h> | ||
221 | #include <linux/device.h> | ||
222 | #include <linux/kernel.h> | ||
223 | #include <linux/smp.h> | ||
224 | #include <linux/smp_lock.h> | ||
225 | #include <linux/dmi.h> | ||
226 | #include <linux/suspend.h> | ||
227 | |||
228 | #include <asm/system.h> | ||
229 | #include <asm/uaccess.h> | ||
230 | #include <asm/desc.h> | ||
231 | |||
232 | #include "io_ports.h" | ||
233 | |||
234 | extern spinlock_t i8253_lock; | ||
235 | extern unsigned long get_cmos_time(void); | ||
236 | extern void machine_real_restart(unsigned char *, int); | ||
237 | |||
238 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) | ||
239 | extern int (*console_blank_hook)(int); | ||
240 | #endif | ||
241 | |||
242 | /* | ||
243 | * The apm_bios device is one of the misc char devices. | ||
244 | * This is its minor number. | ||
245 | */ | ||
246 | #define APM_MINOR_DEV 134 | ||
247 | |||
248 | /* | ||
249 | * See Documentation/Config.help for the configuration options. | ||
250 | * | ||
251 | * Various options can be changed at boot time as follows: | ||
252 | * (We allow underscores for compatibility with the modules code) | ||
253 | * apm=on/off enable/disable APM | ||
254 | * [no-]allow[-_]ints allow interrupts during BIOS calls | ||
255 | * [no-]broken[-_]psr BIOS has a broken GetPowerStatus call | ||
256 | * [no-]realmode[-_]power[-_]off switch to real mode before | ||
257 | * powering off | ||
258 | * [no-]debug log some debugging messages | ||
259 | * [no-]power[-_]off power off on shutdown | ||
260 | * [no-]smp Use apm even on an SMP box | ||
261 | * bounce[-_]interval=<n> number of ticks to ignore suspend | ||
262 | * bounces | ||
263 | * idle[-_]threshold=<n> System idle percentage above which to | ||
264 | * make APM BIOS idle calls. Set it to | ||
265 | * 100 to disable. | ||
266 | * idle[-_]period=<n> Period (in 1/100s of a second) over | ||
267 | * which the idle percentage is | ||
268 | * calculated. | ||
269 | */ | ||
270 | |||
271 | /* KNOWN PROBLEM MACHINES: | ||
272 | * | ||
273 | * U: TI 4000M TravelMate: BIOS is *NOT* APM compliant | ||
274 | * [Confirmed by TI representative] | ||
275 | * ?: ACER 486DX4/75: uses dseg 0040, in violation of APM specification | ||
276 | * [Confirmed by BIOS disassembly] | ||
277 | * [This may work now ...] | ||
278 | * P: Toshiba 1950S: battery life information only gets updated after resume | ||
279 | * P: Midwest Micro Soundbook Elite DX2/66 monochrome: screen blanking | ||
280 | * broken in BIOS [Reported by Garst R. Reese <reese@isn.net>] | ||
281 | * ?: AcerNote-950: oops on reading /proc/apm - workaround is a WIP | ||
282 | * Neale Banks <neale@lowendale.com.au> December 2000 | ||
283 | * | ||
284 | * Legend: U = unusable with APM patches | ||
285 | * P = partially usable with APM patches | ||
286 | */ | ||
287 | |||
288 | /* | ||
289 | * Define as 1 to make the driver always call the APM BIOS busy | ||
290 | * routine even if the clock was not reported as slowed by the | ||
291 | * idle routine. Otherwise, define as 0. | ||
292 | */ | ||
293 | #define ALWAYS_CALL_BUSY 1 | ||
294 | |||
295 | /* | ||
296 | * Define to make the APM BIOS calls zero all data segment registers (so | ||
297 | * that an incorrect BIOS implementation will cause a kernel panic if it | ||
298 | * tries to write to arbitrary memory). | ||
299 | */ | ||
300 | #define APM_ZERO_SEGS | ||
301 | |||
302 | #include "apm.h" | ||
303 | |||
304 | /* | ||
305 | * Define to make all _set_limit calls use 64k limits. The APM 1.1 BIOS is | ||
306 | * supposed to provide limit information that it recognizes. Many machines | ||
307 | * do this correctly, but many others do not restrict themselves to their | ||
308 | * claimed limit. When this happens, they will cause a segmentation | ||
309 | * violation in the kernel at boot time. Most BIOS's, however, will | ||
310 | * respect a 64k limit, so we use that. If you want to be pedantic and | ||
311 | * hold your BIOS to its claims, then undefine this. | ||
312 | */ | ||
313 | #define APM_RELAX_SEGMENTS | ||
314 | |||
315 | /* | ||
316 | * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. | ||
317 | * This patched by Chad Miller <cmiller@surfsouth.com>, original code by | ||
318 | * David Chen <chen@ctpa04.mit.edu> | ||
319 | */ | ||
320 | #undef INIT_TIMER_AFTER_SUSPEND | ||
321 | |||
322 | #ifdef INIT_TIMER_AFTER_SUSPEND | ||
323 | #include <linux/timex.h> | ||
324 | #include <asm/io.h> | ||
325 | #include <linux/delay.h> | ||
326 | #endif | ||
327 | |||
328 | /* | ||
329 | * Need to poll the APM BIOS every second | ||
330 | */ | ||
331 | #define APM_CHECK_TIMEOUT (HZ) | ||
332 | |||
333 | /* | ||
334 | * Ignore suspend events for this amount of time after a resume | ||
335 | */ | ||
336 | #define DEFAULT_BOUNCE_INTERVAL (3 * HZ) | ||
337 | |||
338 | /* | ||
339 | * Maximum number of events stored | ||
340 | */ | ||
341 | #define APM_MAX_EVENTS 20 | ||
342 | |||
343 | /* | ||
344 | * The per-file APM data | ||
345 | */ | ||
346 | struct apm_user { | ||
347 | int magic; | ||
348 | struct apm_user * next; | ||
349 | int suser: 1; | ||
350 | int writer: 1; | ||
351 | int reader: 1; | ||
352 | int suspend_wait: 1; | ||
353 | int suspend_result; | ||
354 | int suspends_pending; | ||
355 | int standbys_pending; | ||
356 | int suspends_read; | ||
357 | int standbys_read; | ||
358 | int event_head; | ||
359 | int event_tail; | ||
360 | apm_event_t events[APM_MAX_EVENTS]; | ||
361 | }; | ||
362 | |||
363 | /* | ||
364 | * The magic number in apm_user | ||
365 | */ | ||
366 | #define APM_BIOS_MAGIC 0x4101 | ||
367 | |||
368 | /* | ||
369 | * idle percentage above which bios idle calls are done | ||
370 | */ | ||
371 | #ifdef CONFIG_APM_CPU_IDLE | ||
372 | #define DEFAULT_IDLE_THRESHOLD 95 | ||
373 | #else | ||
374 | #define DEFAULT_IDLE_THRESHOLD 100 | ||
375 | #endif | ||
376 | #define DEFAULT_IDLE_PERIOD (100 / 3) | ||
377 | |||
378 | /* | ||
379 | * Local variables | ||
380 | */ | ||
381 | static struct { | ||
382 | unsigned long offset; | ||
383 | unsigned short segment; | ||
384 | } apm_bios_entry; | ||
385 | static int clock_slowed; | ||
386 | static int idle_threshold = DEFAULT_IDLE_THRESHOLD; | ||
387 | static int idle_period = DEFAULT_IDLE_PERIOD; | ||
388 | static int set_pm_idle; | ||
389 | static int suspends_pending; | ||
390 | static int standbys_pending; | ||
391 | static int ignore_sys_suspend; | ||
392 | static int ignore_normal_resume; | ||
393 | static int bounce_interval = DEFAULT_BOUNCE_INTERVAL; | ||
394 | |||
395 | #ifdef CONFIG_APM_RTC_IS_GMT | ||
396 | # define clock_cmos_diff 0 | ||
397 | # define got_clock_diff 1 | ||
398 | #else | ||
399 | static long clock_cmos_diff; | ||
400 | static int got_clock_diff; | ||
401 | #endif | ||
402 | static int debug; | ||
403 | static int smp; | ||
404 | static int apm_disabled = -1; | ||
405 | #ifdef CONFIG_SMP | ||
406 | static int power_off; | ||
407 | #else | ||
408 | static int power_off = 1; | ||
409 | #endif | ||
410 | #ifdef CONFIG_APM_REAL_MODE_POWER_OFF | ||
411 | static int realmode_power_off = 1; | ||
412 | #else | ||
413 | static int realmode_power_off; | ||
414 | #endif | ||
415 | static int exit_kapmd; | ||
416 | static int kapmd_running; | ||
417 | #ifdef CONFIG_APM_ALLOW_INTS | ||
418 | static int allow_ints = 1; | ||
419 | #else | ||
420 | static int allow_ints; | ||
421 | #endif | ||
422 | static int broken_psr; | ||
423 | |||
424 | static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); | ||
425 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); | ||
426 | static struct apm_user * user_list; | ||
427 | static DEFINE_SPINLOCK(user_list_lock); | ||
428 | static struct desc_struct bad_bios_desc = { 0, 0x00409200 }; | ||
429 | |||
430 | static char driver_version[] = "1.16ac"; /* no spaces */ | ||
431 | |||
432 | /* | ||
433 | * APM event names taken from the APM 1.2 specification. These are | ||
434 | * the message codes that the BIOS uses to tell us about events | ||
435 | */ | ||
436 | static char * apm_event_name[] = { | ||
437 | "system standby", | ||
438 | "system suspend", | ||
439 | "normal resume", | ||
440 | "critical resume", | ||
441 | "low battery", | ||
442 | "power status change", | ||
443 | "update time", | ||
444 | "critical suspend", | ||
445 | "user standby", | ||
446 | "user suspend", | ||
447 | "system standby resume", | ||
448 | "capabilities change" | ||
449 | }; | ||
450 | #define NR_APM_EVENT_NAME \ | ||
451 | (sizeof(apm_event_name) / sizeof(apm_event_name[0])) | ||
452 | |||
453 | typedef struct lookup_t { | ||
454 | int key; | ||
455 | char * msg; | ||
456 | } lookup_t; | ||
457 | |||
458 | /* | ||
459 | * The BIOS returns a set of standard error codes in AX when the | ||
460 | * carry flag is set. | ||
461 | */ | ||
462 | |||
463 | static const lookup_t error_table[] = { | ||
464 | /* N/A { APM_SUCCESS, "Operation succeeded" }, */ | ||
465 | { APM_DISABLED, "Power management disabled" }, | ||
466 | { APM_CONNECTED, "Real mode interface already connected" }, | ||
467 | { APM_NOT_CONNECTED, "Interface not connected" }, | ||
468 | { APM_16_CONNECTED, "16 bit interface already connected" }, | ||
469 | /* N/A { APM_16_UNSUPPORTED, "16 bit interface not supported" }, */ | ||
470 | { APM_32_CONNECTED, "32 bit interface already connected" }, | ||
471 | { APM_32_UNSUPPORTED, "32 bit interface not supported" }, | ||
472 | { APM_BAD_DEVICE, "Unrecognized device ID" }, | ||
473 | { APM_BAD_PARAM, "Parameter out of range" }, | ||
474 | { APM_NOT_ENGAGED, "Interface not engaged" }, | ||
475 | { APM_BAD_FUNCTION, "Function not supported" }, | ||
476 | { APM_RESUME_DISABLED, "Resume timer disabled" }, | ||
477 | { APM_BAD_STATE, "Unable to enter requested state" }, | ||
478 | /* N/A { APM_NO_EVENTS, "No events pending" }, */ | ||
479 | { APM_NO_ERROR, "BIOS did not set a return code" }, | ||
480 | { APM_NOT_PRESENT, "No APM present" } | ||
481 | }; | ||
482 | #define ERROR_COUNT (sizeof(error_table)/sizeof(lookup_t)) | ||
483 | |||
484 | /** | ||
485 | * apm_error - display an APM error | ||
486 | * @str: information string | ||
487 | * @err: APM BIOS return code | ||
488 | * | ||
489 | * Write a meaningful log entry to the kernel log in the event of | ||
490 | * an APM error. | ||
491 | */ | ||
492 | |||
493 | static void apm_error(char *str, int err) | ||
494 | { | ||
495 | int i; | ||
496 | |||
497 | for (i = 0; i < ERROR_COUNT; i++) | ||
498 | if (error_table[i].key == err) break; | ||
499 | if (i < ERROR_COUNT) | ||
500 | printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); | ||
501 | else | ||
502 | printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", | ||
503 | str, err); | ||
504 | } | ||
505 | |||
506 | /* | ||
507 | * Lock APM functionality to physical CPU 0 | ||
508 | */ | ||
509 | |||
510 | #ifdef CONFIG_SMP | ||
511 | |||
512 | static cpumask_t apm_save_cpus(void) | ||
513 | { | ||
514 | cpumask_t x = current->cpus_allowed; | ||
515 | /* Some bioses don't like being called from CPU != 0 */ | ||
516 | set_cpus_allowed(current, cpumask_of_cpu(0)); | ||
517 | BUG_ON(smp_processor_id() != 0); | ||
518 | return x; | ||
519 | } | ||
520 | |||
521 | static inline void apm_restore_cpus(cpumask_t mask) | ||
522 | { | ||
523 | set_cpus_allowed(current, mask); | ||
524 | } | ||
525 | |||
526 | #else | ||
527 | |||
528 | /* | ||
529 | * No CPU lockdown needed on a uniprocessor | ||
530 | */ | ||
531 | |||
532 | #define apm_save_cpus() (current->cpus_allowed) | ||
533 | #define apm_restore_cpus(x) (void)(x) | ||
534 | |||
535 | #endif | ||
536 | |||
537 | /* | ||
538 | * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and | ||
539 | * apm_info.allow_ints, we are being really paranoid here! Not only | ||
540 | * are interrupts disabled, but all the segment registers (except SS) | ||
541 | * are saved and zeroed this means that if the BIOS tries to reference | ||
542 | * any data without explicitly loading the segment registers, the kernel | ||
543 | * will fault immediately rather than have some unforeseen circumstances | ||
544 | * for the rest of the kernel. And it will be very obvious! :-) Doing | ||
545 | * this depends on CS referring to the same physical memory as DS so that | ||
546 | * DS can be zeroed before the call. Unfortunately, we can't do anything | ||
547 | * about the stack segment/pointer. Also, we tell the compiler that | ||
548 | * everything could change. | ||
549 | * | ||
550 | * Also, we KNOW that for the non error case of apm_bios_call, there | ||
551 | * is no useful data returned in the low order 8 bits of eax. | ||
552 | */ | ||
553 | #define APM_DO_CLI \ | ||
554 | if (apm_info.allow_ints) \ | ||
555 | local_irq_enable(); \ | ||
556 | else \ | ||
557 | local_irq_disable(); | ||
558 | |||
559 | #ifdef APM_ZERO_SEGS | ||
560 | # define APM_DECL_SEGS \ | ||
561 | unsigned int saved_fs; unsigned int saved_gs; | ||
562 | # define APM_DO_SAVE_SEGS \ | ||
563 | savesegment(fs, saved_fs); savesegment(gs, saved_gs) | ||
564 | # define APM_DO_RESTORE_SEGS \ | ||
565 | loadsegment(fs, saved_fs); loadsegment(gs, saved_gs) | ||
566 | #else | ||
567 | # define APM_DECL_SEGS | ||
568 | # define APM_DO_SAVE_SEGS | ||
569 | # define APM_DO_RESTORE_SEGS | ||
570 | #endif | ||
571 | |||
572 | /** | ||
573 | * apm_bios_call - Make an APM BIOS 32bit call | ||
574 | * @func: APM function to execute | ||
575 | * @ebx_in: EBX register for call entry | ||
576 | * @ecx_in: ECX register for call entry | ||
577 | * @eax: EAX register return | ||
578 | * @ebx: EBX register return | ||
579 | * @ecx: ECX register return | ||
580 | * @edx: EDX register return | ||
581 | * @esi: ESI register return | ||
582 | * | ||
583 | * Make an APM call using the 32bit protected mode interface. The | ||
584 | * caller is responsible for knowing if APM BIOS is configured and | ||
585 | * enabled. This call can disable interrupts for a long period of | ||
586 | * time on some laptops. The return value is in AH and the carry | ||
587 | * flag is loaded into AL. If there is an error, then the error | ||
588 | * code is returned in AH (bits 8-15 of eax) and this function | ||
589 | * returns non-zero. | ||
590 | */ | ||
591 | |||
592 | static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, | ||
593 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi) | ||
594 | { | ||
595 | APM_DECL_SEGS | ||
596 | unsigned long flags; | ||
597 | cpumask_t cpus; | ||
598 | int cpu; | ||
599 | struct desc_struct save_desc_40; | ||
600 | |||
601 | cpus = apm_save_cpus(); | ||
602 | |||
603 | cpu = get_cpu(); | ||
604 | save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; | ||
605 | per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; | ||
606 | |||
607 | local_save_flags(flags); | ||
608 | APM_DO_CLI; | ||
609 | APM_DO_SAVE_SEGS; | ||
610 | apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); | ||
611 | APM_DO_RESTORE_SEGS; | ||
612 | local_irq_restore(flags); | ||
613 | per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40; | ||
614 | put_cpu(); | ||
615 | apm_restore_cpus(cpus); | ||
616 | |||
617 | return *eax & 0xff; | ||
618 | } | ||
619 | |||
620 | /** | ||
621 | * apm_bios_call_simple - make a simple APM BIOS 32bit call | ||
622 | * @func: APM function to invoke | ||
623 | * @ebx_in: EBX register value for BIOS call | ||
624 | * @ecx_in: ECX register value for BIOS call | ||
625 | * @eax: EAX register on return from the BIOS call | ||
626 | * | ||
627 | * Make a BIOS call that does only returns one value, or just status. | ||
628 | * If there is an error, then the error code is returned in AH | ||
629 | * (bits 8-15 of eax) and this function returns non-zero. This is | ||
630 | * used for simpler BIOS operations. This call may hold interrupts | ||
631 | * off for a long time on some laptops. | ||
632 | */ | ||
633 | |||
634 | static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) | ||
635 | { | ||
636 | u8 error; | ||
637 | APM_DECL_SEGS | ||
638 | unsigned long flags; | ||
639 | cpumask_t cpus; | ||
640 | int cpu; | ||
641 | struct desc_struct save_desc_40; | ||
642 | |||
643 | |||
644 | cpus = apm_save_cpus(); | ||
645 | |||
646 | cpu = get_cpu(); | ||
647 | save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; | ||
648 | per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; | ||
649 | |||
650 | local_save_flags(flags); | ||
651 | APM_DO_CLI; | ||
652 | APM_DO_SAVE_SEGS; | ||
653 | error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); | ||
654 | APM_DO_RESTORE_SEGS; | ||
655 | local_irq_restore(flags); | ||
656 | __get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40; | ||
657 | put_cpu(); | ||
658 | apm_restore_cpus(cpus); | ||
659 | return error; | ||
660 | } | ||
661 | |||
662 | /** | ||
663 | * apm_driver_version - APM driver version | ||
664 | * @val: loaded with the APM version on return | ||
665 | * | ||
666 | * Retrieve the APM version supported by the BIOS. This is only | ||
667 | * supported for APM 1.1 or higher. An error indicates APM 1.0 is | ||
668 | * probably present. | ||
669 | * | ||
670 | * On entry val should point to a value indicating the APM driver | ||
671 | * version with the high byte being the major and the low byte the | ||
672 | * minor number both in BCD | ||
673 | * | ||
674 | * On return it will hold the BIOS revision supported in the | ||
675 | * same format. | ||
676 | */ | ||
677 | |||
678 | static int apm_driver_version(u_short *val) | ||
679 | { | ||
680 | u32 eax; | ||
681 | |||
682 | if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax)) | ||
683 | return (eax >> 8) & 0xff; | ||
684 | *val = eax; | ||
685 | return APM_SUCCESS; | ||
686 | } | ||
687 | |||
688 | /** | ||
689 | * apm_get_event - get an APM event from the BIOS | ||
690 | * @event: pointer to the event | ||
691 | * @info: point to the event information | ||
692 | * | ||
693 | * The APM BIOS provides a polled information for event | ||
694 | * reporting. The BIOS expects to be polled at least every second | ||
695 | * when events are pending. When a message is found the caller should | ||
696 | * poll until no more messages are present. However, this causes | ||
697 | * problems on some laptops where a suspend event notification is | ||
698 | * not cleared until it is acknowledged. | ||
699 | * | ||
700 | * Additional information is returned in the info pointer, providing | ||
701 | * that APM 1.2 is in use. If no messges are pending the value 0x80 | ||
702 | * is returned (No power management events pending). | ||
703 | */ | ||
704 | |||
705 | static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) | ||
706 | { | ||
707 | u32 eax; | ||
708 | u32 ebx; | ||
709 | u32 ecx; | ||
710 | u32 dummy; | ||
711 | |||
712 | if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx, | ||
713 | &dummy, &dummy)) | ||
714 | return (eax >> 8) & 0xff; | ||
715 | *event = ebx; | ||
716 | if (apm_info.connection_version < 0x0102) | ||
717 | *info = ~0; /* indicate info not valid */ | ||
718 | else | ||
719 | *info = ecx; | ||
720 | return APM_SUCCESS; | ||
721 | } | ||
722 | |||
723 | /** | ||
724 | * set_power_state - set the power management state | ||
725 | * @what: which items to transition | ||
726 | * @state: state to transition to | ||
727 | * | ||
728 | * Request an APM change of state for one or more system devices. The | ||
729 | * processor state must be transitioned last of all. what holds the | ||
730 | * class of device in the upper byte and the device number (0xFF for | ||
731 | * all) for the object to be transitioned. | ||
732 | * | ||
733 | * The state holds the state to transition to, which may in fact | ||
734 | * be an acceptance of a BIOS requested state change. | ||
735 | */ | ||
736 | |||
737 | static int set_power_state(u_short what, u_short state) | ||
738 | { | ||
739 | u32 eax; | ||
740 | |||
741 | if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax)) | ||
742 | return (eax >> 8) & 0xff; | ||
743 | return APM_SUCCESS; | ||
744 | } | ||
745 | |||
746 | /** | ||
747 | * set_system_power_state - set system wide power state | ||
748 | * @state: which state to enter | ||
749 | * | ||
750 | * Transition the entire system into a new APM power state. | ||
751 | */ | ||
752 | |||
753 | static int set_system_power_state(u_short state) | ||
754 | { | ||
755 | return set_power_state(APM_DEVICE_ALL, state); | ||
756 | } | ||
757 | |||
758 | /** | ||
759 | * apm_do_idle - perform power saving | ||
760 | * | ||
761 | * This function notifies the BIOS that the processor is (in the view | ||
762 | * of the OS) idle. It returns -1 in the event that the BIOS refuses | ||
763 | * to handle the idle request. On a success the function returns 1 | ||
764 | * if the BIOS did clock slowing or 0 otherwise. | ||
765 | */ | ||
766 | |||
767 | static int apm_do_idle(void) | ||
768 | { | ||
769 | u32 eax; | ||
770 | |||
771 | if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) { | ||
772 | static unsigned long t; | ||
773 | |||
774 | /* This always fails on some SMP boards running UP kernels. | ||
775 | * Only report the failure the first 5 times. | ||
776 | */ | ||
777 | if (++t < 5) | ||
778 | { | ||
779 | printk(KERN_DEBUG "apm_do_idle failed (%d)\n", | ||
780 | (eax >> 8) & 0xff); | ||
781 | t = jiffies; | ||
782 | } | ||
783 | return -1; | ||
784 | } | ||
785 | clock_slowed = (apm_info.bios.flags & APM_IDLE_SLOWS_CLOCK) != 0; | ||
786 | return clock_slowed; | ||
787 | } | ||
788 | |||
789 | /** | ||
790 | * apm_do_busy - inform the BIOS the CPU is busy | ||
791 | * | ||
792 | * Request that the BIOS brings the CPU back to full performance. | ||
793 | */ | ||
794 | |||
795 | static void apm_do_busy(void) | ||
796 | { | ||
797 | u32 dummy; | ||
798 | |||
799 | if (clock_slowed || ALWAYS_CALL_BUSY) { | ||
800 | (void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy); | ||
801 | clock_slowed = 0; | ||
802 | } | ||
803 | } | ||
804 | |||
805 | /* | ||
806 | * If no process has really been interested in | ||
807 | * the CPU for some time, we want to call BIOS | ||
808 | * power management - we probably want | ||
809 | * to conserve power. | ||
810 | */ | ||
811 | #define IDLE_CALC_LIMIT (HZ * 100) | ||
812 | #define IDLE_LEAKY_MAX 16 | ||
813 | |||
814 | static void (*original_pm_idle)(void); | ||
815 | |||
816 | extern void default_idle(void); | ||
817 | |||
818 | /** | ||
819 | * apm_cpu_idle - cpu idling for APM capable Linux | ||
820 | * | ||
821 | * This is the idling function the kernel executes when APM is available. It | ||
822 | * tries to do BIOS powermanagement based on the average system idle time. | ||
823 | * Furthermore it calls the system default idle routine. | ||
824 | */ | ||
825 | |||
826 | static void apm_cpu_idle(void) | ||
827 | { | ||
828 | static int use_apm_idle; /* = 0 */ | ||
829 | static unsigned int last_jiffies; /* = 0 */ | ||
830 | static unsigned int last_stime; /* = 0 */ | ||
831 | |||
832 | int apm_idle_done = 0; | ||
833 | unsigned int jiffies_since_last_check = jiffies - last_jiffies; | ||
834 | unsigned int bucket; | ||
835 | |||
836 | recalc: | ||
837 | if (jiffies_since_last_check > IDLE_CALC_LIMIT) { | ||
838 | use_apm_idle = 0; | ||
839 | last_jiffies = jiffies; | ||
840 | last_stime = current->stime; | ||
841 | } else if (jiffies_since_last_check > idle_period) { | ||
842 | unsigned int idle_percentage; | ||
843 | |||
844 | idle_percentage = current->stime - last_stime; | ||
845 | idle_percentage *= 100; | ||
846 | idle_percentage /= jiffies_since_last_check; | ||
847 | use_apm_idle = (idle_percentage > idle_threshold); | ||
848 | if (apm_info.forbid_idle) | ||
849 | use_apm_idle = 0; | ||
850 | last_jiffies = jiffies; | ||
851 | last_stime = current->stime; | ||
852 | } | ||
853 | |||
854 | bucket = IDLE_LEAKY_MAX; | ||
855 | |||
856 | while (!need_resched()) { | ||
857 | if (use_apm_idle) { | ||
858 | unsigned int t; | ||
859 | |||
860 | t = jiffies; | ||
861 | switch (apm_do_idle()) { | ||
862 | case 0: apm_idle_done = 1; | ||
863 | if (t != jiffies) { | ||
864 | if (bucket) { | ||
865 | bucket = IDLE_LEAKY_MAX; | ||
866 | continue; | ||
867 | } | ||
868 | } else if (bucket) { | ||
869 | bucket--; | ||
870 | continue; | ||
871 | } | ||
872 | break; | ||
873 | case 1: apm_idle_done = 1; | ||
874 | break; | ||
875 | default: /* BIOS refused */ | ||
876 | break; | ||
877 | } | ||
878 | } | ||
879 | if (original_pm_idle) | ||
880 | original_pm_idle(); | ||
881 | else | ||
882 | default_idle(); | ||
883 | jiffies_since_last_check = jiffies - last_jiffies; | ||
884 | if (jiffies_since_last_check > idle_period) | ||
885 | goto recalc; | ||
886 | } | ||
887 | |||
888 | if (apm_idle_done) | ||
889 | apm_do_busy(); | ||
890 | } | ||
891 | |||
892 | /** | ||
893 | * apm_power_off - ask the BIOS to power off | ||
894 | * | ||
895 | * Handle the power off sequence. This is the one piece of code we | ||
896 | * will execute even on SMP machines. In order to deal with BIOS | ||
897 | * bugs we support real mode APM BIOS power off calls. We also make | ||
898 | * the SMP call on CPU0 as some systems will only honour this call | ||
899 | * on their first cpu. | ||
900 | */ | ||
901 | |||
902 | static void apm_power_off(void) | ||
903 | { | ||
904 | unsigned char po_bios_call[] = { | ||
905 | 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ | ||
906 | 0x8e, 0xd0, /* movw ax,ss */ | ||
907 | 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ | ||
908 | 0xb8, 0x07, 0x53, /* movw $0x5307,ax */ | ||
909 | 0xbb, 0x01, 0x00, /* movw $0x0001,bx */ | ||
910 | 0xb9, 0x03, 0x00, /* movw $0x0003,cx */ | ||
911 | 0xcd, 0x15 /* int $0x15 */ | ||
912 | }; | ||
913 | |||
914 | /* | ||
915 | * This may be called on an SMP machine. | ||
916 | */ | ||
917 | #ifdef CONFIG_SMP | ||
918 | /* Some bioses don't like being called from CPU != 0 */ | ||
919 | set_cpus_allowed(current, cpumask_of_cpu(0)); | ||
920 | BUG_ON(smp_processor_id() != 0); | ||
921 | #endif | ||
922 | if (apm_info.realmode_power_off) | ||
923 | { | ||
924 | (void)apm_save_cpus(); | ||
925 | machine_real_restart(po_bios_call, sizeof(po_bios_call)); | ||
926 | } | ||
927 | else | ||
928 | (void) set_system_power_state(APM_STATE_OFF); | ||
929 | } | ||
930 | |||
931 | #ifdef CONFIG_APM_DO_ENABLE | ||
932 | |||
933 | /** | ||
934 | * apm_enable_power_management - enable BIOS APM power management | ||
935 | * @enable: enable yes/no | ||
936 | * | ||
937 | * Enable or disable the APM BIOS power services. | ||
938 | */ | ||
939 | |||
940 | static int apm_enable_power_management(int enable) | ||
941 | { | ||
942 | u32 eax; | ||
943 | |||
944 | if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED)) | ||
945 | return APM_NOT_ENGAGED; | ||
946 | if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL, | ||
947 | enable, &eax)) | ||
948 | return (eax >> 8) & 0xff; | ||
949 | if (enable) | ||
950 | apm_info.bios.flags &= ~APM_BIOS_DISABLED; | ||
951 | else | ||
952 | apm_info.bios.flags |= APM_BIOS_DISABLED; | ||
953 | return APM_SUCCESS; | ||
954 | } | ||
955 | #endif | ||
956 | |||
957 | /** | ||
958 | * apm_get_power_status - get current power state | ||
959 | * @status: returned status | ||
960 | * @bat: battery info | ||
961 | * @life: estimated life | ||
962 | * | ||
963 | * Obtain the current power status from the APM BIOS. We return a | ||
964 | * status which gives the rough battery status, and current power | ||
965 | * source. The bat value returned give an estimate as a percentage | ||
966 | * of life and a status value for the battery. The estimated life | ||
967 | * if reported is a lifetime in secodnds/minutes at current powwer | ||
968 | * consumption. | ||
969 | */ | ||
970 | |||
971 | static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) | ||
972 | { | ||
973 | u32 eax; | ||
974 | u32 ebx; | ||
975 | u32 ecx; | ||
976 | u32 edx; | ||
977 | u32 dummy; | ||
978 | |||
979 | if (apm_info.get_power_status_broken) | ||
980 | return APM_32_UNSUPPORTED; | ||
981 | if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0, | ||
982 | &eax, &ebx, &ecx, &edx, &dummy)) | ||
983 | return (eax >> 8) & 0xff; | ||
984 | *status = ebx; | ||
985 | *bat = ecx; | ||
986 | if (apm_info.get_power_status_swabinminutes) { | ||
987 | *life = swab16((u16)edx); | ||
988 | *life |= 0x8000; | ||
989 | } else | ||
990 | *life = edx; | ||
991 | return APM_SUCCESS; | ||
992 | } | ||
993 | |||
994 | #if 0 | ||
995 | static int apm_get_battery_status(u_short which, u_short *status, | ||
996 | u_short *bat, u_short *life, u_short *nbat) | ||
997 | { | ||
998 | u32 eax; | ||
999 | u32 ebx; | ||
1000 | u32 ecx; | ||
1001 | u32 edx; | ||
1002 | u32 esi; | ||
1003 | |||
1004 | if (apm_info.connection_version < 0x0102) { | ||
1005 | /* pretend we only have one battery. */ | ||
1006 | if (which != 1) | ||
1007 | return APM_BAD_DEVICE; | ||
1008 | *nbat = 1; | ||
1009 | return apm_get_power_status(status, bat, life); | ||
1010 | } | ||
1011 | |||
1012 | if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax, | ||
1013 | &ebx, &ecx, &edx, &esi)) | ||
1014 | return (eax >> 8) & 0xff; | ||
1015 | *status = ebx; | ||
1016 | *bat = ecx; | ||
1017 | *life = edx; | ||
1018 | *nbat = esi; | ||
1019 | return APM_SUCCESS; | ||
1020 | } | ||
1021 | #endif | ||
1022 | |||
1023 | /** | ||
1024 | * apm_engage_power_management - enable PM on a device | ||
1025 | * @device: identity of device | ||
1026 | * @enable: on/off | ||
1027 | * | ||
1028 | * Activate or deactive power management on either a specific device | ||
1029 | * or the entire system (%APM_DEVICE_ALL). | ||
1030 | */ | ||
1031 | |||
1032 | static int apm_engage_power_management(u_short device, int enable) | ||
1033 | { | ||
1034 | u32 eax; | ||
1035 | |||
1036 | if ((enable == 0) && (device == APM_DEVICE_ALL) | ||
1037 | && (apm_info.bios.flags & APM_BIOS_DISABLED)) | ||
1038 | return APM_DISABLED; | ||
1039 | if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax)) | ||
1040 | return (eax >> 8) & 0xff; | ||
1041 | if (device == APM_DEVICE_ALL) { | ||
1042 | if (enable) | ||
1043 | apm_info.bios.flags &= ~APM_BIOS_DISENGAGED; | ||
1044 | else | ||
1045 | apm_info.bios.flags |= APM_BIOS_DISENGAGED; | ||
1046 | } | ||
1047 | return APM_SUCCESS; | ||
1048 | } | ||
1049 | |||
1050 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) | ||
1051 | |||
1052 | /** | ||
1053 | * apm_console_blank - blank the display | ||
1054 | * @blank: on/off | ||
1055 | * | ||
1056 | * Attempt to blank the console, firstly by blanking just video device | ||
1057 | * zero, and if that fails (some BIOSes don't support it) then it blanks | ||
1058 | * all video devices. Typically the BIOS will do laptop backlight and | ||
1059 | * monitor powerdown for us. | ||
1060 | */ | ||
1061 | |||
1062 | static int apm_console_blank(int blank) | ||
1063 | { | ||
1064 | int error; | ||
1065 | u_short state; | ||
1066 | |||
1067 | state = blank ? APM_STATE_STANDBY : APM_STATE_READY; | ||
1068 | /* Blank the first display device */ | ||
1069 | error = set_power_state(0x100, state); | ||
1070 | if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) { | ||
1071 | /* try to blank them all instead */ | ||
1072 | error = set_power_state(0x1ff, state); | ||
1073 | if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) | ||
1074 | /* try to blank device one instead */ | ||
1075 | error = set_power_state(0x101, state); | ||
1076 | } | ||
1077 | if ((error == APM_SUCCESS) || (error == APM_NO_ERROR)) | ||
1078 | return 1; | ||
1079 | if (error == APM_NOT_ENGAGED) { | ||
1080 | static int tried; | ||
1081 | int eng_error; | ||
1082 | if (tried++ == 0) { | ||
1083 | eng_error = apm_engage_power_management(APM_DEVICE_ALL, 1); | ||
1084 | if (eng_error) { | ||
1085 | apm_error("set display", error); | ||
1086 | apm_error("engage interface", eng_error); | ||
1087 | return 0; | ||
1088 | } else | ||
1089 | return apm_console_blank(blank); | ||
1090 | } | ||
1091 | } | ||
1092 | apm_error("set display", error); | ||
1093 | return 0; | ||
1094 | } | ||
1095 | #endif | ||
1096 | |||
1097 | static int queue_empty(struct apm_user *as) | ||
1098 | { | ||
1099 | return as->event_head == as->event_tail; | ||
1100 | } | ||
1101 | |||
1102 | static apm_event_t get_queued_event(struct apm_user *as) | ||
1103 | { | ||
1104 | as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS; | ||
1105 | return as->events[as->event_tail]; | ||
1106 | } | ||
1107 | |||
1108 | static void queue_event(apm_event_t event, struct apm_user *sender) | ||
1109 | { | ||
1110 | struct apm_user * as; | ||
1111 | |||
1112 | spin_lock(&user_list_lock); | ||
1113 | if (user_list == NULL) | ||
1114 | goto out; | ||
1115 | for (as = user_list; as != NULL; as = as->next) { | ||
1116 | if ((as == sender) || (!as->reader)) | ||
1117 | continue; | ||
1118 | as->event_head = (as->event_head + 1) % APM_MAX_EVENTS; | ||
1119 | if (as->event_head == as->event_tail) { | ||
1120 | static int notified; | ||
1121 | |||
1122 | if (notified++ == 0) | ||
1123 | printk(KERN_ERR "apm: an event queue overflowed\n"); | ||
1124 | as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS; | ||
1125 | } | ||
1126 | as->events[as->event_head] = event; | ||
1127 | if ((!as->suser) || (!as->writer)) | ||
1128 | continue; | ||
1129 | switch (event) { | ||
1130 | case APM_SYS_SUSPEND: | ||
1131 | case APM_USER_SUSPEND: | ||
1132 | as->suspends_pending++; | ||
1133 | suspends_pending++; | ||
1134 | break; | ||
1135 | |||
1136 | case APM_SYS_STANDBY: | ||
1137 | case APM_USER_STANDBY: | ||
1138 | as->standbys_pending++; | ||
1139 | standbys_pending++; | ||
1140 | break; | ||
1141 | } | ||
1142 | } | ||
1143 | wake_up_interruptible(&apm_waitqueue); | ||
1144 | out: | ||
1145 | spin_unlock(&user_list_lock); | ||
1146 | } | ||
1147 | |||
1148 | static void set_time(void) | ||
1149 | { | ||
1150 | if (got_clock_diff) { /* Must know time zone in order to set clock */ | ||
1151 | xtime.tv_sec = get_cmos_time() + clock_cmos_diff; | ||
1152 | xtime.tv_nsec = 0; | ||
1153 | } | ||
1154 | } | ||
1155 | |||
1156 | static void get_time_diff(void) | ||
1157 | { | ||
1158 | #ifndef CONFIG_APM_RTC_IS_GMT | ||
1159 | /* | ||
1160 | * Estimate time zone so that set_time can update the clock | ||
1161 | */ | ||
1162 | clock_cmos_diff = -get_cmos_time(); | ||
1163 | clock_cmos_diff += get_seconds(); | ||
1164 | got_clock_diff = 1; | ||
1165 | #endif | ||
1166 | } | ||
1167 | |||
1168 | static void reinit_timer(void) | ||
1169 | { | ||
1170 | #ifdef INIT_TIMER_AFTER_SUSPEND | ||
1171 | unsigned long flags; | ||
1172 | extern spinlock_t i8253_lock; | ||
1173 | |||
1174 | spin_lock_irqsave(&i8253_lock, flags); | ||
1175 | /* set the clock to 100 Hz */ | ||
1176 | outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | ||
1177 | udelay(10); | ||
1178 | outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ | ||
1179 | udelay(10); | ||
1180 | outb(LATCH >> 8, PIT_CH0); /* MSB */ | ||
1181 | udelay(10); | ||
1182 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
1183 | #endif | ||
1184 | } | ||
1185 | |||
1186 | static int suspend(int vetoable) | ||
1187 | { | ||
1188 | int err; | ||
1189 | struct apm_user *as; | ||
1190 | |||
1191 | if (pm_send_all(PM_SUSPEND, (void *)3)) { | ||
1192 | /* Vetoed */ | ||
1193 | if (vetoable) { | ||
1194 | if (apm_info.connection_version > 0x100) | ||
1195 | set_system_power_state(APM_STATE_REJECT); | ||
1196 | err = -EBUSY; | ||
1197 | ignore_sys_suspend = 0; | ||
1198 | printk(KERN_WARNING "apm: suspend was vetoed.\n"); | ||
1199 | goto out; | ||
1200 | } | ||
1201 | printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n"); | ||
1202 | } | ||
1203 | |||
1204 | device_suspend(PMSG_SUSPEND); | ||
1205 | local_irq_disable(); | ||
1206 | device_power_down(PMSG_SUSPEND); | ||
1207 | |||
1208 | /* serialize with the timer interrupt */ | ||
1209 | write_seqlock(&xtime_lock); | ||
1210 | |||
1211 | /* protect against access to timer chip registers */ | ||
1212 | spin_lock(&i8253_lock); | ||
1213 | |||
1214 | get_time_diff(); | ||
1215 | /* | ||
1216 | * Irq spinlock must be dropped around set_system_power_state. | ||
1217 | * We'll undo any timer changes due to interrupts below. | ||
1218 | */ | ||
1219 | spin_unlock(&i8253_lock); | ||
1220 | write_sequnlock(&xtime_lock); | ||
1221 | local_irq_enable(); | ||
1222 | |||
1223 | save_processor_state(); | ||
1224 | err = set_system_power_state(APM_STATE_SUSPEND); | ||
1225 | restore_processor_state(); | ||
1226 | |||
1227 | local_irq_disable(); | ||
1228 | write_seqlock(&xtime_lock); | ||
1229 | spin_lock(&i8253_lock); | ||
1230 | reinit_timer(); | ||
1231 | set_time(); | ||
1232 | ignore_normal_resume = 1; | ||
1233 | |||
1234 | spin_unlock(&i8253_lock); | ||
1235 | write_sequnlock(&xtime_lock); | ||
1236 | |||
1237 | if (err == APM_NO_ERROR) | ||
1238 | err = APM_SUCCESS; | ||
1239 | if (err != APM_SUCCESS) | ||
1240 | apm_error("suspend", err); | ||
1241 | err = (err == APM_SUCCESS) ? 0 : -EIO; | ||
1242 | device_power_up(); | ||
1243 | local_irq_enable(); | ||
1244 | device_resume(); | ||
1245 | pm_send_all(PM_RESUME, (void *)0); | ||
1246 | queue_event(APM_NORMAL_RESUME, NULL); | ||
1247 | out: | ||
1248 | spin_lock(&user_list_lock); | ||
1249 | for (as = user_list; as != NULL; as = as->next) { | ||
1250 | as->suspend_wait = 0; | ||
1251 | as->suspend_result = err; | ||
1252 | } | ||
1253 | spin_unlock(&user_list_lock); | ||
1254 | wake_up_interruptible(&apm_suspend_waitqueue); | ||
1255 | return err; | ||
1256 | } | ||
1257 | |||
1258 | static void standby(void) | ||
1259 | { | ||
1260 | int err; | ||
1261 | |||
1262 | local_irq_disable(); | ||
1263 | device_power_down(PMSG_SUSPEND); | ||
1264 | /* serialize with the timer interrupt */ | ||
1265 | write_seqlock(&xtime_lock); | ||
1266 | /* If needed, notify drivers here */ | ||
1267 | get_time_diff(); | ||
1268 | write_sequnlock(&xtime_lock); | ||
1269 | local_irq_enable(); | ||
1270 | |||
1271 | err = set_system_power_state(APM_STATE_STANDBY); | ||
1272 | if ((err != APM_SUCCESS) && (err != APM_NO_ERROR)) | ||
1273 | apm_error("standby", err); | ||
1274 | |||
1275 | local_irq_disable(); | ||
1276 | device_power_up(); | ||
1277 | local_irq_enable(); | ||
1278 | } | ||
1279 | |||
1280 | static apm_event_t get_event(void) | ||
1281 | { | ||
1282 | int error; | ||
1283 | apm_event_t event; | ||
1284 | apm_eventinfo_t info; | ||
1285 | |||
1286 | static int notified; | ||
1287 | |||
1288 | /* we don't use the eventinfo */ | ||
1289 | error = apm_get_event(&event, &info); | ||
1290 | if (error == APM_SUCCESS) | ||
1291 | return event; | ||
1292 | |||
1293 | if ((error != APM_NO_EVENTS) && (notified++ == 0)) | ||
1294 | apm_error("get_event", error); | ||
1295 | |||
1296 | return 0; | ||
1297 | } | ||
1298 | |||
1299 | static void check_events(void) | ||
1300 | { | ||
1301 | apm_event_t event; | ||
1302 | static unsigned long last_resume; | ||
1303 | static int ignore_bounce; | ||
1304 | |||
1305 | while ((event = get_event()) != 0) { | ||
1306 | if (debug) { | ||
1307 | if (event <= NR_APM_EVENT_NAME) | ||
1308 | printk(KERN_DEBUG "apm: received %s notify\n", | ||
1309 | apm_event_name[event - 1]); | ||
1310 | else | ||
1311 | printk(KERN_DEBUG "apm: received unknown " | ||
1312 | "event 0x%02x\n", event); | ||
1313 | } | ||
1314 | if (ignore_bounce | ||
1315 | && ((jiffies - last_resume) > bounce_interval)) | ||
1316 | ignore_bounce = 0; | ||
1317 | |||
1318 | switch (event) { | ||
1319 | case APM_SYS_STANDBY: | ||
1320 | case APM_USER_STANDBY: | ||
1321 | queue_event(event, NULL); | ||
1322 | if (standbys_pending <= 0) | ||
1323 | standby(); | ||
1324 | break; | ||
1325 | |||
1326 | case APM_USER_SUSPEND: | ||
1327 | #ifdef CONFIG_APM_IGNORE_USER_SUSPEND | ||
1328 | if (apm_info.connection_version > 0x100) | ||
1329 | set_system_power_state(APM_STATE_REJECT); | ||
1330 | break; | ||
1331 | #endif | ||
1332 | case APM_SYS_SUSPEND: | ||
1333 | if (ignore_bounce) { | ||
1334 | if (apm_info.connection_version > 0x100) | ||
1335 | set_system_power_state(APM_STATE_REJECT); | ||
1336 | break; | ||
1337 | } | ||
1338 | /* | ||
1339 | * If we are already processing a SUSPEND, | ||
1340 | * then further SUSPEND events from the BIOS | ||
1341 | * will be ignored. We also return here to | ||
1342 | * cope with the fact that the Thinkpads keep | ||
1343 | * sending a SUSPEND event until something else | ||
1344 | * happens! | ||
1345 | */ | ||
1346 | if (ignore_sys_suspend) | ||
1347 | return; | ||
1348 | ignore_sys_suspend = 1; | ||
1349 | queue_event(event, NULL); | ||
1350 | if (suspends_pending <= 0) | ||
1351 | (void) suspend(1); | ||
1352 | break; | ||
1353 | |||
1354 | case APM_NORMAL_RESUME: | ||
1355 | case APM_CRITICAL_RESUME: | ||
1356 | case APM_STANDBY_RESUME: | ||
1357 | ignore_sys_suspend = 0; | ||
1358 | last_resume = jiffies; | ||
1359 | ignore_bounce = 1; | ||
1360 | if ((event != APM_NORMAL_RESUME) | ||
1361 | || (ignore_normal_resume == 0)) { | ||
1362 | write_seqlock_irq(&xtime_lock); | ||
1363 | set_time(); | ||
1364 | write_sequnlock_irq(&xtime_lock); | ||
1365 | device_resume(); | ||
1366 | pm_send_all(PM_RESUME, (void *)0); | ||
1367 | queue_event(event, NULL); | ||
1368 | } | ||
1369 | ignore_normal_resume = 0; | ||
1370 | break; | ||
1371 | |||
1372 | case APM_CAPABILITY_CHANGE: | ||
1373 | case APM_LOW_BATTERY: | ||
1374 | case APM_POWER_STATUS_CHANGE: | ||
1375 | queue_event(event, NULL); | ||
1376 | /* If needed, notify drivers here */ | ||
1377 | break; | ||
1378 | |||
1379 | case APM_UPDATE_TIME: | ||
1380 | write_seqlock_irq(&xtime_lock); | ||
1381 | set_time(); | ||
1382 | write_sequnlock_irq(&xtime_lock); | ||
1383 | break; | ||
1384 | |||
1385 | case APM_CRITICAL_SUSPEND: | ||
1386 | /* | ||
1387 | * We are not allowed to reject a critical suspend. | ||
1388 | */ | ||
1389 | (void) suspend(0); | ||
1390 | break; | ||
1391 | } | ||
1392 | } | ||
1393 | } | ||
1394 | |||
1395 | static void apm_event_handler(void) | ||
1396 | { | ||
1397 | static int pending_count = 4; | ||
1398 | int err; | ||
1399 | |||
1400 | if ((standbys_pending > 0) || (suspends_pending > 0)) { | ||
1401 | if ((apm_info.connection_version > 0x100) && | ||
1402 | (pending_count-- <= 0)) { | ||
1403 | pending_count = 4; | ||
1404 | if (debug) | ||
1405 | printk(KERN_DEBUG "apm: setting state busy\n"); | ||
1406 | err = set_system_power_state(APM_STATE_BUSY); | ||
1407 | if (err) | ||
1408 | apm_error("busy", err); | ||
1409 | } | ||
1410 | } else | ||
1411 | pending_count = 4; | ||
1412 | check_events(); | ||
1413 | } | ||
1414 | |||
1415 | /* | ||
1416 | * This is the APM thread main loop. | ||
1417 | */ | ||
1418 | |||
1419 | static void apm_mainloop(void) | ||
1420 | { | ||
1421 | DECLARE_WAITQUEUE(wait, current); | ||
1422 | |||
1423 | add_wait_queue(&apm_waitqueue, &wait); | ||
1424 | set_current_state(TASK_INTERRUPTIBLE); | ||
1425 | for (;;) { | ||
1426 | schedule_timeout(APM_CHECK_TIMEOUT); | ||
1427 | if (exit_kapmd) | ||
1428 | break; | ||
1429 | /* | ||
1430 | * Ok, check all events, check for idle (and mark us sleeping | ||
1431 | * so as not to count towards the load average).. | ||
1432 | */ | ||
1433 | set_current_state(TASK_INTERRUPTIBLE); | ||
1434 | apm_event_handler(); | ||
1435 | } | ||
1436 | remove_wait_queue(&apm_waitqueue, &wait); | ||
1437 | } | ||
1438 | |||
1439 | static int check_apm_user(struct apm_user *as, const char *func) | ||
1440 | { | ||
1441 | if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) { | ||
1442 | printk(KERN_ERR "apm: %s passed bad filp\n", func); | ||
1443 | return 1; | ||
1444 | } | ||
1445 | return 0; | ||
1446 | } | ||
1447 | |||
1448 | static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) | ||
1449 | { | ||
1450 | struct apm_user * as; | ||
1451 | int i; | ||
1452 | apm_event_t event; | ||
1453 | |||
1454 | as = fp->private_data; | ||
1455 | if (check_apm_user(as, "read")) | ||
1456 | return -EIO; | ||
1457 | if ((int)count < sizeof(apm_event_t)) | ||
1458 | return -EINVAL; | ||
1459 | if ((queue_empty(as)) && (fp->f_flags & O_NONBLOCK)) | ||
1460 | return -EAGAIN; | ||
1461 | wait_event_interruptible(apm_waitqueue, !queue_empty(as)); | ||
1462 | i = count; | ||
1463 | while ((i >= sizeof(event)) && !queue_empty(as)) { | ||
1464 | event = get_queued_event(as); | ||
1465 | if (copy_to_user(buf, &event, sizeof(event))) { | ||
1466 | if (i < count) | ||
1467 | break; | ||
1468 | return -EFAULT; | ||
1469 | } | ||
1470 | switch (event) { | ||
1471 | case APM_SYS_SUSPEND: | ||
1472 | case APM_USER_SUSPEND: | ||
1473 | as->suspends_read++; | ||
1474 | break; | ||
1475 | |||
1476 | case APM_SYS_STANDBY: | ||
1477 | case APM_USER_STANDBY: | ||
1478 | as->standbys_read++; | ||
1479 | break; | ||
1480 | } | ||
1481 | buf += sizeof(event); | ||
1482 | i -= sizeof(event); | ||
1483 | } | ||
1484 | if (i < count) | ||
1485 | return count - i; | ||
1486 | if (signal_pending(current)) | ||
1487 | return -ERESTARTSYS; | ||
1488 | return 0; | ||
1489 | } | ||
1490 | |||
1491 | static unsigned int do_poll(struct file *fp, poll_table * wait) | ||
1492 | { | ||
1493 | struct apm_user * as; | ||
1494 | |||
1495 | as = fp->private_data; | ||
1496 | if (check_apm_user(as, "poll")) | ||
1497 | return 0; | ||
1498 | poll_wait(fp, &apm_waitqueue, wait); | ||
1499 | if (!queue_empty(as)) | ||
1500 | return POLLIN | POLLRDNORM; | ||
1501 | return 0; | ||
1502 | } | ||
1503 | |||
1504 | static int do_ioctl(struct inode * inode, struct file *filp, | ||
1505 | u_int cmd, u_long arg) | ||
1506 | { | ||
1507 | struct apm_user * as; | ||
1508 | |||
1509 | as = filp->private_data; | ||
1510 | if (check_apm_user(as, "ioctl")) | ||
1511 | return -EIO; | ||
1512 | if ((!as->suser) || (!as->writer)) | ||
1513 | return -EPERM; | ||
1514 | switch (cmd) { | ||
1515 | case APM_IOC_STANDBY: | ||
1516 | if (as->standbys_read > 0) { | ||
1517 | as->standbys_read--; | ||
1518 | as->standbys_pending--; | ||
1519 | standbys_pending--; | ||
1520 | } else | ||
1521 | queue_event(APM_USER_STANDBY, as); | ||
1522 | if (standbys_pending <= 0) | ||
1523 | standby(); | ||
1524 | break; | ||
1525 | case APM_IOC_SUSPEND: | ||
1526 | if (as->suspends_read > 0) { | ||
1527 | as->suspends_read--; | ||
1528 | as->suspends_pending--; | ||
1529 | suspends_pending--; | ||
1530 | } else | ||
1531 | queue_event(APM_USER_SUSPEND, as); | ||
1532 | if (suspends_pending <= 0) { | ||
1533 | return suspend(1); | ||
1534 | } else { | ||
1535 | as->suspend_wait = 1; | ||
1536 | wait_event_interruptible(apm_suspend_waitqueue, | ||
1537 | as->suspend_wait == 0); | ||
1538 | return as->suspend_result; | ||
1539 | } | ||
1540 | break; | ||
1541 | default: | ||
1542 | return -EINVAL; | ||
1543 | } | ||
1544 | return 0; | ||
1545 | } | ||
1546 | |||
1547 | static int do_release(struct inode * inode, struct file * filp) | ||
1548 | { | ||
1549 | struct apm_user * as; | ||
1550 | |||
1551 | as = filp->private_data; | ||
1552 | if (check_apm_user(as, "release")) | ||
1553 | return 0; | ||
1554 | filp->private_data = NULL; | ||
1555 | if (as->standbys_pending > 0) { | ||
1556 | standbys_pending -= as->standbys_pending; | ||
1557 | if (standbys_pending <= 0) | ||
1558 | standby(); | ||
1559 | } | ||
1560 | if (as->suspends_pending > 0) { | ||
1561 | suspends_pending -= as->suspends_pending; | ||
1562 | if (suspends_pending <= 0) | ||
1563 | (void) suspend(1); | ||
1564 | } | ||
1565 | spin_lock(&user_list_lock); | ||
1566 | if (user_list == as) | ||
1567 | user_list = as->next; | ||
1568 | else { | ||
1569 | struct apm_user * as1; | ||
1570 | |||
1571 | for (as1 = user_list; | ||
1572 | (as1 != NULL) && (as1->next != as); | ||
1573 | as1 = as1->next) | ||
1574 | ; | ||
1575 | if (as1 == NULL) | ||
1576 | printk(KERN_ERR "apm: filp not in user list\n"); | ||
1577 | else | ||
1578 | as1->next = as->next; | ||
1579 | } | ||
1580 | spin_unlock(&user_list_lock); | ||
1581 | kfree(as); | ||
1582 | return 0; | ||
1583 | } | ||
1584 | |||
1585 | static int do_open(struct inode * inode, struct file * filp) | ||
1586 | { | ||
1587 | struct apm_user * as; | ||
1588 | |||
1589 | as = (struct apm_user *)kmalloc(sizeof(*as), GFP_KERNEL); | ||
1590 | if (as == NULL) { | ||
1591 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", | ||
1592 | sizeof(*as)); | ||
1593 | return -ENOMEM; | ||
1594 | } | ||
1595 | as->magic = APM_BIOS_MAGIC; | ||
1596 | as->event_tail = as->event_head = 0; | ||
1597 | as->suspends_pending = as->standbys_pending = 0; | ||
1598 | as->suspends_read = as->standbys_read = 0; | ||
1599 | /* | ||
1600 | * XXX - this is a tiny bit broken, when we consider BSD | ||
1601 | * process accounting. If the device is opened by root, we | ||
1602 | * instantly flag that we used superuser privs. Who knows, | ||
1603 | * we might close the device immediately without doing a | ||
1604 | * privileged operation -- cevans | ||
1605 | */ | ||
1606 | as->suser = capable(CAP_SYS_ADMIN); | ||
1607 | as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE; | ||
1608 | as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ; | ||
1609 | spin_lock(&user_list_lock); | ||
1610 | as->next = user_list; | ||
1611 | user_list = as; | ||
1612 | spin_unlock(&user_list_lock); | ||
1613 | filp->private_data = as; | ||
1614 | return 0; | ||
1615 | } | ||
1616 | |||
1617 | static int apm_get_info(char *buf, char **start, off_t fpos, int length) | ||
1618 | { | ||
1619 | char * p; | ||
1620 | unsigned short bx; | ||
1621 | unsigned short cx; | ||
1622 | unsigned short dx; | ||
1623 | int error; | ||
1624 | unsigned short ac_line_status = 0xff; | ||
1625 | unsigned short battery_status = 0xff; | ||
1626 | unsigned short battery_flag = 0xff; | ||
1627 | int percentage = -1; | ||
1628 | int time_units = -1; | ||
1629 | char *units = "?"; | ||
1630 | |||
1631 | p = buf; | ||
1632 | |||
1633 | if ((num_online_cpus() == 1) && | ||
1634 | !(error = apm_get_power_status(&bx, &cx, &dx))) { | ||
1635 | ac_line_status = (bx >> 8) & 0xff; | ||
1636 | battery_status = bx & 0xff; | ||
1637 | if ((cx & 0xff) != 0xff) | ||
1638 | percentage = cx & 0xff; | ||
1639 | |||
1640 | if (apm_info.connection_version > 0x100) { | ||
1641 | battery_flag = (cx >> 8) & 0xff; | ||
1642 | if (dx != 0xffff) { | ||
1643 | units = (dx & 0x8000) ? "min" : "sec"; | ||
1644 | time_units = dx & 0x7fff; | ||
1645 | } | ||
1646 | } | ||
1647 | } | ||
1648 | /* Arguments, with symbols from linux/apm_bios.h. Information is | ||
1649 | from the Get Power Status (0x0a) call unless otherwise noted. | ||
1650 | |||
1651 | 0) Linux driver version (this will change if format changes) | ||
1652 | 1) APM BIOS Version. Usually 1.0, 1.1 or 1.2. | ||
1653 | 2) APM flags from APM Installation Check (0x00): | ||
1654 | bit 0: APM_16_BIT_SUPPORT | ||
1655 | bit 1: APM_32_BIT_SUPPORT | ||
1656 | bit 2: APM_IDLE_SLOWS_CLOCK | ||
1657 | bit 3: APM_BIOS_DISABLED | ||
1658 | bit 4: APM_BIOS_DISENGAGED | ||
1659 | 3) AC line status | ||
1660 | 0x00: Off-line | ||
1661 | 0x01: On-line | ||
1662 | 0x02: On backup power (BIOS >= 1.1 only) | ||
1663 | 0xff: Unknown | ||
1664 | 4) Battery status | ||
1665 | 0x00: High | ||
1666 | 0x01: Low | ||
1667 | 0x02: Critical | ||
1668 | 0x03: Charging | ||
1669 | 0x04: Selected battery not present (BIOS >= 1.2 only) | ||
1670 | 0xff: Unknown | ||
1671 | 5) Battery flag | ||
1672 | bit 0: High | ||
1673 | bit 1: Low | ||
1674 | bit 2: Critical | ||
1675 | bit 3: Charging | ||
1676 | bit 7: No system battery | ||
1677 | 0xff: Unknown | ||
1678 | 6) Remaining battery life (percentage of charge): | ||
1679 | 0-100: valid | ||
1680 | -1: Unknown | ||
1681 | 7) Remaining battery life (time units): | ||
1682 | Number of remaining minutes or seconds | ||
1683 | -1: Unknown | ||
1684 | 8) min = minutes; sec = seconds */ | ||
1685 | |||
1686 | p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", | ||
1687 | driver_version, | ||
1688 | (apm_info.bios.version >> 8) & 0xff, | ||
1689 | apm_info.bios.version & 0xff, | ||
1690 | apm_info.bios.flags, | ||
1691 | ac_line_status, | ||
1692 | battery_status, | ||
1693 | battery_flag, | ||
1694 | percentage, | ||
1695 | time_units, | ||
1696 | units); | ||
1697 | |||
1698 | return p - buf; | ||
1699 | } | ||
1700 | |||
1701 | static int apm(void *unused) | ||
1702 | { | ||
1703 | unsigned short bx; | ||
1704 | unsigned short cx; | ||
1705 | unsigned short dx; | ||
1706 | int error; | ||
1707 | char * power_stat; | ||
1708 | char * bat_stat; | ||
1709 | |||
1710 | kapmd_running = 1; | ||
1711 | |||
1712 | daemonize("kapmd"); | ||
1713 | |||
1714 | current->flags |= PF_NOFREEZE; | ||
1715 | |||
1716 | #ifdef CONFIG_SMP | ||
1717 | /* 2002/08/01 - WT | ||
1718 | * This is to avoid random crashes at boot time during initialization | ||
1719 | * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D. | ||
1720 | * Some bioses don't like being called from CPU != 0. | ||
1721 | * Method suggested by Ingo Molnar. | ||
1722 | */ | ||
1723 | set_cpus_allowed(current, cpumask_of_cpu(0)); | ||
1724 | BUG_ON(smp_processor_id() != 0); | ||
1725 | #endif | ||
1726 | |||
1727 | if (apm_info.connection_version == 0) { | ||
1728 | apm_info.connection_version = apm_info.bios.version; | ||
1729 | if (apm_info.connection_version > 0x100) { | ||
1730 | /* | ||
1731 | * We only support BIOSs up to version 1.2 | ||
1732 | */ | ||
1733 | if (apm_info.connection_version > 0x0102) | ||
1734 | apm_info.connection_version = 0x0102; | ||
1735 | error = apm_driver_version(&apm_info.connection_version); | ||
1736 | if (error != APM_SUCCESS) { | ||
1737 | apm_error("driver version", error); | ||
1738 | /* Fall back to an APM 1.0 connection. */ | ||
1739 | apm_info.connection_version = 0x100; | ||
1740 | } | ||
1741 | } | ||
1742 | } | ||
1743 | |||
1744 | if (debug) | ||
1745 | printk(KERN_INFO "apm: Connection version %d.%d\n", | ||
1746 | (apm_info.connection_version >> 8) & 0xff, | ||
1747 | apm_info.connection_version & 0xff); | ||
1748 | |||
1749 | #ifdef CONFIG_APM_DO_ENABLE | ||
1750 | if (apm_info.bios.flags & APM_BIOS_DISABLED) { | ||
1751 | /* | ||
1752 | * This call causes my NEC UltraLite Versa 33/C to hang if it | ||
1753 | * is booted with PM disabled but not in the docking station. | ||
1754 | * Unfortunate ... | ||
1755 | */ | ||
1756 | error = apm_enable_power_management(1); | ||
1757 | if (error) { | ||
1758 | apm_error("enable power management", error); | ||
1759 | return -1; | ||
1760 | } | ||
1761 | } | ||
1762 | #endif | ||
1763 | |||
1764 | if ((apm_info.bios.flags & APM_BIOS_DISENGAGED) | ||
1765 | && (apm_info.connection_version > 0x0100)) { | ||
1766 | error = apm_engage_power_management(APM_DEVICE_ALL, 1); | ||
1767 | if (error) { | ||
1768 | apm_error("engage power management", error); | ||
1769 | return -1; | ||
1770 | } | ||
1771 | } | ||
1772 | |||
1773 | if (debug && (num_online_cpus() == 1 || smp )) { | ||
1774 | error = apm_get_power_status(&bx, &cx, &dx); | ||
1775 | if (error) | ||
1776 | printk(KERN_INFO "apm: power status not available\n"); | ||
1777 | else { | ||
1778 | switch ((bx >> 8) & 0xff) { | ||
1779 | case 0: power_stat = "off line"; break; | ||
1780 | case 1: power_stat = "on line"; break; | ||
1781 | case 2: power_stat = "on backup power"; break; | ||
1782 | default: power_stat = "unknown"; break; | ||
1783 | } | ||
1784 | switch (bx & 0xff) { | ||
1785 | case 0: bat_stat = "high"; break; | ||
1786 | case 1: bat_stat = "low"; break; | ||
1787 | case 2: bat_stat = "critical"; break; | ||
1788 | case 3: bat_stat = "charging"; break; | ||
1789 | default: bat_stat = "unknown"; break; | ||
1790 | } | ||
1791 | printk(KERN_INFO | ||
1792 | "apm: AC %s, battery status %s, battery life ", | ||
1793 | power_stat, bat_stat); | ||
1794 | if ((cx & 0xff) == 0xff) | ||
1795 | printk("unknown\n"); | ||
1796 | else | ||
1797 | printk("%d%%\n", cx & 0xff); | ||
1798 | if (apm_info.connection_version > 0x100) { | ||
1799 | printk(KERN_INFO | ||
1800 | "apm: battery flag 0x%02x, battery life ", | ||
1801 | (cx >> 8) & 0xff); | ||
1802 | if (dx == 0xffff) | ||
1803 | printk("unknown\n"); | ||
1804 | else | ||
1805 | printk("%d %s\n", dx & 0x7fff, | ||
1806 | (dx & 0x8000) ? | ||
1807 | "minutes" : "seconds"); | ||
1808 | } | ||
1809 | } | ||
1810 | } | ||
1811 | |||
1812 | /* Install our power off handler.. */ | ||
1813 | if (power_off) | ||
1814 | pm_power_off = apm_power_off; | ||
1815 | |||
1816 | if (num_online_cpus() == 1 || smp) { | ||
1817 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) | ||
1818 | console_blank_hook = apm_console_blank; | ||
1819 | #endif | ||
1820 | apm_mainloop(); | ||
1821 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) | ||
1822 | console_blank_hook = NULL; | ||
1823 | #endif | ||
1824 | } | ||
1825 | kapmd_running = 0; | ||
1826 | |||
1827 | return 0; | ||
1828 | } | ||
1829 | |||
1830 | #ifndef MODULE | ||
1831 | static int __init apm_setup(char *str) | ||
1832 | { | ||
1833 | int invert; | ||
1834 | |||
1835 | while ((str != NULL) && (*str != '\0')) { | ||
1836 | if (strncmp(str, "off", 3) == 0) | ||
1837 | apm_disabled = 1; | ||
1838 | if (strncmp(str, "on", 2) == 0) | ||
1839 | apm_disabled = 0; | ||
1840 | if ((strncmp(str, "bounce-interval=", 16) == 0) || | ||
1841 | (strncmp(str, "bounce_interval=", 16) == 0)) | ||
1842 | bounce_interval = simple_strtol(str + 16, NULL, 0); | ||
1843 | if ((strncmp(str, "idle-threshold=", 15) == 0) || | ||
1844 | (strncmp(str, "idle_threshold=", 15) == 0)) | ||
1845 | idle_threshold = simple_strtol(str + 15, NULL, 0); | ||
1846 | if ((strncmp(str, "idle-period=", 12) == 0) || | ||
1847 | (strncmp(str, "idle_period=", 12) == 0)) | ||
1848 | idle_period = simple_strtol(str + 12, NULL, 0); | ||
1849 | invert = (strncmp(str, "no-", 3) == 0) || | ||
1850 | (strncmp(str, "no_", 3) == 0); | ||
1851 | if (invert) | ||
1852 | str += 3; | ||
1853 | if (strncmp(str, "debug", 5) == 0) | ||
1854 | debug = !invert; | ||
1855 | if ((strncmp(str, "power-off", 9) == 0) || | ||
1856 | (strncmp(str, "power_off", 9) == 0)) | ||
1857 | power_off = !invert; | ||
1858 | if (strncmp(str, "smp", 3) == 0) | ||
1859 | { | ||
1860 | smp = !invert; | ||
1861 | idle_threshold = 100; | ||
1862 | } | ||
1863 | if ((strncmp(str, "allow-ints", 10) == 0) || | ||
1864 | (strncmp(str, "allow_ints", 10) == 0)) | ||
1865 | apm_info.allow_ints = !invert; | ||
1866 | if ((strncmp(str, "broken-psr", 10) == 0) || | ||
1867 | (strncmp(str, "broken_psr", 10) == 0)) | ||
1868 | apm_info.get_power_status_broken = !invert; | ||
1869 | if ((strncmp(str, "realmode-power-off", 18) == 0) || | ||
1870 | (strncmp(str, "realmode_power_off", 18) == 0)) | ||
1871 | apm_info.realmode_power_off = !invert; | ||
1872 | str = strchr(str, ','); | ||
1873 | if (str != NULL) | ||
1874 | str += strspn(str, ", \t"); | ||
1875 | } | ||
1876 | return 1; | ||
1877 | } | ||
1878 | |||
1879 | __setup("apm=", apm_setup); | ||
1880 | #endif | ||
1881 | |||
1882 | static struct file_operations apm_bios_fops = { | ||
1883 | .owner = THIS_MODULE, | ||
1884 | .read = do_read, | ||
1885 | .poll = do_poll, | ||
1886 | .ioctl = do_ioctl, | ||
1887 | .open = do_open, | ||
1888 | .release = do_release, | ||
1889 | }; | ||
1890 | |||
1891 | static struct miscdevice apm_device = { | ||
1892 | APM_MINOR_DEV, | ||
1893 | "apm_bios", | ||
1894 | &apm_bios_fops | ||
1895 | }; | ||
1896 | |||
1897 | |||
1898 | /* Simple "print if true" callback */ | ||
1899 | static int __init print_if_true(struct dmi_system_id *d) | ||
1900 | { | ||
1901 | printk("%s\n", d->ident); | ||
1902 | return 0; | ||
1903 | } | ||
1904 | |||
1905 | /* | ||
1906 | * Some Bioses enable the PS/2 mouse (touchpad) at resume, even if it was | ||
1907 | * disabled before the suspend. Linux used to get terribly confused by that. | ||
1908 | */ | ||
1909 | static int __init broken_ps2_resume(struct dmi_system_id *d) | ||
1910 | { | ||
1911 | printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident); | ||
1912 | return 0; | ||
1913 | } | ||
1914 | |||
1915 | /* Some bioses have a broken protected mode poweroff and need to use realmode */ | ||
1916 | static int __init set_realmode_power_off(struct dmi_system_id *d) | ||
1917 | { | ||
1918 | if (apm_info.realmode_power_off == 0) { | ||
1919 | apm_info.realmode_power_off = 1; | ||
1920 | printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident); | ||
1921 | } | ||
1922 | return 0; | ||
1923 | } | ||
1924 | |||
1925 | /* Some laptops require interrupts to be enabled during APM calls */ | ||
1926 | static int __init set_apm_ints(struct dmi_system_id *d) | ||
1927 | { | ||
1928 | if (apm_info.allow_ints == 0) { | ||
1929 | apm_info.allow_ints = 1; | ||
1930 | printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident); | ||
1931 | } | ||
1932 | return 0; | ||
1933 | } | ||
1934 | |||
1935 | /* Some APM bioses corrupt memory or just plain do not work */ | ||
1936 | static int __init apm_is_horked(struct dmi_system_id *d) | ||
1937 | { | ||
1938 | if (apm_info.disabled == 0) { | ||
1939 | apm_info.disabled = 1; | ||
1940 | printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident); | ||
1941 | } | ||
1942 | return 0; | ||
1943 | } | ||
1944 | |||
1945 | static int __init apm_is_horked_d850md(struct dmi_system_id *d) | ||
1946 | { | ||
1947 | if (apm_info.disabled == 0) { | ||
1948 | apm_info.disabled = 1; | ||
1949 | printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident); | ||
1950 | printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n"); | ||
1951 | printk(KERN_INFO "download from support.intel.com \n"); | ||
1952 | } | ||
1953 | return 0; | ||
1954 | } | ||
1955 | |||
1956 | /* Some APM bioses hang on APM idle calls */ | ||
1957 | static int __init apm_likes_to_melt(struct dmi_system_id *d) | ||
1958 | { | ||
1959 | if (apm_info.forbid_idle == 0) { | ||
1960 | apm_info.forbid_idle = 1; | ||
1961 | printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident); | ||
1962 | } | ||
1963 | return 0; | ||
1964 | } | ||
1965 | |||
1966 | /* | ||
1967 | * Check for clue free BIOS implementations who use | ||
1968 | * the following QA technique | ||
1969 | * | ||
1970 | * [ Write BIOS Code ]<------ | ||
1971 | * | ^ | ||
1972 | * < Does it Compile >----N-- | ||
1973 | * |Y ^ | ||
1974 | * < Does it Boot Win98 >-N-- | ||
1975 | * |Y | ||
1976 | * [Ship It] | ||
1977 | * | ||
1978 | * Phoenix A04 08/24/2000 is known bad (Dell Inspiron 5000e) | ||
1979 | * Phoenix A07 09/29/2000 is known good (Dell Inspiron 5000) | ||
1980 | */ | ||
1981 | static int __init broken_apm_power(struct dmi_system_id *d) | ||
1982 | { | ||
1983 | apm_info.get_power_status_broken = 1; | ||
1984 | printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n"); | ||
1985 | return 0; | ||
1986 | } | ||
1987 | |||
1988 | /* | ||
1989 | * This bios swaps the APM minute reporting bytes over (Many sony laptops | ||
1990 | * have this problem). | ||
1991 | */ | ||
1992 | static int __init swab_apm_power_in_minutes(struct dmi_system_id *d) | ||
1993 | { | ||
1994 | apm_info.get_power_status_swabinminutes = 1; | ||
1995 | printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n"); | ||
1996 | return 0; | ||
1997 | } | ||
1998 | |||
1999 | static struct dmi_system_id __initdata apm_dmi_table[] = { | ||
2000 | { | ||
2001 | print_if_true, | ||
2002 | KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.", | ||
2003 | { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), | ||
2004 | DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), }, | ||
2005 | }, | ||
2006 | { /* Handle problems with APM on the C600 */ | ||
2007 | broken_ps2_resume, "Dell Latitude C600", | ||
2008 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell"), | ||
2009 | DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), }, | ||
2010 | }, | ||
2011 | { /* Allow interrupts during suspend on Dell Latitude laptops*/ | ||
2012 | set_apm_ints, "Dell Latitude", | ||
2013 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | ||
2014 | DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), } | ||
2015 | }, | ||
2016 | { /* APM crashes */ | ||
2017 | apm_is_horked, "Dell Inspiron 2500", | ||
2018 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | ||
2019 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), | ||
2020 | DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), | ||
2021 | DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, | ||
2022 | }, | ||
2023 | { /* Allow interrupts during suspend on Dell Inspiron laptops*/ | ||
2024 | set_apm_ints, "Dell Inspiron", { | ||
2025 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | ||
2026 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), }, | ||
2027 | }, | ||
2028 | { /* Handle problems with APM on Inspiron 5000e */ | ||
2029 | broken_apm_power, "Dell Inspiron 5000e", | ||
2030 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2031 | DMI_MATCH(DMI_BIOS_VERSION, "A04"), | ||
2032 | DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), }, | ||
2033 | }, | ||
2034 | { /* Handle problems with APM on Inspiron 2500 */ | ||
2035 | broken_apm_power, "Dell Inspiron 2500", | ||
2036 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2037 | DMI_MATCH(DMI_BIOS_VERSION, "A12"), | ||
2038 | DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), }, | ||
2039 | }, | ||
2040 | { /* APM crashes */ | ||
2041 | apm_is_horked, "Dell Dimension 4100", | ||
2042 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | ||
2043 | DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), | ||
2044 | DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."), | ||
2045 | DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, | ||
2046 | }, | ||
2047 | { /* Allow interrupts during suspend on Compaq Laptops*/ | ||
2048 | set_apm_ints, "Compaq 12XL125", | ||
2049 | { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), | ||
2050 | DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"), | ||
2051 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2052 | DMI_MATCH(DMI_BIOS_VERSION,"4.06"), }, | ||
2053 | }, | ||
2054 | { /* Allow interrupts during APM or the clock goes slow */ | ||
2055 | set_apm_ints, "ASUSTeK", | ||
2056 | { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), | ||
2057 | DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), }, | ||
2058 | }, | ||
2059 | { /* APM blows on shutdown */ | ||
2060 | apm_is_horked, "ABIT KX7-333[R]", | ||
2061 | { DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"), | ||
2062 | DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), }, | ||
2063 | }, | ||
2064 | { /* APM crashes */ | ||
2065 | apm_is_horked, "Trigem Delhi3", | ||
2066 | { DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"), | ||
2067 | DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), }, | ||
2068 | }, | ||
2069 | { /* APM crashes */ | ||
2070 | apm_is_horked, "Fujitsu-Siemens", | ||
2071 | { DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"), | ||
2072 | DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), }, | ||
2073 | }, | ||
2074 | { /* APM crashes */ | ||
2075 | apm_is_horked_d850md, "Intel D850MD", | ||
2076 | { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), | ||
2077 | DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), }, | ||
2078 | }, | ||
2079 | { /* APM crashes */ | ||
2080 | apm_is_horked, "Intel D810EMO", | ||
2081 | { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), | ||
2082 | DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), }, | ||
2083 | }, | ||
2084 | { /* APM crashes */ | ||
2085 | apm_is_horked, "Dell XPS-Z", | ||
2086 | { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), | ||
2087 | DMI_MATCH(DMI_BIOS_VERSION, "A11"), | ||
2088 | DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), }, | ||
2089 | }, | ||
2090 | { /* APM crashes */ | ||
2091 | apm_is_horked, "Sharp PC-PJ/AX", | ||
2092 | { DMI_MATCH(DMI_SYS_VENDOR, "SHARP"), | ||
2093 | DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"), | ||
2094 | DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"), | ||
2095 | DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), }, | ||
2096 | }, | ||
2097 | { /* APM crashes */ | ||
2098 | apm_is_horked, "Dell Inspiron 2500", | ||
2099 | { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | ||
2100 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), | ||
2101 | DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), | ||
2102 | DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, | ||
2103 | }, | ||
2104 | { /* APM idle hangs */ | ||
2105 | apm_likes_to_melt, "Jabil AMD", | ||
2106 | { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), | ||
2107 | DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), }, | ||
2108 | }, | ||
2109 | { /* APM idle hangs */ | ||
2110 | apm_likes_to_melt, "AMI Bios", | ||
2111 | { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), | ||
2112 | DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), }, | ||
2113 | }, | ||
2114 | { /* Handle problems with APM on Sony Vaio PCG-N505X(DE) */ | ||
2115 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2116 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2117 | DMI_MATCH(DMI_BIOS_VERSION, "R0206H"), | ||
2118 | DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), }, | ||
2119 | }, | ||
2120 | { /* Handle problems with APM on Sony Vaio PCG-N505VX */ | ||
2121 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2122 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2123 | DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"), | ||
2124 | DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), }, | ||
2125 | }, | ||
2126 | { /* Handle problems with APM on Sony Vaio PCG-XG29 */ | ||
2127 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2128 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2129 | DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"), | ||
2130 | DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), }, | ||
2131 | }, | ||
2132 | { /* Handle problems with APM on Sony Vaio PCG-Z600NE */ | ||
2133 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2134 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2135 | DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"), | ||
2136 | DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), }, | ||
2137 | }, | ||
2138 | { /* Handle problems with APM on Sony Vaio PCG-Z600NE */ | ||
2139 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2140 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2141 | DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"), | ||
2142 | DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), }, | ||
2143 | }, | ||
2144 | { /* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */ | ||
2145 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2146 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2147 | DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"), | ||
2148 | DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), }, | ||
2149 | }, | ||
2150 | { /* Handle problems with APM on Sony Vaio PCG-Z505LS */ | ||
2151 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2152 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2153 | DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"), | ||
2154 | DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), }, | ||
2155 | }, | ||
2156 | { /* Handle problems with APM on Sony Vaio PCG-Z505LS */ | ||
2157 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2158 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2159 | DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"), | ||
2160 | DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), }, | ||
2161 | }, | ||
2162 | { /* Handle problems with APM on Sony Vaio PCG-Z505LS (with updated BIOS) */ | ||
2163 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2164 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2165 | DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"), | ||
2166 | DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), }, | ||
2167 | }, | ||
2168 | { /* Handle problems with APM on Sony Vaio PCG-F104K */ | ||
2169 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2170 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2171 | DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"), | ||
2172 | DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), }, | ||
2173 | }, | ||
2174 | |||
2175 | { /* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */ | ||
2176 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2177 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2178 | DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"), | ||
2179 | DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), }, | ||
2180 | }, | ||
2181 | { /* Handle problems with APM on Sony Vaio PCG-C1VE */ | ||
2182 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2183 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2184 | DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"), | ||
2185 | DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), }, | ||
2186 | }, | ||
2187 | { /* Handle problems with APM on Sony Vaio PCG-C1VE */ | ||
2188 | swab_apm_power_in_minutes, "Sony VAIO", | ||
2189 | { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), | ||
2190 | DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"), | ||
2191 | DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), }, | ||
2192 | }, | ||
2193 | { /* broken PM poweroff bios */ | ||
2194 | set_realmode_power_off, "Award Software v4.60 PGMA", | ||
2195 | { DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."), | ||
2196 | DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"), | ||
2197 | DMI_MATCH(DMI_BIOS_DATE, "134526184"), }, | ||
2198 | }, | ||
2199 | |||
2200 | /* Generic per vendor APM settings */ | ||
2201 | |||
2202 | { /* Allow interrupts during suspend on IBM laptops */ | ||
2203 | set_apm_ints, "IBM", | ||
2204 | { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), }, | ||
2205 | }, | ||
2206 | |||
2207 | { } | ||
2208 | }; | ||
2209 | |||
2210 | /* | ||
2211 | * Just start the APM thread. We do NOT want to do APM BIOS | ||
2212 | * calls from anything but the APM thread, if for no other reason | ||
2213 | * than the fact that we don't trust the APM BIOS. This way, | ||
2214 | * most common APM BIOS problems that lead to protection errors | ||
2215 | * etc will have at least some level of being contained... | ||
2216 | * | ||
2217 | * In short, if something bad happens, at least we have a choice | ||
2218 | * of just killing the apm thread.. | ||
2219 | */ | ||
2220 | static int __init apm_init(void) | ||
2221 | { | ||
2222 | struct proc_dir_entry *apm_proc; | ||
2223 | int ret; | ||
2224 | int i; | ||
2225 | |||
2226 | dmi_check_system(apm_dmi_table); | ||
2227 | |||
2228 | if (apm_info.bios.version == 0) { | ||
2229 | printk(KERN_INFO "apm: BIOS not found.\n"); | ||
2230 | return -ENODEV; | ||
2231 | } | ||
2232 | printk(KERN_INFO | ||
2233 | "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n", | ||
2234 | ((apm_info.bios.version >> 8) & 0xff), | ||
2235 | (apm_info.bios.version & 0xff), | ||
2236 | apm_info.bios.flags, | ||
2237 | driver_version); | ||
2238 | if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) { | ||
2239 | printk(KERN_INFO "apm: no 32 bit BIOS support\n"); | ||
2240 | return -ENODEV; | ||
2241 | } | ||
2242 | |||
2243 | if (allow_ints) | ||
2244 | apm_info.allow_ints = 1; | ||
2245 | if (broken_psr) | ||
2246 | apm_info.get_power_status_broken = 1; | ||
2247 | if (realmode_power_off) | ||
2248 | apm_info.realmode_power_off = 1; | ||
2249 | /* User can override, but default is to trust DMI */ | ||
2250 | if (apm_disabled != -1) | ||
2251 | apm_info.disabled = apm_disabled; | ||
2252 | |||
2253 | /* | ||
2254 | * Fix for the Compaq Contura 3/25c which reports BIOS version 0.1 | ||
2255 | * but is reportedly a 1.0 BIOS. | ||
2256 | */ | ||
2257 | if (apm_info.bios.version == 0x001) | ||
2258 | apm_info.bios.version = 0x100; | ||
2259 | |||
2260 | /* BIOS < 1.2 doesn't set cseg_16_len */ | ||
2261 | if (apm_info.bios.version < 0x102) | ||
2262 | apm_info.bios.cseg_16_len = 0; /* 64k */ | ||
2263 | |||
2264 | if (debug) { | ||
2265 | printk(KERN_INFO "apm: entry %x:%lx cseg16 %x dseg %x", | ||
2266 | apm_info.bios.cseg, apm_info.bios.offset, | ||
2267 | apm_info.bios.cseg_16, apm_info.bios.dseg); | ||
2268 | if (apm_info.bios.version > 0x100) | ||
2269 | printk(" cseg len %x, dseg len %x", | ||
2270 | apm_info.bios.cseg_len, | ||
2271 | apm_info.bios.dseg_len); | ||
2272 | if (apm_info.bios.version > 0x101) | ||
2273 | printk(" cseg16 len %x", apm_info.bios.cseg_16_len); | ||
2274 | printk("\n"); | ||
2275 | } | ||
2276 | |||
2277 | if (apm_info.disabled) { | ||
2278 | printk(KERN_NOTICE "apm: disabled on user request.\n"); | ||
2279 | return -ENODEV; | ||
2280 | } | ||
2281 | if ((num_online_cpus() > 1) && !power_off && !smp) { | ||
2282 | printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); | ||
2283 | apm_info.disabled = 1; | ||
2284 | return -ENODEV; | ||
2285 | } | ||
2286 | if (PM_IS_ACTIVE()) { | ||
2287 | printk(KERN_NOTICE "apm: overridden by ACPI.\n"); | ||
2288 | apm_info.disabled = 1; | ||
2289 | return -ENODEV; | ||
2290 | } | ||
2291 | pm_active = 1; | ||
2292 | |||
2293 | /* | ||
2294 | * Set up a segment that references the real mode segment 0x40 | ||
2295 | * that extends up to the end of page zero (that we have reserved). | ||
2296 | * This is for buggy BIOS's that refer to (real mode) segment 0x40 | ||
2297 | * even though they are called in protected mode. | ||
2298 | */ | ||
2299 | set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); | ||
2300 | _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); | ||
2301 | |||
2302 | apm_bios_entry.offset = apm_info.bios.offset; | ||
2303 | apm_bios_entry.segment = APM_CS; | ||
2304 | |||
2305 | for (i = 0; i < NR_CPUS; i++) { | ||
2306 | set_base(per_cpu(cpu_gdt_table, i)[APM_CS >> 3], | ||
2307 | __va((unsigned long)apm_info.bios.cseg << 4)); | ||
2308 | set_base(per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], | ||
2309 | __va((unsigned long)apm_info.bios.cseg_16 << 4)); | ||
2310 | set_base(per_cpu(cpu_gdt_table, i)[APM_DS >> 3], | ||
2311 | __va((unsigned long)apm_info.bios.dseg << 4)); | ||
2312 | #ifndef APM_RELAX_SEGMENTS | ||
2313 | if (apm_info.bios.version == 0x100) { | ||
2314 | #endif | ||
2315 | /* For ASUS motherboard, Award BIOS rev 110 (and others?) */ | ||
2316 | _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 - 1); | ||
2317 | /* For some unknown machine. */ | ||
2318 | _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], 64 * 1024 - 1); | ||
2319 | /* For the DEC Hinote Ultra CT475 (and others?) */ | ||
2320 | _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], 64 * 1024 - 1); | ||
2321 | #ifndef APM_RELAX_SEGMENTS | ||
2322 | } else { | ||
2323 | _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], | ||
2324 | (apm_info.bios.cseg_len - 1) & 0xffff); | ||
2325 | _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], | ||
2326 | (apm_info.bios.cseg_16_len - 1) & 0xffff); | ||
2327 | _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], | ||
2328 | (apm_info.bios.dseg_len - 1) & 0xffff); | ||
2329 | /* workaround for broken BIOSes */ | ||
2330 | if (apm_info.bios.cseg_len <= apm_info.bios.offset) | ||
2331 | _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 -1); | ||
2332 | if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */ | ||
2333 | /* for the BIOS that assumes granularity = 1 */ | ||
2334 | per_cpu(cpu_gdt_table, i)[APM_DS >> 3].b |= 0x800000; | ||
2335 | printk(KERN_NOTICE "apm: we set the granularity of dseg.\n"); | ||
2336 | } | ||
2337 | } | ||
2338 | #endif | ||
2339 | } | ||
2340 | |||
2341 | apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info); | ||
2342 | if (apm_proc) | ||
2343 | apm_proc->owner = THIS_MODULE; | ||
2344 | |||
2345 | ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD); | ||
2346 | if (ret < 0) { | ||
2347 | printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n"); | ||
2348 | return -ENOMEM; | ||
2349 | } | ||
2350 | |||
2351 | if (num_online_cpus() > 1 && !smp ) { | ||
2352 | printk(KERN_NOTICE | ||
2353 | "apm: disabled - APM is not SMP safe (power off active).\n"); | ||
2354 | return 0; | ||
2355 | } | ||
2356 | |||
2357 | misc_register(&apm_device); | ||
2358 | |||
2359 | if (HZ != 100) | ||
2360 | idle_period = (idle_period * HZ) / 100; | ||
2361 | if (idle_threshold < 100) { | ||
2362 | original_pm_idle = pm_idle; | ||
2363 | pm_idle = apm_cpu_idle; | ||
2364 | set_pm_idle = 1; | ||
2365 | } | ||
2366 | |||
2367 | return 0; | ||
2368 | } | ||
2369 | |||
2370 | static void __exit apm_exit(void) | ||
2371 | { | ||
2372 | int error; | ||
2373 | |||
2374 | if (set_pm_idle) { | ||
2375 | pm_idle = original_pm_idle; | ||
2376 | /* | ||
2377 | * We are about to unload the current idle thread pm callback | ||
2378 | * (pm_idle), Wait for all processors to update cached/local | ||
2379 | * copies of pm_idle before proceeding. | ||
2380 | */ | ||
2381 | cpu_idle_wait(); | ||
2382 | } | ||
2383 | if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) | ||
2384 | && (apm_info.connection_version > 0x0100)) { | ||
2385 | error = apm_engage_power_management(APM_DEVICE_ALL, 0); | ||
2386 | if (error) | ||
2387 | apm_error("disengage power management", error); | ||
2388 | } | ||
2389 | misc_deregister(&apm_device); | ||
2390 | remove_proc_entry("apm", NULL); | ||
2391 | if (power_off) | ||
2392 | pm_power_off = NULL; | ||
2393 | exit_kapmd = 1; | ||
2394 | while (kapmd_running) | ||
2395 | schedule(); | ||
2396 | pm_active = 0; | ||
2397 | } | ||
2398 | |||
2399 | module_init(apm_init); | ||
2400 | module_exit(apm_exit); | ||
2401 | |||
2402 | MODULE_AUTHOR("Stephen Rothwell"); | ||
2403 | MODULE_DESCRIPTION("Advanced Power Management"); | ||
2404 | MODULE_LICENSE("GPL"); | ||
2405 | module_param(debug, bool, 0644); | ||
2406 | MODULE_PARM_DESC(debug, "Enable debug mode"); | ||
2407 | module_param(power_off, bool, 0444); | ||
2408 | MODULE_PARM_DESC(power_off, "Enable power off"); | ||
2409 | module_param(bounce_interval, int, 0444); | ||
2410 | MODULE_PARM_DESC(bounce_interval, | ||
2411 | "Set the number of ticks to ignore suspend bounces"); | ||
2412 | module_param(allow_ints, bool, 0444); | ||
2413 | MODULE_PARM_DESC(allow_ints, "Allow interrupts during BIOS calls"); | ||
2414 | module_param(broken_psr, bool, 0444); | ||
2415 | MODULE_PARM_DESC(broken_psr, "BIOS has a broken GetPowerStatus call"); | ||
2416 | module_param(realmode_power_off, bool, 0444); | ||
2417 | MODULE_PARM_DESC(realmode_power_off, | ||
2418 | "Switch to real mode before powering off"); | ||
2419 | module_param(idle_threshold, int, 0444); | ||
2420 | MODULE_PARM_DESC(idle_threshold, | ||
2421 | "System idle percentage above which to make APM BIOS idle calls"); | ||
2422 | module_param(idle_period, int, 0444); | ||
2423 | MODULE_PARM_DESC(idle_period, | ||
2424 | "Period (in sec/100) over which to caculate the idle percentage"); | ||
2425 | module_param(smp, bool, 0444); | ||
2426 | MODULE_PARM_DESC(smp, | ||
2427 | "Set this to enable APM use on an SMP platform. Use with caution on older systems"); | ||
2428 | MODULE_ALIAS_MISCDEV(APM_MINOR_DEV); | ||
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c new file mode 100644 index 000000000000..36d66e2077d0 --- /dev/null +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -0,0 +1,72 @@ | |||
1 | /* | ||
2 | * Generate definitions needed by assembly language modules. | ||
3 | * This code generates raw asm output which is post-processed | ||
4 | * to extract and format the required data. | ||
5 | */ | ||
6 | |||
7 | #include <linux/sched.h> | ||
8 | #include <linux/signal.h> | ||
9 | #include <linux/personality.h> | ||
10 | #include <linux/suspend.h> | ||
11 | #include <asm/ucontext.h> | ||
12 | #include "sigframe.h" | ||
13 | #include <asm/fixmap.h> | ||
14 | #include <asm/processor.h> | ||
15 | #include <asm/thread_info.h> | ||
16 | |||
17 | #define DEFINE(sym, val) \ | ||
18 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) | ||
19 | |||
20 | #define BLANK() asm volatile("\n->" : : ) | ||
21 | |||
22 | #define OFFSET(sym, str, mem) \ | ||
23 | DEFINE(sym, offsetof(struct str, mem)); | ||
24 | |||
25 | void foo(void) | ||
26 | { | ||
27 | OFFSET(SIGCONTEXT_eax, sigcontext, eax); | ||
28 | OFFSET(SIGCONTEXT_ebx, sigcontext, ebx); | ||
29 | OFFSET(SIGCONTEXT_ecx, sigcontext, ecx); | ||
30 | OFFSET(SIGCONTEXT_edx, sigcontext, edx); | ||
31 | OFFSET(SIGCONTEXT_esi, sigcontext, esi); | ||
32 | OFFSET(SIGCONTEXT_edi, sigcontext, edi); | ||
33 | OFFSET(SIGCONTEXT_ebp, sigcontext, ebp); | ||
34 | OFFSET(SIGCONTEXT_esp, sigcontext, esp); | ||
35 | OFFSET(SIGCONTEXT_eip, sigcontext, eip); | ||
36 | BLANK(); | ||
37 | |||
38 | OFFSET(CPUINFO_x86, cpuinfo_x86, x86); | ||
39 | OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); | ||
40 | OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); | ||
41 | OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); | ||
42 | OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math); | ||
43 | OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); | ||
44 | OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); | ||
45 | OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); | ||
46 | BLANK(); | ||
47 | |||
48 | OFFSET(TI_task, thread_info, task); | ||
49 | OFFSET(TI_exec_domain, thread_info, exec_domain); | ||
50 | OFFSET(TI_flags, thread_info, flags); | ||
51 | OFFSET(TI_status, thread_info, status); | ||
52 | OFFSET(TI_cpu, thread_info, cpu); | ||
53 | OFFSET(TI_preempt_count, thread_info, preempt_count); | ||
54 | OFFSET(TI_addr_limit, thread_info, addr_limit); | ||
55 | OFFSET(TI_restart_block, thread_info, restart_block); | ||
56 | BLANK(); | ||
57 | |||
58 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); | ||
59 | OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); | ||
60 | BLANK(); | ||
61 | |||
62 | OFFSET(pbe_address, pbe, address); | ||
63 | OFFSET(pbe_orig_address, pbe, orig_address); | ||
64 | OFFSET(pbe_next, pbe, next); | ||
65 | |||
66 | /* Offset from the sysenter stack to tss.esp0 */ | ||
67 | DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) - | ||
68 | sizeof(struct tss_struct)); | ||
69 | |||
70 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | ||
71 | DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); | ||
72 | } | ||
diff --git a/arch/i386/kernel/bootflag.c b/arch/i386/kernel/bootflag.c new file mode 100644 index 000000000000..4c30ed01f4e1 --- /dev/null +++ b/arch/i386/kernel/bootflag.c | |||
@@ -0,0 +1,99 @@ | |||
1 | /* | ||
2 | * Implement 'Simple Boot Flag Specification 2.0' | ||
3 | */ | ||
4 | |||
5 | |||
6 | #include <linux/config.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/string.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/spinlock.h> | ||
13 | #include <linux/acpi.h> | ||
14 | #include <asm/io.h> | ||
15 | |||
16 | #include <linux/mc146818rtc.h> | ||
17 | |||
18 | |||
19 | #define SBF_RESERVED (0x78) | ||
20 | #define SBF_PNPOS (1<<0) | ||
21 | #define SBF_BOOTING (1<<1) | ||
22 | #define SBF_DIAG (1<<2) | ||
23 | #define SBF_PARITY (1<<7) | ||
24 | |||
25 | |||
26 | int sbf_port __initdata = -1; /* set via acpi_boot_init() */ | ||
27 | |||
28 | |||
29 | static int __init parity(u8 v) | ||
30 | { | ||
31 | int x = 0; | ||
32 | int i; | ||
33 | |||
34 | for(i=0;i<8;i++) | ||
35 | { | ||
36 | x^=(v&1); | ||
37 | v>>=1; | ||
38 | } | ||
39 | return x; | ||
40 | } | ||
41 | |||
42 | static void __init sbf_write(u8 v) | ||
43 | { | ||
44 | unsigned long flags; | ||
45 | if(sbf_port != -1) | ||
46 | { | ||
47 | v &= ~SBF_PARITY; | ||
48 | if(!parity(v)) | ||
49 | v|=SBF_PARITY; | ||
50 | |||
51 | printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", sbf_port, v); | ||
52 | |||
53 | spin_lock_irqsave(&rtc_lock, flags); | ||
54 | CMOS_WRITE(v, sbf_port); | ||
55 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
56 | } | ||
57 | } | ||
58 | |||
59 | static u8 __init sbf_read(void) | ||
60 | { | ||
61 | u8 v; | ||
62 | unsigned long flags; | ||
63 | if(sbf_port == -1) | ||
64 | return 0; | ||
65 | spin_lock_irqsave(&rtc_lock, flags); | ||
66 | v = CMOS_READ(sbf_port); | ||
67 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
68 | return v; | ||
69 | } | ||
70 | |||
71 | static int __init sbf_value_valid(u8 v) | ||
72 | { | ||
73 | if(v&SBF_RESERVED) /* Reserved bits */ | ||
74 | return 0; | ||
75 | if(!parity(v)) | ||
76 | return 0; | ||
77 | return 1; | ||
78 | } | ||
79 | |||
80 | static int __init sbf_init(void) | ||
81 | { | ||
82 | u8 v; | ||
83 | if(sbf_port == -1) | ||
84 | return 0; | ||
85 | v = sbf_read(); | ||
86 | if(!sbf_value_valid(v)) | ||
87 | printk(KERN_WARNING "Simple Boot Flag value 0x%x read from CMOS RAM was invalid\n",v); | ||
88 | |||
89 | v &= ~SBF_RESERVED; | ||
90 | v &= ~SBF_BOOTING; | ||
91 | v &= ~SBF_DIAG; | ||
92 | #if defined(CONFIG_ISAPNP) | ||
93 | v |= SBF_PNPOS; | ||
94 | #endif | ||
95 | sbf_write(v); | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | module_init(sbf_init); | ||
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile new file mode 100644 index 000000000000..010aecfffbc1 --- /dev/null +++ b/arch/i386/kernel/cpu/Makefile | |||
@@ -0,0 +1,19 @@ | |||
1 | # | ||
2 | # Makefile for x86-compatible CPU details and quirks | ||
3 | # | ||
4 | |||
5 | obj-y := common.o proc.o | ||
6 | |||
7 | obj-y += amd.o | ||
8 | obj-y += cyrix.o | ||
9 | obj-y += centaur.o | ||
10 | obj-y += transmeta.o | ||
11 | obj-y += intel.o intel_cacheinfo.o | ||
12 | obj-y += rise.o | ||
13 | obj-y += nexgen.o | ||
14 | obj-y += umc.o | ||
15 | |||
16 | obj-$(CONFIG_X86_MCE) += mcheck/ | ||
17 | |||
18 | obj-$(CONFIG_MTRR) += mtrr/ | ||
19 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c new file mode 100644 index 000000000000..ae94585d0445 --- /dev/null +++ b/arch/i386/kernel/cpu/amd.c | |||
@@ -0,0 +1,249 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/bitops.h> | ||
3 | #include <linux/mm.h> | ||
4 | #include <asm/io.h> | ||
5 | #include <asm/processor.h> | ||
6 | |||
7 | #include "cpu.h" | ||
8 | |||
9 | /* | ||
10 | * B step AMD K6 before B 9730xxxx have hardware bugs that can cause | ||
11 | * misexecution of code under Linux. Owners of such processors should | ||
12 | * contact AMD for precise details and a CPU swap. | ||
13 | * | ||
14 | * See http://www.multimania.com/poulot/k6bug.html | ||
15 | * http://www.amd.com/K6/k6docs/revgd.html | ||
16 | * | ||
17 | * The following test is erm.. interesting. AMD neglected to up | ||
18 | * the chip setting when fixing the bug but they also tweaked some | ||
19 | * performance at the same time.. | ||
20 | */ | ||
21 | |||
22 | extern void vide(void); | ||
23 | __asm__(".align 4\nvide: ret"); | ||
24 | |||
25 | static void __init init_amd(struct cpuinfo_x86 *c) | ||
26 | { | ||
27 | u32 l, h; | ||
28 | int mbytes = num_physpages >> (20-PAGE_SHIFT); | ||
29 | int r; | ||
30 | |||
31 | /* | ||
32 | * FIXME: We should handle the K5 here. Set up the write | ||
33 | * range and also turn on MSR 83 bits 4 and 31 (write alloc, | ||
34 | * no bus pipeline) | ||
35 | */ | ||
36 | |||
37 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | ||
38 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | ||
39 | clear_bit(0*32+31, c->x86_capability); | ||
40 | |||
41 | r = get_model_name(c); | ||
42 | |||
43 | switch(c->x86) | ||
44 | { | ||
45 | case 4: | ||
46 | /* | ||
47 | * General Systems BIOSen alias the cpu frequency registers | ||
48 | * of the Elan at 0x000df000. Unfortuantly, one of the Linux | ||
49 | * drivers subsequently pokes it, and changes the CPU speed. | ||
50 | * Workaround : Remove the unneeded alias. | ||
51 | */ | ||
52 | #define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ | ||
53 | #define CBAR_ENB (0x80000000) | ||
54 | #define CBAR_KEY (0X000000CB) | ||
55 | if (c->x86_model==9 || c->x86_model == 10) { | ||
56 | if (inl (CBAR) & CBAR_ENB) | ||
57 | outl (0 | CBAR_KEY, CBAR); | ||
58 | } | ||
59 | break; | ||
60 | case 5: | ||
61 | if( c->x86_model < 6 ) | ||
62 | { | ||
63 | /* Based on AMD doc 20734R - June 2000 */ | ||
64 | if ( c->x86_model == 0 ) { | ||
65 | clear_bit(X86_FEATURE_APIC, c->x86_capability); | ||
66 | set_bit(X86_FEATURE_PGE, c->x86_capability); | ||
67 | } | ||
68 | break; | ||
69 | } | ||
70 | |||
71 | if ( c->x86_model == 6 && c->x86_mask == 1 ) { | ||
72 | const int K6_BUG_LOOP = 1000000; | ||
73 | int n; | ||
74 | void (*f_vide)(void); | ||
75 | unsigned long d, d2; | ||
76 | |||
77 | printk(KERN_INFO "AMD K6 stepping B detected - "); | ||
78 | |||
79 | /* | ||
80 | * It looks like AMD fixed the 2.6.2 bug and improved indirect | ||
81 | * calls at the same time. | ||
82 | */ | ||
83 | |||
84 | n = K6_BUG_LOOP; | ||
85 | f_vide = vide; | ||
86 | rdtscl(d); | ||
87 | while (n--) | ||
88 | f_vide(); | ||
89 | rdtscl(d2); | ||
90 | d = d2-d; | ||
91 | |||
92 | /* Knock these two lines out if it debugs out ok */ | ||
93 | printk(KERN_INFO "AMD K6 stepping B detected - "); | ||
94 | /* -- cut here -- */ | ||
95 | if (d > 20*K6_BUG_LOOP) | ||
96 | printk("system stability may be impaired when more than 32 MB are used.\n"); | ||
97 | else | ||
98 | printk("probably OK (after B9730xxxx).\n"); | ||
99 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); | ||
100 | } | ||
101 | |||
102 | /* K6 with old style WHCR */ | ||
103 | if (c->x86_model < 8 || | ||
104 | (c->x86_model== 8 && c->x86_mask < 8)) { | ||
105 | /* We can only write allocate on the low 508Mb */ | ||
106 | if(mbytes>508) | ||
107 | mbytes=508; | ||
108 | |||
109 | rdmsr(MSR_K6_WHCR, l, h); | ||
110 | if ((l&0x0000FFFF)==0) { | ||
111 | unsigned long flags; | ||
112 | l=(1<<0)|((mbytes/4)<<1); | ||
113 | local_irq_save(flags); | ||
114 | wbinvd(); | ||
115 | wrmsr(MSR_K6_WHCR, l, h); | ||
116 | local_irq_restore(flags); | ||
117 | printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", | ||
118 | mbytes); | ||
119 | } | ||
120 | break; | ||
121 | } | ||
122 | |||
123 | if ((c->x86_model == 8 && c->x86_mask >7) || | ||
124 | c->x86_model == 9 || c->x86_model == 13) { | ||
125 | /* The more serious chips .. */ | ||
126 | |||
127 | if(mbytes>4092) | ||
128 | mbytes=4092; | ||
129 | |||
130 | rdmsr(MSR_K6_WHCR, l, h); | ||
131 | if ((l&0xFFFF0000)==0) { | ||
132 | unsigned long flags; | ||
133 | l=((mbytes>>2)<<22)|(1<<16); | ||
134 | local_irq_save(flags); | ||
135 | wbinvd(); | ||
136 | wrmsr(MSR_K6_WHCR, l, h); | ||
137 | local_irq_restore(flags); | ||
138 | printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", | ||
139 | mbytes); | ||
140 | } | ||
141 | |||
142 | /* Set MTRR capability flag if appropriate */ | ||
143 | if (c->x86_model == 13 || c->x86_model == 9 || | ||
144 | (c->x86_model == 8 && c->x86_mask >= 8)) | ||
145 | set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); | ||
146 | break; | ||
147 | } | ||
148 | break; | ||
149 | |||
150 | case 6: /* An Athlon/Duron */ | ||
151 | |||
152 | /* Bit 15 of Athlon specific MSR 15, needs to be 0 | ||
153 | * to enable SSE on Palomino/Morgan/Barton CPU's. | ||
154 | * If the BIOS didn't enable it already, enable it here. | ||
155 | */ | ||
156 | if (c->x86_model >= 6 && c->x86_model <= 10) { | ||
157 | if (!cpu_has(c, X86_FEATURE_XMM)) { | ||
158 | printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); | ||
159 | rdmsr(MSR_K7_HWCR, l, h); | ||
160 | l &= ~0x00008000; | ||
161 | wrmsr(MSR_K7_HWCR, l, h); | ||
162 | set_bit(X86_FEATURE_XMM, c->x86_capability); | ||
163 | } | ||
164 | } | ||
165 | |||
166 | /* It's been determined by AMD that Athlons since model 8 stepping 1 | ||
167 | * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx | ||
168 | * As per AMD technical note 27212 0.2 | ||
169 | */ | ||
170 | if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) { | ||
171 | rdmsr(MSR_K7_CLK_CTL, l, h); | ||
172 | if ((l & 0xfff00000) != 0x20000000) { | ||
173 | printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, | ||
174 | ((l & 0x000fffff)|0x20000000)); | ||
175 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); | ||
176 | } | ||
177 | } | ||
178 | break; | ||
179 | } | ||
180 | |||
181 | switch (c->x86) { | ||
182 | case 15: | ||
183 | set_bit(X86_FEATURE_K8, c->x86_capability); | ||
184 | break; | ||
185 | case 6: | ||
186 | set_bit(X86_FEATURE_K7, c->x86_capability); | ||
187 | break; | ||
188 | } | ||
189 | |||
190 | display_cacheinfo(c); | ||
191 | detect_ht(c); | ||
192 | |||
193 | #ifdef CONFIG_X86_HT | ||
194 | /* AMD dual core looks like HT but isn't really. Hide it from the | ||
195 | scheduler. This works around problems with the domain scheduler. | ||
196 | Also probably gives slightly better scheduling and disables | ||
197 | SMT nice which is harmful on dual core. | ||
198 | TBD tune the domain scheduler for dual core. */ | ||
199 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | ||
200 | smp_num_siblings = 1; | ||
201 | #endif | ||
202 | |||
203 | if (cpuid_eax(0x80000000) >= 0x80000008) { | ||
204 | c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; | ||
205 | if (c->x86_num_cores & (c->x86_num_cores - 1)) | ||
206 | c->x86_num_cores = 1; | ||
207 | } | ||
208 | } | ||
209 | |||
210 | static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) | ||
211 | { | ||
212 | /* AMD errata T13 (order #21922) */ | ||
213 | if ((c->x86 == 6)) { | ||
214 | if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ | ||
215 | size = 64; | ||
216 | if (c->x86_model == 4 && | ||
217 | (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ | ||
218 | size = 256; | ||
219 | } | ||
220 | return size; | ||
221 | } | ||
222 | |||
223 | static struct cpu_dev amd_cpu_dev __initdata = { | ||
224 | .c_vendor = "AMD", | ||
225 | .c_ident = { "AuthenticAMD" }, | ||
226 | .c_models = { | ||
227 | { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = | ||
228 | { | ||
229 | [3] = "486 DX/2", | ||
230 | [7] = "486 DX/2-WB", | ||
231 | [8] = "486 DX/4", | ||
232 | [9] = "486 DX/4-WB", | ||
233 | [14] = "Am5x86-WT", | ||
234 | [15] = "Am5x86-WB" | ||
235 | } | ||
236 | }, | ||
237 | }, | ||
238 | .c_init = init_amd, | ||
239 | .c_identify = generic_identify, | ||
240 | .c_size_cache = amd_size_cache, | ||
241 | }; | ||
242 | |||
243 | int __init amd_init_cpu(void) | ||
244 | { | ||
245 | cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; | ||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | //early_arch_initcall(amd_init_cpu); | ||
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c new file mode 100644 index 000000000000..394814e57672 --- /dev/null +++ b/arch/i386/kernel/cpu/centaur.c | |||
@@ -0,0 +1,476 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/bitops.h> | ||
4 | #include <asm/processor.h> | ||
5 | #include <asm/msr.h> | ||
6 | #include <asm/e820.h> | ||
7 | #include "cpu.h" | ||
8 | |||
9 | #ifdef CONFIG_X86_OOSTORE | ||
10 | |||
11 | static u32 __init power2(u32 x) | ||
12 | { | ||
13 | u32 s=1; | ||
14 | while(s<=x) | ||
15 | s<<=1; | ||
16 | return s>>=1; | ||
17 | } | ||
18 | |||
19 | |||
20 | /* | ||
21 | * Set up an actual MCR | ||
22 | */ | ||
23 | |||
24 | static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key) | ||
25 | { | ||
26 | u32 lo, hi; | ||
27 | |||
28 | hi = base & ~0xFFF; | ||
29 | lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ | ||
30 | lo &= ~0xFFF; /* Remove the ctrl value bits */ | ||
31 | lo |= key; /* Attribute we wish to set */ | ||
32 | wrmsr(reg+MSR_IDT_MCR0, lo, hi); | ||
33 | mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ | ||
34 | } | ||
35 | |||
36 | /* | ||
37 | * Figure what we can cover with MCR's | ||
38 | * | ||
39 | * Shortcut: We know you can't put 4Gig of RAM on a winchip | ||
40 | */ | ||
41 | |||
42 | static u32 __init ramtop(void) /* 16388 */ | ||
43 | { | ||
44 | int i; | ||
45 | u32 top = 0; | ||
46 | u32 clip = 0xFFFFFFFFUL; | ||
47 | |||
48 | for (i = 0; i < e820.nr_map; i++) { | ||
49 | unsigned long start, end; | ||
50 | |||
51 | if (e820.map[i].addr > 0xFFFFFFFFUL) | ||
52 | continue; | ||
53 | /* | ||
54 | * Don't MCR over reserved space. Ignore the ISA hole | ||
55 | * we frob around that catastrophy already | ||
56 | */ | ||
57 | |||
58 | if (e820.map[i].type == E820_RESERVED) | ||
59 | { | ||
60 | if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) | ||
61 | clip = e820.map[i].addr; | ||
62 | continue; | ||
63 | } | ||
64 | start = e820.map[i].addr; | ||
65 | end = e820.map[i].addr + e820.map[i].size; | ||
66 | if (start >= end) | ||
67 | continue; | ||
68 | if (end > top) | ||
69 | top = end; | ||
70 | } | ||
71 | /* Everything below 'top' should be RAM except for the ISA hole. | ||
72 | Because of the limited MCR's we want to map NV/ACPI into our | ||
73 | MCR range for gunk in RAM | ||
74 | |||
75 | Clip might cause us to MCR insufficient RAM but that is an | ||
76 | acceptable failure mode and should only bite obscure boxes with | ||
77 | a VESA hole at 15Mb | ||
78 | |||
79 | The second case Clip sometimes kicks in is when the EBDA is marked | ||
80 | as reserved. Again we fail safe with reasonable results | ||
81 | */ | ||
82 | |||
83 | if(top>clip) | ||
84 | top=clip; | ||
85 | |||
86 | return top; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * Compute a set of MCR's to give maximum coverage | ||
91 | */ | ||
92 | |||
93 | static int __init centaur_mcr_compute(int nr, int key) | ||
94 | { | ||
95 | u32 mem = ramtop(); | ||
96 | u32 root = power2(mem); | ||
97 | u32 base = root; | ||
98 | u32 top = root; | ||
99 | u32 floor = 0; | ||
100 | int ct = 0; | ||
101 | |||
102 | while(ct<nr) | ||
103 | { | ||
104 | u32 fspace = 0; | ||
105 | |||
106 | /* | ||
107 | * Find the largest block we will fill going upwards | ||
108 | */ | ||
109 | |||
110 | u32 high = power2(mem-top); | ||
111 | |||
112 | /* | ||
113 | * Find the largest block we will fill going downwards | ||
114 | */ | ||
115 | |||
116 | u32 low = base/2; | ||
117 | |||
118 | /* | ||
119 | * Don't fill below 1Mb going downwards as there | ||
120 | * is an ISA hole in the way. | ||
121 | */ | ||
122 | |||
123 | if(base <= 1024*1024) | ||
124 | low = 0; | ||
125 | |||
126 | /* | ||
127 | * See how much space we could cover by filling below | ||
128 | * the ISA hole | ||
129 | */ | ||
130 | |||
131 | if(floor == 0) | ||
132 | fspace = 512*1024; | ||
133 | else if(floor ==512*1024) | ||
134 | fspace = 128*1024; | ||
135 | |||
136 | /* And forget ROM space */ | ||
137 | |||
138 | /* | ||
139 | * Now install the largest coverage we get | ||
140 | */ | ||
141 | |||
142 | if(fspace > high && fspace > low) | ||
143 | { | ||
144 | centaur_mcr_insert(ct, floor, fspace, key); | ||
145 | floor += fspace; | ||
146 | } | ||
147 | else if(high > low) | ||
148 | { | ||
149 | centaur_mcr_insert(ct, top, high, key); | ||
150 | top += high; | ||
151 | } | ||
152 | else if(low > 0) | ||
153 | { | ||
154 | base -= low; | ||
155 | centaur_mcr_insert(ct, base, low, key); | ||
156 | } | ||
157 | else break; | ||
158 | ct++; | ||
159 | } | ||
160 | /* | ||
161 | * We loaded ct values. We now need to set the mask. The caller | ||
162 | * must do this bit. | ||
163 | */ | ||
164 | |||
165 | return ct; | ||
166 | } | ||
167 | |||
168 | static void __init centaur_create_optimal_mcr(void) | ||
169 | { | ||
170 | int i; | ||
171 | /* | ||
172 | * Allocate up to 6 mcrs to mark as much of ram as possible | ||
173 | * as write combining and weak write ordered. | ||
174 | * | ||
175 | * To experiment with: Linux never uses stack operations for | ||
176 | * mmio spaces so we could globally enable stack operation wc | ||
177 | * | ||
178 | * Load the registers with type 31 - full write combining, all | ||
179 | * writes weakly ordered. | ||
180 | */ | ||
181 | int used = centaur_mcr_compute(6, 31); | ||
182 | |||
183 | /* | ||
184 | * Wipe unused MCRs | ||
185 | */ | ||
186 | |||
187 | for(i=used;i<8;i++) | ||
188 | wrmsr(MSR_IDT_MCR0+i, 0, 0); | ||
189 | } | ||
190 | |||
191 | static void __init winchip2_create_optimal_mcr(void) | ||
192 | { | ||
193 | u32 lo, hi; | ||
194 | int i; | ||
195 | |||
196 | /* | ||
197 | * Allocate up to 6 mcrs to mark as much of ram as possible | ||
198 | * as write combining, weak store ordered. | ||
199 | * | ||
200 | * Load the registers with type 25 | ||
201 | * 8 - weak write ordering | ||
202 | * 16 - weak read ordering | ||
203 | * 1 - write combining | ||
204 | */ | ||
205 | |||
206 | int used = centaur_mcr_compute(6, 25); | ||
207 | |||
208 | /* | ||
209 | * Mark the registers we are using. | ||
210 | */ | ||
211 | |||
212 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
213 | for(i=0;i<used;i++) | ||
214 | lo|=1<<(9+i); | ||
215 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
216 | |||
217 | /* | ||
218 | * Wipe unused MCRs | ||
219 | */ | ||
220 | |||
221 | for(i=used;i<8;i++) | ||
222 | wrmsr(MSR_IDT_MCR0+i, 0, 0); | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * Handle the MCR key on the Winchip 2. | ||
227 | */ | ||
228 | |||
229 | static void __init winchip2_unprotect_mcr(void) | ||
230 | { | ||
231 | u32 lo, hi; | ||
232 | u32 key; | ||
233 | |||
234 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
235 | lo&=~0x1C0; /* blank bits 8-6 */ | ||
236 | key = (lo>>17) & 7; | ||
237 | lo |= key<<6; /* replace with unlock key */ | ||
238 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
239 | } | ||
240 | |||
241 | static void __init winchip2_protect_mcr(void) | ||
242 | { | ||
243 | u32 lo, hi; | ||
244 | |||
245 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
246 | lo&=~0x1C0; /* blank bits 8-6 */ | ||
247 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
248 | } | ||
249 | #endif /* CONFIG_X86_OOSTORE */ | ||
250 | |||
251 | #define ACE_PRESENT (1 << 6) | ||
252 | #define ACE_ENABLED (1 << 7) | ||
253 | #define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ | ||
254 | |||
255 | #define RNG_PRESENT (1 << 2) | ||
256 | #define RNG_ENABLED (1 << 3) | ||
257 | #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ | ||
258 | |||
259 | static void __init init_c3(struct cpuinfo_x86 *c) | ||
260 | { | ||
261 | u32 lo, hi; | ||
262 | |||
263 | /* Test for Centaur Extended Feature Flags presence */ | ||
264 | if (cpuid_eax(0xC0000000) >= 0xC0000001) { | ||
265 | u32 tmp = cpuid_edx(0xC0000001); | ||
266 | |||
267 | /* enable ACE unit, if present and disabled */ | ||
268 | if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { | ||
269 | rdmsr (MSR_VIA_FCR, lo, hi); | ||
270 | lo |= ACE_FCR; /* enable ACE unit */ | ||
271 | wrmsr (MSR_VIA_FCR, lo, hi); | ||
272 | printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); | ||
273 | } | ||
274 | |||
275 | /* enable RNG unit, if present and disabled */ | ||
276 | if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { | ||
277 | rdmsr (MSR_VIA_RNG, lo, hi); | ||
278 | lo |= RNG_ENABLE; /* enable RNG unit */ | ||
279 | wrmsr (MSR_VIA_RNG, lo, hi); | ||
280 | printk(KERN_INFO "CPU: Enabled h/w RNG\n"); | ||
281 | } | ||
282 | |||
283 | /* store Centaur Extended Feature Flags as | ||
284 | * word 5 of the CPU capability bit array | ||
285 | */ | ||
286 | c->x86_capability[5] = cpuid_edx(0xC0000001); | ||
287 | } | ||
288 | |||
289 | /* Cyrix III family needs CX8 & PGE explicity enabled. */ | ||
290 | if (c->x86_model >=6 && c->x86_model <= 9) { | ||
291 | rdmsr (MSR_VIA_FCR, lo, hi); | ||
292 | lo |= (1<<1 | 1<<7); | ||
293 | wrmsr (MSR_VIA_FCR, lo, hi); | ||
294 | set_bit(X86_FEATURE_CX8, c->x86_capability); | ||
295 | } | ||
296 | |||
297 | /* Before Nehemiah, the C3's had 3dNOW! */ | ||
298 | if (c->x86_model >=6 && c->x86_model <9) | ||
299 | set_bit(X86_FEATURE_3DNOW, c->x86_capability); | ||
300 | |||
301 | get_model_name(c); | ||
302 | display_cacheinfo(c); | ||
303 | } | ||
304 | |||
305 | static void __init init_centaur(struct cpuinfo_x86 *c) | ||
306 | { | ||
307 | enum { | ||
308 | ECX8=1<<1, | ||
309 | EIERRINT=1<<2, | ||
310 | DPM=1<<3, | ||
311 | DMCE=1<<4, | ||
312 | DSTPCLK=1<<5, | ||
313 | ELINEAR=1<<6, | ||
314 | DSMC=1<<7, | ||
315 | DTLOCK=1<<8, | ||
316 | EDCTLB=1<<8, | ||
317 | EMMX=1<<9, | ||
318 | DPDC=1<<11, | ||
319 | EBRPRED=1<<12, | ||
320 | DIC=1<<13, | ||
321 | DDC=1<<14, | ||
322 | DNA=1<<15, | ||
323 | ERETSTK=1<<16, | ||
324 | E2MMX=1<<19, | ||
325 | EAMD3D=1<<20, | ||
326 | }; | ||
327 | |||
328 | char *name; | ||
329 | u32 fcr_set=0; | ||
330 | u32 fcr_clr=0; | ||
331 | u32 lo,hi,newlo; | ||
332 | u32 aa,bb,cc,dd; | ||
333 | |||
334 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | ||
335 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | ||
336 | clear_bit(0*32+31, c->x86_capability); | ||
337 | |||
338 | switch (c->x86) { | ||
339 | |||
340 | case 5: | ||
341 | switch(c->x86_model) { | ||
342 | case 4: | ||
343 | name="C6"; | ||
344 | fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK; | ||
345 | fcr_clr=DPDC; | ||
346 | printk(KERN_NOTICE "Disabling bugged TSC.\n"); | ||
347 | clear_bit(X86_FEATURE_TSC, c->x86_capability); | ||
348 | #ifdef CONFIG_X86_OOSTORE | ||
349 | centaur_create_optimal_mcr(); | ||
350 | /* Enable | ||
351 | write combining on non-stack, non-string | ||
352 | write combining on string, all types | ||
353 | weak write ordering | ||
354 | |||
355 | The C6 original lacks weak read order | ||
356 | |||
357 | Note 0x120 is write only on Winchip 1 */ | ||
358 | |||
359 | wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); | ||
360 | #endif | ||
361 | break; | ||
362 | case 8: | ||
363 | switch(c->x86_mask) { | ||
364 | default: | ||
365 | name="2"; | ||
366 | break; | ||
367 | case 7 ... 9: | ||
368 | name="2A"; | ||
369 | break; | ||
370 | case 10 ... 15: | ||
371 | name="2B"; | ||
372 | break; | ||
373 | } | ||
374 | fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; | ||
375 | fcr_clr=DPDC; | ||
376 | #ifdef CONFIG_X86_OOSTORE | ||
377 | winchip2_unprotect_mcr(); | ||
378 | winchip2_create_optimal_mcr(); | ||
379 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
380 | /* Enable | ||
381 | write combining on non-stack, non-string | ||
382 | write combining on string, all types | ||
383 | weak write ordering | ||
384 | */ | ||
385 | lo|=31; | ||
386 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
387 | winchip2_protect_mcr(); | ||
388 | #endif | ||
389 | break; | ||
390 | case 9: | ||
391 | name="3"; | ||
392 | fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; | ||
393 | fcr_clr=DPDC; | ||
394 | #ifdef CONFIG_X86_OOSTORE | ||
395 | winchip2_unprotect_mcr(); | ||
396 | winchip2_create_optimal_mcr(); | ||
397 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
398 | /* Enable | ||
399 | write combining on non-stack, non-string | ||
400 | write combining on string, all types | ||
401 | weak write ordering | ||
402 | */ | ||
403 | lo|=31; | ||
404 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
405 | winchip2_protect_mcr(); | ||
406 | #endif | ||
407 | break; | ||
408 | case 10: | ||
409 | name="4"; | ||
410 | /* no info on the WC4 yet */ | ||
411 | break; | ||
412 | default: | ||
413 | name="??"; | ||
414 | } | ||
415 | |||
416 | rdmsr(MSR_IDT_FCR1, lo, hi); | ||
417 | newlo=(lo|fcr_set) & (~fcr_clr); | ||
418 | |||
419 | if (newlo!=lo) { | ||
420 | printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo ); | ||
421 | wrmsr(MSR_IDT_FCR1, newlo, hi ); | ||
422 | } else { | ||
423 | printk(KERN_INFO "Centaur FCR is 0x%X\n",lo); | ||
424 | } | ||
425 | /* Emulate MTRRs using Centaur's MCR. */ | ||
426 | set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); | ||
427 | /* Report CX8 */ | ||
428 | set_bit(X86_FEATURE_CX8, c->x86_capability); | ||
429 | /* Set 3DNow! on Winchip 2 and above. */ | ||
430 | if (c->x86_model >=8) | ||
431 | set_bit(X86_FEATURE_3DNOW, c->x86_capability); | ||
432 | /* See if we can find out some more. */ | ||
433 | if ( cpuid_eax(0x80000000) >= 0x80000005 ) { | ||
434 | /* Yes, we can. */ | ||
435 | cpuid(0x80000005,&aa,&bb,&cc,&dd); | ||
436 | /* Add L1 data and code cache sizes. */ | ||
437 | c->x86_cache_size = (cc>>24)+(dd>>24); | ||
438 | } | ||
439 | sprintf( c->x86_model_id, "WinChip %s", name ); | ||
440 | break; | ||
441 | |||
442 | case 6: | ||
443 | init_c3(c); | ||
444 | break; | ||
445 | } | ||
446 | } | ||
447 | |||
448 | static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) | ||
449 | { | ||
450 | /* VIA C3 CPUs (670-68F) need further shifting. */ | ||
451 | if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) | ||
452 | size >>= 8; | ||
453 | |||
454 | /* VIA also screwed up Nehemiah stepping 1, and made | ||
455 | it return '65KB' instead of '64KB' | ||
456 | - Note, it seems this may only be in engineering samples. */ | ||
457 | if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65)) | ||
458 | size -=1; | ||
459 | |||
460 | return size; | ||
461 | } | ||
462 | |||
463 | static struct cpu_dev centaur_cpu_dev __initdata = { | ||
464 | .c_vendor = "Centaur", | ||
465 | .c_ident = { "CentaurHauls" }, | ||
466 | .c_init = init_centaur, | ||
467 | .c_size_cache = centaur_size_cache, | ||
468 | }; | ||
469 | |||
470 | int __init centaur_init_cpu(void) | ||
471 | { | ||
472 | cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; | ||
473 | return 0; | ||
474 | } | ||
475 | |||
476 | //early_arch_initcall(centaur_init_cpu); | ||
diff --git a/arch/i386/kernel/cpu/changelog b/arch/i386/kernel/cpu/changelog new file mode 100644 index 000000000000..cef76b80a710 --- /dev/null +++ b/arch/i386/kernel/cpu/changelog | |||
@@ -0,0 +1,63 @@ | |||
1 | /* | ||
2 | * Enhanced CPU type detection by Mike Jagdis, Patrick St. Jean | ||
3 | * and Martin Mares, November 1997. | ||
4 | * | ||
5 | * Force Cyrix 6x86(MX) and M II processors to report MTRR capability | ||
6 | * and Cyrix "coma bug" recognition by | ||
7 | * Zoltán Böszörményi <zboszor@mail.externet.hu> February 1999. | ||
8 | * | ||
9 | * Force Centaur C6 processors to report MTRR capability. | ||
10 | * Bart Hartgers <bart@etpmod.phys.tue.nl>, May 1999. | ||
11 | * | ||
12 | * Intel Mobile Pentium II detection fix. Sean Gilley, June 1999. | ||
13 | * | ||
14 | * IDT Winchip tweaks, misc clean ups. | ||
15 | * Dave Jones <davej@suse.de>, August 1999 | ||
16 | * | ||
17 | * Better detection of Centaur/IDT WinChip models. | ||
18 | * Bart Hartgers <bart@etpmod.phys.tue.nl>, August 1999. | ||
19 | * | ||
20 | * Cleaned up cache-detection code | ||
21 | * Dave Jones <davej@suse.de>, October 1999 | ||
22 | * | ||
23 | * Added proper L2 cache detection for Coppermine | ||
24 | * Dragan Stancevic <visitor@valinux.com>, October 1999 | ||
25 | * | ||
26 | * Added the original array for capability flags but forgot to credit | ||
27 | * myself :) (~1998) Fixed/cleaned up some cpu_model_info and other stuff | ||
28 | * Jauder Ho <jauderho@carumba.com>, January 2000 | ||
29 | * | ||
30 | * Detection for Celeron coppermine, identify_cpu() overhauled, | ||
31 | * and a few other clean ups. | ||
32 | * Dave Jones <davej@suse.de>, April 2000 | ||
33 | * | ||
34 | * Pentium III FXSR, SSE support | ||
35 | * General FPU state handling cleanups | ||
36 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
37 | * | ||
38 | * Added proper Cascades CPU and L2 cache detection for Cascades | ||
39 | * and 8-way type cache happy bunch from Intel:^) | ||
40 | * Dragan Stancevic <visitor@valinux.com>, May 2000 | ||
41 | * | ||
42 | * Forward port AMD Duron errata T13 from 2.2.17pre | ||
43 | * Dave Jones <davej@suse.de>, August 2000 | ||
44 | * | ||
45 | * Forward port lots of fixes/improvements from 2.2.18pre | ||
46 | * Cyrix III, Pentium IV support. | ||
47 | * Dave Jones <davej@suse.de>, October 2000 | ||
48 | * | ||
49 | * Massive cleanup of CPU detection and bug handling; | ||
50 | * Transmeta CPU detection, | ||
51 | * H. Peter Anvin <hpa@zytor.com>, November 2000 | ||
52 | * | ||
53 | * VIA C3 Support. | ||
54 | * Dave Jones <davej@suse.de>, March 2001 | ||
55 | * | ||
56 | * AMD Athlon/Duron/Thunderbird bluesmoke support. | ||
57 | * Dave Jones <davej@suse.de>, April 2001. | ||
58 | * | ||
59 | * CacheSize bug workaround updates for AMD, Intel & VIA Cyrix. | ||
60 | * Dave Jones <davej@suse.de>, September, October 2001. | ||
61 | * | ||
62 | */ | ||
63 | |||
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c new file mode 100644 index 000000000000..ebd5d8247faa --- /dev/null +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -0,0 +1,634 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/string.h> | ||
3 | #include <linux/delay.h> | ||
4 | #include <linux/smp.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/percpu.h> | ||
7 | #include <asm/semaphore.h> | ||
8 | #include <asm/processor.h> | ||
9 | #include <asm/i387.h> | ||
10 | #include <asm/msr.h> | ||
11 | #include <asm/io.h> | ||
12 | #include <asm/mmu_context.h> | ||
13 | #ifdef CONFIG_X86_LOCAL_APIC | ||
14 | #include <asm/mpspec.h> | ||
15 | #include <asm/apic.h> | ||
16 | #include <mach_apic.h> | ||
17 | #endif | ||
18 | |||
19 | #include "cpu.h" | ||
20 | |||
21 | DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]); | ||
22 | EXPORT_PER_CPU_SYMBOL(cpu_gdt_table); | ||
23 | |||
24 | DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); | ||
25 | EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); | ||
26 | |||
27 | static int cachesize_override __initdata = -1; | ||
28 | static int disable_x86_fxsr __initdata = 0; | ||
29 | static int disable_x86_serial_nr __initdata = 1; | ||
30 | |||
31 | struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; | ||
32 | |||
33 | extern void mcheck_init(struct cpuinfo_x86 *c); | ||
34 | |||
35 | extern int disable_pse; | ||
36 | |||
37 | static void default_init(struct cpuinfo_x86 * c) | ||
38 | { | ||
39 | /* Not much we can do here... */ | ||
40 | /* Check if at least it has cpuid */ | ||
41 | if (c->cpuid_level == -1) { | ||
42 | /* No cpuid. It must be an ancient CPU */ | ||
43 | if (c->x86 == 4) | ||
44 | strcpy(c->x86_model_id, "486"); | ||
45 | else if (c->x86 == 3) | ||
46 | strcpy(c->x86_model_id, "386"); | ||
47 | } | ||
48 | } | ||
49 | |||
50 | static struct cpu_dev default_cpu = { | ||
51 | .c_init = default_init, | ||
52 | }; | ||
53 | static struct cpu_dev * this_cpu = &default_cpu; | ||
54 | |||
55 | static int __init cachesize_setup(char *str) | ||
56 | { | ||
57 | get_option (&str, &cachesize_override); | ||
58 | return 1; | ||
59 | } | ||
60 | __setup("cachesize=", cachesize_setup); | ||
61 | |||
62 | int __init get_model_name(struct cpuinfo_x86 *c) | ||
63 | { | ||
64 | unsigned int *v; | ||
65 | char *p, *q; | ||
66 | |||
67 | if (cpuid_eax(0x80000000) < 0x80000004) | ||
68 | return 0; | ||
69 | |||
70 | v = (unsigned int *) c->x86_model_id; | ||
71 | cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); | ||
72 | cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); | ||
73 | cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); | ||
74 | c->x86_model_id[48] = 0; | ||
75 | |||
76 | /* Intel chips right-justify this string for some dumb reason; | ||
77 | undo that brain damage */ | ||
78 | p = q = &c->x86_model_id[0]; | ||
79 | while ( *p == ' ' ) | ||
80 | p++; | ||
81 | if ( p != q ) { | ||
82 | while ( *p ) | ||
83 | *q++ = *p++; | ||
84 | while ( q <= &c->x86_model_id[48] ) | ||
85 | *q++ = '\0'; /* Zero-pad the rest */ | ||
86 | } | ||
87 | |||
88 | return 1; | ||
89 | } | ||
90 | |||
91 | |||
92 | void __init display_cacheinfo(struct cpuinfo_x86 *c) | ||
93 | { | ||
94 | unsigned int n, dummy, ecx, edx, l2size; | ||
95 | |||
96 | n = cpuid_eax(0x80000000); | ||
97 | |||
98 | if (n >= 0x80000005) { | ||
99 | cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); | ||
100 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", | ||
101 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | ||
102 | c->x86_cache_size=(ecx>>24)+(edx>>24); | ||
103 | } | ||
104 | |||
105 | if (n < 0x80000006) /* Some chips just has a large L1. */ | ||
106 | return; | ||
107 | |||
108 | ecx = cpuid_ecx(0x80000006); | ||
109 | l2size = ecx >> 16; | ||
110 | |||
111 | /* do processor-specific cache resizing */ | ||
112 | if (this_cpu->c_size_cache) | ||
113 | l2size = this_cpu->c_size_cache(c,l2size); | ||
114 | |||
115 | /* Allow user to override all this if necessary. */ | ||
116 | if (cachesize_override != -1) | ||
117 | l2size = cachesize_override; | ||
118 | |||
119 | if ( l2size == 0 ) | ||
120 | return; /* Again, no L2 cache is possible */ | ||
121 | |||
122 | c->x86_cache_size = l2size; | ||
123 | |||
124 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | ||
125 | l2size, ecx & 0xFF); | ||
126 | } | ||
127 | |||
128 | /* Naming convention should be: <Name> [(<Codename>)] */ | ||
129 | /* This table only is used unless init_<vendor>() below doesn't set it; */ | ||
130 | /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ | ||
131 | |||
132 | /* Look up CPU names by table lookup. */ | ||
133 | static char __init *table_lookup_model(struct cpuinfo_x86 *c) | ||
134 | { | ||
135 | struct cpu_model_info *info; | ||
136 | |||
137 | if ( c->x86_model >= 16 ) | ||
138 | return NULL; /* Range check */ | ||
139 | |||
140 | if (!this_cpu) | ||
141 | return NULL; | ||
142 | |||
143 | info = this_cpu->c_models; | ||
144 | |||
145 | while (info && info->family) { | ||
146 | if (info->family == c->x86) | ||
147 | return info->model_names[c->x86_model]; | ||
148 | info++; | ||
149 | } | ||
150 | return NULL; /* Not found */ | ||
151 | } | ||
152 | |||
153 | |||
154 | void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early) | ||
155 | { | ||
156 | char *v = c->x86_vendor_id; | ||
157 | int i; | ||
158 | |||
159 | for (i = 0; i < X86_VENDOR_NUM; i++) { | ||
160 | if (cpu_devs[i]) { | ||
161 | if (!strcmp(v,cpu_devs[i]->c_ident[0]) || | ||
162 | (cpu_devs[i]->c_ident[1] && | ||
163 | !strcmp(v,cpu_devs[i]->c_ident[1]))) { | ||
164 | c->x86_vendor = i; | ||
165 | if (!early) | ||
166 | this_cpu = cpu_devs[i]; | ||
167 | break; | ||
168 | } | ||
169 | } | ||
170 | } | ||
171 | } | ||
172 | |||
173 | |||
174 | static int __init x86_fxsr_setup(char * s) | ||
175 | { | ||
176 | disable_x86_fxsr = 1; | ||
177 | return 1; | ||
178 | } | ||
179 | __setup("nofxsr", x86_fxsr_setup); | ||
180 | |||
181 | |||
182 | /* Standard macro to see if a specific flag is changeable */ | ||
183 | static inline int flag_is_changeable_p(u32 flag) | ||
184 | { | ||
185 | u32 f1, f2; | ||
186 | |||
187 | asm("pushfl\n\t" | ||
188 | "pushfl\n\t" | ||
189 | "popl %0\n\t" | ||
190 | "movl %0,%1\n\t" | ||
191 | "xorl %2,%0\n\t" | ||
192 | "pushl %0\n\t" | ||
193 | "popfl\n\t" | ||
194 | "pushfl\n\t" | ||
195 | "popl %0\n\t" | ||
196 | "popfl\n\t" | ||
197 | : "=&r" (f1), "=&r" (f2) | ||
198 | : "ir" (flag)); | ||
199 | |||
200 | return ((f1^f2) & flag) != 0; | ||
201 | } | ||
202 | |||
203 | |||
204 | /* Probe for the CPUID instruction */ | ||
205 | static int __init have_cpuid_p(void) | ||
206 | { | ||
207 | return flag_is_changeable_p(X86_EFLAGS_ID); | ||
208 | } | ||
209 | |||
210 | /* Do minimum CPU detection early. | ||
211 | Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. | ||
212 | The others are not touched to avoid unwanted side effects. */ | ||
213 | static void __init early_cpu_detect(void) | ||
214 | { | ||
215 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
216 | |||
217 | c->x86_cache_alignment = 32; | ||
218 | |||
219 | if (!have_cpuid_p()) | ||
220 | return; | ||
221 | |||
222 | /* Get vendor name */ | ||
223 | cpuid(0x00000000, &c->cpuid_level, | ||
224 | (int *)&c->x86_vendor_id[0], | ||
225 | (int *)&c->x86_vendor_id[8], | ||
226 | (int *)&c->x86_vendor_id[4]); | ||
227 | |||
228 | get_cpu_vendor(c, 1); | ||
229 | |||
230 | c->x86 = 4; | ||
231 | if (c->cpuid_level >= 0x00000001) { | ||
232 | u32 junk, tfms, cap0, misc; | ||
233 | cpuid(0x00000001, &tfms, &misc, &junk, &cap0); | ||
234 | c->x86 = (tfms >> 8) & 15; | ||
235 | c->x86_model = (tfms >> 4) & 15; | ||
236 | if (c->x86 == 0xf) { | ||
237 | c->x86 += (tfms >> 20) & 0xff; | ||
238 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | ||
239 | } | ||
240 | c->x86_mask = tfms & 15; | ||
241 | if (cap0 & (1<<19)) | ||
242 | c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; | ||
243 | } | ||
244 | |||
245 | early_intel_workaround(c); | ||
246 | } | ||
247 | |||
248 | void __init generic_identify(struct cpuinfo_x86 * c) | ||
249 | { | ||
250 | u32 tfms, xlvl; | ||
251 | int junk; | ||
252 | |||
253 | if (have_cpuid_p()) { | ||
254 | /* Get vendor name */ | ||
255 | cpuid(0x00000000, &c->cpuid_level, | ||
256 | (int *)&c->x86_vendor_id[0], | ||
257 | (int *)&c->x86_vendor_id[8], | ||
258 | (int *)&c->x86_vendor_id[4]); | ||
259 | |||
260 | get_cpu_vendor(c, 0); | ||
261 | /* Initialize the standard set of capabilities */ | ||
262 | /* Note that the vendor-specific code below might override */ | ||
263 | |||
264 | /* Intel-defined flags: level 0x00000001 */ | ||
265 | if ( c->cpuid_level >= 0x00000001 ) { | ||
266 | u32 capability, excap; | ||
267 | cpuid(0x00000001, &tfms, &junk, &excap, &capability); | ||
268 | c->x86_capability[0] = capability; | ||
269 | c->x86_capability[4] = excap; | ||
270 | c->x86 = (tfms >> 8) & 15; | ||
271 | c->x86_model = (tfms >> 4) & 15; | ||
272 | if (c->x86 == 0xf) { | ||
273 | c->x86 += (tfms >> 20) & 0xff; | ||
274 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | ||
275 | } | ||
276 | c->x86_mask = tfms & 15; | ||
277 | } else { | ||
278 | /* Have CPUID level 0 only - unheard of */ | ||
279 | c->x86 = 4; | ||
280 | } | ||
281 | |||
282 | /* AMD-defined flags: level 0x80000001 */ | ||
283 | xlvl = cpuid_eax(0x80000000); | ||
284 | if ( (xlvl & 0xffff0000) == 0x80000000 ) { | ||
285 | if ( xlvl >= 0x80000001 ) { | ||
286 | c->x86_capability[1] = cpuid_edx(0x80000001); | ||
287 | c->x86_capability[6] = cpuid_ecx(0x80000001); | ||
288 | } | ||
289 | if ( xlvl >= 0x80000004 ) | ||
290 | get_model_name(c); /* Default name */ | ||
291 | } | ||
292 | } | ||
293 | } | ||
294 | |||
295 | static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | ||
296 | { | ||
297 | if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { | ||
298 | /* Disable processor serial number */ | ||
299 | unsigned long lo,hi; | ||
300 | rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); | ||
301 | lo |= 0x200000; | ||
302 | wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); | ||
303 | printk(KERN_NOTICE "CPU serial number disabled.\n"); | ||
304 | clear_bit(X86_FEATURE_PN, c->x86_capability); | ||
305 | |||
306 | /* Disabling the serial number may affect the cpuid level */ | ||
307 | c->cpuid_level = cpuid_eax(0); | ||
308 | } | ||
309 | } | ||
310 | |||
311 | static int __init x86_serial_nr_setup(char *s) | ||
312 | { | ||
313 | disable_x86_serial_nr = 0; | ||
314 | return 1; | ||
315 | } | ||
316 | __setup("serialnumber", x86_serial_nr_setup); | ||
317 | |||
318 | |||
319 | |||
320 | /* | ||
321 | * This does the hard work of actually picking apart the CPU stuff... | ||
322 | */ | ||
323 | void __init identify_cpu(struct cpuinfo_x86 *c) | ||
324 | { | ||
325 | int i; | ||
326 | |||
327 | c->loops_per_jiffy = loops_per_jiffy; | ||
328 | c->x86_cache_size = -1; | ||
329 | c->x86_vendor = X86_VENDOR_UNKNOWN; | ||
330 | c->cpuid_level = -1; /* CPUID not detected */ | ||
331 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ | ||
332 | c->x86_vendor_id[0] = '\0'; /* Unset */ | ||
333 | c->x86_model_id[0] = '\0'; /* Unset */ | ||
334 | c->x86_num_cores = 1; | ||
335 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | ||
336 | |||
337 | if (!have_cpuid_p()) { | ||
338 | /* First of all, decide if this is a 486 or higher */ | ||
339 | /* It's a 486 if we can modify the AC flag */ | ||
340 | if ( flag_is_changeable_p(X86_EFLAGS_AC) ) | ||
341 | c->x86 = 4; | ||
342 | else | ||
343 | c->x86 = 3; | ||
344 | } | ||
345 | |||
346 | generic_identify(c); | ||
347 | |||
348 | printk(KERN_DEBUG "CPU: After generic identify, caps:"); | ||
349 | for (i = 0; i < NCAPINTS; i++) | ||
350 | printk(" %08lx", c->x86_capability[i]); | ||
351 | printk("\n"); | ||
352 | |||
353 | if (this_cpu->c_identify) { | ||
354 | this_cpu->c_identify(c); | ||
355 | |||
356 | printk(KERN_DEBUG "CPU: After vendor identify, caps:"); | ||
357 | for (i = 0; i < NCAPINTS; i++) | ||
358 | printk(" %08lx", c->x86_capability[i]); | ||
359 | printk("\n"); | ||
360 | } | ||
361 | |||
362 | /* | ||
363 | * Vendor-specific initialization. In this section we | ||
364 | * canonicalize the feature flags, meaning if there are | ||
365 | * features a certain CPU supports which CPUID doesn't | ||
366 | * tell us, CPUID claiming incorrect flags, or other bugs, | ||
367 | * we handle them here. | ||
368 | * | ||
369 | * At the end of this section, c->x86_capability better | ||
370 | * indicate the features this CPU genuinely supports! | ||
371 | */ | ||
372 | if (this_cpu->c_init) | ||
373 | this_cpu->c_init(c); | ||
374 | |||
375 | /* Disable the PN if appropriate */ | ||
376 | squash_the_stupid_serial_number(c); | ||
377 | |||
378 | /* | ||
379 | * The vendor-specific functions might have changed features. Now | ||
380 | * we do "generic changes." | ||
381 | */ | ||
382 | |||
383 | /* TSC disabled? */ | ||
384 | if ( tsc_disable ) | ||
385 | clear_bit(X86_FEATURE_TSC, c->x86_capability); | ||
386 | |||
387 | /* FXSR disabled? */ | ||
388 | if (disable_x86_fxsr) { | ||
389 | clear_bit(X86_FEATURE_FXSR, c->x86_capability); | ||
390 | clear_bit(X86_FEATURE_XMM, c->x86_capability); | ||
391 | } | ||
392 | |||
393 | if (disable_pse) | ||
394 | clear_bit(X86_FEATURE_PSE, c->x86_capability); | ||
395 | |||
396 | /* If the model name is still unset, do table lookup. */ | ||
397 | if ( !c->x86_model_id[0] ) { | ||
398 | char *p; | ||
399 | p = table_lookup_model(c); | ||
400 | if ( p ) | ||
401 | strcpy(c->x86_model_id, p); | ||
402 | else | ||
403 | /* Last resort... */ | ||
404 | sprintf(c->x86_model_id, "%02x/%02x", | ||
405 | c->x86_vendor, c->x86_model); | ||
406 | } | ||
407 | |||
408 | /* Now the feature flags better reflect actual CPU features! */ | ||
409 | |||
410 | printk(KERN_DEBUG "CPU: After all inits, caps:"); | ||
411 | for (i = 0; i < NCAPINTS; i++) | ||
412 | printk(" %08lx", c->x86_capability[i]); | ||
413 | printk("\n"); | ||
414 | |||
415 | /* | ||
416 | * On SMP, boot_cpu_data holds the common feature set between | ||
417 | * all CPUs; so make sure that we indicate which features are | ||
418 | * common between the CPUs. The first time this routine gets | ||
419 | * executed, c == &boot_cpu_data. | ||
420 | */ | ||
421 | if ( c != &boot_cpu_data ) { | ||
422 | /* AND the already accumulated flags with these */ | ||
423 | for ( i = 0 ; i < NCAPINTS ; i++ ) | ||
424 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | ||
425 | } | ||
426 | |||
427 | /* Init Machine Check Exception if available. */ | ||
428 | #ifdef CONFIG_X86_MCE | ||
429 | mcheck_init(c); | ||
430 | #endif | ||
431 | } | ||
432 | |||
433 | #ifdef CONFIG_X86_HT | ||
434 | void __init detect_ht(struct cpuinfo_x86 *c) | ||
435 | { | ||
436 | u32 eax, ebx, ecx, edx; | ||
437 | int index_lsb, index_msb, tmp; | ||
438 | int cpu = smp_processor_id(); | ||
439 | |||
440 | if (!cpu_has(c, X86_FEATURE_HT)) | ||
441 | return; | ||
442 | |||
443 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
444 | smp_num_siblings = (ebx & 0xff0000) >> 16; | ||
445 | |||
446 | if (smp_num_siblings == 1) { | ||
447 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | ||
448 | } else if (smp_num_siblings > 1 ) { | ||
449 | index_lsb = 0; | ||
450 | index_msb = 31; | ||
451 | |||
452 | if (smp_num_siblings > NR_CPUS) { | ||
453 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); | ||
454 | smp_num_siblings = 1; | ||
455 | return; | ||
456 | } | ||
457 | tmp = smp_num_siblings; | ||
458 | while ((tmp & 1) == 0) { | ||
459 | tmp >>=1 ; | ||
460 | index_lsb++; | ||
461 | } | ||
462 | tmp = smp_num_siblings; | ||
463 | while ((tmp & 0x80000000 ) == 0) { | ||
464 | tmp <<=1 ; | ||
465 | index_msb--; | ||
466 | } | ||
467 | if (index_lsb != index_msb ) | ||
468 | index_msb++; | ||
469 | phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); | ||
470 | |||
471 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | ||
472 | phys_proc_id[cpu]); | ||
473 | } | ||
474 | } | ||
475 | #endif | ||
476 | |||
477 | void __init print_cpu_info(struct cpuinfo_x86 *c) | ||
478 | { | ||
479 | char *vendor = NULL; | ||
480 | |||
481 | if (c->x86_vendor < X86_VENDOR_NUM) | ||
482 | vendor = this_cpu->c_vendor; | ||
483 | else if (c->cpuid_level >= 0) | ||
484 | vendor = c->x86_vendor_id; | ||
485 | |||
486 | if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) | ||
487 | printk("%s ", vendor); | ||
488 | |||
489 | if (!c->x86_model_id[0]) | ||
490 | printk("%d86", c->x86); | ||
491 | else | ||
492 | printk("%s", c->x86_model_id); | ||
493 | |||
494 | if (c->x86_mask || c->cpuid_level >= 0) | ||
495 | printk(" stepping %02x\n", c->x86_mask); | ||
496 | else | ||
497 | printk("\n"); | ||
498 | } | ||
499 | |||
500 | cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; | ||
501 | |||
502 | /* This is hacky. :) | ||
503 | * We're emulating future behavior. | ||
504 | * In the future, the cpu-specific init functions will be called implicitly | ||
505 | * via the magic of initcalls. | ||
506 | * They will insert themselves into the cpu_devs structure. | ||
507 | * Then, when cpu_init() is called, we can just iterate over that array. | ||
508 | */ | ||
509 | |||
510 | extern int intel_cpu_init(void); | ||
511 | extern int cyrix_init_cpu(void); | ||
512 | extern int nsc_init_cpu(void); | ||
513 | extern int amd_init_cpu(void); | ||
514 | extern int centaur_init_cpu(void); | ||
515 | extern int transmeta_init_cpu(void); | ||
516 | extern int rise_init_cpu(void); | ||
517 | extern int nexgen_init_cpu(void); | ||
518 | extern int umc_init_cpu(void); | ||
519 | |||
520 | void __init early_cpu_init(void) | ||
521 | { | ||
522 | intel_cpu_init(); | ||
523 | cyrix_init_cpu(); | ||
524 | nsc_init_cpu(); | ||
525 | amd_init_cpu(); | ||
526 | centaur_init_cpu(); | ||
527 | transmeta_init_cpu(); | ||
528 | rise_init_cpu(); | ||
529 | nexgen_init_cpu(); | ||
530 | umc_init_cpu(); | ||
531 | early_cpu_detect(); | ||
532 | |||
533 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
534 | /* pse is not compatible with on-the-fly unmapping, | ||
535 | * disable it even if the cpus claim to support it. | ||
536 | */ | ||
537 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | ||
538 | disable_pse = 1; | ||
539 | #endif | ||
540 | } | ||
541 | /* | ||
542 | * cpu_init() initializes state that is per-CPU. Some data is already | ||
543 | * initialized (naturally) in the bootstrap process, such as the GDT | ||
544 | * and IDT. We reload them nevertheless, this function acts as a | ||
545 | * 'CPU state barrier', nothing should get across. | ||
546 | */ | ||
547 | void __init cpu_init (void) | ||
548 | { | ||
549 | int cpu = smp_processor_id(); | ||
550 | struct tss_struct * t = &per_cpu(init_tss, cpu); | ||
551 | struct thread_struct *thread = ¤t->thread; | ||
552 | __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); | ||
553 | |||
554 | if (cpu_test_and_set(cpu, cpu_initialized)) { | ||
555 | printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); | ||
556 | for (;;) local_irq_enable(); | ||
557 | } | ||
558 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | ||
559 | |||
560 | if (cpu_has_vme || cpu_has_tsc || cpu_has_de) | ||
561 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | ||
562 | if (tsc_disable && cpu_has_tsc) { | ||
563 | printk(KERN_NOTICE "Disabling TSC...\n"); | ||
564 | /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ | ||
565 | clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | ||
566 | set_in_cr4(X86_CR4_TSD); | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * Initialize the per-CPU GDT with the boot GDT, | ||
571 | * and set up the GDT descriptor: | ||
572 | */ | ||
573 | memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table, | ||
574 | GDT_SIZE); | ||
575 | |||
576 | /* Set up GDT entry for 16bit stack */ | ||
577 | *(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |= | ||
578 | ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | | ||
579 | ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | | ||
580 | (CPU_16BIT_STACK_SIZE - 1); | ||
581 | |||
582 | cpu_gdt_descr[cpu].size = GDT_SIZE - 1; | ||
583 | cpu_gdt_descr[cpu].address = | ||
584 | (unsigned long)&per_cpu(cpu_gdt_table, cpu); | ||
585 | |||
586 | /* | ||
587 | * Set up the per-thread TLS descriptor cache: | ||
588 | */ | ||
589 | memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), | ||
590 | GDT_ENTRY_TLS_ENTRIES * 8); | ||
591 | |||
592 | __asm__ __volatile__("lgdt %0" : : "m" (cpu_gdt_descr[cpu])); | ||
593 | __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); | ||
594 | |||
595 | /* | ||
596 | * Delete NT | ||
597 | */ | ||
598 | __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); | ||
599 | |||
600 | /* | ||
601 | * Set up and load the per-CPU TSS and LDT | ||
602 | */ | ||
603 | atomic_inc(&init_mm.mm_count); | ||
604 | current->active_mm = &init_mm; | ||
605 | if (current->mm) | ||
606 | BUG(); | ||
607 | enter_lazy_tlb(&init_mm, current); | ||
608 | |||
609 | load_esp0(t, thread); | ||
610 | set_tss_desc(cpu,t); | ||
611 | load_TR_desc(); | ||
612 | load_LDT(&init_mm.context); | ||
613 | |||
614 | /* Set up doublefault TSS pointer in the GDT */ | ||
615 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); | ||
616 | |||
617 | /* Clear %fs and %gs. */ | ||
618 | asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); | ||
619 | |||
620 | /* Clear all 6 debug registers: */ | ||
621 | |||
622 | #define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); | ||
623 | |||
624 | CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); | ||
625 | |||
626 | #undef CD | ||
627 | |||
628 | /* | ||
629 | * Force FPU initialization: | ||
630 | */ | ||
631 | current_thread_info()->status = 0; | ||
632 | clear_used_math(); | ||
633 | mxcsr_feature_mask_init(); | ||
634 | } | ||
diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h new file mode 100644 index 000000000000..5a1d4f163e84 --- /dev/null +++ b/arch/i386/kernel/cpu/cpu.h | |||
@@ -0,0 +1,30 @@ | |||
1 | |||
2 | struct cpu_model_info { | ||
3 | int vendor; | ||
4 | int family; | ||
5 | char *model_names[16]; | ||
6 | }; | ||
7 | |||
8 | /* attempt to consolidate cpu attributes */ | ||
9 | struct cpu_dev { | ||
10 | char * c_vendor; | ||
11 | |||
12 | /* some have two possibilities for cpuid string */ | ||
13 | char * c_ident[2]; | ||
14 | |||
15 | struct cpu_model_info c_models[4]; | ||
16 | |||
17 | void (*c_init)(struct cpuinfo_x86 * c); | ||
18 | void (*c_identify)(struct cpuinfo_x86 * c); | ||
19 | unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); | ||
20 | }; | ||
21 | |||
22 | extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; | ||
23 | |||
24 | extern int get_model_name(struct cpuinfo_x86 *c); | ||
25 | extern void display_cacheinfo(struct cpuinfo_x86 *c); | ||
26 | |||
27 | extern void generic_identify(struct cpuinfo_x86 * c); | ||
28 | |||
29 | extern void early_intel_workaround(struct cpuinfo_x86 *c); | ||
30 | |||
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig new file mode 100644 index 000000000000..f25ffd74235c --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig | |||
@@ -0,0 +1,231 @@ | |||
1 | # | ||
2 | # CPU Frequency scaling | ||
3 | # | ||
4 | |||
5 | menu "CPU Frequency scaling" | ||
6 | |||
7 | source "drivers/cpufreq/Kconfig" | ||
8 | |||
9 | if CPU_FREQ | ||
10 | |||
11 | comment "CPUFreq processor drivers" | ||
12 | |||
13 | config X86_ACPI_CPUFREQ | ||
14 | tristate "ACPI Processor P-States driver" | ||
15 | select CPU_FREQ_TABLE | ||
16 | depends on ACPI_PROCESSOR | ||
17 | help | ||
18 | This driver adds a CPUFreq driver which utilizes the ACPI | ||
19 | Processor Performance States. | ||
20 | |||
21 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
22 | |||
23 | If in doubt, say N. | ||
24 | |||
25 | config ELAN_CPUFREQ | ||
26 | tristate "AMD Elan" | ||
27 | select CPU_FREQ_TABLE | ||
28 | depends on X86_ELAN | ||
29 | ---help--- | ||
30 | This adds the CPUFreq driver for AMD Elan SC400 and SC410 | ||
31 | processors. | ||
32 | |||
33 | You need to specify the processor maximum speed as boot | ||
34 | parameter: elanfreq=maxspeed (in kHz) or as module | ||
35 | parameter "max_freq". | ||
36 | |||
37 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
38 | |||
39 | If in doubt, say N. | ||
40 | |||
41 | config X86_POWERNOW_K6 | ||
42 | tristate "AMD Mobile K6-2/K6-3 PowerNow!" | ||
43 | select CPU_FREQ_TABLE | ||
44 | help | ||
45 | This adds the CPUFreq driver for mobile AMD K6-2+ and mobile | ||
46 | AMD K6-3+ processors. | ||
47 | |||
48 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
49 | |||
50 | If in doubt, say N. | ||
51 | |||
52 | config X86_POWERNOW_K7 | ||
53 | tristate "AMD Mobile Athlon/Duron PowerNow!" | ||
54 | select CPU_FREQ_TABLE | ||
55 | help | ||
56 | This adds the CPUFreq driver for mobile AMD K7 mobile processors. | ||
57 | |||
58 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
59 | |||
60 | If in doubt, say N. | ||
61 | |||
62 | config X86_POWERNOW_K7_ACPI | ||
63 | bool | ||
64 | depends on X86_POWERNOW_K7 && ACPI_PROCESSOR | ||
65 | depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) | ||
66 | default y | ||
67 | |||
68 | config X86_POWERNOW_K8 | ||
69 | tristate "AMD Opteron/Athlon64 PowerNow!" | ||
70 | select CPU_FREQ_TABLE | ||
71 | depends on EXPERIMENTAL | ||
72 | help | ||
73 | This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. | ||
74 | |||
75 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
76 | |||
77 | If in doubt, say N. | ||
78 | |||
79 | config X86_POWERNOW_K8_ACPI | ||
80 | bool | ||
81 | depends on X86_POWERNOW_K8 && ACPI_PROCESSOR | ||
82 | depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) | ||
83 | default y | ||
84 | |||
85 | config X86_GX_SUSPMOD | ||
86 | tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" | ||
87 | help | ||
88 | This add the CPUFreq driver for NatSemi Geode processors which | ||
89 | support suspend modulation. | ||
90 | |||
91 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
92 | |||
93 | If in doubt, say N. | ||
94 | |||
95 | config X86_SPEEDSTEP_CENTRINO | ||
96 | tristate "Intel Enhanced SpeedStep" | ||
97 | select CPU_FREQ_TABLE | ||
98 | select X86_SPEEDSTEP_CENTRINO_TABLE if (!X86_SPEEDSTEP_CENTRINO_ACPI) | ||
99 | help | ||
100 | This adds the CPUFreq driver for Enhanced SpeedStep enabled | ||
101 | mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, | ||
102 | you also need to say Y to "Use ACPI tables to decode..." below | ||
103 | [which might imply enabling ACPI] if you want to use this driver | ||
104 | on non-Banias CPUs. | ||
105 | |||
106 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
107 | |||
108 | If in doubt, say N. | ||
109 | |||
110 | config X86_SPEEDSTEP_CENTRINO_ACPI | ||
111 | bool "Use ACPI tables to decode valid frequency/voltage pairs" | ||
112 | depends on X86_SPEEDSTEP_CENTRINO && ACPI_PROCESSOR | ||
113 | depends on !(X86_SPEEDSTEP_CENTRINO = y && ACPI_PROCESSOR = m) | ||
114 | default y | ||
115 | help | ||
116 | Use primarily the information provided in the BIOS ACPI tables | ||
117 | to determine valid CPU frequency and voltage pairings. It is | ||
118 | required for the driver to work on non-Banias CPUs. | ||
119 | |||
120 | If in doubt, say Y. | ||
121 | |||
122 | config X86_SPEEDSTEP_CENTRINO_TABLE | ||
123 | bool "Built-in tables for Banias CPUs" | ||
124 | depends on X86_SPEEDSTEP_CENTRINO | ||
125 | default y | ||
126 | help | ||
127 | Use built-in tables for Banias CPUs if ACPI encoding | ||
128 | is not available. | ||
129 | |||
130 | If in doubt, say N. | ||
131 | |||
132 | config X86_SPEEDSTEP_ICH | ||
133 | tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" | ||
134 | select CPU_FREQ_TABLE | ||
135 | help | ||
136 | This adds the CPUFreq driver for certain mobile Intel Pentium III | ||
137 | (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all | ||
138 | mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, | ||
139 | ICH3 or ICH4 southbridge. | ||
140 | |||
141 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
142 | |||
143 | If in doubt, say N. | ||
144 | |||
145 | config X86_SPEEDSTEP_SMI | ||
146 | tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" | ||
147 | select CPU_FREQ_TABLE | ||
148 | depends on EXPERIMENTAL | ||
149 | help | ||
150 | This adds the CPUFreq driver for certain mobile Intel Pentium III | ||
151 | (Coppermine), all mobile Intel Pentium III-M (Tualatin) | ||
152 | on systems which have an Intel 440BX/ZX/MX southbridge. | ||
153 | |||
154 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
155 | |||
156 | If in doubt, say N. | ||
157 | |||
158 | config X86_P4_CLOCKMOD | ||
159 | tristate "Intel Pentium 4 clock modulation" | ||
160 | select CPU_FREQ_TABLE | ||
161 | help | ||
162 | This adds the CPUFreq driver for Intel Pentium 4 / XEON | ||
163 | processors. | ||
164 | |||
165 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
166 | |||
167 | If in doubt, say N. | ||
168 | |||
169 | config X86_CPUFREQ_NFORCE2 | ||
170 | tristate "nVidia nForce2 FSB changing" | ||
171 | depends on EXPERIMENTAL | ||
172 | help | ||
173 | This adds the CPUFreq driver for FSB changing on nVidia nForce2 | ||
174 | platforms. | ||
175 | |||
176 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
177 | |||
178 | If in doubt, say N. | ||
179 | |||
180 | config X86_LONGRUN | ||
181 | tristate "Transmeta LongRun" | ||
182 | help | ||
183 | This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors | ||
184 | which support LongRun. | ||
185 | |||
186 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
187 | |||
188 | If in doubt, say N. | ||
189 | |||
190 | config X86_LONGHAUL | ||
191 | tristate "VIA Cyrix III Longhaul" | ||
192 | select CPU_FREQ_TABLE | ||
193 | help | ||
194 | This adds the CPUFreq driver for VIA Samuel/CyrixIII, | ||
195 | VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T | ||
196 | processors. | ||
197 | |||
198 | For details, take a look at <file:Documentation/cpu-freq/>. | ||
199 | |||
200 | If in doubt, say N. | ||
201 | |||
202 | comment "shared options" | ||
203 | |||
204 | config X86_ACPI_CPUFREQ_PROC_INTF | ||
205 | bool "/proc/acpi/processor/../performance interface (deprecated)" | ||
206 | depends on PROC_FS | ||
207 | depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI | ||
208 | help | ||
209 | This enables the deprecated /proc/acpi/processor/../performance | ||
210 | interface. While it is helpful for debugging, the generic, | ||
211 | cross-architecture cpufreq interfaces should be used. | ||
212 | |||
213 | If in doubt, say N. | ||
214 | |||
215 | config X86_SPEEDSTEP_LIB | ||
216 | tristate | ||
217 | default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD | ||
218 | |||
219 | config X86_SPEEDSTEP_RELAXED_CAP_CHECK | ||
220 | bool "Relaxed speedstep capability checks" | ||
221 | depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) | ||
222 | help | ||
223 | Don't perform all checks for a speedstep capable system which would | ||
224 | normally be done. Some ancient or strange systems, though speedstep | ||
225 | capable, don't always indicate that they are speedstep capable. This | ||
226 | option lets the probing code bypass some of those checks if the | ||
227 | parameter "relaxed_check=1" is passed to the module. | ||
228 | |||
229 | endif # CPU_FREQ | ||
230 | |||
231 | endmenu | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile new file mode 100644 index 000000000000..a922e97aeedd --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/Makefile | |||
@@ -0,0 +1,14 @@ | |||
1 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | ||
2 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | ||
3 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | ||
4 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o | ||
5 | obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o | ||
6 | obj-$(CONFIG_X86_LONGRUN) += longrun.o | ||
7 | obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o | ||
8 | obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o | ||
9 | obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o | ||
10 | obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o | ||
11 | obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o | ||
12 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o | ||
13 | obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o | ||
14 | obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c new file mode 100644 index 000000000000..963e17aa205d --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -0,0 +1,537 @@ | |||
1 | /* | ||
2 | * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.3 $) | ||
3 | * | ||
4 | * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> | ||
5 | * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> | ||
6 | * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> | ||
7 | * | ||
8 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or (at | ||
13 | * your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License along | ||
21 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
22 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. | ||
23 | * | ||
24 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
25 | */ | ||
26 | |||
27 | #include <linux/config.h> | ||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/cpufreq.h> | ||
32 | #include <linux/proc_fs.h> | ||
33 | #include <linux/seq_file.h> | ||
34 | #include <asm/io.h> | ||
35 | #include <asm/delay.h> | ||
36 | #include <asm/uaccess.h> | ||
37 | |||
38 | #include <linux/acpi.h> | ||
39 | #include <acpi/processor.h> | ||
40 | |||
41 | #include "speedstep-est-common.h" | ||
42 | |||
43 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) | ||
44 | |||
45 | MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); | ||
46 | MODULE_DESCRIPTION("ACPI Processor P-States Driver"); | ||
47 | MODULE_LICENSE("GPL"); | ||
48 | |||
49 | |||
50 | struct cpufreq_acpi_io { | ||
51 | struct acpi_processor_performance acpi_data; | ||
52 | struct cpufreq_frequency_table *freq_table; | ||
53 | unsigned int resume; | ||
54 | }; | ||
55 | |||
56 | static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; | ||
57 | |||
58 | static struct cpufreq_driver acpi_cpufreq_driver; | ||
59 | |||
60 | static int | ||
61 | acpi_processor_write_port( | ||
62 | u16 port, | ||
63 | u8 bit_width, | ||
64 | u32 value) | ||
65 | { | ||
66 | if (bit_width <= 8) { | ||
67 | outb(value, port); | ||
68 | } else if (bit_width <= 16) { | ||
69 | outw(value, port); | ||
70 | } else if (bit_width <= 32) { | ||
71 | outl(value, port); | ||
72 | } else { | ||
73 | return -ENODEV; | ||
74 | } | ||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | static int | ||
79 | acpi_processor_read_port( | ||
80 | u16 port, | ||
81 | u8 bit_width, | ||
82 | u32 *ret) | ||
83 | { | ||
84 | *ret = 0; | ||
85 | if (bit_width <= 8) { | ||
86 | *ret = inb(port); | ||
87 | } else if (bit_width <= 16) { | ||
88 | *ret = inw(port); | ||
89 | } else if (bit_width <= 32) { | ||
90 | *ret = inl(port); | ||
91 | } else { | ||
92 | return -ENODEV; | ||
93 | } | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | static int | ||
98 | acpi_processor_set_performance ( | ||
99 | struct cpufreq_acpi_io *data, | ||
100 | unsigned int cpu, | ||
101 | int state) | ||
102 | { | ||
103 | u16 port = 0; | ||
104 | u8 bit_width = 0; | ||
105 | int ret = 0; | ||
106 | u32 value = 0; | ||
107 | int i = 0; | ||
108 | struct cpufreq_freqs cpufreq_freqs; | ||
109 | cpumask_t saved_mask; | ||
110 | int retval; | ||
111 | |||
112 | dprintk("acpi_processor_set_performance\n"); | ||
113 | |||
114 | /* | ||
115 | * TBD: Use something other than set_cpus_allowed. | ||
116 | * As set_cpus_allowed is a bit racy, | ||
117 | * with any other set_cpus_allowed for this process. | ||
118 | */ | ||
119 | saved_mask = current->cpus_allowed; | ||
120 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); | ||
121 | if (smp_processor_id() != cpu) { | ||
122 | return (-EAGAIN); | ||
123 | } | ||
124 | |||
125 | if (state == data->acpi_data.state) { | ||
126 | if (unlikely(data->resume)) { | ||
127 | dprintk("Called after resume, resetting to P%d\n", state); | ||
128 | data->resume = 0; | ||
129 | } else { | ||
130 | dprintk("Already at target state (P%d)\n", state); | ||
131 | retval = 0; | ||
132 | goto migrate_end; | ||
133 | } | ||
134 | } | ||
135 | |||
136 | dprintk("Transitioning from P%d to P%d\n", | ||
137 | data->acpi_data.state, state); | ||
138 | |||
139 | /* cpufreq frequency struct */ | ||
140 | cpufreq_freqs.cpu = cpu; | ||
141 | cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency; | ||
142 | cpufreq_freqs.new = data->freq_table[state].frequency; | ||
143 | |||
144 | /* notify cpufreq */ | ||
145 | cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); | ||
146 | |||
147 | /* | ||
148 | * First we write the target state's 'control' value to the | ||
149 | * control_register. | ||
150 | */ | ||
151 | |||
152 | port = data->acpi_data.control_register.address; | ||
153 | bit_width = data->acpi_data.control_register.bit_width; | ||
154 | value = (u32) data->acpi_data.states[state].control; | ||
155 | |||
156 | dprintk("Writing 0x%08x to port 0x%04x\n", value, port); | ||
157 | |||
158 | ret = acpi_processor_write_port(port, bit_width, value); | ||
159 | if (ret) { | ||
160 | dprintk("Invalid port width 0x%04x\n", bit_width); | ||
161 | retval = ret; | ||
162 | goto migrate_end; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Then we read the 'status_register' and compare the value with the | ||
167 | * target state's 'status' to make sure the transition was successful. | ||
168 | * Note that we'll poll for up to 1ms (100 cycles of 10us) before | ||
169 | * giving up. | ||
170 | */ | ||
171 | |||
172 | port = data->acpi_data.status_register.address; | ||
173 | bit_width = data->acpi_data.status_register.bit_width; | ||
174 | |||
175 | dprintk("Looking for 0x%08x from port 0x%04x\n", | ||
176 | (u32) data->acpi_data.states[state].status, port); | ||
177 | |||
178 | for (i=0; i<100; i++) { | ||
179 | ret = acpi_processor_read_port(port, bit_width, &value); | ||
180 | if (ret) { | ||
181 | dprintk("Invalid port width 0x%04x\n", bit_width); | ||
182 | retval = ret; | ||
183 | goto migrate_end; | ||
184 | } | ||
185 | if (value == (u32) data->acpi_data.states[state].status) | ||
186 | break; | ||
187 | udelay(10); | ||
188 | } | ||
189 | |||
190 | /* notify cpufreq */ | ||
191 | cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); | ||
192 | |||
193 | if (value != (u32) data->acpi_data.states[state].status) { | ||
194 | unsigned int tmp = cpufreq_freqs.new; | ||
195 | cpufreq_freqs.new = cpufreq_freqs.old; | ||
196 | cpufreq_freqs.old = tmp; | ||
197 | cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); | ||
198 | cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); | ||
199 | printk(KERN_WARNING "acpi-cpufreq: Transition failed\n"); | ||
200 | retval = -ENODEV; | ||
201 | goto migrate_end; | ||
202 | } | ||
203 | |||
204 | dprintk("Transition successful after %d microseconds\n", i * 10); | ||
205 | |||
206 | data->acpi_data.state = state; | ||
207 | |||
208 | retval = 0; | ||
209 | migrate_end: | ||
210 | set_cpus_allowed(current, saved_mask); | ||
211 | return (retval); | ||
212 | } | ||
213 | |||
214 | |||
215 | static int | ||
216 | acpi_cpufreq_target ( | ||
217 | struct cpufreq_policy *policy, | ||
218 | unsigned int target_freq, | ||
219 | unsigned int relation) | ||
220 | { | ||
221 | struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; | ||
222 | unsigned int next_state = 0; | ||
223 | unsigned int result = 0; | ||
224 | |||
225 | dprintk("acpi_cpufreq_setpolicy\n"); | ||
226 | |||
227 | result = cpufreq_frequency_table_target(policy, | ||
228 | data->freq_table, | ||
229 | target_freq, | ||
230 | relation, | ||
231 | &next_state); | ||
232 | if (result) | ||
233 | return (result); | ||
234 | |||
235 | result = acpi_processor_set_performance (data, policy->cpu, next_state); | ||
236 | |||
237 | return (result); | ||
238 | } | ||
239 | |||
240 | |||
241 | static int | ||
242 | acpi_cpufreq_verify ( | ||
243 | struct cpufreq_policy *policy) | ||
244 | { | ||
245 | unsigned int result = 0; | ||
246 | struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; | ||
247 | |||
248 | dprintk("acpi_cpufreq_verify\n"); | ||
249 | |||
250 | result = cpufreq_frequency_table_verify(policy, | ||
251 | data->freq_table); | ||
252 | |||
253 | return (result); | ||
254 | } | ||
255 | |||
256 | |||
257 | static unsigned long | ||
258 | acpi_cpufreq_guess_freq ( | ||
259 | struct cpufreq_acpi_io *data, | ||
260 | unsigned int cpu) | ||
261 | { | ||
262 | if (cpu_khz) { | ||
263 | /* search the closest match to cpu_khz */ | ||
264 | unsigned int i; | ||
265 | unsigned long freq; | ||
266 | unsigned long freqn = data->acpi_data.states[0].core_frequency * 1000; | ||
267 | |||
268 | for (i=0; i < (data->acpi_data.state_count - 1); i++) { | ||
269 | freq = freqn; | ||
270 | freqn = data->acpi_data.states[i+1].core_frequency * 1000; | ||
271 | if ((2 * cpu_khz) > (freqn + freq)) { | ||
272 | data->acpi_data.state = i; | ||
273 | return (freq); | ||
274 | } | ||
275 | } | ||
276 | data->acpi_data.state = data->acpi_data.state_count - 1; | ||
277 | return (freqn); | ||
278 | } else | ||
279 | /* assume CPU is at P0... */ | ||
280 | data->acpi_data.state = 0; | ||
281 | return data->acpi_data.states[0].core_frequency * 1000; | ||
282 | |||
283 | } | ||
284 | |||
285 | |||
286 | /* | ||
287 | * acpi_processor_cpu_init_pdc_est - let BIOS know about the SMP capabilities | ||
288 | * of this driver | ||
289 | * @perf: processor-specific acpi_io_data struct | ||
290 | * @cpu: CPU being initialized | ||
291 | * | ||
292 | * To avoid issues with legacy OSes, some BIOSes require to be informed of | ||
293 | * the SMP capabilities of OS P-state driver. Here we set the bits in _PDC | ||
294 | * accordingly, for Enhanced Speedstep. Actual call to _PDC is done in | ||
295 | * driver/acpi/processor.c | ||
296 | */ | ||
297 | static void | ||
298 | acpi_processor_cpu_init_pdc_est( | ||
299 | struct acpi_processor_performance *perf, | ||
300 | unsigned int cpu, | ||
301 | struct acpi_object_list *obj_list | ||
302 | ) | ||
303 | { | ||
304 | union acpi_object *obj; | ||
305 | u32 *buf; | ||
306 | struct cpuinfo_x86 *c = cpu_data + cpu; | ||
307 | dprintk("acpi_processor_cpu_init_pdc_est\n"); | ||
308 | |||
309 | if (!cpu_has(c, X86_FEATURE_EST)) | ||
310 | return; | ||
311 | |||
312 | /* Initialize pdc. It will be used later. */ | ||
313 | if (!obj_list) | ||
314 | return; | ||
315 | |||
316 | if (!(obj_list->count && obj_list->pointer)) | ||
317 | return; | ||
318 | |||
319 | obj = obj_list->pointer; | ||
320 | if ((obj->buffer.length == 12) && obj->buffer.pointer) { | ||
321 | buf = (u32 *)obj->buffer.pointer; | ||
322 | buf[0] = ACPI_PDC_REVISION_ID; | ||
323 | buf[1] = 1; | ||
324 | buf[2] = ACPI_PDC_EST_CAPABILITY_SMP; | ||
325 | perf->pdc = obj_list; | ||
326 | } | ||
327 | return; | ||
328 | } | ||
329 | |||
330 | |||
331 | /* CPU specific PDC initialization */ | ||
332 | static void | ||
333 | acpi_processor_cpu_init_pdc( | ||
334 | struct acpi_processor_performance *perf, | ||
335 | unsigned int cpu, | ||
336 | struct acpi_object_list *obj_list | ||
337 | ) | ||
338 | { | ||
339 | struct cpuinfo_x86 *c = cpu_data + cpu; | ||
340 | dprintk("acpi_processor_cpu_init_pdc\n"); | ||
341 | perf->pdc = NULL; | ||
342 | if (cpu_has(c, X86_FEATURE_EST)) | ||
343 | acpi_processor_cpu_init_pdc_est(perf, cpu, obj_list); | ||
344 | return; | ||
345 | } | ||
346 | |||
347 | |||
348 | static int | ||
349 | acpi_cpufreq_cpu_init ( | ||
350 | struct cpufreq_policy *policy) | ||
351 | { | ||
352 | unsigned int i; | ||
353 | unsigned int cpu = policy->cpu; | ||
354 | struct cpufreq_acpi_io *data; | ||
355 | unsigned int result = 0; | ||
356 | |||
357 | union acpi_object arg0 = {ACPI_TYPE_BUFFER}; | ||
358 | u32 arg0_buf[3]; | ||
359 | struct acpi_object_list arg_list = {1, &arg0}; | ||
360 | |||
361 | dprintk("acpi_cpufreq_cpu_init\n"); | ||
362 | /* setup arg_list for _PDC settings */ | ||
363 | arg0.buffer.length = 12; | ||
364 | arg0.buffer.pointer = (u8 *) arg0_buf; | ||
365 | |||
366 | data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); | ||
367 | if (!data) | ||
368 | return (-ENOMEM); | ||
369 | memset(data, 0, sizeof(struct cpufreq_acpi_io)); | ||
370 | |||
371 | acpi_io_data[cpu] = data; | ||
372 | |||
373 | acpi_processor_cpu_init_pdc(&data->acpi_data, cpu, &arg_list); | ||
374 | result = acpi_processor_register_performance(&data->acpi_data, cpu); | ||
375 | data->acpi_data.pdc = NULL; | ||
376 | |||
377 | if (result) | ||
378 | goto err_free; | ||
379 | |||
380 | if (is_const_loops_cpu(cpu)) { | ||
381 | acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
382 | } | ||
383 | |||
384 | /* capability check */ | ||
385 | if (data->acpi_data.state_count <= 1) { | ||
386 | dprintk("No P-States\n"); | ||
387 | result = -ENODEV; | ||
388 | goto err_unreg; | ||
389 | } | ||
390 | if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO) || | ||
391 | (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_SYSTEM_IO)) { | ||
392 | dprintk("Unsupported address space [%d, %d]\n", | ||
393 | (u32) (data->acpi_data.control_register.space_id), | ||
394 | (u32) (data->acpi_data.status_register.space_id)); | ||
395 | result = -ENODEV; | ||
396 | goto err_unreg; | ||
397 | } | ||
398 | |||
399 | /* alloc freq_table */ | ||
400 | data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * (data->acpi_data.state_count + 1), GFP_KERNEL); | ||
401 | if (!data->freq_table) { | ||
402 | result = -ENOMEM; | ||
403 | goto err_unreg; | ||
404 | } | ||
405 | |||
406 | /* detect transition latency */ | ||
407 | policy->cpuinfo.transition_latency = 0; | ||
408 | for (i=0; i<data->acpi_data.state_count; i++) { | ||
409 | if ((data->acpi_data.states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) | ||
410 | policy->cpuinfo.transition_latency = data->acpi_data.states[i].transition_latency * 1000; | ||
411 | } | ||
412 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
413 | |||
414 | /* The current speed is unknown and not detectable by ACPI... */ | ||
415 | policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); | ||
416 | |||
417 | /* table init */ | ||
418 | for (i=0; i<=data->acpi_data.state_count; i++) | ||
419 | { | ||
420 | data->freq_table[i].index = i; | ||
421 | if (i<data->acpi_data.state_count) | ||
422 | data->freq_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; | ||
423 | else | ||
424 | data->freq_table[i].frequency = CPUFREQ_TABLE_END; | ||
425 | } | ||
426 | |||
427 | result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); | ||
428 | if (result) { | ||
429 | goto err_freqfree; | ||
430 | } | ||
431 | |||
432 | /* notify BIOS that we exist */ | ||
433 | acpi_processor_notify_smm(THIS_MODULE); | ||
434 | |||
435 | printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management activated.\n", | ||
436 | cpu); | ||
437 | for (i = 0; i < data->acpi_data.state_count; i++) | ||
438 | dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", | ||
439 | (i == data->acpi_data.state?'*':' '), i, | ||
440 | (u32) data->acpi_data.states[i].core_frequency, | ||
441 | (u32) data->acpi_data.states[i].power, | ||
442 | (u32) data->acpi_data.states[i].transition_latency); | ||
443 | |||
444 | cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); | ||
445 | return (result); | ||
446 | |||
447 | err_freqfree: | ||
448 | kfree(data->freq_table); | ||
449 | err_unreg: | ||
450 | acpi_processor_unregister_performance(&data->acpi_data, cpu); | ||
451 | err_free: | ||
452 | kfree(data); | ||
453 | acpi_io_data[cpu] = NULL; | ||
454 | |||
455 | return (result); | ||
456 | } | ||
457 | |||
458 | |||
459 | static int | ||
460 | acpi_cpufreq_cpu_exit ( | ||
461 | struct cpufreq_policy *policy) | ||
462 | { | ||
463 | struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; | ||
464 | |||
465 | |||
466 | dprintk("acpi_cpufreq_cpu_exit\n"); | ||
467 | |||
468 | if (data) { | ||
469 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
470 | acpi_io_data[policy->cpu] = NULL; | ||
471 | acpi_processor_unregister_performance(&data->acpi_data, policy->cpu); | ||
472 | kfree(data); | ||
473 | } | ||
474 | |||
475 | return (0); | ||
476 | } | ||
477 | |||
478 | static int | ||
479 | acpi_cpufreq_resume ( | ||
480 | struct cpufreq_policy *policy) | ||
481 | { | ||
482 | struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; | ||
483 | |||
484 | |||
485 | dprintk("acpi_cpufreq_resume\n"); | ||
486 | |||
487 | data->resume = 1; | ||
488 | |||
489 | return (0); | ||
490 | } | ||
491 | |||
492 | |||
493 | static struct freq_attr* acpi_cpufreq_attr[] = { | ||
494 | &cpufreq_freq_attr_scaling_available_freqs, | ||
495 | NULL, | ||
496 | }; | ||
497 | |||
498 | static struct cpufreq_driver acpi_cpufreq_driver = { | ||
499 | .verify = acpi_cpufreq_verify, | ||
500 | .target = acpi_cpufreq_target, | ||
501 | .init = acpi_cpufreq_cpu_init, | ||
502 | .exit = acpi_cpufreq_cpu_exit, | ||
503 | .resume = acpi_cpufreq_resume, | ||
504 | .name = "acpi-cpufreq", | ||
505 | .owner = THIS_MODULE, | ||
506 | .attr = acpi_cpufreq_attr, | ||
507 | }; | ||
508 | |||
509 | |||
510 | static int __init | ||
511 | acpi_cpufreq_init (void) | ||
512 | { | ||
513 | int result = 0; | ||
514 | |||
515 | dprintk("acpi_cpufreq_init\n"); | ||
516 | |||
517 | result = cpufreq_register_driver(&acpi_cpufreq_driver); | ||
518 | |||
519 | return (result); | ||
520 | } | ||
521 | |||
522 | |||
523 | static void __exit | ||
524 | acpi_cpufreq_exit (void) | ||
525 | { | ||
526 | dprintk("acpi_cpufreq_exit\n"); | ||
527 | |||
528 | cpufreq_unregister_driver(&acpi_cpufreq_driver); | ||
529 | |||
530 | return; | ||
531 | } | ||
532 | |||
533 | |||
534 | late_initcall(acpi_cpufreq_init); | ||
535 | module_exit(acpi_cpufreq_exit); | ||
536 | |||
537 | MODULE_ALIAS("acpi"); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c new file mode 100644 index 000000000000..04a405345203 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c | |||
@@ -0,0 +1,457 @@ | |||
1 | /* | ||
2 | * (C) 2004 Sebastian Witt <se.witt@gmx.net> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * Based upon reverse engineered information | ||
6 | * | ||
7 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/moduleparam.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/cpufreq.h> | ||
15 | #include <linux/pci.h> | ||
16 | #include <linux/delay.h> | ||
17 | |||
18 | #define NFORCE2_XTAL 25 | ||
19 | #define NFORCE2_BOOTFSB 0x48 | ||
20 | #define NFORCE2_PLLENABLE 0xa8 | ||
21 | #define NFORCE2_PLLREG 0xa4 | ||
22 | #define NFORCE2_PLLADR 0xa0 | ||
23 | #define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div) | ||
24 | |||
25 | #define NFORCE2_MIN_FSB 50 | ||
26 | #define NFORCE2_SAFE_DISTANCE 50 | ||
27 | |||
28 | /* Delay in ms between FSB changes */ | ||
29 | //#define NFORCE2_DELAY 10 | ||
30 | |||
31 | /* nforce2_chipset: | ||
32 | * FSB is changed using the chipset | ||
33 | */ | ||
34 | static struct pci_dev *nforce2_chipset_dev; | ||
35 | |||
36 | /* fid: | ||
37 | * multiplier * 10 | ||
38 | */ | ||
39 | static int fid = 0; | ||
40 | |||
41 | /* min_fsb, max_fsb: | ||
42 | * minimum and maximum FSB (= FSB at boot time) | ||
43 | */ | ||
44 | static int min_fsb = 0; | ||
45 | static int max_fsb = 0; | ||
46 | |||
47 | MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); | ||
48 | MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); | ||
49 | MODULE_LICENSE("GPL"); | ||
50 | |||
51 | module_param(fid, int, 0444); | ||
52 | module_param(min_fsb, int, 0444); | ||
53 | |||
54 | MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); | ||
55 | MODULE_PARM_DESC(min_fsb, | ||
56 | "Minimum FSB to use, if not defined: current FSB - 50"); | ||
57 | |||
58 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) | ||
59 | |||
60 | /* | ||
61 | * nforce2_calc_fsb - calculate FSB | ||
62 | * @pll: PLL value | ||
63 | * | ||
64 | * Calculates FSB from PLL value | ||
65 | */ | ||
66 | static int nforce2_calc_fsb(int pll) | ||
67 | { | ||
68 | unsigned char mul, div; | ||
69 | |||
70 | mul = (pll >> 8) & 0xff; | ||
71 | div = pll & 0xff; | ||
72 | |||
73 | if (div > 0) | ||
74 | return NFORCE2_XTAL * mul / div; | ||
75 | |||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | /* | ||
80 | * nforce2_calc_pll - calculate PLL value | ||
81 | * @fsb: FSB | ||
82 | * | ||
83 | * Calculate PLL value for given FSB | ||
84 | */ | ||
85 | static int nforce2_calc_pll(unsigned int fsb) | ||
86 | { | ||
87 | unsigned char xmul, xdiv; | ||
88 | unsigned char mul = 0, div = 0; | ||
89 | int tried = 0; | ||
90 | |||
91 | /* Try to calculate multiplier and divider up to 4 times */ | ||
92 | while (((mul == 0) || (div == 0)) && (tried <= 3)) { | ||
93 | for (xdiv = 1; xdiv <= 0x80; xdiv++) | ||
94 | for (xmul = 1; xmul <= 0xfe; xmul++) | ||
95 | if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) == | ||
96 | fsb + tried) { | ||
97 | mul = xmul; | ||
98 | div = xdiv; | ||
99 | } | ||
100 | tried++; | ||
101 | } | ||
102 | |||
103 | if ((mul == 0) || (div == 0)) | ||
104 | return -1; | ||
105 | |||
106 | return NFORCE2_PLL(mul, div); | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * nforce2_write_pll - write PLL value to chipset | ||
111 | * @pll: PLL value | ||
112 | * | ||
113 | * Writes new FSB PLL value to chipset | ||
114 | */ | ||
115 | static void nforce2_write_pll(int pll) | ||
116 | { | ||
117 | int temp; | ||
118 | |||
119 | /* Set the pll addr. to 0x00 */ | ||
120 | temp = 0x00; | ||
121 | pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, temp); | ||
122 | |||
123 | /* Now write the value in all 64 registers */ | ||
124 | for (temp = 0; temp <= 0x3f; temp++) { | ||
125 | pci_write_config_dword(nforce2_chipset_dev, | ||
126 | NFORCE2_PLLREG, pll); | ||
127 | } | ||
128 | |||
129 | return; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * nforce2_fsb_read - Read FSB | ||
134 | * | ||
135 | * Read FSB from chipset | ||
136 | * If bootfsb != 0, return FSB at boot-time | ||
137 | */ | ||
138 | static unsigned int nforce2_fsb_read(int bootfsb) | ||
139 | { | ||
140 | struct pci_dev *nforce2_sub5; | ||
141 | u32 fsb, temp = 0; | ||
142 | |||
143 | |||
144 | /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ | ||
145 | nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, | ||
146 | 0x01EF, | ||
147 | PCI_ANY_ID, | ||
148 | PCI_ANY_ID, | ||
149 | NULL); | ||
150 | |||
151 | if (!nforce2_sub5) | ||
152 | return 0; | ||
153 | |||
154 | pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); | ||
155 | fsb /= 1000000; | ||
156 | |||
157 | /* Check if PLL register is already set */ | ||
158 | pci_read_config_byte(nforce2_chipset_dev, | ||
159 | NFORCE2_PLLENABLE, (u8 *)&temp); | ||
160 | |||
161 | if(bootfsb || !temp) | ||
162 | return fsb; | ||
163 | |||
164 | /* Use PLL register FSB value */ | ||
165 | pci_read_config_dword(nforce2_chipset_dev, | ||
166 | NFORCE2_PLLREG, &temp); | ||
167 | fsb = nforce2_calc_fsb(temp); | ||
168 | |||
169 | return fsb; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * nforce2_set_fsb - set new FSB | ||
174 | * @fsb: New FSB | ||
175 | * | ||
176 | * Sets new FSB | ||
177 | */ | ||
178 | static int nforce2_set_fsb(unsigned int fsb) | ||
179 | { | ||
180 | u32 pll, temp = 0; | ||
181 | unsigned int tfsb; | ||
182 | int diff; | ||
183 | |||
184 | if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { | ||
185 | printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); | ||
186 | return -EINVAL; | ||
187 | } | ||
188 | |||
189 | tfsb = nforce2_fsb_read(0); | ||
190 | if (!tfsb) { | ||
191 | printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); | ||
192 | return -EINVAL; | ||
193 | } | ||
194 | |||
195 | /* First write? Then set actual value */ | ||
196 | pci_read_config_byte(nforce2_chipset_dev, | ||
197 | NFORCE2_PLLENABLE, (u8 *)&temp); | ||
198 | if (!temp) { | ||
199 | pll = nforce2_calc_pll(tfsb); | ||
200 | |||
201 | if (pll < 0) | ||
202 | return -EINVAL; | ||
203 | |||
204 | nforce2_write_pll(pll); | ||
205 | } | ||
206 | |||
207 | /* Enable write access */ | ||
208 | temp = 0x01; | ||
209 | pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp); | ||
210 | |||
211 | diff = tfsb - fsb; | ||
212 | |||
213 | if (!diff) | ||
214 | return 0; | ||
215 | |||
216 | while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) { | ||
217 | if (diff < 0) | ||
218 | tfsb++; | ||
219 | else | ||
220 | tfsb--; | ||
221 | |||
222 | /* Calculate the PLL reg. value */ | ||
223 | if ((pll = nforce2_calc_pll(tfsb)) == -1) | ||
224 | return -EINVAL; | ||
225 | |||
226 | nforce2_write_pll(pll); | ||
227 | #ifdef NFORCE2_DELAY | ||
228 | mdelay(NFORCE2_DELAY); | ||
229 | #endif | ||
230 | } | ||
231 | |||
232 | temp = 0x40; | ||
233 | pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp); | ||
234 | |||
235 | return 0; | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * nforce2_get - get the CPU frequency | ||
240 | * @cpu: CPU number | ||
241 | * | ||
242 | * Returns the CPU frequency | ||
243 | */ | ||
244 | static unsigned int nforce2_get(unsigned int cpu) | ||
245 | { | ||
246 | if (cpu) | ||
247 | return 0; | ||
248 | return nforce2_fsb_read(0) * fid * 100; | ||
249 | } | ||
250 | |||
251 | /** | ||
252 | * nforce2_target - set a new CPUFreq policy | ||
253 | * @policy: new policy | ||
254 | * @target_freq: the target frequency | ||
255 | * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
256 | * | ||
257 | * Sets a new CPUFreq policy. | ||
258 | */ | ||
259 | static int nforce2_target(struct cpufreq_policy *policy, | ||
260 | unsigned int target_freq, unsigned int relation) | ||
261 | { | ||
262 | // unsigned long flags; | ||
263 | struct cpufreq_freqs freqs; | ||
264 | unsigned int target_fsb; | ||
265 | |||
266 | if ((target_freq > policy->max) || (target_freq < policy->min)) | ||
267 | return -EINVAL; | ||
268 | |||
269 | target_fsb = target_freq / (fid * 100); | ||
270 | |||
271 | freqs.old = nforce2_get(policy->cpu); | ||
272 | freqs.new = target_fsb * fid * 100; | ||
273 | freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */ | ||
274 | |||
275 | if (freqs.old == freqs.new) | ||
276 | return 0; | ||
277 | |||
278 | dprintk(KERN_INFO "cpufreq: Old CPU frequency %d kHz, new %d kHz\n", | ||
279 | freqs.old, freqs.new); | ||
280 | |||
281 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
282 | |||
283 | /* Disable IRQs */ | ||
284 | //local_irq_save(flags); | ||
285 | |||
286 | if (nforce2_set_fsb(target_fsb) < 0) | ||
287 | printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", | ||
288 | target_fsb); | ||
289 | else | ||
290 | dprintk(KERN_INFO "cpufreq: Changed FSB successfully to %d\n", | ||
291 | target_fsb); | ||
292 | |||
293 | /* Enable IRQs */ | ||
294 | //local_irq_restore(flags); | ||
295 | |||
296 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | /** | ||
302 | * nforce2_verify - verifies a new CPUFreq policy | ||
303 | * @policy: new policy | ||
304 | */ | ||
305 | static int nforce2_verify(struct cpufreq_policy *policy) | ||
306 | { | ||
307 | unsigned int fsb_pol_max; | ||
308 | |||
309 | fsb_pol_max = policy->max / (fid * 100); | ||
310 | |||
311 | if (policy->min < (fsb_pol_max * fid * 100)) | ||
312 | policy->max = (fsb_pol_max + 1) * fid * 100; | ||
313 | |||
314 | cpufreq_verify_within_limits(policy, | ||
315 | policy->cpuinfo.min_freq, | ||
316 | policy->cpuinfo.max_freq); | ||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | static int nforce2_cpu_init(struct cpufreq_policy *policy) | ||
321 | { | ||
322 | unsigned int fsb; | ||
323 | unsigned int rfid; | ||
324 | |||
325 | /* capability check */ | ||
326 | if (policy->cpu != 0) | ||
327 | return -ENODEV; | ||
328 | |||
329 | /* Get current FSB */ | ||
330 | fsb = nforce2_fsb_read(0); | ||
331 | |||
332 | if (!fsb) | ||
333 | return -EIO; | ||
334 | |||
335 | /* FIX: Get FID from CPU */ | ||
336 | if (!fid) { | ||
337 | if (!cpu_khz) { | ||
338 | printk(KERN_WARNING | ||
339 | "cpufreq: cpu_khz not set, can't calculate multiplier!\n"); | ||
340 | return -ENODEV; | ||
341 | } | ||
342 | |||
343 | fid = cpu_khz / (fsb * 100); | ||
344 | rfid = fid % 5; | ||
345 | |||
346 | if (rfid) { | ||
347 | if (rfid > 2) | ||
348 | fid += 5 - rfid; | ||
349 | else | ||
350 | fid -= rfid; | ||
351 | } | ||
352 | } | ||
353 | |||
354 | printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, | ||
355 | fid / 10, fid % 10); | ||
356 | |||
357 | /* Set maximum FSB to FSB at boot time */ | ||
358 | max_fsb = nforce2_fsb_read(1); | ||
359 | |||
360 | if(!max_fsb) | ||
361 | return -EIO; | ||
362 | |||
363 | if (!min_fsb) | ||
364 | min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE; | ||
365 | |||
366 | if (min_fsb < NFORCE2_MIN_FSB) | ||
367 | min_fsb = NFORCE2_MIN_FSB; | ||
368 | |||
369 | /* cpuinfo and default policy values */ | ||
370 | policy->cpuinfo.min_freq = min_fsb * fid * 100; | ||
371 | policy->cpuinfo.max_freq = max_fsb * fid * 100; | ||
372 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
373 | policy->cur = nforce2_get(policy->cpu); | ||
374 | policy->min = policy->cpuinfo.min_freq; | ||
375 | policy->max = policy->cpuinfo.max_freq; | ||
376 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
377 | |||
378 | return 0; | ||
379 | } | ||
380 | |||
381 | static int nforce2_cpu_exit(struct cpufreq_policy *policy) | ||
382 | { | ||
383 | return 0; | ||
384 | } | ||
385 | |||
386 | static struct cpufreq_driver nforce2_driver = { | ||
387 | .name = "nforce2", | ||
388 | .verify = nforce2_verify, | ||
389 | .target = nforce2_target, | ||
390 | .get = nforce2_get, | ||
391 | .init = nforce2_cpu_init, | ||
392 | .exit = nforce2_cpu_exit, | ||
393 | .owner = THIS_MODULE, | ||
394 | }; | ||
395 | |||
396 | /** | ||
397 | * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic | ||
398 | * | ||
399 | * Detects nForce2 A2 and C1 stepping | ||
400 | * | ||
401 | */ | ||
402 | static unsigned int nforce2_detect_chipset(void) | ||
403 | { | ||
404 | u8 revision; | ||
405 | |||
406 | nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, | ||
407 | PCI_DEVICE_ID_NVIDIA_NFORCE2, | ||
408 | PCI_ANY_ID, | ||
409 | PCI_ANY_ID, | ||
410 | NULL); | ||
411 | |||
412 | if (nforce2_chipset_dev == NULL) | ||
413 | return -ENODEV; | ||
414 | |||
415 | pci_read_config_byte(nforce2_chipset_dev, PCI_REVISION_ID, &revision); | ||
416 | |||
417 | printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", | ||
418 | revision); | ||
419 | printk(KERN_INFO | ||
420 | "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); | ||
421 | |||
422 | return 0; | ||
423 | } | ||
424 | |||
425 | /** | ||
426 | * nforce2_init - initializes the nForce2 CPUFreq driver | ||
427 | * | ||
428 | * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported | ||
429 | * devices, -EINVAL on problems during initiatization, and zero on | ||
430 | * success. | ||
431 | */ | ||
432 | static int __init nforce2_init(void) | ||
433 | { | ||
434 | /* TODO: do we need to detect the processor? */ | ||
435 | |||
436 | /* detect chipset */ | ||
437 | if (nforce2_detect_chipset()) { | ||
438 | printk(KERN_ERR "cpufreq: No nForce2 chipset.\n"); | ||
439 | return -ENODEV; | ||
440 | } | ||
441 | |||
442 | return cpufreq_register_driver(&nforce2_driver); | ||
443 | } | ||
444 | |||
445 | /** | ||
446 | * nforce2_exit - unregisters cpufreq module | ||
447 | * | ||
448 | * Unregisters nForce2 FSB change support. | ||
449 | */ | ||
450 | static void __exit nforce2_exit(void) | ||
451 | { | ||
452 | cpufreq_unregister_driver(&nforce2_driver); | ||
453 | } | ||
454 | |||
455 | module_init(nforce2_init); | ||
456 | module_exit(nforce2_exit); | ||
457 | |||
diff --git a/arch/i386/kernel/cpu/cpufreq/elanfreq.c b/arch/i386/kernel/cpu/cpufreq/elanfreq.c new file mode 100644 index 000000000000..3f7caa4ae6d6 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/elanfreq.c | |||
@@ -0,0 +1,312 @@ | |||
1 | /* | ||
2 | * elanfreq: cpufreq driver for the AMD ELAN family | ||
3 | * | ||
4 | * (c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de> | ||
5 | * | ||
6 | * Parts of this code are (c) Sven Geggus <sven@geggus.net> | ||
7 | * | ||
8 | * All Rights Reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public License | ||
12 | * as published by the Free Software Foundation; either version | ||
13 | * 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/init.h> | ||
22 | |||
23 | #include <linux/slab.h> | ||
24 | #include <linux/delay.h> | ||
25 | #include <linux/cpufreq.h> | ||
26 | |||
27 | #include <asm/msr.h> | ||
28 | #include <asm/timex.h> | ||
29 | #include <asm/io.h> | ||
30 | |||
31 | #define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ | ||
32 | #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ | ||
33 | |||
34 | /* Module parameter */ | ||
35 | static int max_freq; | ||
36 | |||
37 | struct s_elan_multiplier { | ||
38 | int clock; /* frequency in kHz */ | ||
39 | int val40h; /* PMU Force Mode register */ | ||
40 | int val80h; /* CPU Clock Speed Register */ | ||
41 | }; | ||
42 | |||
43 | /* | ||
44 | * It is important that the frequencies | ||
45 | * are listed in ascending order here! | ||
46 | */ | ||
47 | struct s_elan_multiplier elan_multiplier[] = { | ||
48 | {1000, 0x02, 0x18}, | ||
49 | {2000, 0x02, 0x10}, | ||
50 | {4000, 0x02, 0x08}, | ||
51 | {8000, 0x00, 0x00}, | ||
52 | {16000, 0x00, 0x02}, | ||
53 | {33000, 0x00, 0x04}, | ||
54 | {66000, 0x01, 0x04}, | ||
55 | {99000, 0x01, 0x05} | ||
56 | }; | ||
57 | |||
58 | static struct cpufreq_frequency_table elanfreq_table[] = { | ||
59 | {0, 1000}, | ||
60 | {1, 2000}, | ||
61 | {2, 4000}, | ||
62 | {3, 8000}, | ||
63 | {4, 16000}, | ||
64 | {5, 33000}, | ||
65 | {6, 66000}, | ||
66 | {7, 99000}, | ||
67 | {0, CPUFREQ_TABLE_END}, | ||
68 | }; | ||
69 | |||
70 | |||
71 | /** | ||
72 | * elanfreq_get_cpu_frequency: determine current cpu speed | ||
73 | * | ||
74 | * Finds out at which frequency the CPU of the Elan SOC runs | ||
75 | * at the moment. Frequencies from 1 to 33 MHz are generated | ||
76 | * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" | ||
77 | * and have the rest of the chip running with 33 MHz. | ||
78 | */ | ||
79 | |||
80 | static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) | ||
81 | { | ||
82 | u8 clockspeed_reg; /* Clock Speed Register */ | ||
83 | |||
84 | local_irq_disable(); | ||
85 | outb_p(0x80,REG_CSCIR); | ||
86 | clockspeed_reg = inb_p(REG_CSCDR); | ||
87 | local_irq_enable(); | ||
88 | |||
89 | if ((clockspeed_reg & 0xE0) == 0xE0) { return 0; } | ||
90 | |||
91 | /* Are we in CPU clock multiplied mode (66/99 MHz)? */ | ||
92 | if ((clockspeed_reg & 0xE0) == 0xC0) { | ||
93 | if ((clockspeed_reg & 0x01) == 0) { | ||
94 | return 66000; | ||
95 | } else { | ||
96 | return 99000; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /* 33 MHz is not 32 MHz... */ | ||
101 | if ((clockspeed_reg & 0xE0)==0xA0) | ||
102 | return 33000; | ||
103 | |||
104 | return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); | ||
105 | } | ||
106 | |||
107 | |||
108 | /** | ||
109 | * elanfreq_set_cpu_frequency: Change the CPU core frequency | ||
110 | * @cpu: cpu number | ||
111 | * @freq: frequency in kHz | ||
112 | * | ||
113 | * This function takes a frequency value and changes the CPU frequency | ||
114 | * according to this. Note that the frequency has to be checked by | ||
115 | * elanfreq_validatespeed() for correctness! | ||
116 | * | ||
117 | * There is no return value. | ||
118 | */ | ||
119 | |||
120 | static void elanfreq_set_cpu_state (unsigned int state) { | ||
121 | |||
122 | struct cpufreq_freqs freqs; | ||
123 | |||
124 | freqs.old = elanfreq_get_cpu_frequency(0); | ||
125 | freqs.new = elan_multiplier[state].clock; | ||
126 | freqs.cpu = 0; /* elanfreq.c is UP only driver */ | ||
127 | |||
128 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
129 | |||
130 | printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",elan_multiplier[state].clock); | ||
131 | |||
132 | |||
133 | /* | ||
134 | * Access to the Elan's internal registers is indexed via | ||
135 | * 0x22: Chip Setup & Control Register Index Register (CSCI) | ||
136 | * 0x23: Chip Setup & Control Register Data Register (CSCD) | ||
137 | * | ||
138 | */ | ||
139 | |||
140 | /* | ||
141 | * 0x40 is the Power Management Unit's Force Mode Register. | ||
142 | * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) | ||
143 | */ | ||
144 | |||
145 | local_irq_disable(); | ||
146 | outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ | ||
147 | outb_p(0x00,REG_CSCDR); | ||
148 | local_irq_enable(); /* wait till internal pipelines and */ | ||
149 | udelay(1000); /* buffers have cleaned up */ | ||
150 | |||
151 | local_irq_disable(); | ||
152 | |||
153 | /* now, set the CPU clock speed register (0x80) */ | ||
154 | outb_p(0x80,REG_CSCIR); | ||
155 | outb_p(elan_multiplier[state].val80h,REG_CSCDR); | ||
156 | |||
157 | /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ | ||
158 | outb_p(0x40,REG_CSCIR); | ||
159 | outb_p(elan_multiplier[state].val40h,REG_CSCDR); | ||
160 | udelay(10000); | ||
161 | local_irq_enable(); | ||
162 | |||
163 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
164 | }; | ||
165 | |||
166 | |||
167 | /** | ||
168 | * elanfreq_validatespeed: test if frequency range is valid | ||
169 | * @policy: the policy to validate | ||
170 | * | ||
171 | * This function checks if a given frequency range in kHz is valid | ||
172 | * for the hardware supported by the driver. | ||
173 | */ | ||
174 | |||
175 | static int elanfreq_verify (struct cpufreq_policy *policy) | ||
176 | { | ||
177 | return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); | ||
178 | } | ||
179 | |||
180 | static int elanfreq_target (struct cpufreq_policy *policy, | ||
181 | unsigned int target_freq, | ||
182 | unsigned int relation) | ||
183 | { | ||
184 | unsigned int newstate = 0; | ||
185 | |||
186 | if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) | ||
187 | return -EINVAL; | ||
188 | |||
189 | elanfreq_set_cpu_state(newstate); | ||
190 | |||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | |||
195 | /* | ||
196 | * Module init and exit code | ||
197 | */ | ||
198 | |||
199 | static int elanfreq_cpu_init(struct cpufreq_policy *policy) | ||
200 | { | ||
201 | struct cpuinfo_x86 *c = cpu_data; | ||
202 | unsigned int i; | ||
203 | int result; | ||
204 | |||
205 | /* capability check */ | ||
206 | if ((c->x86_vendor != X86_VENDOR_AMD) || | ||
207 | (c->x86 != 4) || (c->x86_model!=10)) | ||
208 | return -ENODEV; | ||
209 | |||
210 | /* max freq */ | ||
211 | if (!max_freq) | ||
212 | max_freq = elanfreq_get_cpu_frequency(0); | ||
213 | |||
214 | /* table init */ | ||
215 | for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { | ||
216 | if (elanfreq_table[i].frequency > max_freq) | ||
217 | elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
218 | } | ||
219 | |||
220 | /* cpuinfo and default policy values */ | ||
221 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
222 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
223 | policy->cur = elanfreq_get_cpu_frequency(0); | ||
224 | |||
225 | result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); | ||
226 | if (result) | ||
227 | return (result); | ||
228 | |||
229 | cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); | ||
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | |||
235 | static int elanfreq_cpu_exit(struct cpufreq_policy *policy) | ||
236 | { | ||
237 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | |||
242 | #ifndef MODULE | ||
243 | /** | ||
244 | * elanfreq_setup - elanfreq command line parameter parsing | ||
245 | * | ||
246 | * elanfreq command line parameter. Use: | ||
247 | * elanfreq=66000 | ||
248 | * to set the maximum CPU frequency to 66 MHz. Note that in | ||
249 | * case you do not give this boot parameter, the maximum | ||
250 | * frequency will fall back to _current_ CPU frequency which | ||
251 | * might be lower. If you build this as a module, use the | ||
252 | * max_freq module parameter instead. | ||
253 | */ | ||
254 | static int __init elanfreq_setup(char *str) | ||
255 | { | ||
256 | max_freq = simple_strtoul(str, &str, 0); | ||
257 | printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); | ||
258 | return 1; | ||
259 | } | ||
260 | __setup("elanfreq=", elanfreq_setup); | ||
261 | #endif | ||
262 | |||
263 | |||
264 | static struct freq_attr* elanfreq_attr[] = { | ||
265 | &cpufreq_freq_attr_scaling_available_freqs, | ||
266 | NULL, | ||
267 | }; | ||
268 | |||
269 | |||
270 | static struct cpufreq_driver elanfreq_driver = { | ||
271 | .get = elanfreq_get_cpu_frequency, | ||
272 | .verify = elanfreq_verify, | ||
273 | .target = elanfreq_target, | ||
274 | .init = elanfreq_cpu_init, | ||
275 | .exit = elanfreq_cpu_exit, | ||
276 | .name = "elanfreq", | ||
277 | .owner = THIS_MODULE, | ||
278 | .attr = elanfreq_attr, | ||
279 | }; | ||
280 | |||
281 | |||
282 | static int __init elanfreq_init(void) | ||
283 | { | ||
284 | struct cpuinfo_x86 *c = cpu_data; | ||
285 | |||
286 | /* Test if we have the right hardware */ | ||
287 | if ((c->x86_vendor != X86_VENDOR_AMD) || | ||
288 | (c->x86 != 4) || (c->x86_model!=10)) | ||
289 | { | ||
290 | printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); | ||
291 | return -ENODEV; | ||
292 | } | ||
293 | |||
294 | return cpufreq_register_driver(&elanfreq_driver); | ||
295 | } | ||
296 | |||
297 | |||
298 | static void __exit elanfreq_exit(void) | ||
299 | { | ||
300 | cpufreq_unregister_driver(&elanfreq_driver); | ||
301 | } | ||
302 | |||
303 | |||
304 | module_param (max_freq, int, 0444); | ||
305 | |||
306 | MODULE_LICENSE("GPL"); | ||
307 | MODULE_AUTHOR("Robert Schwebel <r.schwebel@pengutronix.de>, Sven Geggus <sven@geggus.net>"); | ||
308 | MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); | ||
309 | |||
310 | module_init(elanfreq_init); | ||
311 | module_exit(elanfreq_exit); | ||
312 | |||
diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c new file mode 100644 index 000000000000..1a49adb1f4a6 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c | |||
@@ -0,0 +1,502 @@ | |||
1 | /* | ||
2 | * Cyrix MediaGX and NatSemi Geode Suspend Modulation | ||
3 | * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com> | ||
4 | * (C) 2002 Hiroshi Miura <miura@da-cha.org> | ||
5 | * All Rights Reserved | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * version 2 as published by the Free Software Foundation | ||
10 | * | ||
11 | * The author(s) of this software shall not be held liable for damages | ||
12 | * of any nature resulting due to the use of this software. This | ||
13 | * software is provided AS-IS with no warranties. | ||
14 | * | ||
15 | * Theoritical note: | ||
16 | * | ||
17 | * (see Geode(tm) CS5530 manual (rev.4.1) page.56) | ||
18 | * | ||
19 | * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 | ||
20 | * are based on Suspend Moduration. | ||
21 | * | ||
22 | * Suspend Modulation works by asserting and de-asserting the SUSP# pin | ||
23 | * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# | ||
24 | * the CPU enters an idle state. GX1 stops its core clock when SUSP# is | ||
25 | * asserted then power consumption is reduced. | ||
26 | * | ||
27 | * Suspend Modulation's OFF/ON duration are configurable | ||
28 | * with 'Suspend Modulation OFF Count Register' | ||
29 | * and 'Suspend Modulation ON Count Register'. | ||
30 | * These registers are 8bit counters that represent the number of | ||
31 | * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) | ||
32 | * to the processor. | ||
33 | * | ||
34 | * These counters define a ratio which is the effective frequency | ||
35 | * of operation of the system. | ||
36 | * | ||
37 | * OFF Count | ||
38 | * F_eff = Fgx * ---------------------- | ||
39 | * OFF Count + ON Count | ||
40 | * | ||
41 | * 0 <= On Count, Off Count <= 255 | ||
42 | * | ||
43 | * From these limits, we can get register values | ||
44 | * | ||
45 | * off_duration + on_duration <= MAX_DURATION | ||
46 | * on_duration = off_duration * (stock_freq - freq) / freq | ||
47 | * | ||
48 | * off_duration = (freq * DURATION) / stock_freq | ||
49 | * on_duration = DURATION - off_duration | ||
50 | * | ||
51 | * | ||
52 | *--------------------------------------------------------------------------- | ||
53 | * | ||
54 | * ChangeLog: | ||
55 | * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org> | ||
56 | * - fix on/off register mistake | ||
57 | * - fix cpu_khz calc when it stops cpu modulation. | ||
58 | * | ||
59 | * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org> | ||
60 | * - rewrite for Cyrix MediaGX Cx5510/5520 and | ||
61 | * NatSemi Geode Cs5530(A). | ||
62 | * | ||
63 | * Jul. ??, 2002 Zwane Mwaikambo <zwane@commfireservices.com> | ||
64 | * - cs5530_mod patch for 2.4.19-rc1. | ||
65 | * | ||
66 | *--------------------------------------------------------------------------- | ||
67 | * | ||
68 | * Todo | ||
69 | * Test on machines with 5510, 5530, 5530A | ||
70 | */ | ||
71 | |||
72 | /************************************************************************ | ||
73 | * Suspend Modulation - Definitions * | ||
74 | ************************************************************************/ | ||
75 | |||
76 | #include <linux/kernel.h> | ||
77 | #include <linux/module.h> | ||
78 | #include <linux/init.h> | ||
79 | #include <linux/smp.h> | ||
80 | #include <linux/cpufreq.h> | ||
81 | #include <linux/pci.h> | ||
82 | #include <asm/processor.h> | ||
83 | #include <asm/errno.h> | ||
84 | |||
85 | /* PCI config registers, all at F0 */ | ||
86 | #define PCI_PMER1 0x80 /* power management enable register 1 */ | ||
87 | #define PCI_PMER2 0x81 /* power management enable register 2 */ | ||
88 | #define PCI_PMER3 0x82 /* power management enable register 3 */ | ||
89 | #define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ | ||
90 | #define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ | ||
91 | #define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ | ||
92 | #define PCI_MODON 0x95 /* suspend modulation ON counter register */ | ||
93 | #define PCI_SUSCFG 0x96 /* suspend configuration register */ | ||
94 | |||
95 | /* PMER1 bits */ | ||
96 | #define GPM (1<<0) /* global power management */ | ||
97 | #define GIT (1<<1) /* globally enable PM device idle timers */ | ||
98 | #define GTR (1<<2) /* globally enable IO traps */ | ||
99 | #define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ | ||
100 | #define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ | ||
101 | |||
102 | /* SUSCFG bits */ | ||
103 | #define SUSMOD (1<<0) /* enable/disable suspend modulation */ | ||
104 | /* the belows support only with cs5530 (after rev.1.2)/cs5530A */ | ||
105 | #define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ | ||
106 | /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ | ||
107 | #define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ | ||
108 | /* the belows support only with cs5530A */ | ||
109 | #define PWRSVE_ISA (1<<3) /* stop ISA clock */ | ||
110 | #define PWRSVE (1<<4) /* active idle */ | ||
111 | |||
112 | struct gxfreq_params { | ||
113 | u8 on_duration; | ||
114 | u8 off_duration; | ||
115 | u8 pci_suscfg; | ||
116 | u8 pci_pmer1; | ||
117 | u8 pci_pmer2; | ||
118 | u8 pci_rev; | ||
119 | struct pci_dev *cs55x0; | ||
120 | }; | ||
121 | |||
122 | static struct gxfreq_params *gx_params; | ||
123 | static int stock_freq; | ||
124 | |||
125 | /* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ | ||
126 | static int pci_busclk = 0; | ||
127 | module_param (pci_busclk, int, 0444); | ||
128 | |||
129 | /* maximum duration for which the cpu may be suspended | ||
130 | * (32us * MAX_DURATION). If no parameter is given, this defaults | ||
131 | * to 255. | ||
132 | * Note that this leads to a maximum of 8 ms(!) where the CPU clock | ||
133 | * is suspended -- processing power is just 0.39% of what it used to be, | ||
134 | * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ | ||
135 | static int max_duration = 255; | ||
136 | module_param (max_duration, int, 0444); | ||
137 | |||
138 | /* For the default policy, we want at least some processing power | ||
139 | * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) | ||
140 | */ | ||
141 | #define POLICY_MIN_DIV 20 | ||
142 | |||
143 | |||
144 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) | ||
145 | |||
146 | /** | ||
147 | * we can detect a core multipiler from dir0_lsb | ||
148 | * from GX1 datasheet p.56, | ||
149 | * MULT[3:0]: | ||
150 | * 0000 = SYSCLK multiplied by 4 (test only) | ||
151 | * 0001 = SYSCLK multiplied by 10 | ||
152 | * 0010 = SYSCLK multiplied by 4 | ||
153 | * 0011 = SYSCLK multiplied by 6 | ||
154 | * 0100 = SYSCLK multiplied by 9 | ||
155 | * 0101 = SYSCLK multiplied by 5 | ||
156 | * 0110 = SYSCLK multiplied by 7 | ||
157 | * 0111 = SYSCLK multiplied by 8 | ||
158 | * of 33.3MHz | ||
159 | **/ | ||
160 | static int gx_freq_mult[16] = { | ||
161 | 4, 10, 4, 6, 9, 5, 7, 8, | ||
162 | 0, 0, 0, 0, 0, 0, 0, 0 | ||
163 | }; | ||
164 | |||
165 | |||
166 | /**************************************************************** | ||
167 | * Low Level chipset interface * | ||
168 | ****************************************************************/ | ||
169 | static struct pci_device_id gx_chipset_tbl[] __initdata = { | ||
170 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, | ||
171 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, | ||
172 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, | ||
173 | { 0, }, | ||
174 | }; | ||
175 | |||
176 | /** | ||
177 | * gx_detect_chipset: | ||
178 | * | ||
179 | **/ | ||
180 | static __init struct pci_dev *gx_detect_chipset(void) | ||
181 | { | ||
182 | struct pci_dev *gx_pci = NULL; | ||
183 | |||
184 | /* check if CPU is a MediaGX or a Geode. */ | ||
185 | if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && | ||
186 | (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { | ||
187 | dprintk("error: no MediaGX/Geode processor found!\n"); | ||
188 | return NULL; | ||
189 | } | ||
190 | |||
191 | /* detect which companion chip is used */ | ||
192 | while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { | ||
193 | if ((pci_match_device (gx_chipset_tbl, gx_pci)) != NULL) { | ||
194 | return gx_pci; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | dprintk("error: no supported chipset found!\n"); | ||
199 | return NULL; | ||
200 | } | ||
201 | |||
202 | /** | ||
203 | * gx_get_cpuspeed: | ||
204 | * | ||
205 | * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. | ||
206 | */ | ||
207 | static unsigned int gx_get_cpuspeed(unsigned int cpu) | ||
208 | { | ||
209 | if ((gx_params->pci_suscfg & SUSMOD) == 0) | ||
210 | return stock_freq; | ||
211 | |||
212 | return (stock_freq * gx_params->off_duration) | ||
213 | / (gx_params->on_duration + gx_params->off_duration); | ||
214 | } | ||
215 | |||
216 | /** | ||
217 | * gx_validate_speed: | ||
218 | * determine current cpu speed | ||
219 | * | ||
220 | **/ | ||
221 | |||
222 | static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) | ||
223 | { | ||
224 | unsigned int i; | ||
225 | u8 tmp_on, tmp_off; | ||
226 | int old_tmp_freq = stock_freq; | ||
227 | int tmp_freq; | ||
228 | |||
229 | *off_duration=1; | ||
230 | *on_duration=0; | ||
231 | |||
232 | for (i=max_duration; i>0; i--) { | ||
233 | tmp_off = ((khz * i) / stock_freq) & 0xff; | ||
234 | tmp_on = i - tmp_off; | ||
235 | tmp_freq = (stock_freq * tmp_off) / i; | ||
236 | /* if this relation is closer to khz, use this. If it's equal, | ||
237 | * prefer it, too - lower latency */ | ||
238 | if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) { | ||
239 | *on_duration = tmp_on; | ||
240 | *off_duration = tmp_off; | ||
241 | old_tmp_freq = tmp_freq; | ||
242 | } | ||
243 | } | ||
244 | |||
245 | return old_tmp_freq; | ||
246 | } | ||
247 | |||
248 | |||
249 | /** | ||
250 | * gx_set_cpuspeed: | ||
251 | * set cpu speed in khz. | ||
252 | **/ | ||
253 | |||
254 | static void gx_set_cpuspeed(unsigned int khz) | ||
255 | { | ||
256 | u8 suscfg, pmer1; | ||
257 | unsigned int new_khz; | ||
258 | unsigned long flags; | ||
259 | struct cpufreq_freqs freqs; | ||
260 | |||
261 | |||
262 | freqs.cpu = 0; | ||
263 | freqs.old = gx_get_cpuspeed(0); | ||
264 | |||
265 | new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration); | ||
266 | |||
267 | freqs.new = new_khz; | ||
268 | |||
269 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
270 | local_irq_save(flags); | ||
271 | |||
272 | if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ | ||
273 | switch (gx_params->cs55x0->device) { | ||
274 | case PCI_DEVICE_ID_CYRIX_5530_LEGACY: | ||
275 | pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; | ||
276 | /* FIXME: need to test other values -- Zwane,Miura */ | ||
277 | pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ | ||
278 | pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ | ||
279 | pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); | ||
280 | |||
281 | if (gx_params->pci_rev < 0x10) { /* CS5530(rev 1.2, 1.3) */ | ||
282 | suscfg = gx_params->pci_suscfg | SUSMOD; | ||
283 | } else { /* CS5530A,B.. */ | ||
284 | suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; | ||
285 | } | ||
286 | break; | ||
287 | case PCI_DEVICE_ID_CYRIX_5520: | ||
288 | case PCI_DEVICE_ID_CYRIX_5510: | ||
289 | suscfg = gx_params->pci_suscfg | SUSMOD; | ||
290 | break; | ||
291 | default: | ||
292 | local_irq_restore(flags); | ||
293 | dprintk("fatal: try to set unknown chipset.\n"); | ||
294 | return; | ||
295 | } | ||
296 | } else { | ||
297 | suscfg = gx_params->pci_suscfg & ~(SUSMOD); | ||
298 | gx_params->off_duration = 0; | ||
299 | gx_params->on_duration = 0; | ||
300 | dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); | ||
301 | } | ||
302 | |||
303 | pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); | ||
304 | pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); | ||
305 | |||
306 | pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); | ||
307 | pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); | ||
308 | |||
309 | local_irq_restore(flags); | ||
310 | |||
311 | gx_params->pci_suscfg = suscfg; | ||
312 | |||
313 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
314 | |||
315 | dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", | ||
316 | gx_params->on_duration * 32, gx_params->off_duration * 32); | ||
317 | dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); | ||
318 | } | ||
319 | |||
320 | /**************************************************************** | ||
321 | * High level functions * | ||
322 | ****************************************************************/ | ||
323 | |||
324 | /* | ||
325 | * cpufreq_gx_verify: test if frequency range is valid | ||
326 | * | ||
327 | * This function checks if a given frequency range in kHz is valid | ||
328 | * for the hardware supported by the driver. | ||
329 | */ | ||
330 | |||
331 | static int cpufreq_gx_verify(struct cpufreq_policy *policy) | ||
332 | { | ||
333 | unsigned int tmp_freq = 0; | ||
334 | u8 tmp1, tmp2; | ||
335 | |||
336 | if (!stock_freq || !policy) | ||
337 | return -EINVAL; | ||
338 | |||
339 | policy->cpu = 0; | ||
340 | cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); | ||
341 | |||
342 | /* it needs to be assured that at least one supported frequency is | ||
343 | * within policy->min and policy->max. If it is not, policy->max | ||
344 | * needs to be increased until one freuqency is supported. | ||
345 | * policy->min may not be decreased, though. This way we guarantee a | ||
346 | * specific processing capacity. | ||
347 | */ | ||
348 | tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); | ||
349 | if (tmp_freq < policy->min) | ||
350 | tmp_freq += stock_freq / max_duration; | ||
351 | policy->min = tmp_freq; | ||
352 | if (policy->min > policy->max) | ||
353 | policy->max = tmp_freq; | ||
354 | tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); | ||
355 | if (tmp_freq > policy->max) | ||
356 | tmp_freq -= stock_freq / max_duration; | ||
357 | policy->max = tmp_freq; | ||
358 | if (policy->max < policy->min) | ||
359 | policy->max = policy->min; | ||
360 | cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); | ||
361 | |||
362 | return 0; | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * cpufreq_gx_target: | ||
367 | * | ||
368 | */ | ||
369 | static int cpufreq_gx_target(struct cpufreq_policy *policy, | ||
370 | unsigned int target_freq, | ||
371 | unsigned int relation) | ||
372 | { | ||
373 | u8 tmp1, tmp2; | ||
374 | unsigned int tmp_freq; | ||
375 | |||
376 | if (!stock_freq || !policy) | ||
377 | return -EINVAL; | ||
378 | |||
379 | policy->cpu = 0; | ||
380 | |||
381 | tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2); | ||
382 | while (tmp_freq < policy->min) { | ||
383 | tmp_freq += stock_freq / max_duration; | ||
384 | tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); | ||
385 | } | ||
386 | while (tmp_freq > policy->max) { | ||
387 | tmp_freq -= stock_freq / max_duration; | ||
388 | tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); | ||
389 | } | ||
390 | |||
391 | gx_set_cpuspeed(tmp_freq); | ||
392 | |||
393 | return 0; | ||
394 | } | ||
395 | |||
396 | static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) | ||
397 | { | ||
398 | unsigned int maxfreq, curfreq; | ||
399 | |||
400 | if (!policy || policy->cpu != 0) | ||
401 | return -ENODEV; | ||
402 | |||
403 | /* determine maximum frequency */ | ||
404 | if (pci_busclk) { | ||
405 | maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; | ||
406 | } else if (cpu_khz) { | ||
407 | maxfreq = cpu_khz; | ||
408 | } else { | ||
409 | maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; | ||
410 | } | ||
411 | stock_freq = maxfreq; | ||
412 | curfreq = gx_get_cpuspeed(0); | ||
413 | |||
414 | dprintk("cpu max frequency is %d.\n", maxfreq); | ||
415 | dprintk("cpu current frequency is %dkHz.\n",curfreq); | ||
416 | |||
417 | /* setup basic struct for cpufreq API */ | ||
418 | policy->cpu = 0; | ||
419 | |||
420 | if (max_duration < POLICY_MIN_DIV) | ||
421 | policy->min = maxfreq / max_duration; | ||
422 | else | ||
423 | policy->min = maxfreq / POLICY_MIN_DIV; | ||
424 | policy->max = maxfreq; | ||
425 | policy->cur = curfreq; | ||
426 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
427 | policy->cpuinfo.min_freq = maxfreq / max_duration; | ||
428 | policy->cpuinfo.max_freq = maxfreq; | ||
429 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
430 | |||
431 | return 0; | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * cpufreq_gx_init: | ||
436 | * MediaGX/Geode GX initialize cpufreq driver | ||
437 | */ | ||
438 | static struct cpufreq_driver gx_suspmod_driver = { | ||
439 | .get = gx_get_cpuspeed, | ||
440 | .verify = cpufreq_gx_verify, | ||
441 | .target = cpufreq_gx_target, | ||
442 | .init = cpufreq_gx_cpu_init, | ||
443 | .name = "gx-suspmod", | ||
444 | .owner = THIS_MODULE, | ||
445 | }; | ||
446 | |||
447 | static int __init cpufreq_gx_init(void) | ||
448 | { | ||
449 | int ret; | ||
450 | struct gxfreq_params *params; | ||
451 | struct pci_dev *gx_pci; | ||
452 | u32 class_rev; | ||
453 | |||
454 | /* Test if we have the right hardware */ | ||
455 | if ((gx_pci = gx_detect_chipset()) == NULL) | ||
456 | return -ENODEV; | ||
457 | |||
458 | /* check whether module parameters are sane */ | ||
459 | if (max_duration > 0xff) | ||
460 | max_duration = 0xff; | ||
461 | |||
462 | dprintk("geode suspend modulation available.\n"); | ||
463 | |||
464 | params = kmalloc(sizeof(struct gxfreq_params), GFP_KERNEL); | ||
465 | if (params == NULL) | ||
466 | return -ENOMEM; | ||
467 | memset(params, 0, sizeof(struct gxfreq_params)); | ||
468 | |||
469 | params->cs55x0 = gx_pci; | ||
470 | gx_params = params; | ||
471 | |||
472 | /* keep cs55x0 configurations */ | ||
473 | pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg)); | ||
474 | pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); | ||
475 | pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); | ||
476 | pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); | ||
477 | pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); | ||
478 | pci_read_config_dword(params->cs55x0, PCI_CLASS_REVISION, &class_rev); | ||
479 | params->pci_rev = class_rev && 0xff; | ||
480 | |||
481 | if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { | ||
482 | kfree(params); | ||
483 | return ret; /* register error! */ | ||
484 | } | ||
485 | |||
486 | return 0; | ||
487 | } | ||
488 | |||
489 | static void __exit cpufreq_gx_exit(void) | ||
490 | { | ||
491 | cpufreq_unregister_driver(&gx_suspmod_driver); | ||
492 | pci_dev_put(gx_params->cs55x0); | ||
493 | kfree(gx_params); | ||
494 | } | ||
495 | |||
496 | MODULE_AUTHOR ("Hiroshi Miura <miura@da-cha.org>"); | ||
497 | MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); | ||
498 | MODULE_LICENSE ("GPL"); | ||
499 | |||
500 | module_init(cpufreq_gx_init); | ||
501 | module_exit(cpufreq_gx_exit); | ||
502 | |||
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c new file mode 100644 index 000000000000..ab0f9f5aac11 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c | |||
@@ -0,0 +1,658 @@ | |||
1 | /* | ||
2 | * (C) 2001-2004 Dave Jones. <davej@codemonkey.org.uk> | ||
3 | * (C) 2002 Padraig Brady. <padraig@antefacto.com> | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * Based upon datasheets & sample CPUs kindly provided by VIA. | ||
7 | * | ||
8 | * VIA have currently 3 different versions of Longhaul. | ||
9 | * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. | ||
10 | * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. | ||
11 | * Version 2 of longhaul is the same as v1, but adds voltage scaling. | ||
12 | * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C) | ||
13 | * voltage scaling support has currently been disabled in this driver | ||
14 | * until we have code that gets it right. | ||
15 | * Version 3 of longhaul got renamed to Powersaver and redesigned | ||
16 | * to use the POWERSAVER MSR at 0x110a. | ||
17 | * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. | ||
18 | * It's pretty much the same feature wise to longhaul v2, though | ||
19 | * there is provision for scaling FSB too, but this doesn't work | ||
20 | * too well in practice so we don't even try to use this. | ||
21 | * | ||
22 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
23 | */ | ||
24 | |||
25 | #include <linux/kernel.h> | ||
26 | #include <linux/module.h> | ||
27 | #include <linux/moduleparam.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/cpufreq.h> | ||
30 | #include <linux/slab.h> | ||
31 | #include <linux/string.h> | ||
32 | |||
33 | #include <asm/msr.h> | ||
34 | #include <asm/timex.h> | ||
35 | #include <asm/io.h> | ||
36 | |||
37 | #include "longhaul.h" | ||
38 | |||
39 | #define PFX "longhaul: " | ||
40 | |||
41 | #define TYPE_LONGHAUL_V1 1 | ||
42 | #define TYPE_LONGHAUL_V2 2 | ||
43 | #define TYPE_POWERSAVER 3 | ||
44 | |||
45 | #define CPU_SAMUEL 1 | ||
46 | #define CPU_SAMUEL2 2 | ||
47 | #define CPU_EZRA 3 | ||
48 | #define CPU_EZRA_T 4 | ||
49 | #define CPU_NEHEMIAH 5 | ||
50 | |||
51 | static int cpu_model; | ||
52 | static unsigned int numscales=16, numvscales; | ||
53 | static unsigned int fsb; | ||
54 | static int minvid, maxvid; | ||
55 | static unsigned int minmult, maxmult; | ||
56 | static int can_scale_voltage; | ||
57 | static int vrmrev; | ||
58 | |||
59 | /* Module parameters */ | ||
60 | static int dont_scale_voltage; | ||
61 | |||
62 | |||
63 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) | ||
64 | |||
65 | |||
66 | #define __hlt() __asm__ __volatile__("hlt": : :"memory") | ||
67 | |||
68 | /* Clock ratios multiplied by 10 */ | ||
69 | static int clock_ratio[32]; | ||
70 | static int eblcr_table[32]; | ||
71 | static int voltage_table[32]; | ||
72 | static unsigned int highest_speed, lowest_speed; /* kHz */ | ||
73 | static int longhaul_version; | ||
74 | static struct cpufreq_frequency_table *longhaul_table; | ||
75 | |||
76 | #ifdef CONFIG_CPU_FREQ_DEBUG | ||
77 | static char speedbuffer[8]; | ||
78 | |||
79 | static char *print_speed(int speed) | ||
80 | { | ||
81 | if (speed > 1000) { | ||
82 | if (speed%1000 == 0) | ||
83 | sprintf (speedbuffer, "%dGHz", speed/1000); | ||
84 | else | ||
85 | sprintf (speedbuffer, "%d.%dGHz", speed/1000, (speed%1000)/100); | ||
86 | } else | ||
87 | sprintf (speedbuffer, "%dMHz", speed); | ||
88 | |||
89 | return speedbuffer; | ||
90 | } | ||
91 | #endif | ||
92 | |||
93 | |||
94 | static unsigned int calc_speed(int mult) | ||
95 | { | ||
96 | int khz; | ||
97 | khz = (mult/10)*fsb; | ||
98 | if (mult%10) | ||
99 | khz += fsb/2; | ||
100 | khz *= 1000; | ||
101 | return khz; | ||
102 | } | ||
103 | |||
104 | |||
105 | static int longhaul_get_cpu_mult(void) | ||
106 | { | ||
107 | unsigned long invalue=0,lo, hi; | ||
108 | |||
109 | rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); | ||
110 | invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22; | ||
111 | if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) { | ||
112 | if (lo & (1<<27)) | ||
113 | invalue+=16; | ||
114 | } | ||
115 | return eblcr_table[invalue]; | ||
116 | } | ||
117 | |||
118 | |||
119 | static void do_powersaver(union msr_longhaul *longhaul, | ||
120 | unsigned int clock_ratio_index) | ||
121 | { | ||
122 | int version; | ||
123 | |||
124 | switch (cpu_model) { | ||
125 | case CPU_EZRA_T: | ||
126 | version = 3; | ||
127 | break; | ||
128 | case CPU_NEHEMIAH: | ||
129 | version = 0xf; | ||
130 | break; | ||
131 | default: | ||
132 | return; | ||
133 | } | ||
134 | |||
135 | rdmsrl(MSR_VIA_LONGHAUL, longhaul->val); | ||
136 | longhaul->bits.SoftBusRatio = clock_ratio_index & 0xf; | ||
137 | longhaul->bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; | ||
138 | longhaul->bits.EnableSoftBusRatio = 1; | ||
139 | longhaul->bits.RevisionKey = 0; | ||
140 | local_irq_disable(); | ||
141 | wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); | ||
142 | local_irq_enable(); | ||
143 | __hlt(); | ||
144 | |||
145 | rdmsrl(MSR_VIA_LONGHAUL, longhaul->val); | ||
146 | longhaul->bits.EnableSoftBusRatio = 0; | ||
147 | longhaul->bits.RevisionKey = version; | ||
148 | local_irq_disable(); | ||
149 | wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); | ||
150 | local_irq_enable(); | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * longhaul_set_cpu_frequency() | ||
155 | * @clock_ratio_index : bitpattern of the new multiplier. | ||
156 | * | ||
157 | * Sets a new clock ratio. | ||
158 | */ | ||
159 | |||
160 | static void longhaul_setstate(unsigned int clock_ratio_index) | ||
161 | { | ||
162 | int speed, mult; | ||
163 | struct cpufreq_freqs freqs; | ||
164 | union msr_longhaul longhaul; | ||
165 | union msr_bcr2 bcr2; | ||
166 | static unsigned int old_ratio=-1; | ||
167 | |||
168 | if (old_ratio == clock_ratio_index) | ||
169 | return; | ||
170 | old_ratio = clock_ratio_index; | ||
171 | |||
172 | mult = clock_ratio[clock_ratio_index]; | ||
173 | if (mult == -1) | ||
174 | return; | ||
175 | |||
176 | speed = calc_speed(mult); | ||
177 | if ((speed > highest_speed) || (speed < lowest_speed)) | ||
178 | return; | ||
179 | |||
180 | freqs.old = calc_speed(longhaul_get_cpu_mult()); | ||
181 | freqs.new = speed; | ||
182 | freqs.cpu = 0; /* longhaul.c is UP only driver */ | ||
183 | |||
184 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
185 | |||
186 | dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", | ||
187 | fsb, mult/10, mult%10, print_speed(speed/1000)); | ||
188 | |||
189 | switch (longhaul_version) { | ||
190 | |||
191 | /* | ||
192 | * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) | ||
193 | * Software controlled multipliers only. | ||
194 | * | ||
195 | * *NB* Until we get voltage scaling working v1 & v2 are the same code. | ||
196 | * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5b] and Ezra [C5C] | ||
197 | */ | ||
198 | case TYPE_LONGHAUL_V1: | ||
199 | case TYPE_LONGHAUL_V2: | ||
200 | rdmsrl (MSR_VIA_BCR2, bcr2.val); | ||
201 | /* Enable software clock multiplier */ | ||
202 | bcr2.bits.ESOFTBF = 1; | ||
203 | bcr2.bits.CLOCKMUL = clock_ratio_index; | ||
204 | local_irq_disable(); | ||
205 | wrmsrl (MSR_VIA_BCR2, bcr2.val); | ||
206 | local_irq_enable(); | ||
207 | |||
208 | __hlt(); | ||
209 | |||
210 | /* Disable software clock multiplier */ | ||
211 | rdmsrl (MSR_VIA_BCR2, bcr2.val); | ||
212 | bcr2.bits.ESOFTBF = 0; | ||
213 | local_irq_disable(); | ||
214 | wrmsrl (MSR_VIA_BCR2, bcr2.val); | ||
215 | local_irq_enable(); | ||
216 | break; | ||
217 | |||
218 | /* | ||
219 | * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) | ||
220 | * We can scale voltage with this too, but that's currently | ||
221 | * disabled until we come up with a decent 'match freq to voltage' | ||
222 | * algorithm. | ||
223 | * When we add voltage scaling, we will also need to do the | ||
224 | * voltage/freq setting in order depending on the direction | ||
225 | * of scaling (like we do in powernow-k7.c) | ||
226 | * Nehemiah can do FSB scaling too, but this has never been proven | ||
227 | * to work in practice. | ||
228 | */ | ||
229 | case TYPE_POWERSAVER: | ||
230 | do_powersaver(&longhaul, clock_ratio_index); | ||
231 | break; | ||
232 | } | ||
233 | |||
234 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Centaur decided to make life a little more tricky. | ||
239 | * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. | ||
240 | * Samuel2 and above have to try and guess what the FSB is. | ||
241 | * We do this by assuming we booted at maximum multiplier, and interpolate | ||
242 | * between that value multiplied by possible FSBs and cpu_mhz which | ||
243 | * was calculated at boot time. Really ugly, but no other way to do this. | ||
244 | */ | ||
245 | |||
246 | #define ROUNDING 0xf | ||
247 | |||
248 | static int _guess(int guess) | ||
249 | { | ||
250 | int target; | ||
251 | |||
252 | target = ((maxmult/10)*guess); | ||
253 | if (maxmult%10 != 0) | ||
254 | target += (guess/2); | ||
255 | target += ROUNDING/2; | ||
256 | target &= ~ROUNDING; | ||
257 | return target; | ||
258 | } | ||
259 | |||
260 | |||
261 | static int guess_fsb(void) | ||
262 | { | ||
263 | int speed = (cpu_khz/1000); | ||
264 | int i; | ||
265 | int speeds[3] = { 66, 100, 133 }; | ||
266 | |||
267 | speed += ROUNDING/2; | ||
268 | speed &= ~ROUNDING; | ||
269 | |||
270 | for (i=0; i<3; i++) { | ||
271 | if (_guess(speeds[i]) == speed) | ||
272 | return speeds[i]; | ||
273 | } | ||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | |||
278 | static int __init longhaul_get_ranges(void) | ||
279 | { | ||
280 | unsigned long invalue; | ||
281 | unsigned int multipliers[32]= { | ||
282 | 50,30,40,100,55,35,45,95,90,70,80,60,120,75,85,65, | ||
283 | -1,110,120,-1,135,115,125,105,130,150,160,140,-1,155,-1,145 }; | ||
284 | unsigned int j, k = 0; | ||
285 | union msr_longhaul longhaul; | ||
286 | unsigned long lo, hi; | ||
287 | unsigned int eblcr_fsb_table_v1[] = { 66, 133, 100, -1 }; | ||
288 | unsigned int eblcr_fsb_table_v2[] = { 133, 100, -1, 66 }; | ||
289 | |||
290 | switch (longhaul_version) { | ||
291 | case TYPE_LONGHAUL_V1: | ||
292 | case TYPE_LONGHAUL_V2: | ||
293 | /* Ugh, Longhaul v1 didn't have the min/max MSRs. | ||
294 | Assume min=3.0x & max = whatever we booted at. */ | ||
295 | minmult = 30; | ||
296 | maxmult = longhaul_get_cpu_mult(); | ||
297 | rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); | ||
298 | invalue = (lo & (1<<18|1<<19)) >>18; | ||
299 | if (cpu_model==CPU_SAMUEL || cpu_model==CPU_SAMUEL2) | ||
300 | fsb = eblcr_fsb_table_v1[invalue]; | ||
301 | else | ||
302 | fsb = guess_fsb(); | ||
303 | break; | ||
304 | |||
305 | case TYPE_POWERSAVER: | ||
306 | /* Ezra-T */ | ||
307 | if (cpu_model==CPU_EZRA_T) { | ||
308 | rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); | ||
309 | invalue = longhaul.bits.MaxMHzBR; | ||
310 | if (longhaul.bits.MaxMHzBR4) | ||
311 | invalue += 16; | ||
312 | maxmult=multipliers[invalue]; | ||
313 | |||
314 | invalue = longhaul.bits.MinMHzBR; | ||
315 | if (longhaul.bits.MinMHzBR4 == 1) | ||
316 | minmult = 30; | ||
317 | else | ||
318 | minmult = multipliers[invalue]; | ||
319 | fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB]; | ||
320 | break; | ||
321 | } | ||
322 | |||
323 | /* Nehemiah */ | ||
324 | if (cpu_model==CPU_NEHEMIAH) { | ||
325 | rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); | ||
326 | |||
327 | /* | ||
328 | * TODO: This code works, but raises a lot of questions. | ||
329 | * - Some Nehemiah's seem to have broken Min/MaxMHzBR's. | ||
330 | * We get around this by using a hardcoded multiplier of 4.0x | ||
331 | * for the minimimum speed, and the speed we booted up at for the max. | ||
332 | * This is done in longhaul_get_cpu_mult() by reading the EBLCR register. | ||
333 | * - According to some VIA documentation EBLCR is only | ||
334 | * in pre-Nehemiah C3s. How this still works is a mystery. | ||
335 | * We're possibly using something undocumented and unsupported, | ||
336 | * But it works, so we don't grumble. | ||
337 | */ | ||
338 | minmult=40; | ||
339 | maxmult=longhaul_get_cpu_mult(); | ||
340 | |||
341 | /* Starting with the 1.2GHz parts, theres a 200MHz bus. */ | ||
342 | if ((cpu_khz/1000) > 1200) | ||
343 | fsb = 200; | ||
344 | else | ||
345 | fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB]; | ||
346 | break; | ||
347 | } | ||
348 | } | ||
349 | |||
350 | dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", | ||
351 | minmult/10, minmult%10, maxmult/10, maxmult%10); | ||
352 | |||
353 | if (fsb == -1) { | ||
354 | printk (KERN_INFO PFX "Invalid (reserved) FSB!\n"); | ||
355 | return -EINVAL; | ||
356 | } | ||
357 | |||
358 | highest_speed = calc_speed(maxmult); | ||
359 | lowest_speed = calc_speed(minmult); | ||
360 | dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, | ||
361 | print_speed(lowest_speed/1000), | ||
362 | print_speed(highest_speed/1000)); | ||
363 | |||
364 | if (lowest_speed == highest_speed) { | ||
365 | printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n"); | ||
366 | return -EINVAL; | ||
367 | } | ||
368 | if (lowest_speed > highest_speed) { | ||
369 | printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", | ||
370 | lowest_speed, highest_speed); | ||
371 | return -EINVAL; | ||
372 | } | ||
373 | |||
374 | longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); | ||
375 | if(!longhaul_table) | ||
376 | return -ENOMEM; | ||
377 | |||
378 | for (j=0; j < numscales; j++) { | ||
379 | unsigned int ratio; | ||
380 | ratio = clock_ratio[j]; | ||
381 | if (ratio == -1) | ||
382 | continue; | ||
383 | if (ratio > maxmult || ratio < minmult) | ||
384 | continue; | ||
385 | longhaul_table[k].frequency = calc_speed(ratio); | ||
386 | longhaul_table[k].index = j; | ||
387 | k++; | ||
388 | } | ||
389 | |||
390 | longhaul_table[k].frequency = CPUFREQ_TABLE_END; | ||
391 | if (!k) { | ||
392 | kfree (longhaul_table); | ||
393 | return -EINVAL; | ||
394 | } | ||
395 | |||
396 | return 0; | ||
397 | } | ||
398 | |||
399 | |||
400 | static void __init longhaul_setup_voltagescaling(void) | ||
401 | { | ||
402 | union msr_longhaul longhaul; | ||
403 | |||
404 | rdmsrl (MSR_VIA_LONGHAUL, longhaul.val); | ||
405 | |||
406 | if (!(longhaul.bits.RevisionID & 1)) | ||
407 | return; | ||
408 | |||
409 | minvid = longhaul.bits.MinimumVID; | ||
410 | maxvid = longhaul.bits.MaximumVID; | ||
411 | vrmrev = longhaul.bits.VRMRev; | ||
412 | |||
413 | if (minvid == 0 || maxvid == 0) { | ||
414 | printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " | ||
415 | "Voltage scaling disabled.\n", | ||
416 | minvid/1000, minvid%1000, maxvid/1000, maxvid%1000); | ||
417 | return; | ||
418 | } | ||
419 | |||
420 | if (minvid == maxvid) { | ||
421 | printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " | ||
422 | "both %d.%03d. Voltage scaling disabled\n", | ||
423 | maxvid/1000, maxvid%1000); | ||
424 | return; | ||
425 | } | ||
426 | |||
427 | if (vrmrev==0) { | ||
428 | dprintk ("VRM 8.5 \n"); | ||
429 | memcpy (voltage_table, vrm85scales, sizeof(voltage_table)); | ||
430 | numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25; | ||
431 | } else { | ||
432 | dprintk ("Mobile VRM \n"); | ||
433 | memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table)); | ||
434 | numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5; | ||
435 | } | ||
436 | |||
437 | /* Current voltage isn't readable at first, so we need to | ||
438 | set it to a known value. The spec says to use maxvid */ | ||
439 | longhaul.bits.RevisionKey = longhaul.bits.RevisionID; /* FIXME: This is bad. */ | ||
440 | longhaul.bits.EnableSoftVID = 1; | ||
441 | longhaul.bits.SoftVID = maxvid; | ||
442 | wrmsrl (MSR_VIA_LONGHAUL, longhaul.val); | ||
443 | |||
444 | minvid = voltage_table[minvid]; | ||
445 | maxvid = voltage_table[maxvid]; | ||
446 | |||
447 | dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n", | ||
448 | maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales); | ||
449 | |||
450 | can_scale_voltage = 1; | ||
451 | } | ||
452 | |||
453 | |||
454 | static int longhaul_verify(struct cpufreq_policy *policy) | ||
455 | { | ||
456 | return cpufreq_frequency_table_verify(policy, longhaul_table); | ||
457 | } | ||
458 | |||
459 | |||
460 | static int longhaul_target(struct cpufreq_policy *policy, | ||
461 | unsigned int target_freq, unsigned int relation) | ||
462 | { | ||
463 | unsigned int table_index = 0; | ||
464 | unsigned int new_clock_ratio = 0; | ||
465 | |||
466 | if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) | ||
467 | return -EINVAL; | ||
468 | |||
469 | new_clock_ratio = longhaul_table[table_index].index & 0xFF; | ||
470 | |||
471 | longhaul_setstate(new_clock_ratio); | ||
472 | |||
473 | return 0; | ||
474 | } | ||
475 | |||
476 | |||
477 | static unsigned int longhaul_get(unsigned int cpu) | ||
478 | { | ||
479 | if (cpu) | ||
480 | return 0; | ||
481 | return calc_speed(longhaul_get_cpu_mult()); | ||
482 | } | ||
483 | |||
484 | |||
485 | static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | ||
486 | { | ||
487 | struct cpuinfo_x86 *c = cpu_data; | ||
488 | char *cpuname=NULL; | ||
489 | int ret; | ||
490 | |||
491 | switch (c->x86_model) { | ||
492 | case 6: | ||
493 | cpu_model = CPU_SAMUEL; | ||
494 | cpuname = "C3 'Samuel' [C5A]"; | ||
495 | longhaul_version = TYPE_LONGHAUL_V1; | ||
496 | memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); | ||
497 | memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr)); | ||
498 | break; | ||
499 | |||
500 | case 7: | ||
501 | longhaul_version = TYPE_LONGHAUL_V1; | ||
502 | switch (c->x86_mask) { | ||
503 | case 0: | ||
504 | cpu_model = CPU_SAMUEL2; | ||
505 | cpuname = "C3 'Samuel 2' [C5B]"; | ||
506 | /* Note, this is not a typo, early Samuel2's had Samuel1 ratios. */ | ||
507 | memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); | ||
508 | memcpy (eblcr_table, samuel2_eblcr, sizeof(samuel2_eblcr)); | ||
509 | break; | ||
510 | case 1 ... 15: | ||
511 | if (c->x86_mask < 8) { | ||
512 | cpu_model = CPU_SAMUEL2; | ||
513 | cpuname = "C3 'Samuel 2' [C5B]"; | ||
514 | } else { | ||
515 | cpu_model = CPU_EZRA; | ||
516 | cpuname = "C3 'Ezra' [C5C]"; | ||
517 | } | ||
518 | memcpy (clock_ratio, ezra_clock_ratio, sizeof(ezra_clock_ratio)); | ||
519 | memcpy (eblcr_table, ezra_eblcr, sizeof(ezra_eblcr)); | ||
520 | break; | ||
521 | } | ||
522 | break; | ||
523 | |||
524 | case 8: | ||
525 | cpu_model = CPU_EZRA_T; | ||
526 | cpuname = "C3 'Ezra-T' [C5M]"; | ||
527 | longhaul_version = TYPE_POWERSAVER; | ||
528 | numscales=32; | ||
529 | memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio)); | ||
530 | memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr)); | ||
531 | break; | ||
532 | |||
533 | case 9: | ||
534 | cpu_model = CPU_NEHEMIAH; | ||
535 | longhaul_version = TYPE_POWERSAVER; | ||
536 | numscales=32; | ||
537 | switch (c->x86_mask) { | ||
538 | case 0 ... 1: | ||
539 | cpuname = "C3 'Nehemiah A' [C5N]"; | ||
540 | memcpy (clock_ratio, nehemiah_a_clock_ratio, sizeof(nehemiah_a_clock_ratio)); | ||
541 | memcpy (eblcr_table, nehemiah_a_eblcr, sizeof(nehemiah_a_eblcr)); | ||
542 | break; | ||
543 | case 2 ... 4: | ||
544 | cpuname = "C3 'Nehemiah B' [C5N]"; | ||
545 | memcpy (clock_ratio, nehemiah_b_clock_ratio, sizeof(nehemiah_b_clock_ratio)); | ||
546 | memcpy (eblcr_table, nehemiah_b_eblcr, sizeof(nehemiah_b_eblcr)); | ||
547 | break; | ||
548 | case 5 ... 15: | ||
549 | cpuname = "C3 'Nehemiah C' [C5N]"; | ||
550 | memcpy (clock_ratio, nehemiah_c_clock_ratio, sizeof(nehemiah_c_clock_ratio)); | ||
551 | memcpy (eblcr_table, nehemiah_c_eblcr, sizeof(nehemiah_c_eblcr)); | ||
552 | break; | ||
553 | } | ||
554 | break; | ||
555 | |||
556 | default: | ||
557 | cpuname = "Unknown"; | ||
558 | break; | ||
559 | } | ||
560 | |||
561 | printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); | ||
562 | switch (longhaul_version) { | ||
563 | case TYPE_LONGHAUL_V1: | ||
564 | case TYPE_LONGHAUL_V2: | ||
565 | printk ("Longhaul v%d supported.\n", longhaul_version); | ||
566 | break; | ||
567 | case TYPE_POWERSAVER: | ||
568 | printk ("Powersaver supported.\n"); | ||
569 | break; | ||
570 | }; | ||
571 | |||
572 | ret = longhaul_get_ranges(); | ||
573 | if (ret != 0) | ||
574 | return ret; | ||
575 | |||
576 | if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) && | ||
577 | (dont_scale_voltage==0)) | ||
578 | longhaul_setup_voltagescaling(); | ||
579 | |||
580 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
581 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
582 | policy->cur = calc_speed(longhaul_get_cpu_mult()); | ||
583 | |||
584 | ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); | ||
585 | if (ret) | ||
586 | return ret; | ||
587 | |||
588 | cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); | ||
589 | |||
590 | return 0; | ||
591 | } | ||
592 | |||
593 | static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) | ||
594 | { | ||
595 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
596 | return 0; | ||
597 | } | ||
598 | |||
599 | static struct freq_attr* longhaul_attr[] = { | ||
600 | &cpufreq_freq_attr_scaling_available_freqs, | ||
601 | NULL, | ||
602 | }; | ||
603 | |||
604 | static struct cpufreq_driver longhaul_driver = { | ||
605 | .verify = longhaul_verify, | ||
606 | .target = longhaul_target, | ||
607 | .get = longhaul_get, | ||
608 | .init = longhaul_cpu_init, | ||
609 | .exit = __devexit_p(longhaul_cpu_exit), | ||
610 | .name = "longhaul", | ||
611 | .owner = THIS_MODULE, | ||
612 | .attr = longhaul_attr, | ||
613 | }; | ||
614 | |||
615 | |||
616 | static int __init longhaul_init(void) | ||
617 | { | ||
618 | struct cpuinfo_x86 *c = cpu_data; | ||
619 | |||
620 | if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) | ||
621 | return -ENODEV; | ||
622 | |||
623 | switch (c->x86_model) { | ||
624 | case 6 ... 9: | ||
625 | return cpufreq_register_driver(&longhaul_driver); | ||
626 | default: | ||
627 | printk (KERN_INFO PFX "Unknown VIA CPU. Contact davej@codemonkey.org.uk\n"); | ||
628 | } | ||
629 | |||
630 | return -ENODEV; | ||
631 | } | ||
632 | |||
633 | |||
634 | static void __exit longhaul_exit(void) | ||
635 | { | ||
636 | int i=0; | ||
637 | |||
638 | for (i=0; i < numscales; i++) { | ||
639 | if (clock_ratio[i] == maxmult) { | ||
640 | longhaul_setstate(i); | ||
641 | break; | ||
642 | } | ||
643 | } | ||
644 | |||
645 | cpufreq_unregister_driver(&longhaul_driver); | ||
646 | kfree(longhaul_table); | ||
647 | } | ||
648 | |||
649 | module_param (dont_scale_voltage, int, 0644); | ||
650 | MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor"); | ||
651 | |||
652 | MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); | ||
653 | MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); | ||
654 | MODULE_LICENSE ("GPL"); | ||
655 | |||
656 | module_init(longhaul_init); | ||
657 | module_exit(longhaul_exit); | ||
658 | |||
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h new file mode 100644 index 000000000000..2a495c162ec7 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h | |||
@@ -0,0 +1,466 @@ | |||
1 | /* | ||
2 | * longhaul.h | ||
3 | * (C) 2003 Dave Jones. | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * | ||
7 | * VIA-specific information | ||
8 | */ | ||
9 | |||
10 | union msr_bcr2 { | ||
11 | struct { | ||
12 | unsigned Reseved:19, // 18:0 | ||
13 | ESOFTBF:1, // 19 | ||
14 | Reserved2:3, // 22:20 | ||
15 | CLOCKMUL:4, // 26:23 | ||
16 | Reserved3:5; // 31:27 | ||
17 | } bits; | ||
18 | unsigned long val; | ||
19 | }; | ||
20 | |||
21 | union msr_longhaul { | ||
22 | struct { | ||
23 | unsigned RevisionID:4, // 3:0 | ||
24 | RevisionKey:4, // 7:4 | ||
25 | EnableSoftBusRatio:1, // 8 | ||
26 | EnableSoftVID:1, // 9 | ||
27 | EnableSoftBSEL:1, // 10 | ||
28 | Reserved:3, // 11:13 | ||
29 | SoftBusRatio4:1, // 14 | ||
30 | VRMRev:1, // 15 | ||
31 | SoftBusRatio:4, // 19:16 | ||
32 | SoftVID:5, // 24:20 | ||
33 | Reserved2:3, // 27:25 | ||
34 | SoftBSEL:2, // 29:28 | ||
35 | Reserved3:2, // 31:30 | ||
36 | MaxMHzBR:4, // 35:32 | ||
37 | MaximumVID:5, // 40:36 | ||
38 | MaxMHzFSB:2, // 42:41 | ||
39 | MaxMHzBR4:1, // 43 | ||
40 | Reserved4:4, // 47:44 | ||
41 | MinMHzBR:4, // 51:48 | ||
42 | MinimumVID:5, // 56:52 | ||
43 | MinMHzFSB:2, // 58:57 | ||
44 | MinMHzBR4:1, // 59 | ||
45 | Reserved5:4; // 63:60 | ||
46 | } bits; | ||
47 | unsigned long long val; | ||
48 | }; | ||
49 | |||
50 | /* | ||
51 | * Clock ratio tables. Div/Mod by 10 to get ratio. | ||
52 | * The eblcr ones specify the ratio read from the CPU. | ||
53 | * The clock_ratio ones specify what to write to the CPU. | ||
54 | */ | ||
55 | |||
56 | /* | ||
57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) | ||
58 | */ | ||
59 | static int __initdata samuel1_clock_ratio[16] = { | ||
60 | -1, /* 0000 -> RESERVED */ | ||
61 | 30, /* 0001 -> 3.0x */ | ||
62 | 40, /* 0010 -> 4.0x */ | ||
63 | -1, /* 0011 -> RESERVED */ | ||
64 | -1, /* 0100 -> RESERVED */ | ||
65 | 35, /* 0101 -> 3.5x */ | ||
66 | 45, /* 0110 -> 4.5x */ | ||
67 | 55, /* 0111 -> 5.5x */ | ||
68 | 60, /* 1000 -> 6.0x */ | ||
69 | 70, /* 1001 -> 7.0x */ | ||
70 | 80, /* 1010 -> 8.0x */ | ||
71 | 50, /* 1011 -> 5.0x */ | ||
72 | 65, /* 1100 -> 6.5x */ | ||
73 | 75, /* 1101 -> 7.5x */ | ||
74 | -1, /* 1110 -> RESERVED */ | ||
75 | -1, /* 1111 -> RESERVED */ | ||
76 | }; | ||
77 | |||
78 | static int __initdata samuel1_eblcr[16] = { | ||
79 | 50, /* 0000 -> RESERVED */ | ||
80 | 30, /* 0001 -> 3.0x */ | ||
81 | 40, /* 0010 -> 4.0x */ | ||
82 | -1, /* 0011 -> RESERVED */ | ||
83 | 55, /* 0100 -> 5.5x */ | ||
84 | 35, /* 0101 -> 3.5x */ | ||
85 | 45, /* 0110 -> 4.5x */ | ||
86 | -1, /* 0111 -> RESERVED */ | ||
87 | -1, /* 1000 -> RESERVED */ | ||
88 | 70, /* 1001 -> 7.0x */ | ||
89 | 80, /* 1010 -> 8.0x */ | ||
90 | 60, /* 1011 -> 6.0x */ | ||
91 | -1, /* 1100 -> RESERVED */ | ||
92 | 75, /* 1101 -> 7.5x */ | ||
93 | -1, /* 1110 -> RESERVED */ | ||
94 | 65, /* 1111 -> 6.5x */ | ||
95 | }; | ||
96 | |||
97 | /* | ||
98 | * VIA C3 Samuel2 Stepping 1->15 | ||
99 | */ | ||
100 | static int __initdata samuel2_eblcr[16] = { | ||
101 | 50, /* 0000 -> 5.0x */ | ||
102 | 30, /* 0001 -> 3.0x */ | ||
103 | 40, /* 0010 -> 4.0x */ | ||
104 | 100, /* 0011 -> 10.0x */ | ||
105 | 55, /* 0100 -> 5.5x */ | ||
106 | 35, /* 0101 -> 3.5x */ | ||
107 | 45, /* 0110 -> 4.5x */ | ||
108 | 110, /* 0111 -> 11.0x */ | ||
109 | 90, /* 1000 -> 9.0x */ | ||
110 | 70, /* 1001 -> 7.0x */ | ||
111 | 80, /* 1010 -> 8.0x */ | ||
112 | 60, /* 1011 -> 6.0x */ | ||
113 | 120, /* 1100 -> 12.0x */ | ||
114 | 75, /* 1101 -> 7.5x */ | ||
115 | 130, /* 1110 -> 13.0x */ | ||
116 | 65, /* 1111 -> 6.5x */ | ||
117 | }; | ||
118 | |||
119 | /* | ||
120 | * VIA C3 Ezra | ||
121 | */ | ||
122 | static int __initdata ezra_clock_ratio[16] = { | ||
123 | 100, /* 0000 -> 10.0x */ | ||
124 | 30, /* 0001 -> 3.0x */ | ||
125 | 40, /* 0010 -> 4.0x */ | ||
126 | 90, /* 0011 -> 9.0x */ | ||
127 | 95, /* 0100 -> 9.5x */ | ||
128 | 35, /* 0101 -> 3.5x */ | ||
129 | 45, /* 0110 -> 4.5x */ | ||
130 | 55, /* 0111 -> 5.5x */ | ||
131 | 60, /* 1000 -> 6.0x */ | ||
132 | 70, /* 1001 -> 7.0x */ | ||
133 | 80, /* 1010 -> 8.0x */ | ||
134 | 50, /* 1011 -> 5.0x */ | ||
135 | 65, /* 1100 -> 6.5x */ | ||
136 | 75, /* 1101 -> 7.5x */ | ||
137 | 85, /* 1110 -> 8.5x */ | ||
138 | 120, /* 1111 -> 12.0x */ | ||
139 | }; | ||
140 | |||
141 | static int __initdata ezra_eblcr[16] = { | ||
142 | 50, /* 0000 -> 5.0x */ | ||
143 | 30, /* 0001 -> 3.0x */ | ||
144 | 40, /* 0010 -> 4.0x */ | ||
145 | 100, /* 0011 -> 10.0x */ | ||
146 | 55, /* 0100 -> 5.5x */ | ||
147 | 35, /* 0101 -> 3.5x */ | ||
148 | 45, /* 0110 -> 4.5x */ | ||
149 | 95, /* 0111 -> 9.5x */ | ||
150 | 90, /* 1000 -> 9.0x */ | ||
151 | 70, /* 1001 -> 7.0x */ | ||
152 | 80, /* 1010 -> 8.0x */ | ||
153 | 60, /* 1011 -> 6.0x */ | ||
154 | 120, /* 1100 -> 12.0x */ | ||
155 | 75, /* 1101 -> 7.5x */ | ||
156 | 85, /* 1110 -> 8.5x */ | ||
157 | 65, /* 1111 -> 6.5x */ | ||
158 | }; | ||
159 | |||
160 | /* | ||
161 | * VIA C3 (Ezra-T) [C5M]. | ||
162 | */ | ||
163 | static int __initdata ezrat_clock_ratio[32] = { | ||
164 | 100, /* 0000 -> 10.0x */ | ||
165 | 30, /* 0001 -> 3.0x */ | ||
166 | 40, /* 0010 -> 4.0x */ | ||
167 | 90, /* 0011 -> 9.0x */ | ||
168 | 95, /* 0100 -> 9.5x */ | ||
169 | 35, /* 0101 -> 3.5x */ | ||
170 | 45, /* 0110 -> 4.5x */ | ||
171 | 55, /* 0111 -> 5.5x */ | ||
172 | 60, /* 1000 -> 6.0x */ | ||
173 | 70, /* 1001 -> 7.0x */ | ||
174 | 80, /* 1010 -> 8.0x */ | ||
175 | 50, /* 1011 -> 5.0x */ | ||
176 | 65, /* 1100 -> 6.5x */ | ||
177 | 75, /* 1101 -> 7.5x */ | ||
178 | 85, /* 1110 -> 8.5x */ | ||
179 | 120, /* 1111 -> 12.0x */ | ||
180 | |||
181 | -1, /* 0000 -> RESERVED (10.0x) */ | ||
182 | 110, /* 0001 -> 11.0x */ | ||
183 | 120, /* 0010 -> 12.0x */ | ||
184 | -1, /* 0011 -> RESERVED (9.0x)*/ | ||
185 | 105, /* 0100 -> 10.5x */ | ||
186 | 115, /* 0101 -> 11.5x */ | ||
187 | 125, /* 0110 -> 12.5x */ | ||
188 | 135, /* 0111 -> 13.5x */ | ||
189 | 140, /* 1000 -> 14.0x */ | ||
190 | 150, /* 1001 -> 15.0x */ | ||
191 | 160, /* 1010 -> 16.0x */ | ||
192 | 130, /* 1011 -> 13.0x */ | ||
193 | 145, /* 1100 -> 14.5x */ | ||
194 | 155, /* 1101 -> 15.5x */ | ||
195 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
196 | -1, /* 1111 -> RESERVED (12.0x) */ | ||
197 | }; | ||
198 | |||
199 | static int __initdata ezrat_eblcr[32] = { | ||
200 | 50, /* 0000 -> 5.0x */ | ||
201 | 30, /* 0001 -> 3.0x */ | ||
202 | 40, /* 0010 -> 4.0x */ | ||
203 | 100, /* 0011 -> 10.0x */ | ||
204 | 55, /* 0100 -> 5.5x */ | ||
205 | 35, /* 0101 -> 3.5x */ | ||
206 | 45, /* 0110 -> 4.5x */ | ||
207 | 95, /* 0111 -> 9.5x */ | ||
208 | 90, /* 1000 -> 9.0x */ | ||
209 | 70, /* 1001 -> 7.0x */ | ||
210 | 80, /* 1010 -> 8.0x */ | ||
211 | 60, /* 1011 -> 6.0x */ | ||
212 | 120, /* 1100 -> 12.0x */ | ||
213 | 75, /* 1101 -> 7.5x */ | ||
214 | 85, /* 1110 -> 8.5x */ | ||
215 | 65, /* 1111 -> 6.5x */ | ||
216 | |||
217 | -1, /* 0000 -> RESERVED (9.0x) */ | ||
218 | 110, /* 0001 -> 11.0x */ | ||
219 | 120, /* 0010 -> 12.0x */ | ||
220 | -1, /* 0011 -> RESERVED (10.0x)*/ | ||
221 | 135, /* 0100 -> 13.5x */ | ||
222 | 115, /* 0101 -> 11.5x */ | ||
223 | 125, /* 0110 -> 12.5x */ | ||
224 | 105, /* 0111 -> 10.5x */ | ||
225 | 130, /* 1000 -> 13.0x */ | ||
226 | 150, /* 1001 -> 15.0x */ | ||
227 | 160, /* 1010 -> 16.0x */ | ||
228 | 140, /* 1011 -> 14.0x */ | ||
229 | -1, /* 1100 -> RESERVED (12.0x) */ | ||
230 | 155, /* 1101 -> 15.5x */ | ||
231 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
232 | 145, /* 1111 -> 14.5x */ | ||
233 | }; | ||
234 | |||
235 | /* | ||
236 | * VIA C3 Nehemiah */ | ||
237 | |||
238 | static int __initdata nehemiah_a_clock_ratio[32] = { | ||
239 | 100, /* 0000 -> 10.0x */ | ||
240 | 160, /* 0001 -> 16.0x */ | ||
241 | -1, /* 0010 -> RESERVED */ | ||
242 | 90, /* 0011 -> 9.0x */ | ||
243 | 95, /* 0100 -> 9.5x */ | ||
244 | -1, /* 0101 -> RESERVED */ | ||
245 | -1, /* 0110 -> RESERVED */ | ||
246 | 55, /* 0111 -> 5.5x */ | ||
247 | 60, /* 1000 -> 6.0x */ | ||
248 | 70, /* 1001 -> 7.0x */ | ||
249 | 80, /* 1010 -> 8.0x */ | ||
250 | 50, /* 1011 -> 5.0x */ | ||
251 | 65, /* 1100 -> 6.5x */ | ||
252 | 75, /* 1101 -> 7.5x */ | ||
253 | 85, /* 1110 -> 8.5x */ | ||
254 | 120, /* 1111 -> 12.0x */ | ||
255 | 100, /* 0000 -> 10.0x */ | ||
256 | -1, /* 0001 -> RESERVED */ | ||
257 | 120, /* 0010 -> 12.0x */ | ||
258 | 90, /* 0011 -> 9.0x */ | ||
259 | 105, /* 0100 -> 10.5x */ | ||
260 | 115, /* 0101 -> 11.5x */ | ||
261 | 125, /* 0110 -> 12.5x */ | ||
262 | 135, /* 0111 -> 13.5x */ | ||
263 | 140, /* 1000 -> 14.0x */ | ||
264 | 150, /* 1001 -> 15.0x */ | ||
265 | 160, /* 1010 -> 16.0x */ | ||
266 | 130, /* 1011 -> 13.0x */ | ||
267 | 145, /* 1100 -> 14.5x */ | ||
268 | 155, /* 1101 -> 15.5x */ | ||
269 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
270 | 120, /* 1111 -> 12.0x */ | ||
271 | }; | ||
272 | |||
273 | static int __initdata nehemiah_b_clock_ratio[32] = { | ||
274 | 100, /* 0000 -> 10.0x */ | ||
275 | 160, /* 0001 -> 16.0x */ | ||
276 | -1, /* 0010 -> RESERVED */ | ||
277 | 90, /* 0011 -> 9.0x */ | ||
278 | 95, /* 0100 -> 9.5x */ | ||
279 | -1, /* 0101 -> RESERVED */ | ||
280 | -1, /* 0110 -> RESERVED */ | ||
281 | 55, /* 0111 -> 5.5x */ | ||
282 | 60, /* 1000 -> 6.0x */ | ||
283 | 70, /* 1001 -> 7.0x */ | ||
284 | 80, /* 1010 -> 8.0x */ | ||
285 | 50, /* 1011 -> 5.0x */ | ||
286 | 65, /* 1100 -> 6.5x */ | ||
287 | 75, /* 1101 -> 7.5x */ | ||
288 | 85, /* 1110 -> 8.5x */ | ||
289 | 120, /* 1111 -> 12.0x */ | ||
290 | 100, /* 0000 -> 10.0x */ | ||
291 | 110, /* 0001 -> 11.0x */ | ||
292 | 120, /* 0010 -> 12.0x */ | ||
293 | 90, /* 0011 -> 9.0x */ | ||
294 | 105, /* 0100 -> 10.5x */ | ||
295 | 115, /* 0101 -> 11.5x */ | ||
296 | 125, /* 0110 -> 12.5x */ | ||
297 | 135, /* 0111 -> 13.5x */ | ||
298 | 140, /* 1000 -> 14.0x */ | ||
299 | 150, /* 1001 -> 15.0x */ | ||
300 | 160, /* 1010 -> 16.0x */ | ||
301 | 130, /* 1011 -> 13.0x */ | ||
302 | 145, /* 1100 -> 14.5x */ | ||
303 | 155, /* 1101 -> 15.5x */ | ||
304 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
305 | 120, /* 1111 -> 12.0x */ | ||
306 | }; | ||
307 | |||
308 | static int __initdata nehemiah_c_clock_ratio[32] = { | ||
309 | 100, /* 0000 -> 10.0x */ | ||
310 | 160, /* 0001 -> 16.0x */ | ||
311 | 40, /* 0010 -> RESERVED */ | ||
312 | 90, /* 0011 -> 9.0x */ | ||
313 | 95, /* 0100 -> 9.5x */ | ||
314 | -1, /* 0101 -> RESERVED */ | ||
315 | 45, /* 0110 -> RESERVED */ | ||
316 | 55, /* 0111 -> 5.5x */ | ||
317 | 60, /* 1000 -> 6.0x */ | ||
318 | 70, /* 1001 -> 7.0x */ | ||
319 | 80, /* 1010 -> 8.0x */ | ||
320 | 50, /* 1011 -> 5.0x */ | ||
321 | 65, /* 1100 -> 6.5x */ | ||
322 | 75, /* 1101 -> 7.5x */ | ||
323 | 85, /* 1110 -> 8.5x */ | ||
324 | 120, /* 1111 -> 12.0x */ | ||
325 | 100, /* 0000 -> 10.0x */ | ||
326 | 110, /* 0001 -> 11.0x */ | ||
327 | 120, /* 0010 -> 12.0x */ | ||
328 | 90, /* 0011 -> 9.0x */ | ||
329 | 105, /* 0100 -> 10.5x */ | ||
330 | 115, /* 0101 -> 11.5x */ | ||
331 | 125, /* 0110 -> 12.5x */ | ||
332 | 135, /* 0111 -> 13.5x */ | ||
333 | 140, /* 1000 -> 14.0x */ | ||
334 | 150, /* 1001 -> 15.0x */ | ||
335 | 160, /* 1010 -> 16.0x */ | ||
336 | 130, /* 1011 -> 13.0x */ | ||
337 | 145, /* 1100 -> 14.5x */ | ||
338 | 155, /* 1101 -> 15.5x */ | ||
339 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
340 | 120, /* 1111 -> 12.0x */ | ||
341 | }; | ||
342 | |||
343 | static int __initdata nehemiah_a_eblcr[32] = { | ||
344 | 50, /* 0000 -> 5.0x */ | ||
345 | 160, /* 0001 -> 16.0x */ | ||
346 | -1, /* 0010 -> RESERVED */ | ||
347 | 100, /* 0011 -> 10.0x */ | ||
348 | 55, /* 0100 -> 5.5x */ | ||
349 | -1, /* 0101 -> RESERVED */ | ||
350 | -1, /* 0110 -> RESERVED */ | ||
351 | 95, /* 0111 -> 9.5x */ | ||
352 | 90, /* 1000 -> 9.0x */ | ||
353 | 70, /* 1001 -> 7.0x */ | ||
354 | 80, /* 1010 -> 8.0x */ | ||
355 | 60, /* 1011 -> 6.0x */ | ||
356 | 120, /* 1100 -> 12.0x */ | ||
357 | 75, /* 1101 -> 7.5x */ | ||
358 | 85, /* 1110 -> 8.5x */ | ||
359 | 65, /* 1111 -> 6.5x */ | ||
360 | 90, /* 0000 -> 9.0x */ | ||
361 | -1, /* 0001 -> RESERVED */ | ||
362 | 120, /* 0010 -> 12.0x */ | ||
363 | 100, /* 0011 -> 10.0x */ | ||
364 | 135, /* 0100 -> 13.5x */ | ||
365 | 115, /* 0101 -> 11.5x */ | ||
366 | 125, /* 0110 -> 12.5x */ | ||
367 | 105, /* 0111 -> 10.5x */ | ||
368 | 130, /* 1000 -> 13.0x */ | ||
369 | 150, /* 1001 -> 15.0x */ | ||
370 | 160, /* 1010 -> 16.0x */ | ||
371 | 140, /* 1011 -> 14.0x */ | ||
372 | 120, /* 1100 -> 12.0x */ | ||
373 | 155, /* 1101 -> 15.5x */ | ||
374 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
375 | 145 /* 1111 -> 14.5x */ | ||
376 | /* end of table */ | ||
377 | }; | ||
378 | static int __initdata nehemiah_b_eblcr[32] = { | ||
379 | 50, /* 0000 -> 5.0x */ | ||
380 | 160, /* 0001 -> 16.0x */ | ||
381 | -1, /* 0010 -> RESERVED */ | ||
382 | 100, /* 0011 -> 10.0x */ | ||
383 | 55, /* 0100 -> 5.5x */ | ||
384 | -1, /* 0101 -> RESERVED */ | ||
385 | -1, /* 0110 -> RESERVED */ | ||
386 | 95, /* 0111 -> 9.5x */ | ||
387 | 90, /* 1000 -> 9.0x */ | ||
388 | 70, /* 1001 -> 7.0x */ | ||
389 | 80, /* 1010 -> 8.0x */ | ||
390 | 60, /* 1011 -> 6.0x */ | ||
391 | 120, /* 1100 -> 12.0x */ | ||
392 | 75, /* 1101 -> 7.5x */ | ||
393 | 85, /* 1110 -> 8.5x */ | ||
394 | 65, /* 1111 -> 6.5x */ | ||
395 | 90, /* 0000 -> 9.0x */ | ||
396 | 110, /* 0001 -> 11.0x */ | ||
397 | 120, /* 0010 -> 12.0x */ | ||
398 | 100, /* 0011 -> 10.0x */ | ||
399 | 135, /* 0100 -> 13.5x */ | ||
400 | 115, /* 0101 -> 11.5x */ | ||
401 | 125, /* 0110 -> 12.5x */ | ||
402 | 105, /* 0111 -> 10.5x */ | ||
403 | 130, /* 1000 -> 13.0x */ | ||
404 | 150, /* 1001 -> 15.0x */ | ||
405 | 160, /* 1010 -> 16.0x */ | ||
406 | 140, /* 1011 -> 14.0x */ | ||
407 | 120, /* 1100 -> 12.0x */ | ||
408 | 155, /* 1101 -> 15.5x */ | ||
409 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
410 | 145 /* 1111 -> 14.5x */ | ||
411 | /* end of table */ | ||
412 | }; | ||
413 | static int __initdata nehemiah_c_eblcr[32] = { | ||
414 | 50, /* 0000 -> 5.0x */ | ||
415 | 160, /* 0001 -> 16.0x */ | ||
416 | 40, /* 0010 -> RESERVED */ | ||
417 | 100, /* 0011 -> 10.0x */ | ||
418 | 55, /* 0100 -> 5.5x */ | ||
419 | -1, /* 0101 -> RESERVED */ | ||
420 | 45, /* 0110 -> RESERVED */ | ||
421 | 95, /* 0111 -> 9.5x */ | ||
422 | 90, /* 1000 -> 9.0x */ | ||
423 | 70, /* 1001 -> 7.0x */ | ||
424 | 80, /* 1010 -> 8.0x */ | ||
425 | 60, /* 1011 -> 6.0x */ | ||
426 | 120, /* 1100 -> 12.0x */ | ||
427 | 75, /* 1101 -> 7.5x */ | ||
428 | 85, /* 1110 -> 8.5x */ | ||
429 | 65, /* 1111 -> 6.5x */ | ||
430 | 90, /* 0000 -> 9.0x */ | ||
431 | 110, /* 0001 -> 11.0x */ | ||
432 | 120, /* 0010 -> 12.0x */ | ||
433 | 100, /* 0011 -> 10.0x */ | ||
434 | 135, /* 0100 -> 13.5x */ | ||
435 | 115, /* 0101 -> 11.5x */ | ||
436 | 125, /* 0110 -> 12.5x */ | ||
437 | 105, /* 0111 -> 10.5x */ | ||
438 | 130, /* 1000 -> 13.0x */ | ||
439 | 150, /* 1001 -> 15.0x */ | ||
440 | 160, /* 1010 -> 16.0x */ | ||
441 | 140, /* 1011 -> 14.0x */ | ||
442 | 120, /* 1100 -> 12.0x */ | ||
443 | 155, /* 1101 -> 15.5x */ | ||
444 | -1, /* 1110 -> RESERVED (13.0x) */ | ||
445 | 145 /* 1111 -> 14.5x */ | ||
446 | /* end of table */ | ||
447 | }; | ||
448 | |||
449 | /* | ||
450 | * Voltage scales. Div/Mod by 1000 to get actual voltage. | ||
451 | * Which scale to use depends on the VRM type in use. | ||
452 | */ | ||
453 | static int __initdata vrm85scales[32] = { | ||
454 | 1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700, | ||
455 | 1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300, | ||
456 | 1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725, | ||
457 | 1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325, | ||
458 | }; | ||
459 | |||
460 | static int __initdata mobilevrmscales[32] = { | ||
461 | 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, | ||
462 | 1600, 1550, 1500, 1450, 1500, 1350, 1300, -1, | ||
463 | 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, | ||
464 | 1075, 1050, 1025, 1000, 975, 950, 925, -1, | ||
465 | }; | ||
466 | |||
diff --git a/arch/i386/kernel/cpu/cpufreq/longrun.c b/arch/i386/kernel/cpu/cpufreq/longrun.c new file mode 100644 index 000000000000..e3868de4dc2e --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/longrun.c | |||
@@ -0,0 +1,326 @@ | |||
1 | /* | ||
2 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/slab.h> | ||
13 | #include <linux/cpufreq.h> | ||
14 | |||
15 | #include <asm/msr.h> | ||
16 | #include <asm/processor.h> | ||
17 | #include <asm/timex.h> | ||
18 | |||
19 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg) | ||
20 | |||
21 | static struct cpufreq_driver longrun_driver; | ||
22 | |||
23 | /** | ||
24 | * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz | ||
25 | * values into per cent values. In TMTA microcode, the following is valid: | ||
26 | * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) | ||
27 | */ | ||
28 | static unsigned int longrun_low_freq, longrun_high_freq; | ||
29 | |||
30 | |||
31 | /** | ||
32 | * longrun_get_policy - get the current LongRun policy | ||
33 | * @policy: struct cpufreq_policy where current policy is written into | ||
34 | * | ||
35 | * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS | ||
36 | * and MSR_TMTA_LONGRUN_CTRL | ||
37 | */ | ||
38 | static void __init longrun_get_policy(struct cpufreq_policy *policy) | ||
39 | { | ||
40 | u32 msr_lo, msr_hi; | ||
41 | |||
42 | rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); | ||
43 | dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi); | ||
44 | if (msr_lo & 0x01) | ||
45 | policy->policy = CPUFREQ_POLICY_PERFORMANCE; | ||
46 | else | ||
47 | policy->policy = CPUFREQ_POLICY_POWERSAVE; | ||
48 | |||
49 | rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
50 | dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi); | ||
51 | msr_lo &= 0x0000007F; | ||
52 | msr_hi &= 0x0000007F; | ||
53 | |||
54 | if ( longrun_high_freq <= longrun_low_freq ) { | ||
55 | /* Assume degenerate Longrun table */ | ||
56 | policy->min = policy->max = longrun_high_freq; | ||
57 | } else { | ||
58 | policy->min = longrun_low_freq + msr_lo * | ||
59 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
60 | policy->max = longrun_low_freq + msr_hi * | ||
61 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
62 | } | ||
63 | policy->cpu = 0; | ||
64 | } | ||
65 | |||
66 | |||
67 | /** | ||
68 | * longrun_set_policy - sets a new CPUFreq policy | ||
69 | * @policy: new policy | ||
70 | * | ||
71 | * Sets a new CPUFreq policy on LongRun-capable processors. This function | ||
72 | * has to be called with cpufreq_driver locked. | ||
73 | */ | ||
74 | static int longrun_set_policy(struct cpufreq_policy *policy) | ||
75 | { | ||
76 | u32 msr_lo, msr_hi; | ||
77 | u32 pctg_lo, pctg_hi; | ||
78 | |||
79 | if (!policy) | ||
80 | return -EINVAL; | ||
81 | |||
82 | if ( longrun_high_freq <= longrun_low_freq ) { | ||
83 | /* Assume degenerate Longrun table */ | ||
84 | pctg_lo = pctg_hi = 100; | ||
85 | } else { | ||
86 | pctg_lo = (policy->min - longrun_low_freq) / | ||
87 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
88 | pctg_hi = (policy->max - longrun_low_freq) / | ||
89 | ((longrun_high_freq - longrun_low_freq) / 100); | ||
90 | } | ||
91 | |||
92 | if (pctg_hi > 100) | ||
93 | pctg_hi = 100; | ||
94 | if (pctg_lo > pctg_hi) | ||
95 | pctg_lo = pctg_hi; | ||
96 | |||
97 | /* performance or economy mode */ | ||
98 | rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); | ||
99 | msr_lo &= 0xFFFFFFFE; | ||
100 | switch (policy->policy) { | ||
101 | case CPUFREQ_POLICY_PERFORMANCE: | ||
102 | msr_lo |= 0x00000001; | ||
103 | break; | ||
104 | case CPUFREQ_POLICY_POWERSAVE: | ||
105 | break; | ||
106 | } | ||
107 | wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); | ||
108 | |||
109 | /* lower and upper boundary */ | ||
110 | rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
111 | msr_lo &= 0xFFFFFF80; | ||
112 | msr_hi &= 0xFFFFFF80; | ||
113 | msr_lo |= pctg_lo; | ||
114 | msr_hi |= pctg_hi; | ||
115 | wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | |||
121 | /** | ||
122 | * longrun_verify_poliy - verifies a new CPUFreq policy | ||
123 | * @policy: the policy to verify | ||
124 | * | ||
125 | * Validates a new CPUFreq policy. This function has to be called with | ||
126 | * cpufreq_driver locked. | ||
127 | */ | ||
128 | static int longrun_verify_policy(struct cpufreq_policy *policy) | ||
129 | { | ||
130 | if (!policy) | ||
131 | return -EINVAL; | ||
132 | |||
133 | policy->cpu = 0; | ||
134 | cpufreq_verify_within_limits(policy, | ||
135 | policy->cpuinfo.min_freq, | ||
136 | policy->cpuinfo.max_freq); | ||
137 | |||
138 | if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && | ||
139 | (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) | ||
140 | return -EINVAL; | ||
141 | |||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | static unsigned int longrun_get(unsigned int cpu) | ||
146 | { | ||
147 | u32 eax, ebx, ecx, edx; | ||
148 | |||
149 | if (cpu) | ||
150 | return 0; | ||
151 | |||
152 | cpuid(0x80860007, &eax, &ebx, &ecx, &edx); | ||
153 | dprintk("cpuid eax is %u\n", eax); | ||
154 | |||
155 | return (eax * 1000); | ||
156 | } | ||
157 | |||
158 | /** | ||
159 | * longrun_determine_freqs - determines the lowest and highest possible core frequency | ||
160 | * @low_freq: an int to put the lowest frequency into | ||
161 | * @high_freq: an int to put the highest frequency into | ||
162 | * | ||
163 | * Determines the lowest and highest possible core frequencies on this CPU. | ||
164 | * This is necessary to calculate the performance percentage according to | ||
165 | * TMTA rules: | ||
166 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) | ||
167 | */ | ||
168 | static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, | ||
169 | unsigned int *high_freq) | ||
170 | { | ||
171 | u32 msr_lo, msr_hi; | ||
172 | u32 save_lo, save_hi; | ||
173 | u32 eax, ebx, ecx, edx; | ||
174 | u32 try_hi; | ||
175 | struct cpuinfo_x86 *c = cpu_data; | ||
176 | |||
177 | if (!low_freq || !high_freq) | ||
178 | return -EINVAL; | ||
179 | |||
180 | if (cpu_has(c, X86_FEATURE_LRTI)) { | ||
181 | /* if the LongRun Table Interface is present, the | ||
182 | * detection is a bit easier: | ||
183 | * For minimum frequency, read out the maximum | ||
184 | * level (msr_hi), write that into "currently | ||
185 | * selected level", and read out the frequency. | ||
186 | * For maximum frequency, read out level zero. | ||
187 | */ | ||
188 | /* minimum */ | ||
189 | rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi); | ||
190 | wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi); | ||
191 | rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); | ||
192 | *low_freq = msr_lo * 1000; /* to kHz */ | ||
193 | |||
194 | /* maximum */ | ||
195 | wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi); | ||
196 | rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); | ||
197 | *high_freq = msr_lo * 1000; /* to kHz */ | ||
198 | |||
199 | dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); | ||
200 | |||
201 | if (*low_freq > *high_freq) | ||
202 | *low_freq = *high_freq; | ||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | /* set the upper border to the value determined during TSC init */ | ||
207 | *high_freq = (cpu_khz / 1000); | ||
208 | *high_freq = *high_freq * 1000; | ||
209 | dprintk("high frequency is %u kHz\n", *high_freq); | ||
210 | |||
211 | /* get current borders */ | ||
212 | rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
213 | save_lo = msr_lo & 0x0000007F; | ||
214 | save_hi = msr_hi & 0x0000007F; | ||
215 | |||
216 | /* if current perf_pctg is larger than 90%, we need to decrease the | ||
217 | * upper limit to make the calculation more accurate. | ||
218 | */ | ||
219 | cpuid(0x80860007, &eax, &ebx, &ecx, &edx); | ||
220 | /* try decreasing in 10% steps, some processors react only | ||
221 | * on some barrier values */ | ||
222 | for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) { | ||
223 | /* set to 0 to try_hi perf_pctg */ | ||
224 | msr_lo &= 0xFFFFFF80; | ||
225 | msr_hi &= 0xFFFFFF80; | ||
226 | msr_lo |= 0; | ||
227 | msr_hi |= try_hi; | ||
228 | wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); | ||
229 | |||
230 | /* read out current core MHz and current perf_pctg */ | ||
231 | cpuid(0x80860007, &eax, &ebx, &ecx, &edx); | ||
232 | |||
233 | /* restore values */ | ||
234 | wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi); | ||
235 | } | ||
236 | dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax); | ||
237 | |||
238 | /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) | ||
239 | * eqals | ||
240 | * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) | ||
241 | * | ||
242 | * high_freq * perf_pctg is stored tempoarily into "ebx". | ||
243 | */ | ||
244 | ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */ | ||
245 | |||
246 | if ((ecx > 95) || (ecx == 0) || (eax < ebx)) | ||
247 | return -EIO; | ||
248 | |||
249 | edx = (eax - ebx) / (100 - ecx); | ||
250 | *low_freq = edx * 1000; /* back to kHz */ | ||
251 | |||
252 | dprintk("low frequency is %u kHz\n", *low_freq); | ||
253 | |||
254 | if (*low_freq > *high_freq) | ||
255 | *low_freq = *high_freq; | ||
256 | |||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | |||
261 | static int __init longrun_cpu_init(struct cpufreq_policy *policy) | ||
262 | { | ||
263 | int result = 0; | ||
264 | |||
265 | /* capability check */ | ||
266 | if (policy->cpu != 0) | ||
267 | return -ENODEV; | ||
268 | |||
269 | /* detect low and high frequency */ | ||
270 | result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq); | ||
271 | if (result) | ||
272 | return result; | ||
273 | |||
274 | /* cpuinfo and default policy values */ | ||
275 | policy->cpuinfo.min_freq = longrun_low_freq; | ||
276 | policy->cpuinfo.max_freq = longrun_high_freq; | ||
277 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
278 | longrun_get_policy(policy); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | |||
284 | static struct cpufreq_driver longrun_driver = { | ||
285 | .flags = CPUFREQ_CONST_LOOPS, | ||
286 | .verify = longrun_verify_policy, | ||
287 | .setpolicy = longrun_set_policy, | ||
288 | .get = longrun_get, | ||
289 | .init = longrun_cpu_init, | ||
290 | .name = "longrun", | ||
291 | .owner = THIS_MODULE, | ||
292 | }; | ||
293 | |||
294 | |||
295 | /** | ||
296 | * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver | ||
297 | * | ||
298 | * Initializes the LongRun support. | ||
299 | */ | ||
300 | static int __init longrun_init(void) | ||
301 | { | ||
302 | struct cpuinfo_x86 *c = cpu_data; | ||
303 | |||
304 | if (c->x86_vendor != X86_VENDOR_TRANSMETA || | ||
305 | !cpu_has(c, X86_FEATURE_LONGRUN)) | ||
306 | return -ENODEV; | ||
307 | |||
308 | return cpufreq_register_driver(&longrun_driver); | ||
309 | } | ||
310 | |||
311 | |||
312 | /** | ||
313 | * longrun_exit - unregisters LongRun support | ||
314 | */ | ||
315 | static void __exit longrun_exit(void) | ||
316 | { | ||
317 | cpufreq_unregister_driver(&longrun_driver); | ||
318 | } | ||
319 | |||
320 | |||
321 | MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>"); | ||
322 | MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors."); | ||
323 | MODULE_LICENSE ("GPL"); | ||
324 | |||
325 | module_init(longrun_init); | ||
326 | module_exit(longrun_exit); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c new file mode 100644 index 000000000000..aa622d52c6e5 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -0,0 +1,337 @@ | |||
1 | /* | ||
2 | * Pentium 4/Xeon CPU on demand clock modulation/speed scaling | ||
3 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
4 | * (C) 2002 Zwane Mwaikambo <zwane@commfireservices.com> | ||
5 | * (C) 2002 Arjan van de Ven <arjanv@redhat.com> | ||
6 | * (C) 2002 Tora T. Engstad | ||
7 | * All Rights Reserved | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | * | ||
14 | * The author(s) of this software shall not be held liable for damages | ||
15 | * of any nature resulting due to the use of this software. This | ||
16 | * software is provided AS-IS with no warranties. | ||
17 | * | ||
18 | * Date Errata Description | ||
19 | * 20020525 N44, O17 12.5% or 25% DC causes lockup | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/config.h> | ||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/init.h> | ||
27 | #include <linux/smp.h> | ||
28 | #include <linux/cpufreq.h> | ||
29 | #include <linux/slab.h> | ||
30 | #include <linux/cpumask.h> | ||
31 | |||
32 | #include <asm/processor.h> | ||
33 | #include <asm/msr.h> | ||
34 | #include <asm/timex.h> | ||
35 | |||
36 | #include "speedstep-lib.h" | ||
37 | |||
38 | #define PFX "p4-clockmod: " | ||
39 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg) | ||
40 | |||
41 | /* | ||
42 | * Duty Cycle (3bits), note DC_DISABLE is not specified in | ||
43 | * intel docs i just use it to mean disable | ||
44 | */ | ||
45 | enum { | ||
46 | DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT, | ||
47 | DC_64PT, DC_75PT, DC_88PT, DC_DISABLE | ||
48 | }; | ||
49 | |||
50 | #define DC_ENTRIES 8 | ||
51 | |||
52 | |||
53 | static int has_N44_O17_errata[NR_CPUS]; | ||
54 | static unsigned int stock_freq; | ||
55 | static struct cpufreq_driver p4clockmod_driver; | ||
56 | static unsigned int cpufreq_p4_get(unsigned int cpu); | ||
57 | |||
58 | static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) | ||
59 | { | ||
60 | u32 l, h; | ||
61 | |||
62 | if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) | ||
63 | return -EINVAL; | ||
64 | |||
65 | rdmsr(MSR_IA32_THERM_STATUS, l, h); | ||
66 | |||
67 | if (l & 0x01) | ||
68 | dprintk("CPU#%d currently thermal throttled\n", cpu); | ||
69 | |||
70 | if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT)) | ||
71 | newstate = DC_38PT; | ||
72 | |||
73 | rdmsr(MSR_IA32_THERM_CONTROL, l, h); | ||
74 | if (newstate == DC_DISABLE) { | ||
75 | dprintk("CPU#%d disabling modulation\n", cpu); | ||
76 | wrmsr(MSR_IA32_THERM_CONTROL, l & ~(1<<4), h); | ||
77 | } else { | ||
78 | dprintk("CPU#%d setting duty cycle to %d%%\n", | ||
79 | cpu, ((125 * newstate) / 10)); | ||
80 | /* bits 63 - 5 : reserved | ||
81 | * bit 4 : enable/disable | ||
82 | * bits 3-1 : duty cycle | ||
83 | * bit 0 : reserved | ||
84 | */ | ||
85 | l = (l & ~14); | ||
86 | l = l | (1<<4) | ((newstate & 0x7)<<1); | ||
87 | wrmsr(MSR_IA32_THERM_CONTROL, l, h); | ||
88 | } | ||
89 | |||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | |||
94 | static struct cpufreq_frequency_table p4clockmod_table[] = { | ||
95 | {DC_RESV, CPUFREQ_ENTRY_INVALID}, | ||
96 | {DC_DFLT, 0}, | ||
97 | {DC_25PT, 0}, | ||
98 | {DC_38PT, 0}, | ||
99 | {DC_50PT, 0}, | ||
100 | {DC_64PT, 0}, | ||
101 | {DC_75PT, 0}, | ||
102 | {DC_88PT, 0}, | ||
103 | {DC_DISABLE, 0}, | ||
104 | {DC_RESV, CPUFREQ_TABLE_END}, | ||
105 | }; | ||
106 | |||
107 | |||
108 | static int cpufreq_p4_target(struct cpufreq_policy *policy, | ||
109 | unsigned int target_freq, | ||
110 | unsigned int relation) | ||
111 | { | ||
112 | unsigned int newstate = DC_RESV; | ||
113 | struct cpufreq_freqs freqs; | ||
114 | cpumask_t cpus_allowed; | ||
115 | int i; | ||
116 | |||
117 | if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate)) | ||
118 | return -EINVAL; | ||
119 | |||
120 | freqs.old = cpufreq_p4_get(policy->cpu); | ||
121 | freqs.new = stock_freq * p4clockmod_table[newstate].index / 8; | ||
122 | |||
123 | if (freqs.new == freqs.old) | ||
124 | return 0; | ||
125 | |||
126 | /* notifiers */ | ||
127 | for_each_cpu_mask(i, policy->cpus) { | ||
128 | freqs.cpu = i; | ||
129 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
130 | } | ||
131 | |||
132 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software | ||
133 | * Developer's Manual, Volume 3 | ||
134 | */ | ||
135 | cpus_allowed = current->cpus_allowed; | ||
136 | |||
137 | for_each_cpu_mask(i, policy->cpus) { | ||
138 | cpumask_t this_cpu = cpumask_of_cpu(i); | ||
139 | |||
140 | set_cpus_allowed(current, this_cpu); | ||
141 | BUG_ON(smp_processor_id() != i); | ||
142 | |||
143 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); | ||
144 | } | ||
145 | set_cpus_allowed(current, cpus_allowed); | ||
146 | |||
147 | /* notifiers */ | ||
148 | for_each_cpu_mask(i, policy->cpus) { | ||
149 | freqs.cpu = i; | ||
150 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
151 | } | ||
152 | |||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | |||
157 | static int cpufreq_p4_verify(struct cpufreq_policy *policy) | ||
158 | { | ||
159 | return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]); | ||
160 | } | ||
161 | |||
162 | |||
163 | static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | ||
164 | { | ||
165 | if ((c->x86 == 0x06) && (c->x86_model == 0x09)) { | ||
166 | /* Pentium M (Banias) */ | ||
167 | printk(KERN_WARNING PFX "Warning: Pentium M detected. " | ||
168 | "The speedstep_centrino module offers voltage scaling" | ||
169 | " in addition of frequency scaling. You should use " | ||
170 | "that instead of p4-clockmod, if possible.\n"); | ||
171 | return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); | ||
172 | } | ||
173 | |||
174 | if ((c->x86 == 0x06) && (c->x86_model == 0x0D)) { | ||
175 | /* Pentium M (Dothan) */ | ||
176 | printk(KERN_WARNING PFX "Warning: Pentium M detected. " | ||
177 | "The speedstep_centrino module offers voltage scaling" | ||
178 | " in addition of frequency scaling. You should use " | ||
179 | "that instead of p4-clockmod, if possible.\n"); | ||
180 | /* on P-4s, the TSC runs with constant frequency independent whether | ||
181 | * throttling is active or not. */ | ||
182 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
183 | return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); | ||
184 | } | ||
185 | |||
186 | if (c->x86 != 0xF) { | ||
187 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <linux@brodo.de>\n"); | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | /* on P-4s, the TSC runs with constant frequency independent whether | ||
192 | * throttling is active or not. */ | ||
193 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
194 | |||
195 | if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) { | ||
196 | printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " | ||
197 | "The speedstep-ich or acpi cpufreq modules offer " | ||
198 | "voltage scaling in addition of frequency scaling. " | ||
199 | "You should use either one instead of p4-clockmod, " | ||
200 | "if possible.\n"); | ||
201 | return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M); | ||
202 | } | ||
203 | |||
204 | return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); | ||
205 | } | ||
206 | |||
207 | |||
208 | |||
209 | static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) | ||
210 | { | ||
211 | struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; | ||
212 | int cpuid = 0; | ||
213 | unsigned int i; | ||
214 | |||
215 | #ifdef CONFIG_SMP | ||
216 | policy->cpus = cpu_sibling_map[policy->cpu]; | ||
217 | #endif | ||
218 | |||
219 | /* Errata workaround */ | ||
220 | cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; | ||
221 | switch (cpuid) { | ||
222 | case 0x0f07: | ||
223 | case 0x0f0a: | ||
224 | case 0x0f11: | ||
225 | case 0x0f12: | ||
226 | has_N44_O17_errata[policy->cpu] = 1; | ||
227 | dprintk("has errata -- disabling low frequencies\n"); | ||
228 | } | ||
229 | |||
230 | /* get max frequency */ | ||
231 | stock_freq = cpufreq_p4_get_frequency(c); | ||
232 | if (!stock_freq) | ||
233 | return -EINVAL; | ||
234 | |||
235 | /* table init */ | ||
236 | for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { | ||
237 | if ((i<2) && (has_N44_O17_errata[policy->cpu])) | ||
238 | p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
239 | else | ||
240 | p4clockmod_table[i].frequency = (stock_freq * i)/8; | ||
241 | } | ||
242 | cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); | ||
243 | |||
244 | /* cpuinfo and default policy values */ | ||
245 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
246 | policy->cpuinfo.transition_latency = 1000000; /* assumed */ | ||
247 | policy->cur = stock_freq; | ||
248 | |||
249 | return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); | ||
250 | } | ||
251 | |||
252 | |||
253 | static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) | ||
254 | { | ||
255 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | static unsigned int cpufreq_p4_get(unsigned int cpu) | ||
260 | { | ||
261 | cpumask_t cpus_allowed; | ||
262 | u32 l, h; | ||
263 | |||
264 | cpus_allowed = current->cpus_allowed; | ||
265 | |||
266 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); | ||
267 | BUG_ON(smp_processor_id() != cpu); | ||
268 | |||
269 | rdmsr(MSR_IA32_THERM_CONTROL, l, h); | ||
270 | |||
271 | set_cpus_allowed(current, cpus_allowed); | ||
272 | |||
273 | if (l & 0x10) { | ||
274 | l = l >> 1; | ||
275 | l &= 0x7; | ||
276 | } else | ||
277 | l = DC_DISABLE; | ||
278 | |||
279 | if (l != DC_DISABLE) | ||
280 | return (stock_freq * l / 8); | ||
281 | |||
282 | return stock_freq; | ||
283 | } | ||
284 | |||
285 | static struct freq_attr* p4clockmod_attr[] = { | ||
286 | &cpufreq_freq_attr_scaling_available_freqs, | ||
287 | NULL, | ||
288 | }; | ||
289 | |||
290 | static struct cpufreq_driver p4clockmod_driver = { | ||
291 | .verify = cpufreq_p4_verify, | ||
292 | .target = cpufreq_p4_target, | ||
293 | .init = cpufreq_p4_cpu_init, | ||
294 | .exit = cpufreq_p4_cpu_exit, | ||
295 | .get = cpufreq_p4_get, | ||
296 | .name = "p4-clockmod", | ||
297 | .owner = THIS_MODULE, | ||
298 | .attr = p4clockmod_attr, | ||
299 | }; | ||
300 | |||
301 | |||
302 | static int __init cpufreq_p4_init(void) | ||
303 | { | ||
304 | struct cpuinfo_x86 *c = cpu_data; | ||
305 | int ret; | ||
306 | |||
307 | /* | ||
308 | * THERM_CONTROL is architectural for IA32 now, so | ||
309 | * we can rely on the capability checks | ||
310 | */ | ||
311 | if (c->x86_vendor != X86_VENDOR_INTEL) | ||
312 | return -ENODEV; | ||
313 | |||
314 | if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || | ||
315 | !test_bit(X86_FEATURE_ACC, c->x86_capability)) | ||
316 | return -ENODEV; | ||
317 | |||
318 | ret = cpufreq_register_driver(&p4clockmod_driver); | ||
319 | if (!ret) | ||
320 | printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); | ||
321 | |||
322 | return (ret); | ||
323 | } | ||
324 | |||
325 | |||
326 | static void __exit cpufreq_p4_exit(void) | ||
327 | { | ||
328 | cpufreq_unregister_driver(&p4clockmod_driver); | ||
329 | } | ||
330 | |||
331 | |||
332 | MODULE_AUTHOR ("Zwane Mwaikambo <zwane@commfireservices.com>"); | ||
333 | MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); | ||
334 | MODULE_LICENSE ("GPL"); | ||
335 | |||
336 | late_initcall(cpufreq_p4_init); | ||
337 | module_exit(cpufreq_p4_exit); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c new file mode 100644 index 000000000000..222f8cfe3c57 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c | |||
@@ -0,0 +1,256 @@ | |||
1 | /* | ||
2 | * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) | ||
3 | * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * | ||
7 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/cpufreq.h> | ||
14 | #include <linux/ioport.h> | ||
15 | #include <linux/slab.h> | ||
16 | |||
17 | #include <asm/msr.h> | ||
18 | #include <asm/timex.h> | ||
19 | #include <asm/io.h> | ||
20 | |||
21 | |||
22 | #define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long | ||
23 | as it is unused */ | ||
24 | |||
25 | static unsigned int busfreq; /* FSB, in 10 kHz */ | ||
26 | static unsigned int max_multiplier; | ||
27 | |||
28 | |||
29 | /* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */ | ||
30 | static struct cpufreq_frequency_table clock_ratio[] = { | ||
31 | {45, /* 000 -> 4.5x */ 0}, | ||
32 | {50, /* 001 -> 5.0x */ 0}, | ||
33 | {40, /* 010 -> 4.0x */ 0}, | ||
34 | {55, /* 011 -> 5.5x */ 0}, | ||
35 | {20, /* 100 -> 2.0x */ 0}, | ||
36 | {30, /* 101 -> 3.0x */ 0}, | ||
37 | {60, /* 110 -> 6.0x */ 0}, | ||
38 | {35, /* 111 -> 3.5x */ 0}, | ||
39 | {0, CPUFREQ_TABLE_END} | ||
40 | }; | ||
41 | |||
42 | |||
43 | /** | ||
44 | * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier | ||
45 | * | ||
46 | * Returns the current setting of the frequency multiplier. Core clock | ||
47 | * speed is frequency of the Front-Side Bus multiplied with this value. | ||
48 | */ | ||
49 | static int powernow_k6_get_cpu_multiplier(void) | ||
50 | { | ||
51 | u64 invalue = 0; | ||
52 | u32 msrval; | ||
53 | |||
54 | msrval = POWERNOW_IOPORT + 0x1; | ||
55 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ | ||
56 | invalue=inl(POWERNOW_IOPORT + 0x8); | ||
57 | msrval = POWERNOW_IOPORT + 0x0; | ||
58 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ | ||
59 | |||
60 | return clock_ratio[(invalue >> 5)&7].index; | ||
61 | } | ||
62 | |||
63 | |||
64 | /** | ||
65 | * powernow_k6_set_state - set the PowerNow! multiplier | ||
66 | * @best_i: clock_ratio[best_i] is the target multiplier | ||
67 | * | ||
68 | * Tries to change the PowerNow! multiplier | ||
69 | */ | ||
70 | static void powernow_k6_set_state (unsigned int best_i) | ||
71 | { | ||
72 | unsigned long outvalue=0, invalue=0; | ||
73 | unsigned long msrval; | ||
74 | struct cpufreq_freqs freqs; | ||
75 | |||
76 | if (clock_ratio[best_i].index > max_multiplier) { | ||
77 | printk(KERN_ERR "cpufreq: invalid target frequency\n"); | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); | ||
82 | freqs.new = busfreq * clock_ratio[best_i].index; | ||
83 | freqs.cpu = 0; /* powernow-k6.c is UP only driver */ | ||
84 | |||
85 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
86 | |||
87 | /* we now need to transform best_i to the BVC format, see AMD#23446 */ | ||
88 | |||
89 | outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5); | ||
90 | |||
91 | msrval = POWERNOW_IOPORT + 0x1; | ||
92 | wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ | ||
93 | invalue=inl(POWERNOW_IOPORT + 0x8); | ||
94 | invalue = invalue & 0xf; | ||
95 | outvalue = outvalue | invalue; | ||
96 | outl(outvalue ,(POWERNOW_IOPORT + 0x8)); | ||
97 | msrval = POWERNOW_IOPORT + 0x0; | ||
98 | wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ | ||
99 | |||
100 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
101 | |||
102 | return; | ||
103 | } | ||
104 | |||
105 | |||
106 | /** | ||
107 | * powernow_k6_verify - verifies a new CPUfreq policy | ||
108 | * @policy: new policy | ||
109 | * | ||
110 | * Policy must be within lowest and highest possible CPU Frequency, | ||
111 | * and at least one possible state must be within min and max. | ||
112 | */ | ||
113 | static int powernow_k6_verify(struct cpufreq_policy *policy) | ||
114 | { | ||
115 | return cpufreq_frequency_table_verify(policy, &clock_ratio[0]); | ||
116 | } | ||
117 | |||
118 | |||
119 | /** | ||
120 | * powernow_k6_setpolicy - sets a new CPUFreq policy | ||
121 | * @policy: new policy | ||
122 | * @target_freq: the target frequency | ||
123 | * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
124 | * | ||
125 | * sets a new CPUFreq policy | ||
126 | */ | ||
127 | static int powernow_k6_target (struct cpufreq_policy *policy, | ||
128 | unsigned int target_freq, | ||
129 | unsigned int relation) | ||
130 | { | ||
131 | unsigned int newstate = 0; | ||
132 | |||
133 | if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate)) | ||
134 | return -EINVAL; | ||
135 | |||
136 | powernow_k6_set_state(newstate); | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | |||
142 | static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | ||
143 | { | ||
144 | unsigned int i; | ||
145 | int result; | ||
146 | |||
147 | if (policy->cpu != 0) | ||
148 | return -ENODEV; | ||
149 | |||
150 | /* get frequencies */ | ||
151 | max_multiplier = powernow_k6_get_cpu_multiplier(); | ||
152 | busfreq = cpu_khz / max_multiplier; | ||
153 | |||
154 | /* table init */ | ||
155 | for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { | ||
156 | if (clock_ratio[i].index > max_multiplier) | ||
157 | clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
158 | else | ||
159 | clock_ratio[i].frequency = busfreq * clock_ratio[i].index; | ||
160 | } | ||
161 | |||
162 | /* cpuinfo and default policy values */ | ||
163 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
164 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
165 | policy->cur = busfreq * max_multiplier; | ||
166 | |||
167 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); | ||
168 | if (result) | ||
169 | return (result); | ||
170 | |||
171 | cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); | ||
172 | |||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | |||
177 | static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) | ||
178 | { | ||
179 | unsigned int i; | ||
180 | for (i=0; i<8; i++) { | ||
181 | if (i==max_multiplier) | ||
182 | powernow_k6_set_state(i); | ||
183 | } | ||
184 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | static unsigned int powernow_k6_get(unsigned int cpu) | ||
189 | { | ||
190 | return busfreq * powernow_k6_get_cpu_multiplier(); | ||
191 | } | ||
192 | |||
193 | static struct freq_attr* powernow_k6_attr[] = { | ||
194 | &cpufreq_freq_attr_scaling_available_freqs, | ||
195 | NULL, | ||
196 | }; | ||
197 | |||
198 | static struct cpufreq_driver powernow_k6_driver = { | ||
199 | .verify = powernow_k6_verify, | ||
200 | .target = powernow_k6_target, | ||
201 | .init = powernow_k6_cpu_init, | ||
202 | .exit = powernow_k6_cpu_exit, | ||
203 | .get = powernow_k6_get, | ||
204 | .name = "powernow-k6", | ||
205 | .owner = THIS_MODULE, | ||
206 | .attr = powernow_k6_attr, | ||
207 | }; | ||
208 | |||
209 | |||
210 | /** | ||
211 | * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver | ||
212 | * | ||
213 | * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported | ||
214 | * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero | ||
215 | * on success. | ||
216 | */ | ||
217 | static int __init powernow_k6_init(void) | ||
218 | { | ||
219 | struct cpuinfo_x86 *c = cpu_data; | ||
220 | |||
221 | if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || | ||
222 | ((c->x86_model != 12) && (c->x86_model != 13))) | ||
223 | return -ENODEV; | ||
224 | |||
225 | if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { | ||
226 | printk("cpufreq: PowerNow IOPORT region already used.\n"); | ||
227 | return -EIO; | ||
228 | } | ||
229 | |||
230 | if (cpufreq_register_driver(&powernow_k6_driver)) { | ||
231 | release_region (POWERNOW_IOPORT, 16); | ||
232 | return -EINVAL; | ||
233 | } | ||
234 | |||
235 | return 0; | ||
236 | } | ||
237 | |||
238 | |||
239 | /** | ||
240 | * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support | ||
241 | * | ||
242 | * Unregisters AMD K6-2+ / K6-3+ PowerNow! support. | ||
243 | */ | ||
244 | static void __exit powernow_k6_exit(void) | ||
245 | { | ||
246 | cpufreq_unregister_driver(&powernow_k6_driver); | ||
247 | release_region (POWERNOW_IOPORT, 16); | ||
248 | } | ||
249 | |||
250 | |||
251 | MODULE_AUTHOR ("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); | ||
252 | MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); | ||
253 | MODULE_LICENSE ("GPL"); | ||
254 | |||
255 | module_init(powernow_k6_init); | ||
256 | module_exit(powernow_k6_exit); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c new file mode 100644 index 000000000000..913f652623d9 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c | |||
@@ -0,0 +1,690 @@ | |||
1 | /* | ||
2 | * AMD K7 Powernow driver. | ||
3 | * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs. | ||
4 | * (C) 2003-2004 Dave Jones <davej@redhat.com> | ||
5 | * | ||
6 | * Licensed under the terms of the GNU GPL License version 2. | ||
7 | * Based upon datasheets & sample CPUs kindly provided by AMD. | ||
8 | * | ||
9 | * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt. | ||
10 | * - We cli/sti on stepping A0 CPUs around the FID/VID transition. | ||
11 | * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect. | ||
12 | * - We disable half multipliers if ACPI is used on A0 stepping CPUs. | ||
13 | */ | ||
14 | |||
15 | #include <linux/config.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/moduleparam.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/cpufreq.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/dmi.h> | ||
24 | |||
25 | #include <asm/msr.h> | ||
26 | #include <asm/timex.h> | ||
27 | #include <asm/io.h> | ||
28 | #include <asm/system.h> | ||
29 | |||
30 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
31 | #include <linux/acpi.h> | ||
32 | #include <acpi/processor.h> | ||
33 | #endif | ||
34 | |||
35 | #include "powernow-k7.h" | ||
36 | |||
37 | #define PFX "powernow: " | ||
38 | |||
39 | |||
40 | struct psb_s { | ||
41 | u8 signature[10]; | ||
42 | u8 tableversion; | ||
43 | u8 flags; | ||
44 | u16 settlingtime; | ||
45 | u8 reserved1; | ||
46 | u8 numpst; | ||
47 | }; | ||
48 | |||
49 | struct pst_s { | ||
50 | u32 cpuid; | ||
51 | u8 fsbspeed; | ||
52 | u8 maxfid; | ||
53 | u8 startvid; | ||
54 | u8 numpstates; | ||
55 | }; | ||
56 | |||
57 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
58 | union powernow_acpi_control_t { | ||
59 | struct { | ||
60 | unsigned long fid:5, | ||
61 | vid:5, | ||
62 | sgtc:20, | ||
63 | res1:2; | ||
64 | } bits; | ||
65 | unsigned long val; | ||
66 | }; | ||
67 | #endif | ||
68 | |||
69 | #ifdef CONFIG_CPU_FREQ_DEBUG | ||
70 | /* divide by 1000 to get VCore voltage in V. */ | ||
71 | static int mobile_vid_table[32] = { | ||
72 | 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, | ||
73 | 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, | ||
74 | 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, | ||
75 | 1075, 1050, 1025, 1000, 975, 950, 925, 0, | ||
76 | }; | ||
77 | #endif | ||
78 | |||
79 | /* divide by 10 to get FID. */ | ||
80 | static int fid_codes[32] = { | ||
81 | 110, 115, 120, 125, 50, 55, 60, 65, | ||
82 | 70, 75, 80, 85, 90, 95, 100, 105, | ||
83 | 30, 190, 40, 200, 130, 135, 140, 210, | ||
84 | 150, 225, 160, 165, 170, 180, -1, -1, | ||
85 | }; | ||
86 | |||
87 | /* This parameter is used in order to force ACPI instead of legacy method for | ||
88 | * configuration purpose. | ||
89 | */ | ||
90 | |||
91 | static int acpi_force; | ||
92 | |||
93 | static struct cpufreq_frequency_table *powernow_table; | ||
94 | |||
95 | static unsigned int can_scale_bus; | ||
96 | static unsigned int can_scale_vid; | ||
97 | static unsigned int minimum_speed=-1; | ||
98 | static unsigned int maximum_speed; | ||
99 | static unsigned int number_scales; | ||
100 | static unsigned int fsb; | ||
101 | static unsigned int latency; | ||
102 | static char have_a0; | ||
103 | |||
104 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg) | ||
105 | |||
106 | static int check_fsb(unsigned int fsbspeed) | ||
107 | { | ||
108 | int delta; | ||
109 | unsigned int f = fsb / 1000; | ||
110 | |||
111 | delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; | ||
112 | return (delta < 5); | ||
113 | } | ||
114 | |||
115 | static int check_powernow(void) | ||
116 | { | ||
117 | struct cpuinfo_x86 *c = cpu_data; | ||
118 | unsigned int maxei, eax, ebx, ecx, edx; | ||
119 | |||
120 | if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { | ||
121 | #ifdef MODULE | ||
122 | printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); | ||
123 | #endif | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | /* Get maximum capabilities */ | ||
128 | maxei = cpuid_eax (0x80000000); | ||
129 | if (maxei < 0x80000007) { /* Any powernow info ? */ | ||
130 | #ifdef MODULE | ||
131 | printk (KERN_INFO PFX "No powernow capabilities detected\n"); | ||
132 | #endif | ||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | if ((c->x86_model == 6) && (c->x86_mask == 0)) { | ||
137 | printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); | ||
138 | have_a0 = 1; | ||
139 | } | ||
140 | |||
141 | cpuid(0x80000007, &eax, &ebx, &ecx, &edx); | ||
142 | |||
143 | /* Check we can actually do something before we say anything.*/ | ||
144 | if (!(edx & (1 << 1 | 1 << 2))) | ||
145 | return 0; | ||
146 | |||
147 | printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); | ||
148 | |||
149 | if (edx & 1 << 1) { | ||
150 | printk ("frequency"); | ||
151 | can_scale_bus=1; | ||
152 | } | ||
153 | |||
154 | if ((edx & (1 << 1 | 1 << 2)) == 0x6) | ||
155 | printk (" and "); | ||
156 | |||
157 | if (edx & 1 << 2) { | ||
158 | printk ("voltage"); | ||
159 | can_scale_vid=1; | ||
160 | } | ||
161 | |||
162 | printk (".\n"); | ||
163 | return 1; | ||
164 | } | ||
165 | |||
166 | |||
167 | static int get_ranges (unsigned char *pst) | ||
168 | { | ||
169 | unsigned int j; | ||
170 | unsigned int speed; | ||
171 | u8 fid, vid; | ||
172 | |||
173 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); | ||
174 | if (!powernow_table) | ||
175 | return -ENOMEM; | ||
176 | memset(powernow_table, 0, (sizeof(struct cpufreq_frequency_table) * (number_scales + 1))); | ||
177 | |||
178 | for (j=0 ; j < number_scales; j++) { | ||
179 | fid = *pst++; | ||
180 | |||
181 | powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; | ||
182 | powernow_table[j].index = fid; /* lower 8 bits */ | ||
183 | |||
184 | speed = powernow_table[j].frequency; | ||
185 | |||
186 | if ((fid_codes[fid] % 10)==5) { | ||
187 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
188 | if (have_a0 == 1) | ||
189 | powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; | ||
190 | #endif | ||
191 | } | ||
192 | |||
193 | if (speed < minimum_speed) | ||
194 | minimum_speed = speed; | ||
195 | if (speed > maximum_speed) | ||
196 | maximum_speed = speed; | ||
197 | |||
198 | vid = *pst++; | ||
199 | powernow_table[j].index |= (vid << 8); /* upper 8 bits */ | ||
200 | |||
201 | dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " | ||
202 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, | ||
203 | fid_codes[fid] % 10, speed/1000, vid, | ||
204 | mobile_vid_table[vid]/1000, | ||
205 | mobile_vid_table[vid]%1000); | ||
206 | } | ||
207 | powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; | ||
208 | powernow_table[number_scales].index = 0; | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | |||
214 | static void change_FID(int fid) | ||
215 | { | ||
216 | union msr_fidvidctl fidvidctl; | ||
217 | |||
218 | rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
219 | if (fidvidctl.bits.FID != fid) { | ||
220 | fidvidctl.bits.SGTC = latency; | ||
221 | fidvidctl.bits.FID = fid; | ||
222 | fidvidctl.bits.VIDC = 0; | ||
223 | fidvidctl.bits.FIDC = 1; | ||
224 | wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
225 | } | ||
226 | } | ||
227 | |||
228 | |||
229 | static void change_VID(int vid) | ||
230 | { | ||
231 | union msr_fidvidctl fidvidctl; | ||
232 | |||
233 | rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
234 | if (fidvidctl.bits.VID != vid) { | ||
235 | fidvidctl.bits.SGTC = latency; | ||
236 | fidvidctl.bits.VID = vid; | ||
237 | fidvidctl.bits.FIDC = 0; | ||
238 | fidvidctl.bits.VIDC = 1; | ||
239 | wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); | ||
240 | } | ||
241 | } | ||
242 | |||
243 | |||
244 | static void change_speed (unsigned int index) | ||
245 | { | ||
246 | u8 fid, vid; | ||
247 | struct cpufreq_freqs freqs; | ||
248 | union msr_fidvidstatus fidvidstatus; | ||
249 | int cfid; | ||
250 | |||
251 | /* fid are the lower 8 bits of the index we stored into | ||
252 | * the cpufreq frequency table in powernow_decode_bios, | ||
253 | * vid are the upper 8 bits. | ||
254 | */ | ||
255 | |||
256 | fid = powernow_table[index].index & 0xFF; | ||
257 | vid = (powernow_table[index].index & 0xFF00) >> 8; | ||
258 | |||
259 | freqs.cpu = 0; | ||
260 | |||
261 | rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); | ||
262 | cfid = fidvidstatus.bits.CFID; | ||
263 | freqs.old = fsb * fid_codes[cfid] / 10; | ||
264 | |||
265 | freqs.new = powernow_table[index].frequency; | ||
266 | |||
267 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
268 | |||
269 | /* Now do the magic poking into the MSRs. */ | ||
270 | |||
271 | if (have_a0 == 1) /* A0 errata 5 */ | ||
272 | local_irq_disable(); | ||
273 | |||
274 | if (freqs.old > freqs.new) { | ||
275 | /* Going down, so change FID first */ | ||
276 | change_FID(fid); | ||
277 | change_VID(vid); | ||
278 | } else { | ||
279 | /* Going up, so change VID first */ | ||
280 | change_VID(vid); | ||
281 | change_FID(fid); | ||
282 | } | ||
283 | |||
284 | |||
285 | if (have_a0 == 1) | ||
286 | local_irq_enable(); | ||
287 | |||
288 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
289 | } | ||
290 | |||
291 | |||
292 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
293 | |||
294 | static struct acpi_processor_performance *acpi_processor_perf; | ||
295 | |||
296 | static int powernow_acpi_init(void) | ||
297 | { | ||
298 | int i; | ||
299 | int retval = 0; | ||
300 | union powernow_acpi_control_t pc; | ||
301 | |||
302 | if (acpi_processor_perf != NULL && powernow_table != NULL) { | ||
303 | retval = -EINVAL; | ||
304 | goto err0; | ||
305 | } | ||
306 | |||
307 | acpi_processor_perf = kmalloc(sizeof(struct acpi_processor_performance), | ||
308 | GFP_KERNEL); | ||
309 | |||
310 | if (!acpi_processor_perf) { | ||
311 | retval = -ENOMEM; | ||
312 | goto err0; | ||
313 | } | ||
314 | |||
315 | memset(acpi_processor_perf, 0, sizeof(struct acpi_processor_performance)); | ||
316 | |||
317 | if (acpi_processor_register_performance(acpi_processor_perf, 0)) { | ||
318 | retval = -EIO; | ||
319 | goto err1; | ||
320 | } | ||
321 | |||
322 | if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { | ||
323 | retval = -ENODEV; | ||
324 | goto err2; | ||
325 | } | ||
326 | |||
327 | if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { | ||
328 | retval = -ENODEV; | ||
329 | goto err2; | ||
330 | } | ||
331 | |||
332 | number_scales = acpi_processor_perf->state_count; | ||
333 | |||
334 | if (number_scales < 2) { | ||
335 | retval = -ENODEV; | ||
336 | goto err2; | ||
337 | } | ||
338 | |||
339 | powernow_table = kmalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); | ||
340 | if (!powernow_table) { | ||
341 | retval = -ENOMEM; | ||
342 | goto err2; | ||
343 | } | ||
344 | |||
345 | memset(powernow_table, 0, ((number_scales + 1) * sizeof(struct cpufreq_frequency_table))); | ||
346 | |||
347 | pc.val = (unsigned long) acpi_processor_perf->states[0].control; | ||
348 | for (i = 0; i < number_scales; i++) { | ||
349 | u8 fid, vid; | ||
350 | unsigned int speed; | ||
351 | |||
352 | pc.val = (unsigned long) acpi_processor_perf->states[i].control; | ||
353 | dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", | ||
354 | i, | ||
355 | (u32) acpi_processor_perf->states[i].core_frequency, | ||
356 | (u32) acpi_processor_perf->states[i].power, | ||
357 | (u32) acpi_processor_perf->states[i].transition_latency, | ||
358 | (u32) acpi_processor_perf->states[i].control, | ||
359 | pc.bits.sgtc); | ||
360 | |||
361 | vid = pc.bits.vid; | ||
362 | fid = pc.bits.fid; | ||
363 | |||
364 | powernow_table[i].frequency = fsb * fid_codes[fid] / 10; | ||
365 | powernow_table[i].index = fid; /* lower 8 bits */ | ||
366 | powernow_table[i].index |= (vid << 8); /* upper 8 bits */ | ||
367 | |||
368 | speed = powernow_table[i].frequency; | ||
369 | |||
370 | if ((fid_codes[fid] % 10)==5) { | ||
371 | if (have_a0 == 1) | ||
372 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
373 | } | ||
374 | |||
375 | dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " | ||
376 | "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, | ||
377 | fid_codes[fid] % 10, speed/1000, vid, | ||
378 | mobile_vid_table[vid]/1000, | ||
379 | mobile_vid_table[vid]%1000); | ||
380 | |||
381 | if (latency < pc.bits.sgtc) | ||
382 | latency = pc.bits.sgtc; | ||
383 | |||
384 | if (speed < minimum_speed) | ||
385 | minimum_speed = speed; | ||
386 | if (speed > maximum_speed) | ||
387 | maximum_speed = speed; | ||
388 | } | ||
389 | |||
390 | powernow_table[i].frequency = CPUFREQ_TABLE_END; | ||
391 | powernow_table[i].index = 0; | ||
392 | |||
393 | /* notify BIOS that we exist */ | ||
394 | acpi_processor_notify_smm(THIS_MODULE); | ||
395 | |||
396 | return 0; | ||
397 | |||
398 | err2: | ||
399 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | ||
400 | err1: | ||
401 | kfree(acpi_processor_perf); | ||
402 | err0: | ||
403 | printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); | ||
404 | acpi_processor_perf = NULL; | ||
405 | return retval; | ||
406 | } | ||
407 | #else | ||
408 | static int powernow_acpi_init(void) | ||
409 | { | ||
410 | printk(KERN_INFO PFX "no support for ACPI processor found." | ||
411 | " Please recompile your kernel with ACPI processor\n"); | ||
412 | return -EINVAL; | ||
413 | } | ||
414 | #endif | ||
415 | |||
416 | static int powernow_decode_bios (int maxfid, int startvid) | ||
417 | { | ||
418 | struct psb_s *psb; | ||
419 | struct pst_s *pst; | ||
420 | unsigned int i, j; | ||
421 | unsigned char *p; | ||
422 | unsigned int etuple; | ||
423 | unsigned int ret; | ||
424 | |||
425 | etuple = cpuid_eax(0x80000001); | ||
426 | |||
427 | for (i=0xC0000; i < 0xffff0 ; i+=16) { | ||
428 | |||
429 | p = phys_to_virt(i); | ||
430 | |||
431 | if (memcmp(p, "AMDK7PNOW!", 10) == 0){ | ||
432 | dprintk ("Found PSB header at %p\n", p); | ||
433 | psb = (struct psb_s *) p; | ||
434 | dprintk ("Table version: 0x%x\n", psb->tableversion); | ||
435 | if (psb->tableversion != 0x12) { | ||
436 | printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n"); | ||
437 | return -ENODEV; | ||
438 | } | ||
439 | |||
440 | dprintk ("Flags: 0x%x\n", psb->flags); | ||
441 | if ((psb->flags & 1)==0) { | ||
442 | dprintk ("Mobile voltage regulator\n"); | ||
443 | } else { | ||
444 | dprintk ("Desktop voltage regulator\n"); | ||
445 | } | ||
446 | |||
447 | latency = psb->settlingtime; | ||
448 | if (latency < 100) { | ||
449 | printk (KERN_INFO PFX "BIOS set settling time to %d microseconds." | ||
450 | "Should be at least 100. Correcting.\n", latency); | ||
451 | latency = 100; | ||
452 | } | ||
453 | dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime); | ||
454 | dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); | ||
455 | |||
456 | p += sizeof (struct psb_s); | ||
457 | |||
458 | pst = (struct pst_s *) p; | ||
459 | |||
460 | for (i = 0 ; i <psb->numpst; i++) { | ||
461 | pst = (struct pst_s *) p; | ||
462 | number_scales = pst->numpstates; | ||
463 | |||
464 | if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && | ||
465 | (maxfid==pst->maxfid) && (startvid==pst->startvid)) | ||
466 | { | ||
467 | dprintk ("PST:%d (@%p)\n", i, pst); | ||
468 | dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", | ||
469 | pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); | ||
470 | |||
471 | ret = get_ranges ((char *) pst + sizeof (struct pst_s)); | ||
472 | return ret; | ||
473 | |||
474 | } else { | ||
475 | p = (char *) pst + sizeof (struct pst_s); | ||
476 | for (j=0 ; j < number_scales; j++) | ||
477 | p+=2; | ||
478 | } | ||
479 | } | ||
480 | printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple); | ||
481 | printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n"); | ||
482 | |||
483 | return -EINVAL; | ||
484 | } | ||
485 | p++; | ||
486 | } | ||
487 | |||
488 | return -ENODEV; | ||
489 | } | ||
490 | |||
491 | |||
492 | static int powernow_target (struct cpufreq_policy *policy, | ||
493 | unsigned int target_freq, | ||
494 | unsigned int relation) | ||
495 | { | ||
496 | unsigned int newstate; | ||
497 | |||
498 | if (cpufreq_frequency_table_target(policy, powernow_table, target_freq, relation, &newstate)) | ||
499 | return -EINVAL; | ||
500 | |||
501 | change_speed(newstate); | ||
502 | |||
503 | return 0; | ||
504 | } | ||
505 | |||
506 | |||
507 | static int powernow_verify (struct cpufreq_policy *policy) | ||
508 | { | ||
509 | return cpufreq_frequency_table_verify(policy, powernow_table); | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * We use the fact that the bus frequency is somehow | ||
514 | * a multiple of 100000/3 khz, then we compute sgtc according | ||
515 | * to this multiple. | ||
516 | * That way, we match more how AMD thinks all of that work. | ||
517 | * We will then get the same kind of behaviour already tested under | ||
518 | * the "well-known" other OS. | ||
519 | */ | ||
520 | static int __init fixup_sgtc(void) | ||
521 | { | ||
522 | unsigned int sgtc; | ||
523 | unsigned int m; | ||
524 | |||
525 | m = fsb / 3333; | ||
526 | if ((m % 10) >= 5) | ||
527 | m += 5; | ||
528 | |||
529 | m /= 10; | ||
530 | |||
531 | sgtc = 100 * m * latency; | ||
532 | sgtc = sgtc / 3; | ||
533 | if (sgtc > 0xfffff) { | ||
534 | printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); | ||
535 | sgtc = 0xfffff; | ||
536 | } | ||
537 | return sgtc; | ||
538 | } | ||
539 | |||
540 | static unsigned int powernow_get(unsigned int cpu) | ||
541 | { | ||
542 | union msr_fidvidstatus fidvidstatus; | ||
543 | unsigned int cfid; | ||
544 | |||
545 | if (cpu) | ||
546 | return 0; | ||
547 | rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); | ||
548 | cfid = fidvidstatus.bits.CFID; | ||
549 | |||
550 | return (fsb * fid_codes[cfid] / 10); | ||
551 | } | ||
552 | |||
553 | |||
554 | static int __init acer_cpufreq_pst(struct dmi_system_id *d) | ||
555 | { | ||
556 | printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); | ||
557 | printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); | ||
558 | printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n"); | ||
559 | return 0; | ||
560 | } | ||
561 | |||
562 | /* | ||
563 | * Some Athlon laptops have really fucked PST tables. | ||
564 | * A BIOS update is all that can save them. | ||
565 | * Mention this, and disable cpufreq. | ||
566 | */ | ||
567 | static struct dmi_system_id __initdata powernow_dmi_table[] = { | ||
568 | { | ||
569 | .callback = acer_cpufreq_pst, | ||
570 | .ident = "Acer Aspire", | ||
571 | .matches = { | ||
572 | DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), | ||
573 | DMI_MATCH(DMI_BIOS_VERSION, "3A71"), | ||
574 | }, | ||
575 | }, | ||
576 | { } | ||
577 | }; | ||
578 | |||
579 | static int __init powernow_cpu_init (struct cpufreq_policy *policy) | ||
580 | { | ||
581 | union msr_fidvidstatus fidvidstatus; | ||
582 | int result; | ||
583 | |||
584 | if (policy->cpu != 0) | ||
585 | return -ENODEV; | ||
586 | |||
587 | rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); | ||
588 | |||
589 | /* A K7 with powernow technology is set to max frequency by BIOS */ | ||
590 | fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.MFID]; | ||
591 | if (!fsb) { | ||
592 | printk(KERN_WARNING PFX "can not determine bus frequency\n"); | ||
593 | return -EINVAL; | ||
594 | } | ||
595 | dprintk("FSB: %3d.%03d MHz\n", fsb/1000, fsb%1000); | ||
596 | |||
597 | if (dmi_check_system(powernow_dmi_table) || acpi_force) { | ||
598 | printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); | ||
599 | result = powernow_acpi_init(); | ||
600 | } else { | ||
601 | result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); | ||
602 | if (result) { | ||
603 | printk (KERN_INFO PFX "Trying ACPI perflib\n"); | ||
604 | maximum_speed = 0; | ||
605 | minimum_speed = -1; | ||
606 | latency = 0; | ||
607 | result = powernow_acpi_init(); | ||
608 | if (result) { | ||
609 | printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); | ||
610 | printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.shtml\n"); | ||
611 | } | ||
612 | } else { | ||
613 | /* SGTC use the bus clock as timer */ | ||
614 | latency = fixup_sgtc(); | ||
615 | printk(KERN_INFO PFX "SGTC: %d\n", latency); | ||
616 | } | ||
617 | } | ||
618 | |||
619 | if (result) | ||
620 | return result; | ||
621 | |||
622 | printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", | ||
623 | minimum_speed/1000, maximum_speed/1000); | ||
624 | |||
625 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
626 | |||
627 | policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); | ||
628 | |||
629 | policy->cur = powernow_get(0); | ||
630 | |||
631 | cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); | ||
632 | |||
633 | return cpufreq_frequency_table_cpuinfo(policy, powernow_table); | ||
634 | } | ||
635 | |||
636 | static int powernow_cpu_exit (struct cpufreq_policy *policy) { | ||
637 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
638 | |||
639 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | ||
640 | if (acpi_processor_perf) { | ||
641 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | ||
642 | kfree(acpi_processor_perf); | ||
643 | } | ||
644 | #endif | ||
645 | |||
646 | if (powernow_table) | ||
647 | kfree(powernow_table); | ||
648 | |||
649 | return 0; | ||
650 | } | ||
651 | |||
652 | static struct freq_attr* powernow_table_attr[] = { | ||
653 | &cpufreq_freq_attr_scaling_available_freqs, | ||
654 | NULL, | ||
655 | }; | ||
656 | |||
657 | static struct cpufreq_driver powernow_driver = { | ||
658 | .verify = powernow_verify, | ||
659 | .target = powernow_target, | ||
660 | .get = powernow_get, | ||
661 | .init = powernow_cpu_init, | ||
662 | .exit = powernow_cpu_exit, | ||
663 | .name = "powernow-k7", | ||
664 | .owner = THIS_MODULE, | ||
665 | .attr = powernow_table_attr, | ||
666 | }; | ||
667 | |||
668 | static int __init powernow_init (void) | ||
669 | { | ||
670 | if (check_powernow()==0) | ||
671 | return -ENODEV; | ||
672 | return cpufreq_register_driver(&powernow_driver); | ||
673 | } | ||
674 | |||
675 | |||
676 | static void __exit powernow_exit (void) | ||
677 | { | ||
678 | cpufreq_unregister_driver(&powernow_driver); | ||
679 | } | ||
680 | |||
681 | module_param(acpi_force, int, 0444); | ||
682 | MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); | ||
683 | |||
684 | MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); | ||
685 | MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); | ||
686 | MODULE_LICENSE ("GPL"); | ||
687 | |||
688 | late_initcall(powernow_init); | ||
689 | module_exit(powernow_exit); | ||
690 | |||
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.h b/arch/i386/kernel/cpu/cpufreq/powernow-k7.h new file mode 100644 index 000000000000..f8a63b3664e3 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.h | |||
@@ -0,0 +1,44 @@ | |||
1 | /* | ||
2 | * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $ | ||
3 | * (C) 2003 Dave Jones. | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * | ||
7 | * AMD-specific information | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | union msr_fidvidctl { | ||
12 | struct { | ||
13 | unsigned FID:5, // 4:0 | ||
14 | reserved1:3, // 7:5 | ||
15 | VID:5, // 12:8 | ||
16 | reserved2:3, // 15:13 | ||
17 | FIDC:1, // 16 | ||
18 | VIDC:1, // 17 | ||
19 | reserved3:2, // 19:18 | ||
20 | FIDCHGRATIO:1, // 20 | ||
21 | reserved4:11, // 31-21 | ||
22 | SGTC:20, // 32:51 | ||
23 | reserved5:12; // 63:52 | ||
24 | } bits; | ||
25 | unsigned long long val; | ||
26 | }; | ||
27 | |||
28 | union msr_fidvidstatus { | ||
29 | struct { | ||
30 | unsigned CFID:5, // 4:0 | ||
31 | reserved1:3, // 7:5 | ||
32 | SFID:5, // 12:8 | ||
33 | reserved2:3, // 15:13 | ||
34 | MFID:5, // 20:16 | ||
35 | reserved3:11, // 31:21 | ||
36 | CVID:5, // 36:32 | ||
37 | reserved4:3, // 39:37 | ||
38 | SVID:5, // 44:40 | ||
39 | reserved5:3, // 47:45 | ||
40 | MVID:5, // 52:48 | ||
41 | reserved6:11; // 63:53 | ||
42 | } bits; | ||
43 | unsigned long long val; | ||
44 | }; | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c new file mode 100644 index 000000000000..a65ff7e32e5d --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -0,0 +1,1135 @@ | |||
1 | /* | ||
2 | * (c) 2003, 2004 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | * | ||
7 | * Support : paul.devriendt@amd.com | ||
8 | * | ||
9 | * Based on the powernow-k7.c module written by Dave Jones. | ||
10 | * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs | ||
11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> | ||
12 | * (C) 2004 Pavel Machek <pavel@suse.cz> | ||
13 | * Licensed under the terms of the GNU GPL License version 2. | ||
14 | * Based upon datasheets & sample CPUs kindly provided by AMD. | ||
15 | * | ||
16 | * Valuable input gratefully received from Dave Jones, Pavel Machek, | ||
17 | * Dominik Brodowski, and others. | ||
18 | * Processor information obtained from Chapter 9 (Power and Thermal Management) | ||
19 | * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD | ||
20 | * Opteron Processors" available for download from www.amd.com | ||
21 | * | ||
22 | * Tables for specific CPUs can be infrerred from | ||
23 | * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf | ||
24 | */ | ||
25 | |||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/smp.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/cpufreq.h> | ||
31 | #include <linux/slab.h> | ||
32 | #include <linux/string.h> | ||
33 | |||
34 | #include <asm/msr.h> | ||
35 | #include <asm/io.h> | ||
36 | #include <asm/delay.h> | ||
37 | |||
38 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | ||
39 | #include <linux/acpi.h> | ||
40 | #include <acpi/processor.h> | ||
41 | #endif | ||
42 | |||
43 | #define PFX "powernow-k8: " | ||
44 | #define BFX PFX "BIOS error: " | ||
45 | #define VERSION "version 1.00.09e" | ||
46 | #include "powernow-k8.h" | ||
47 | |||
48 | /* serialize freq changes */ | ||
49 | static DECLARE_MUTEX(fidvid_sem); | ||
50 | |||
51 | static struct powernow_k8_data *powernow_data[NR_CPUS]; | ||
52 | |||
53 | /* Return a frequency in MHz, given an input fid */ | ||
54 | static u32 find_freq_from_fid(u32 fid) | ||
55 | { | ||
56 | return 800 + (fid * 100); | ||
57 | } | ||
58 | |||
59 | /* Return a frequency in KHz, given an input fid */ | ||
60 | static u32 find_khz_freq_from_fid(u32 fid) | ||
61 | { | ||
62 | return 1000 * find_freq_from_fid(fid); | ||
63 | } | ||
64 | |||
65 | /* Return a voltage in miliVolts, given an input vid */ | ||
66 | static u32 find_millivolts_from_vid(struct powernow_k8_data *data, u32 vid) | ||
67 | { | ||
68 | return 1550-vid*25; | ||
69 | } | ||
70 | |||
71 | /* Return the vco fid for an input fid | ||
72 | * | ||
73 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids | ||
74 | * only from corresponding high fids. This returns "high" fid corresponding to | ||
75 | * "low" one. | ||
76 | */ | ||
77 | static u32 convert_fid_to_vco_fid(u32 fid) | ||
78 | { | ||
79 | if (fid < HI_FID_TABLE_BOTTOM) { | ||
80 | return 8 + (2 * fid); | ||
81 | } else { | ||
82 | return fid; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Return 1 if the pending bit is set. Unless we just instructed the processor | ||
88 | * to transition to a new state, seeing this bit set is really bad news. | ||
89 | */ | ||
90 | static int pending_bit_stuck(void) | ||
91 | { | ||
92 | u32 lo, hi; | ||
93 | |||
94 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | ||
95 | return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * Update the global current fid / vid values from the status msr. | ||
100 | * Returns 1 on error. | ||
101 | */ | ||
102 | static int query_current_values_with_pending_wait(struct powernow_k8_data *data) | ||
103 | { | ||
104 | u32 lo, hi; | ||
105 | u32 i = 0; | ||
106 | |||
107 | lo = MSR_S_LO_CHANGE_PENDING; | ||
108 | while (lo & MSR_S_LO_CHANGE_PENDING) { | ||
109 | if (i++ > 0x1000000) { | ||
110 | printk(KERN_ERR PFX "detected change pending stuck\n"); | ||
111 | return 1; | ||
112 | } | ||
113 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | ||
114 | } | ||
115 | |||
116 | data->currvid = hi & MSR_S_HI_CURRENT_VID; | ||
117 | data->currfid = lo & MSR_S_LO_CURRENT_FID; | ||
118 | |||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | /* the isochronous relief time */ | ||
123 | static void count_off_irt(struct powernow_k8_data *data) | ||
124 | { | ||
125 | udelay((1 << data->irt) * 10); | ||
126 | return; | ||
127 | } | ||
128 | |||
129 | /* the voltage stabalization time */ | ||
130 | static void count_off_vst(struct powernow_k8_data *data) | ||
131 | { | ||
132 | udelay(data->vstable * VST_UNITS_20US); | ||
133 | return; | ||
134 | } | ||
135 | |||
136 | /* need to init the control msr to a safe value (for each cpu) */ | ||
137 | static void fidvid_msr_init(void) | ||
138 | { | ||
139 | u32 lo, hi; | ||
140 | u8 fid, vid; | ||
141 | |||
142 | rdmsr(MSR_FIDVID_STATUS, lo, hi); | ||
143 | vid = hi & MSR_S_HI_CURRENT_VID; | ||
144 | fid = lo & MSR_S_LO_CURRENT_FID; | ||
145 | lo = fid | (vid << MSR_C_LO_VID_SHIFT); | ||
146 | hi = MSR_C_HI_STP_GNT_BENIGN; | ||
147 | dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); | ||
148 | wrmsr(MSR_FIDVID_CTL, lo, hi); | ||
149 | } | ||
150 | |||
151 | |||
152 | /* write the new fid value along with the other control fields to the msr */ | ||
153 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) | ||
154 | { | ||
155 | u32 lo; | ||
156 | u32 savevid = data->currvid; | ||
157 | |||
158 | if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { | ||
159 | printk(KERN_ERR PFX "internal error - overflow on fid write\n"); | ||
160 | return 1; | ||
161 | } | ||
162 | |||
163 | lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; | ||
164 | |||
165 | dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", | ||
166 | fid, lo, data->plllock * PLL_LOCK_CONVERSION); | ||
167 | |||
168 | wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); | ||
169 | |||
170 | if (query_current_values_with_pending_wait(data)) | ||
171 | return 1; | ||
172 | |||
173 | count_off_irt(data); | ||
174 | |||
175 | if (savevid != data->currvid) { | ||
176 | printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n", | ||
177 | savevid, data->currvid); | ||
178 | return 1; | ||
179 | } | ||
180 | |||
181 | if (fid != data->currfid) { | ||
182 | printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid, | ||
183 | data->currfid); | ||
184 | return 1; | ||
185 | } | ||
186 | |||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | /* Write a new vid to the hardware */ | ||
191 | static int write_new_vid(struct powernow_k8_data *data, u32 vid) | ||
192 | { | ||
193 | u32 lo; | ||
194 | u32 savefid = data->currfid; | ||
195 | |||
196 | if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { | ||
197 | printk(KERN_ERR PFX "internal error - overflow on vid write\n"); | ||
198 | return 1; | ||
199 | } | ||
200 | |||
201 | lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; | ||
202 | |||
203 | dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", | ||
204 | vid, lo, STOP_GRANT_5NS); | ||
205 | |||
206 | wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); | ||
207 | |||
208 | if (query_current_values_with_pending_wait(data)) | ||
209 | return 1; | ||
210 | |||
211 | if (savefid != data->currfid) { | ||
212 | printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", | ||
213 | savefid, data->currfid); | ||
214 | return 1; | ||
215 | } | ||
216 | |||
217 | if (vid != data->currvid) { | ||
218 | printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid, | ||
219 | data->currvid); | ||
220 | return 1; | ||
221 | } | ||
222 | |||
223 | return 0; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * Reduce the vid by the max of step or reqvid. | ||
228 | * Decreasing vid codes represent increasing voltages: | ||
229 | * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of 0x1f is off. | ||
230 | */ | ||
231 | static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) | ||
232 | { | ||
233 | if ((data->currvid - reqvid) > step) | ||
234 | reqvid = data->currvid - step; | ||
235 | |||
236 | if (write_new_vid(data, reqvid)) | ||
237 | return 1; | ||
238 | |||
239 | count_off_vst(data); | ||
240 | |||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | /* Change the fid and vid, by the 3 phases. */ | ||
245 | static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) | ||
246 | { | ||
247 | if (core_voltage_pre_transition(data, reqvid)) | ||
248 | return 1; | ||
249 | |||
250 | if (core_frequency_transition(data, reqfid)) | ||
251 | return 1; | ||
252 | |||
253 | if (core_voltage_post_transition(data, reqvid)) | ||
254 | return 1; | ||
255 | |||
256 | if (query_current_values_with_pending_wait(data)) | ||
257 | return 1; | ||
258 | |||
259 | if ((reqfid != data->currfid) || (reqvid != data->currvid)) { | ||
260 | printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n", | ||
261 | smp_processor_id(), | ||
262 | reqfid, reqvid, data->currfid, data->currvid); | ||
263 | return 1; | ||
264 | } | ||
265 | |||
266 | dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", | ||
267 | smp_processor_id(), data->currfid, data->currvid); | ||
268 | |||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | /* Phase 1 - core voltage transition ... setup voltage */ | ||
273 | static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid) | ||
274 | { | ||
275 | u32 rvosteps = data->rvo; | ||
276 | u32 savefid = data->currfid; | ||
277 | |||
278 | dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", | ||
279 | smp_processor_id(), | ||
280 | data->currfid, data->currvid, reqvid, data->rvo); | ||
281 | |||
282 | while (data->currvid > reqvid) { | ||
283 | dprintk("ph1: curr 0x%x, req vid 0x%x\n", | ||
284 | data->currvid, reqvid); | ||
285 | if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) | ||
286 | return 1; | ||
287 | } | ||
288 | |||
289 | while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { | ||
290 | if (data->currvid == 0) { | ||
291 | rvosteps = 0; | ||
292 | } else { | ||
293 | dprintk("ph1: changing vid for rvo, req 0x%x\n", | ||
294 | data->currvid - 1); | ||
295 | if (decrease_vid_code_by_step(data, data->currvid - 1, 1)) | ||
296 | return 1; | ||
297 | rvosteps--; | ||
298 | } | ||
299 | } | ||
300 | |||
301 | if (query_current_values_with_pending_wait(data)) | ||
302 | return 1; | ||
303 | |||
304 | if (savefid != data->currfid) { | ||
305 | printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid); | ||
306 | return 1; | ||
307 | } | ||
308 | |||
309 | dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", | ||
310 | data->currfid, data->currvid); | ||
311 | |||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | /* Phase 2 - core frequency transition */ | ||
316 | static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) | ||
317 | { | ||
318 | u32 vcoreqfid, vcocurrfid, vcofiddiff, savevid = data->currvid; | ||
319 | |||
320 | if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { | ||
321 | printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", | ||
322 | reqfid, data->currfid); | ||
323 | return 1; | ||
324 | } | ||
325 | |||
326 | if (data->currfid == reqfid) { | ||
327 | printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); | ||
328 | return 0; | ||
329 | } | ||
330 | |||
331 | dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", | ||
332 | smp_processor_id(), | ||
333 | data->currfid, data->currvid, reqfid); | ||
334 | |||
335 | vcoreqfid = convert_fid_to_vco_fid(reqfid); | ||
336 | vcocurrfid = convert_fid_to_vco_fid(data->currfid); | ||
337 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid | ||
338 | : vcoreqfid - vcocurrfid; | ||
339 | |||
340 | while (vcofiddiff > 2) { | ||
341 | if (reqfid > data->currfid) { | ||
342 | if (data->currfid > LO_FID_TABLE_TOP) { | ||
343 | if (write_new_fid(data, data->currfid + 2)) { | ||
344 | return 1; | ||
345 | } | ||
346 | } else { | ||
347 | if (write_new_fid | ||
348 | (data, 2 + convert_fid_to_vco_fid(data->currfid))) { | ||
349 | return 1; | ||
350 | } | ||
351 | } | ||
352 | } else { | ||
353 | if (write_new_fid(data, data->currfid - 2)) | ||
354 | return 1; | ||
355 | } | ||
356 | |||
357 | vcocurrfid = convert_fid_to_vco_fid(data->currfid); | ||
358 | vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid | ||
359 | : vcoreqfid - vcocurrfid; | ||
360 | } | ||
361 | |||
362 | if (write_new_fid(data, reqfid)) | ||
363 | return 1; | ||
364 | |||
365 | if (query_current_values_with_pending_wait(data)) | ||
366 | return 1; | ||
367 | |||
368 | if (data->currfid != reqfid) { | ||
369 | printk(KERN_ERR PFX | ||
370 | "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n", | ||
371 | data->currfid, reqfid); | ||
372 | return 1; | ||
373 | } | ||
374 | |||
375 | if (savevid != data->currvid) { | ||
376 | printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", | ||
377 | savevid, data->currvid); | ||
378 | return 1; | ||
379 | } | ||
380 | |||
381 | dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", | ||
382 | data->currfid, data->currvid); | ||
383 | |||
384 | return 0; | ||
385 | } | ||
386 | |||
387 | /* Phase 3 - core voltage transition flow ... jump to the final vid. */ | ||
388 | static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid) | ||
389 | { | ||
390 | u32 savefid = data->currfid; | ||
391 | u32 savereqvid = reqvid; | ||
392 | |||
393 | dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", | ||
394 | smp_processor_id(), | ||
395 | data->currfid, data->currvid); | ||
396 | |||
397 | if (reqvid != data->currvid) { | ||
398 | if (write_new_vid(data, reqvid)) | ||
399 | return 1; | ||
400 | |||
401 | if (savefid != data->currfid) { | ||
402 | printk(KERN_ERR PFX | ||
403 | "ph3: bad fid change, save 0x%x, curr 0x%x\n", | ||
404 | savefid, data->currfid); | ||
405 | return 1; | ||
406 | } | ||
407 | |||
408 | if (data->currvid != reqvid) { | ||
409 | printk(KERN_ERR PFX | ||
410 | "ph3: failed vid transition\n, req 0x%x, curr 0x%x", | ||
411 | reqvid, data->currvid); | ||
412 | return 1; | ||
413 | } | ||
414 | } | ||
415 | |||
416 | if (query_current_values_with_pending_wait(data)) | ||
417 | return 1; | ||
418 | |||
419 | if (savereqvid != data->currvid) { | ||
420 | dprintk("ph3 failed, currvid 0x%x\n", data->currvid); | ||
421 | return 1; | ||
422 | } | ||
423 | |||
424 | if (savefid != data->currfid) { | ||
425 | dprintk("ph3 failed, currfid changed 0x%x\n", | ||
426 | data->currfid); | ||
427 | return 1; | ||
428 | } | ||
429 | |||
430 | dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", | ||
431 | data->currfid, data->currvid); | ||
432 | |||
433 | return 0; | ||
434 | } | ||
435 | |||
436 | static int check_supported_cpu(unsigned int cpu) | ||
437 | { | ||
438 | cpumask_t oldmask = CPU_MASK_ALL; | ||
439 | u32 eax, ebx, ecx, edx; | ||
440 | unsigned int rc = 0; | ||
441 | |||
442 | oldmask = current->cpus_allowed; | ||
443 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); | ||
444 | schedule(); | ||
445 | |||
446 | if (smp_processor_id() != cpu) { | ||
447 | printk(KERN_ERR "limiting to cpu %u failed\n", cpu); | ||
448 | goto out; | ||
449 | } | ||
450 | |||
451 | if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) | ||
452 | goto out; | ||
453 | |||
454 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | ||
455 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || | ||
456 | ((eax & CPUID_XFAM) != CPUID_XFAM_K8) || | ||
457 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_E)) { | ||
458 | printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); | ||
459 | goto out; | ||
460 | } | ||
461 | |||
462 | eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); | ||
463 | if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { | ||
464 | printk(KERN_INFO PFX | ||
465 | "No frequency change capabilities detected\n"); | ||
466 | goto out; | ||
467 | } | ||
468 | |||
469 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | ||
470 | if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { | ||
471 | printk(KERN_INFO PFX "Power state transitions not supported\n"); | ||
472 | goto out; | ||
473 | } | ||
474 | |||
475 | rc = 1; | ||
476 | |||
477 | out: | ||
478 | set_cpus_allowed(current, oldmask); | ||
479 | schedule(); | ||
480 | return rc; | ||
481 | |||
482 | } | ||
483 | |||
484 | static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) | ||
485 | { | ||
486 | unsigned int j; | ||
487 | u8 lastfid = 0xff; | ||
488 | |||
489 | for (j = 0; j < data->numps; j++) { | ||
490 | if (pst[j].vid > LEAST_VID) { | ||
491 | printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid); | ||
492 | return -EINVAL; | ||
493 | } | ||
494 | if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ | ||
495 | printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j); | ||
496 | return -ENODEV; | ||
497 | } | ||
498 | if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ | ||
499 | printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); | ||
500 | return -ENODEV; | ||
501 | } | ||
502 | if ((pst[j].fid > MAX_FID) | ||
503 | || (pst[j].fid & 1) | ||
504 | || (j && (pst[j].fid < HI_FID_TABLE_BOTTOM))) { | ||
505 | /* Only first fid is allowed to be in "low" range */ | ||
506 | printk(KERN_ERR PFX "two low fids - %d : 0x%x\n", j, pst[j].fid); | ||
507 | return -EINVAL; | ||
508 | } | ||
509 | if (pst[j].fid < lastfid) | ||
510 | lastfid = pst[j].fid; | ||
511 | } | ||
512 | if (lastfid & 1) { | ||
513 | printk(KERN_ERR PFX "lastfid invalid\n"); | ||
514 | return -EINVAL; | ||
515 | } | ||
516 | if (lastfid > LO_FID_TABLE_TOP) | ||
517 | printk(KERN_INFO PFX "first fid not from lo freq table\n"); | ||
518 | |||
519 | return 0; | ||
520 | } | ||
521 | |||
522 | static void print_basics(struct powernow_k8_data *data) | ||
523 | { | ||
524 | int j; | ||
525 | for (j = 0; j < data->numps; j++) { | ||
526 | if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) | ||
527 | printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x (%d mV)\n", j, | ||
528 | data->powernow_table[j].index & 0xff, | ||
529 | data->powernow_table[j].frequency/1000, | ||
530 | data->powernow_table[j].index >> 8, | ||
531 | find_millivolts_from_vid(data, data->powernow_table[j].index >> 8)); | ||
532 | } | ||
533 | if (data->batps) | ||
534 | printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); | ||
535 | } | ||
536 | |||
537 | static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) | ||
538 | { | ||
539 | struct cpufreq_frequency_table *powernow_table; | ||
540 | unsigned int j; | ||
541 | |||
542 | if (data->batps) { /* use ACPI support to get full speed on mains power */ | ||
543 | printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps); | ||
544 | data->numps = data->batps; | ||
545 | } | ||
546 | |||
547 | for ( j=1; j<data->numps; j++ ) { | ||
548 | if (pst[j-1].fid >= pst[j].fid) { | ||
549 | printk(KERN_ERR PFX "PST out of sequence\n"); | ||
550 | return -EINVAL; | ||
551 | } | ||
552 | } | ||
553 | |||
554 | if (data->numps < 2) { | ||
555 | printk(KERN_ERR PFX "no p states to transition\n"); | ||
556 | return -ENODEV; | ||
557 | } | ||
558 | |||
559 | if (check_pst_table(data, pst, maxvid)) | ||
560 | return -EINVAL; | ||
561 | |||
562 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | ||
563 | * (data->numps + 1)), GFP_KERNEL); | ||
564 | if (!powernow_table) { | ||
565 | printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); | ||
566 | return -ENOMEM; | ||
567 | } | ||
568 | |||
569 | for (j = 0; j < data->numps; j++) { | ||
570 | powernow_table[j].index = pst[j].fid; /* lower 8 bits */ | ||
571 | powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ | ||
572 | powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid); | ||
573 | } | ||
574 | powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; | ||
575 | powernow_table[data->numps].index = 0; | ||
576 | |||
577 | if (query_current_values_with_pending_wait(data)) { | ||
578 | kfree(powernow_table); | ||
579 | return -EIO; | ||
580 | } | ||
581 | |||
582 | dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); | ||
583 | data->powernow_table = powernow_table; | ||
584 | print_basics(data); | ||
585 | |||
586 | for (j = 0; j < data->numps; j++) | ||
587 | if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid)) | ||
588 | return 0; | ||
589 | |||
590 | dprintk("currfid/vid do not match PST, ignoring\n"); | ||
591 | return 0; | ||
592 | } | ||
593 | |||
594 | /* Find and validate the PSB/PST table in BIOS. */ | ||
595 | static int find_psb_table(struct powernow_k8_data *data) | ||
596 | { | ||
597 | struct psb_s *psb; | ||
598 | unsigned int i; | ||
599 | u32 mvs; | ||
600 | u8 maxvid; | ||
601 | u32 cpst = 0; | ||
602 | u32 thiscpuid; | ||
603 | |||
604 | for (i = 0xc0000; i < 0xffff0; i += 0x10) { | ||
605 | /* Scan BIOS looking for the signature. */ | ||
606 | /* It can not be at ffff0 - it is too big. */ | ||
607 | |||
608 | psb = phys_to_virt(i); | ||
609 | if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) | ||
610 | continue; | ||
611 | |||
612 | dprintk("found PSB header at 0x%p\n", psb); | ||
613 | |||
614 | dprintk("table vers: 0x%x\n", psb->tableversion); | ||
615 | if (psb->tableversion != PSB_VERSION_1_4) { | ||
616 | printk(KERN_INFO BFX "PSB table is not v1.4\n"); | ||
617 | return -ENODEV; | ||
618 | } | ||
619 | |||
620 | dprintk("flags: 0x%x\n", psb->flags1); | ||
621 | if (psb->flags1) { | ||
622 | printk(KERN_ERR BFX "unknown flags\n"); | ||
623 | return -ENODEV; | ||
624 | } | ||
625 | |||
626 | data->vstable = psb->vstable; | ||
627 | dprintk("voltage stabilization time: %d(*20us)\n", data->vstable); | ||
628 | |||
629 | dprintk("flags2: 0x%x\n", psb->flags2); | ||
630 | data->rvo = psb->flags2 & 3; | ||
631 | data->irt = ((psb->flags2) >> 2) & 3; | ||
632 | mvs = ((psb->flags2) >> 4) & 3; | ||
633 | data->vidmvs = 1 << mvs; | ||
634 | data->batps = ((psb->flags2) >> 6) & 3; | ||
635 | |||
636 | dprintk("ramp voltage offset: %d\n", data->rvo); | ||
637 | dprintk("isochronous relief time: %d\n", data->irt); | ||
638 | dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); | ||
639 | |||
640 | dprintk("numpst: 0x%x\n", psb->num_tables); | ||
641 | cpst = psb->num_tables; | ||
642 | if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){ | ||
643 | thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | ||
644 | if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) { | ||
645 | cpst = 1; | ||
646 | } | ||
647 | } | ||
648 | if (cpst != 1) { | ||
649 | printk(KERN_ERR BFX "numpst must be 1\n"); | ||
650 | return -ENODEV; | ||
651 | } | ||
652 | |||
653 | data->plllock = psb->plllocktime; | ||
654 | dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); | ||
655 | dprintk("maxfid: 0x%x\n", psb->maxfid); | ||
656 | dprintk("maxvid: 0x%x\n", psb->maxvid); | ||
657 | maxvid = psb->maxvid; | ||
658 | |||
659 | data->numps = psb->numps; | ||
660 | dprintk("numpstates: 0x%x\n", data->numps); | ||
661 | return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); | ||
662 | } | ||
663 | /* | ||
664 | * If you see this message, complain to BIOS manufacturer. If | ||
665 | * he tells you "we do not support Linux" or some similar | ||
666 | * nonsense, remember that Windows 2000 uses the same legacy | ||
667 | * mechanism that the old Linux PSB driver uses. Tell them it | ||
668 | * is broken with Windows 2000. | ||
669 | * | ||
670 | * The reference to the AMD documentation is chapter 9 in the | ||
671 | * BIOS and Kernel Developer's Guide, which is available on | ||
672 | * www.amd.com | ||
673 | */ | ||
674 | printk(KERN_ERR PFX "BIOS error - no PSB\n"); | ||
675 | return -ENODEV; | ||
676 | } | ||
677 | |||
678 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | ||
679 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) | ||
680 | { | ||
681 | if (!data->acpi_data.state_count) | ||
682 | return; | ||
683 | |||
684 | data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; | ||
685 | data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; | ||
686 | data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; | ||
687 | data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); | ||
688 | data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; | ||
689 | } | ||
690 | |||
691 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | ||
692 | { | ||
693 | int i; | ||
694 | int cntlofreq = 0; | ||
695 | struct cpufreq_frequency_table *powernow_table; | ||
696 | |||
697 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | ||
698 | dprintk("register performance failed\n"); | ||
699 | return -EIO; | ||
700 | } | ||
701 | |||
702 | /* verify the data contained in the ACPI structures */ | ||
703 | if (data->acpi_data.state_count <= 1) { | ||
704 | dprintk("No ACPI P-States\n"); | ||
705 | goto err_out; | ||
706 | } | ||
707 | |||
708 | if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || | ||
709 | (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { | ||
710 | dprintk("Invalid control/status registers (%x - %x)\n", | ||
711 | data->acpi_data.control_register.space_id, | ||
712 | data->acpi_data.status_register.space_id); | ||
713 | goto err_out; | ||
714 | } | ||
715 | |||
716 | /* fill in data->powernow_table */ | ||
717 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | ||
718 | * (data->acpi_data.state_count + 1)), GFP_KERNEL); | ||
719 | if (!powernow_table) { | ||
720 | dprintk("powernow_table memory alloc failure\n"); | ||
721 | goto err_out; | ||
722 | } | ||
723 | |||
724 | for (i = 0; i < data->acpi_data.state_count; i++) { | ||
725 | u32 fid = data->acpi_data.states[i].control & FID_MASK; | ||
726 | u32 vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; | ||
727 | |||
728 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); | ||
729 | |||
730 | powernow_table[i].index = fid; /* lower 8 bits */ | ||
731 | powernow_table[i].index |= (vid << 8); /* upper 8 bits */ | ||
732 | powernow_table[i].frequency = find_khz_freq_from_fid(fid); | ||
733 | |||
734 | /* verify frequency is OK */ | ||
735 | if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) || | ||
736 | (powernow_table[i].frequency < (MIN_FREQ * 1000))) { | ||
737 | dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency); | ||
738 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
739 | continue; | ||
740 | } | ||
741 | |||
742 | /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ | ||
743 | if (vid == 0x1f) { | ||
744 | dprintk("invalid vid %u, ignoring\n", vid); | ||
745 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
746 | continue; | ||
747 | } | ||
748 | |||
749 | if (fid < HI_FID_TABLE_BOTTOM) { | ||
750 | if (cntlofreq) { | ||
751 | /* if both entries are the same, ignore this | ||
752 | * one... | ||
753 | */ | ||
754 | if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || | ||
755 | (powernow_table[i].index != powernow_table[cntlofreq].index)) { | ||
756 | printk(KERN_ERR PFX "Too many lo freq table entries\n"); | ||
757 | goto err_out_mem; | ||
758 | } | ||
759 | |||
760 | dprintk("double low frequency table entry, ignoring it.\n"); | ||
761 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
762 | continue; | ||
763 | } else | ||
764 | cntlofreq = i; | ||
765 | } | ||
766 | |||
767 | if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { | ||
768 | printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", | ||
769 | powernow_table[i].frequency, | ||
770 | (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); | ||
771 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
772 | continue; | ||
773 | } | ||
774 | } | ||
775 | |||
776 | powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; | ||
777 | powernow_table[data->acpi_data.state_count].index = 0; | ||
778 | data->powernow_table = powernow_table; | ||
779 | |||
780 | /* fill in data */ | ||
781 | data->numps = data->acpi_data.state_count; | ||
782 | print_basics(data); | ||
783 | powernow_k8_acpi_pst_values(data, 0); | ||
784 | |||
785 | /* notify BIOS that we exist */ | ||
786 | acpi_processor_notify_smm(THIS_MODULE); | ||
787 | |||
788 | return 0; | ||
789 | |||
790 | err_out_mem: | ||
791 | kfree(powernow_table); | ||
792 | |||
793 | err_out: | ||
794 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | ||
795 | |||
796 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ | ||
797 | data->acpi_data.state_count = 0; | ||
798 | |||
799 | return -ENODEV; | ||
800 | } | ||
801 | |||
802 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) | ||
803 | { | ||
804 | if (data->acpi_data.state_count) | ||
805 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | ||
806 | } | ||
807 | |||
808 | #else | ||
809 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } | ||
810 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } | ||
811 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } | ||
812 | #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ | ||
813 | |||
814 | /* Take a frequency, and issue the fid/vid transition command */ | ||
815 | static int transition_frequency(struct powernow_k8_data *data, unsigned int index) | ||
816 | { | ||
817 | u32 fid; | ||
818 | u32 vid; | ||
819 | int res; | ||
820 | struct cpufreq_freqs freqs; | ||
821 | |||
822 | dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); | ||
823 | |||
824 | /* fid are the lower 8 bits of the index we stored into | ||
825 | * the cpufreq frequency table in find_psb_table, vid are | ||
826 | * the upper 8 bits. | ||
827 | */ | ||
828 | |||
829 | fid = data->powernow_table[index].index & 0xFF; | ||
830 | vid = (data->powernow_table[index].index & 0xFF00) >> 8; | ||
831 | |||
832 | dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); | ||
833 | |||
834 | if (query_current_values_with_pending_wait(data)) | ||
835 | return 1; | ||
836 | |||
837 | if ((data->currvid == vid) && (data->currfid == fid)) { | ||
838 | dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", | ||
839 | fid, vid); | ||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { | ||
844 | printk("ignoring illegal change in lo freq table-%x to 0x%x\n", | ||
845 | data->currfid, fid); | ||
846 | return 1; | ||
847 | } | ||
848 | |||
849 | dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", | ||
850 | smp_processor_id(), fid, vid); | ||
851 | |||
852 | freqs.cpu = data->cpu; | ||
853 | |||
854 | freqs.old = find_khz_freq_from_fid(data->currfid); | ||
855 | freqs.new = find_khz_freq_from_fid(fid); | ||
856 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
857 | |||
858 | down(&fidvid_sem); | ||
859 | res = transition_fid_vid(data, fid, vid); | ||
860 | up(&fidvid_sem); | ||
861 | |||
862 | freqs.new = find_khz_freq_from_fid(data->currfid); | ||
863 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
864 | |||
865 | return res; | ||
866 | } | ||
867 | |||
868 | /* Driver entry point to switch to the target frequency */ | ||
869 | static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) | ||
870 | { | ||
871 | cpumask_t oldmask = CPU_MASK_ALL; | ||
872 | struct powernow_k8_data *data = powernow_data[pol->cpu]; | ||
873 | u32 checkfid = data->currfid; | ||
874 | u32 checkvid = data->currvid; | ||
875 | unsigned int newstate; | ||
876 | int ret = -EIO; | ||
877 | |||
878 | /* only run on specific CPU from here on */ | ||
879 | oldmask = current->cpus_allowed; | ||
880 | set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); | ||
881 | schedule(); | ||
882 | |||
883 | if (smp_processor_id() != pol->cpu) { | ||
884 | printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); | ||
885 | goto err_out; | ||
886 | } | ||
887 | |||
888 | if (pending_bit_stuck()) { | ||
889 | printk(KERN_ERR PFX "failing targ, change pending bit set\n"); | ||
890 | goto err_out; | ||
891 | } | ||
892 | |||
893 | dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", | ||
894 | pol->cpu, targfreq, pol->min, pol->max, relation); | ||
895 | |||
896 | if (query_current_values_with_pending_wait(data)) { | ||
897 | ret = -EIO; | ||
898 | goto err_out; | ||
899 | } | ||
900 | |||
901 | dprintk("targ: curr fid 0x%x, vid 0x%x\n", | ||
902 | data->currfid, data->currvid); | ||
903 | |||
904 | if ((checkvid != data->currvid) || (checkfid != data->currfid)) { | ||
905 | printk(KERN_ERR PFX | ||
906 | "error - out of sync, fid 0x%x 0x%x, vid 0x%x 0x%x\n", | ||
907 | checkfid, data->currfid, checkvid, data->currvid); | ||
908 | } | ||
909 | |||
910 | if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) | ||
911 | goto err_out; | ||
912 | |||
913 | powernow_k8_acpi_pst_values(data, newstate); | ||
914 | |||
915 | if (transition_frequency(data, newstate)) { | ||
916 | printk(KERN_ERR PFX "transition frequency failed\n"); | ||
917 | ret = 1; | ||
918 | goto err_out; | ||
919 | } | ||
920 | |||
921 | pol->cur = find_khz_freq_from_fid(data->currfid); | ||
922 | ret = 0; | ||
923 | |||
924 | err_out: | ||
925 | set_cpus_allowed(current, oldmask); | ||
926 | schedule(); | ||
927 | |||
928 | return ret; | ||
929 | } | ||
930 | |||
931 | /* Driver entry point to verify the policy and range of frequencies */ | ||
932 | static int powernowk8_verify(struct cpufreq_policy *pol) | ||
933 | { | ||
934 | struct powernow_k8_data *data = powernow_data[pol->cpu]; | ||
935 | |||
936 | return cpufreq_frequency_table_verify(pol, data->powernow_table); | ||
937 | } | ||
938 | |||
939 | /* per CPU init entry point to the driver */ | ||
940 | static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) | ||
941 | { | ||
942 | struct powernow_k8_data *data; | ||
943 | cpumask_t oldmask = CPU_MASK_ALL; | ||
944 | int rc; | ||
945 | |||
946 | if (!check_supported_cpu(pol->cpu)) | ||
947 | return -ENODEV; | ||
948 | |||
949 | data = kmalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); | ||
950 | if (!data) { | ||
951 | printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); | ||
952 | return -ENOMEM; | ||
953 | } | ||
954 | memset(data,0,sizeof(struct powernow_k8_data)); | ||
955 | |||
956 | data->cpu = pol->cpu; | ||
957 | |||
958 | if (powernow_k8_cpu_init_acpi(data)) { | ||
959 | /* | ||
960 | * Use the PSB BIOS structure. This is only availabe on | ||
961 | * an UP version, and is deprecated by AMD. | ||
962 | */ | ||
963 | |||
964 | if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) { | ||
965 | printk(KERN_INFO PFX "MP systems not supported by PSB BIOS structure\n"); | ||
966 | kfree(data); | ||
967 | return -ENODEV; | ||
968 | } | ||
969 | if (pol->cpu != 0) { | ||
970 | printk(KERN_ERR PFX "init not cpu 0\n"); | ||
971 | kfree(data); | ||
972 | return -ENODEV; | ||
973 | } | ||
974 | rc = find_psb_table(data); | ||
975 | if (rc) { | ||
976 | kfree(data); | ||
977 | return -ENODEV; | ||
978 | } | ||
979 | } | ||
980 | |||
981 | /* only run on specific CPU from here on */ | ||
982 | oldmask = current->cpus_allowed; | ||
983 | set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); | ||
984 | schedule(); | ||
985 | |||
986 | if (smp_processor_id() != pol->cpu) { | ||
987 | printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); | ||
988 | goto err_out; | ||
989 | } | ||
990 | |||
991 | if (pending_bit_stuck()) { | ||
992 | printk(KERN_ERR PFX "failing init, change pending bit set\n"); | ||
993 | goto err_out; | ||
994 | } | ||
995 | |||
996 | if (query_current_values_with_pending_wait(data)) | ||
997 | goto err_out; | ||
998 | |||
999 | fidvid_msr_init(); | ||
1000 | |||
1001 | /* run on any CPU again */ | ||
1002 | set_cpus_allowed(current, oldmask); | ||
1003 | schedule(); | ||
1004 | |||
1005 | pol->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
1006 | |||
1007 | /* Take a crude guess here. | ||
1008 | * That guess was in microseconds, so multiply with 1000 */ | ||
1009 | pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) | ||
1010 | + (3 * (1 << data->irt) * 10)) * 1000; | ||
1011 | |||
1012 | pol->cur = find_khz_freq_from_fid(data->currfid); | ||
1013 | dprintk("policy current frequency %d kHz\n", pol->cur); | ||
1014 | |||
1015 | /* min/max the cpu is capable of */ | ||
1016 | if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { | ||
1017 | printk(KERN_ERR PFX "invalid powernow_table\n"); | ||
1018 | powernow_k8_cpu_exit_acpi(data); | ||
1019 | kfree(data->powernow_table); | ||
1020 | kfree(data); | ||
1021 | return -EINVAL; | ||
1022 | } | ||
1023 | |||
1024 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | ||
1025 | |||
1026 | printk("cpu_init done, current fid 0x%x, vid 0x%x\n", | ||
1027 | data->currfid, data->currvid); | ||
1028 | |||
1029 | powernow_data[pol->cpu] = data; | ||
1030 | |||
1031 | return 0; | ||
1032 | |||
1033 | err_out: | ||
1034 | set_cpus_allowed(current, oldmask); | ||
1035 | schedule(); | ||
1036 | powernow_k8_cpu_exit_acpi(data); | ||
1037 | |||
1038 | kfree(data); | ||
1039 | return -ENODEV; | ||
1040 | } | ||
1041 | |||
1042 | static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) | ||
1043 | { | ||
1044 | struct powernow_k8_data *data = powernow_data[pol->cpu]; | ||
1045 | |||
1046 | if (!data) | ||
1047 | return -EINVAL; | ||
1048 | |||
1049 | powernow_k8_cpu_exit_acpi(data); | ||
1050 | |||
1051 | cpufreq_frequency_table_put_attr(pol->cpu); | ||
1052 | |||
1053 | kfree(data->powernow_table); | ||
1054 | kfree(data); | ||
1055 | |||
1056 | return 0; | ||
1057 | } | ||
1058 | |||
1059 | static unsigned int powernowk8_get (unsigned int cpu) | ||
1060 | { | ||
1061 | struct powernow_k8_data *data = powernow_data[cpu]; | ||
1062 | cpumask_t oldmask = current->cpus_allowed; | ||
1063 | unsigned int khz = 0; | ||
1064 | |||
1065 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); | ||
1066 | if (smp_processor_id() != cpu) { | ||
1067 | printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); | ||
1068 | set_cpus_allowed(current, oldmask); | ||
1069 | return 0; | ||
1070 | } | ||
1071 | preempt_disable(); | ||
1072 | |||
1073 | if (query_current_values_with_pending_wait(data)) | ||
1074 | goto out; | ||
1075 | |||
1076 | khz = find_khz_freq_from_fid(data->currfid); | ||
1077 | |||
1078 | out: | ||
1079 | preempt_enable_no_resched(); | ||
1080 | set_cpus_allowed(current, oldmask); | ||
1081 | |||
1082 | return khz; | ||
1083 | } | ||
1084 | |||
1085 | static struct freq_attr* powernow_k8_attr[] = { | ||
1086 | &cpufreq_freq_attr_scaling_available_freqs, | ||
1087 | NULL, | ||
1088 | }; | ||
1089 | |||
1090 | static struct cpufreq_driver cpufreq_amd64_driver = { | ||
1091 | .verify = powernowk8_verify, | ||
1092 | .target = powernowk8_target, | ||
1093 | .init = powernowk8_cpu_init, | ||
1094 | .exit = __devexit_p(powernowk8_cpu_exit), | ||
1095 | .get = powernowk8_get, | ||
1096 | .name = "powernow-k8", | ||
1097 | .owner = THIS_MODULE, | ||
1098 | .attr = powernow_k8_attr, | ||
1099 | }; | ||
1100 | |||
1101 | /* driver entry point for init */ | ||
1102 | static int __init powernowk8_init(void) | ||
1103 | { | ||
1104 | unsigned int i, supported_cpus = 0; | ||
1105 | |||
1106 | for (i=0; i<NR_CPUS; i++) { | ||
1107 | if (!cpu_online(i)) | ||
1108 | continue; | ||
1109 | if (check_supported_cpu(i)) | ||
1110 | supported_cpus++; | ||
1111 | } | ||
1112 | |||
1113 | if (supported_cpus == num_online_cpus()) { | ||
1114 | printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron processors (" VERSION ")\n", | ||
1115 | supported_cpus); | ||
1116 | return cpufreq_register_driver(&cpufreq_amd64_driver); | ||
1117 | } | ||
1118 | |||
1119 | return -ENODEV; | ||
1120 | } | ||
1121 | |||
1122 | /* driver entry point for term */ | ||
1123 | static void __exit powernowk8_exit(void) | ||
1124 | { | ||
1125 | dprintk("exit\n"); | ||
1126 | |||
1127 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | ||
1128 | } | ||
1129 | |||
1130 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com>"); | ||
1131 | MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); | ||
1132 | MODULE_LICENSE("GPL"); | ||
1133 | |||
1134 | late_initcall(powernowk8_init); | ||
1135 | module_exit(powernowk8_exit); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h new file mode 100644 index 000000000000..63ebc8470f52 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -0,0 +1,176 @@ | |||
1 | /* | ||
2 | * (c) 2003, 2004 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | */ | ||
7 | |||
8 | struct powernow_k8_data { | ||
9 | unsigned int cpu; | ||
10 | |||
11 | u32 numps; /* number of p-states */ | ||
12 | u32 batps; /* number of p-states supported on battery */ | ||
13 | |||
14 | /* these values are constant when the PSB is used to determine | ||
15 | * vid/fid pairings, but are modified during the ->target() call | ||
16 | * when ACPI is used */ | ||
17 | u32 rvo; /* ramp voltage offset */ | ||
18 | u32 irt; /* isochronous relief time */ | ||
19 | u32 vidmvs; /* usable value calculated from mvs */ | ||
20 | u32 vstable; /* voltage stabilization time, units 20 us */ | ||
21 | u32 plllock; /* pll lock time, units 1 us */ | ||
22 | |||
23 | /* keep track of the current fid / vid */ | ||
24 | u32 currvid, currfid; | ||
25 | |||
26 | /* the powernow_table includes all frequency and vid/fid pairings: | ||
27 | * fid are the lower 8 bits of the index, vid are the upper 8 bits. | ||
28 | * frequency is in kHz */ | ||
29 | struct cpufreq_frequency_table *powernow_table; | ||
30 | |||
31 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | ||
32 | /* the acpi table needs to be kept. it's only available if ACPI was | ||
33 | * used to determine valid frequency/vid/fid states */ | ||
34 | struct acpi_processor_performance acpi_data; | ||
35 | #endif | ||
36 | }; | ||
37 | |||
38 | |||
39 | /* processor's cpuid instruction support */ | ||
40 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ | ||
41 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | ||
42 | #define CPUID_XFAM_K8 0 | ||
43 | #define CPUID_XMOD 0x000f0000 /* extended model */ | ||
44 | #define CPUID_XMOD_REV_E 0x00020000 | ||
45 | #define CPUID_USE_XFAM_XMOD 0x00000f00 | ||
46 | #define CPUID_GET_MAX_CAPABILITIES 0x80000000 | ||
47 | #define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 | ||
48 | #define P_STATE_TRANSITION_CAPABLE 6 | ||
49 | |||
50 | /* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ | ||
51 | /* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ | ||
52 | /* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ | ||
53 | /* the register number is placed in ecx, and the data is returned in edx:eax. */ | ||
54 | |||
55 | #define MSR_FIDVID_CTL 0xc0010041 | ||
56 | #define MSR_FIDVID_STATUS 0xc0010042 | ||
57 | |||
58 | /* Field definitions within the FID VID Low Control MSR : */ | ||
59 | #define MSR_C_LO_INIT_FID_VID 0x00010000 | ||
60 | #define MSR_C_LO_NEW_VID 0x00001f00 | ||
61 | #define MSR_C_LO_NEW_FID 0x0000002f | ||
62 | #define MSR_C_LO_VID_SHIFT 8 | ||
63 | |||
64 | /* Field definitions within the FID VID High Control MSR : */ | ||
65 | #define MSR_C_HI_STP_GNT_TO 0x000fffff | ||
66 | |||
67 | /* Field definitions within the FID VID Low Status MSR : */ | ||
68 | #define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ | ||
69 | #define MSR_S_LO_MAX_RAMP_VID 0x1f000000 | ||
70 | #define MSR_S_LO_MAX_FID 0x003f0000 | ||
71 | #define MSR_S_LO_START_FID 0x00003f00 | ||
72 | #define MSR_S_LO_CURRENT_FID 0x0000003f | ||
73 | |||
74 | /* Field definitions within the FID VID High Status MSR : */ | ||
75 | #define MSR_S_HI_MAX_WORKING_VID 0x001f0000 | ||
76 | #define MSR_S_HI_START_VID 0x00001f00 | ||
77 | #define MSR_S_HI_CURRENT_VID 0x0000001f | ||
78 | #define MSR_C_HI_STP_GNT_BENIGN 0x00000001 | ||
79 | |||
80 | /* | ||
81 | * There are restrictions frequencies have to follow: | ||
82 | * - only 1 entry in the low fid table ( <=1.4GHz ) | ||
83 | * - lowest entry in the high fid table must be >= 2 * the entry in the | ||
84 | * low fid table | ||
85 | * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry | ||
86 | * in the low fid table | ||
87 | * - the parts can only step at 200 MHz intervals, so 1.9 GHz is never valid | ||
88 | * - lowest frequency must be >= interprocessor hypertransport link speed | ||
89 | * (only applies to MP systems obviously) | ||
90 | */ | ||
91 | |||
92 | /* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ | ||
93 | #define LO_FID_TABLE_TOP 6 /* fid values marking the boundary */ | ||
94 | #define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ | ||
95 | |||
96 | #define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ | ||
97 | #define HI_VCOFREQ_TABLE_BOTTOM 1600 | ||
98 | |||
99 | #define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ | ||
100 | |||
101 | #define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ | ||
102 | #define LEAST_VID 0x1e /* Lowest (numerically highest) useful vid value */ | ||
103 | |||
104 | #define MIN_FREQ 800 /* Min and max freqs, per spec */ | ||
105 | #define MAX_FREQ 5000 | ||
106 | |||
107 | #define INVALID_FID_MASK 0xffffffc1 /* not a valid fid if these bits are set */ | ||
108 | #define INVALID_VID_MASK 0xffffffe0 /* not a valid vid if these bits are set */ | ||
109 | |||
110 | #define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ | ||
111 | |||
112 | #define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ | ||
113 | |||
114 | #define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ | ||
115 | #define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */ | ||
116 | |||
117 | /* | ||
118 | * Most values of interest are enocoded in a single field of the _PSS | ||
119 | * entries: the "control" value. | ||
120 | */ | ||
121 | |||
122 | #define IRT_SHIFT 30 | ||
123 | #define RVO_SHIFT 28 | ||
124 | #define PLL_L_SHIFT 20 | ||
125 | #define MVS_SHIFT 18 | ||
126 | #define VST_SHIFT 11 | ||
127 | #define VID_SHIFT 6 | ||
128 | #define IRT_MASK 3 | ||
129 | #define RVO_MASK 3 | ||
130 | #define PLL_L_MASK 0x7f | ||
131 | #define MVS_MASK 3 | ||
132 | #define VST_MASK 0x7f | ||
133 | #define VID_MASK 0x1f | ||
134 | #define FID_MASK 0x3f | ||
135 | |||
136 | |||
137 | /* | ||
138 | * Version 1.4 of the PSB table. This table is constructed by BIOS and is | ||
139 | * to tell the OS's power management driver which VIDs and FIDs are | ||
140 | * supported by this particular processor. | ||
141 | * If the data in the PSB / PST is wrong, then this driver will program the | ||
142 | * wrong values into hardware, which is very likely to lead to a crash. | ||
143 | */ | ||
144 | |||
145 | #define PSB_ID_STRING "AMDK7PNOW!" | ||
146 | #define PSB_ID_STRING_LEN 10 | ||
147 | |||
148 | #define PSB_VERSION_1_4 0x14 | ||
149 | |||
150 | struct psb_s { | ||
151 | u8 signature[10]; | ||
152 | u8 tableversion; | ||
153 | u8 flags1; | ||
154 | u16 vstable; | ||
155 | u8 flags2; | ||
156 | u8 num_tables; | ||
157 | u32 cpuid; | ||
158 | u8 plllocktime; | ||
159 | u8 maxfid; | ||
160 | u8 maxvid; | ||
161 | u8 numps; | ||
162 | }; | ||
163 | |||
164 | /* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ | ||
165 | struct pst_s { | ||
166 | u8 fid; | ||
167 | u8 vid; | ||
168 | }; | ||
169 | |||
170 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) | ||
171 | |||
172 | static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); | ||
173 | static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); | ||
174 | static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); | ||
175 | |||
176 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c new file mode 100644 index 000000000000..07d5612dc00f --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -0,0 +1,715 @@ | |||
1 | /* | ||
2 | * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium | ||
3 | * M (part of the Centrino chipset). | ||
4 | * | ||
5 | * Despite the "SpeedStep" in the name, this is almost entirely unlike | ||
6 | * traditional SpeedStep. | ||
7 | * | ||
8 | * Modelled on speedstep.c | ||
9 | * | ||
10 | * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org> | ||
11 | * | ||
12 | * WARNING WARNING WARNING | ||
13 | * | ||
14 | * This driver manipulates the PERF_CTL MSR, which is only somewhat | ||
15 | * documented. While it seems to work on my laptop, it has not been | ||
16 | * tested anywhere else, and it may not work for you, do strange | ||
17 | * things or simply crash. | ||
18 | */ | ||
19 | |||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/cpufreq.h> | ||
24 | #include <linux/config.h> | ||
25 | #include <linux/delay.h> | ||
26 | #include <linux/compiler.h> | ||
27 | |||
28 | #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI | ||
29 | #include <linux/acpi.h> | ||
30 | #include <acpi/processor.h> | ||
31 | #endif | ||
32 | |||
33 | #include <asm/msr.h> | ||
34 | #include <asm/processor.h> | ||
35 | #include <asm/cpufeature.h> | ||
36 | |||
37 | #include "speedstep-est-common.h" | ||
38 | |||
39 | #define PFX "speedstep-centrino: " | ||
40 | #define MAINTAINER "Jeremy Fitzhardinge <jeremy@goop.org>" | ||
41 | |||
42 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | ||
43 | |||
44 | |||
45 | struct cpu_id | ||
46 | { | ||
47 | __u8 x86; /* CPU family */ | ||
48 | __u8 x86_model; /* model */ | ||
49 | __u8 x86_mask; /* stepping */ | ||
50 | }; | ||
51 | |||
52 | enum { | ||
53 | CPU_BANIAS, | ||
54 | CPU_DOTHAN_A1, | ||
55 | CPU_DOTHAN_A2, | ||
56 | CPU_DOTHAN_B0, | ||
57 | }; | ||
58 | |||
59 | static const struct cpu_id cpu_ids[] = { | ||
60 | [CPU_BANIAS] = { 6, 9, 5 }, | ||
61 | [CPU_DOTHAN_A1] = { 6, 13, 1 }, | ||
62 | [CPU_DOTHAN_A2] = { 6, 13, 2 }, | ||
63 | [CPU_DOTHAN_B0] = { 6, 13, 6 }, | ||
64 | }; | ||
65 | #define N_IDS (sizeof(cpu_ids)/sizeof(cpu_ids[0])) | ||
66 | |||
67 | struct cpu_model | ||
68 | { | ||
69 | const struct cpu_id *cpu_id; | ||
70 | const char *model_name; | ||
71 | unsigned max_freq; /* max clock in kHz */ | ||
72 | |||
73 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ | ||
74 | }; | ||
75 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); | ||
76 | |||
77 | /* Operating points for current CPU */ | ||
78 | static struct cpu_model *centrino_model[NR_CPUS]; | ||
79 | static const struct cpu_id *centrino_cpu[NR_CPUS]; | ||
80 | |||
81 | static struct cpufreq_driver centrino_driver; | ||
82 | |||
83 | #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE | ||
84 | |||
85 | /* Computes the correct form for IA32_PERF_CTL MSR for a particular | ||
86 | frequency/voltage operating point; frequency in MHz, volts in mV. | ||
87 | This is stored as "index" in the structure. */ | ||
88 | #define OP(mhz, mv) \ | ||
89 | { \ | ||
90 | .frequency = (mhz) * 1000, \ | ||
91 | .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * These voltage tables were derived from the Intel Pentium M | ||
96 | * datasheet, document 25261202.pdf, Table 5. I have verified they | ||
97 | * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium | ||
98 | * M. | ||
99 | */ | ||
100 | |||
101 | /* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ | ||
102 | static struct cpufreq_frequency_table banias_900[] = | ||
103 | { | ||
104 | OP(600, 844), | ||
105 | OP(800, 988), | ||
106 | OP(900, 1004), | ||
107 | { .frequency = CPUFREQ_TABLE_END } | ||
108 | }; | ||
109 | |||
110 | /* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ | ||
111 | static struct cpufreq_frequency_table banias_1000[] = | ||
112 | { | ||
113 | OP(600, 844), | ||
114 | OP(800, 972), | ||
115 | OP(900, 988), | ||
116 | OP(1000, 1004), | ||
117 | { .frequency = CPUFREQ_TABLE_END } | ||
118 | }; | ||
119 | |||
120 | /* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ | ||
121 | static struct cpufreq_frequency_table banias_1100[] = | ||
122 | { | ||
123 | OP( 600, 956), | ||
124 | OP( 800, 1020), | ||
125 | OP( 900, 1100), | ||
126 | OP(1000, 1164), | ||
127 | OP(1100, 1180), | ||
128 | { .frequency = CPUFREQ_TABLE_END } | ||
129 | }; | ||
130 | |||
131 | |||
132 | /* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ | ||
133 | static struct cpufreq_frequency_table banias_1200[] = | ||
134 | { | ||
135 | OP( 600, 956), | ||
136 | OP( 800, 1004), | ||
137 | OP( 900, 1020), | ||
138 | OP(1000, 1100), | ||
139 | OP(1100, 1164), | ||
140 | OP(1200, 1180), | ||
141 | { .frequency = CPUFREQ_TABLE_END } | ||
142 | }; | ||
143 | |||
144 | /* Intel Pentium M processor 1.30GHz (Banias) */ | ||
145 | static struct cpufreq_frequency_table banias_1300[] = | ||
146 | { | ||
147 | OP( 600, 956), | ||
148 | OP( 800, 1260), | ||
149 | OP(1000, 1292), | ||
150 | OP(1200, 1356), | ||
151 | OP(1300, 1388), | ||
152 | { .frequency = CPUFREQ_TABLE_END } | ||
153 | }; | ||
154 | |||
155 | /* Intel Pentium M processor 1.40GHz (Banias) */ | ||
156 | static struct cpufreq_frequency_table banias_1400[] = | ||
157 | { | ||
158 | OP( 600, 956), | ||
159 | OP( 800, 1180), | ||
160 | OP(1000, 1308), | ||
161 | OP(1200, 1436), | ||
162 | OP(1400, 1484), | ||
163 | { .frequency = CPUFREQ_TABLE_END } | ||
164 | }; | ||
165 | |||
166 | /* Intel Pentium M processor 1.50GHz (Banias) */ | ||
167 | static struct cpufreq_frequency_table banias_1500[] = | ||
168 | { | ||
169 | OP( 600, 956), | ||
170 | OP( 800, 1116), | ||
171 | OP(1000, 1228), | ||
172 | OP(1200, 1356), | ||
173 | OP(1400, 1452), | ||
174 | OP(1500, 1484), | ||
175 | { .frequency = CPUFREQ_TABLE_END } | ||
176 | }; | ||
177 | |||
178 | /* Intel Pentium M processor 1.60GHz (Banias) */ | ||
179 | static struct cpufreq_frequency_table banias_1600[] = | ||
180 | { | ||
181 | OP( 600, 956), | ||
182 | OP( 800, 1036), | ||
183 | OP(1000, 1164), | ||
184 | OP(1200, 1276), | ||
185 | OP(1400, 1420), | ||
186 | OP(1600, 1484), | ||
187 | { .frequency = CPUFREQ_TABLE_END } | ||
188 | }; | ||
189 | |||
190 | /* Intel Pentium M processor 1.70GHz (Banias) */ | ||
191 | static struct cpufreq_frequency_table banias_1700[] = | ||
192 | { | ||
193 | OP( 600, 956), | ||
194 | OP( 800, 1004), | ||
195 | OP(1000, 1116), | ||
196 | OP(1200, 1228), | ||
197 | OP(1400, 1308), | ||
198 | OP(1700, 1484), | ||
199 | { .frequency = CPUFREQ_TABLE_END } | ||
200 | }; | ||
201 | #undef OP | ||
202 | |||
203 | #define _BANIAS(cpuid, max, name) \ | ||
204 | { .cpu_id = cpuid, \ | ||
205 | .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ | ||
206 | .max_freq = (max)*1000, \ | ||
207 | .op_points = banias_##max, \ | ||
208 | } | ||
209 | #define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) | ||
210 | |||
211 | /* CPU models, their operating frequency range, and freq/voltage | ||
212 | operating points */ | ||
213 | static struct cpu_model models[] = | ||
214 | { | ||
215 | _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), | ||
216 | BANIAS(1000), | ||
217 | BANIAS(1100), | ||
218 | BANIAS(1200), | ||
219 | BANIAS(1300), | ||
220 | BANIAS(1400), | ||
221 | BANIAS(1500), | ||
222 | BANIAS(1600), | ||
223 | BANIAS(1700), | ||
224 | |||
225 | /* NULL model_name is a wildcard */ | ||
226 | { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, | ||
227 | { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, | ||
228 | { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, | ||
229 | |||
230 | { NULL, } | ||
231 | }; | ||
232 | #undef _BANIAS | ||
233 | #undef BANIAS | ||
234 | |||
235 | static int centrino_cpu_init_table(struct cpufreq_policy *policy) | ||
236 | { | ||
237 | struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; | ||
238 | struct cpu_model *model; | ||
239 | |||
240 | for(model = models; model->cpu_id != NULL; model++) | ||
241 | if (centrino_verify_cpu_id(cpu, model->cpu_id) && | ||
242 | (model->model_name == NULL || | ||
243 | strcmp(cpu->x86_model_id, model->model_name) == 0)) | ||
244 | break; | ||
245 | |||
246 | if (model->cpu_id == NULL) { | ||
247 | /* No match at all */ | ||
248 | dprintk(KERN_INFO PFX "no support for CPU model \"%s\": " | ||
249 | "send /proc/cpuinfo to " MAINTAINER "\n", | ||
250 | cpu->x86_model_id); | ||
251 | return -ENOENT; | ||
252 | } | ||
253 | |||
254 | if (model->op_points == NULL) { | ||
255 | /* Matched a non-match */ | ||
256 | dprintk(KERN_INFO PFX "no table support for CPU model \"%s\": \n", | ||
257 | cpu->x86_model_id); | ||
258 | #ifndef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI | ||
259 | dprintk(KERN_INFO PFX "try compiling with CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI enabled\n"); | ||
260 | #endif | ||
261 | return -ENOENT; | ||
262 | } | ||
263 | |||
264 | centrino_model[policy->cpu] = model; | ||
265 | |||
266 | dprintk("found \"%s\": max frequency: %dkHz\n", | ||
267 | model->model_name, model->max_freq); | ||
268 | |||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | #else | ||
273 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } | ||
274 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ | ||
275 | |||
276 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) | ||
277 | { | ||
278 | if ((c->x86 == x->x86) && | ||
279 | (c->x86_model == x->x86_model) && | ||
280 | (c->x86_mask == x->x86_mask)) | ||
281 | return 1; | ||
282 | return 0; | ||
283 | } | ||
284 | |||
285 | /* To be called only after centrino_model is initialized */ | ||
286 | static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) | ||
287 | { | ||
288 | int i; | ||
289 | |||
290 | /* | ||
291 | * Extract clock in kHz from PERF_CTL value | ||
292 | * for centrino, as some DSDTs are buggy. | ||
293 | * Ideally, this can be done using the acpi_data structure. | ||
294 | */ | ||
295 | if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || | ||
296 | (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || | ||
297 | (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { | ||
298 | msr = (msr >> 8) & 0xff; | ||
299 | return msr * 100000; | ||
300 | } | ||
301 | |||
302 | if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) | ||
303 | return 0; | ||
304 | |||
305 | msr &= 0xffff; | ||
306 | for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { | ||
307 | if (msr == centrino_model[cpu]->op_points[i].index) | ||
308 | return centrino_model[cpu]->op_points[i].frequency; | ||
309 | } | ||
310 | if (failsafe) | ||
311 | return centrino_model[cpu]->op_points[i-1].frequency; | ||
312 | else | ||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | /* Return the current CPU frequency in kHz */ | ||
317 | static unsigned int get_cur_freq(unsigned int cpu) | ||
318 | { | ||
319 | unsigned l, h; | ||
320 | unsigned clock_freq; | ||
321 | cpumask_t saved_mask; | ||
322 | |||
323 | saved_mask = current->cpus_allowed; | ||
324 | set_cpus_allowed(current, cpumask_of_cpu(cpu)); | ||
325 | if (smp_processor_id() != cpu) | ||
326 | return 0; | ||
327 | |||
328 | rdmsr(MSR_IA32_PERF_STATUS, l, h); | ||
329 | clock_freq = extract_clock(l, cpu, 0); | ||
330 | |||
331 | if (unlikely(clock_freq == 0)) { | ||
332 | /* | ||
333 | * On some CPUs, we can see transient MSR values (which are | ||
334 | * not present in _PSS), while CPU is doing some automatic | ||
335 | * P-state transition (like TM2). Get the last freq set | ||
336 | * in PERF_CTL. | ||
337 | */ | ||
338 | rdmsr(MSR_IA32_PERF_CTL, l, h); | ||
339 | clock_freq = extract_clock(l, cpu, 1); | ||
340 | } | ||
341 | |||
342 | set_cpus_allowed(current, saved_mask); | ||
343 | return clock_freq; | ||
344 | } | ||
345 | |||
346 | |||
347 | #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI | ||
348 | |||
349 | static struct acpi_processor_performance p; | ||
350 | |||
351 | /* | ||
352 | * centrino_cpu_init_acpi - register with ACPI P-States library | ||
353 | * | ||
354 | * Register with the ACPI P-States library (part of drivers/acpi/processor.c) | ||
355 | * in order to determine correct frequency and voltage pairings by reading | ||
356 | * the _PSS of the ACPI DSDT or SSDT tables. | ||
357 | */ | ||
358 | static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) | ||
359 | { | ||
360 | union acpi_object arg0 = {ACPI_TYPE_BUFFER}; | ||
361 | u32 arg0_buf[3]; | ||
362 | struct acpi_object_list arg_list = {1, &arg0}; | ||
363 | unsigned long cur_freq; | ||
364 | int result = 0, i; | ||
365 | unsigned int cpu = policy->cpu; | ||
366 | |||
367 | /* _PDC settings */ | ||
368 | arg0.buffer.length = 12; | ||
369 | arg0.buffer.pointer = (u8 *) arg0_buf; | ||
370 | arg0_buf[0] = ACPI_PDC_REVISION_ID; | ||
371 | arg0_buf[1] = 1; | ||
372 | arg0_buf[2] = ACPI_PDC_EST_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_MSR; | ||
373 | |||
374 | p.pdc = &arg_list; | ||
375 | |||
376 | /* register with ACPI core */ | ||
377 | if (acpi_processor_register_performance(&p, cpu)) { | ||
378 | dprintk(KERN_INFO PFX "obtaining ACPI data failed\n"); | ||
379 | return -EIO; | ||
380 | } | ||
381 | |||
382 | /* verify the acpi_data */ | ||
383 | if (p.state_count <= 1) { | ||
384 | dprintk("No P-States\n"); | ||
385 | result = -ENODEV; | ||
386 | goto err_unreg; | ||
387 | } | ||
388 | |||
389 | if ((p.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || | ||
390 | (p.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { | ||
391 | dprintk("Invalid control/status registers (%x - %x)\n", | ||
392 | p.control_register.space_id, p.status_register.space_id); | ||
393 | result = -EIO; | ||
394 | goto err_unreg; | ||
395 | } | ||
396 | |||
397 | for (i=0; i<p.state_count; i++) { | ||
398 | if (p.states[i].control != p.states[i].status) { | ||
399 | dprintk("Different control (%x) and status values (%x)\n", | ||
400 | p.states[i].control, p.states[i].status); | ||
401 | result = -EINVAL; | ||
402 | goto err_unreg; | ||
403 | } | ||
404 | |||
405 | if (!p.states[i].core_frequency) { | ||
406 | dprintk("Zero core frequency for state %u\n", i); | ||
407 | result = -EINVAL; | ||
408 | goto err_unreg; | ||
409 | } | ||
410 | |||
411 | if (p.states[i].core_frequency > p.states[0].core_frequency) { | ||
412 | dprintk("P%u has larger frequency (%u) than P0 (%u), skipping\n", i, | ||
413 | p.states[i].core_frequency, p.states[0].core_frequency); | ||
414 | p.states[i].core_frequency = 0; | ||
415 | continue; | ||
416 | } | ||
417 | } | ||
418 | |||
419 | centrino_model[cpu] = kmalloc(sizeof(struct cpu_model), GFP_KERNEL); | ||
420 | if (!centrino_model[cpu]) { | ||
421 | result = -ENOMEM; | ||
422 | goto err_unreg; | ||
423 | } | ||
424 | memset(centrino_model[cpu], 0, sizeof(struct cpu_model)); | ||
425 | |||
426 | centrino_model[cpu]->model_name=NULL; | ||
427 | centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000; | ||
428 | centrino_model[cpu]->op_points = kmalloc(sizeof(struct cpufreq_frequency_table) * | ||
429 | (p.state_count + 1), GFP_KERNEL); | ||
430 | if (!centrino_model[cpu]->op_points) { | ||
431 | result = -ENOMEM; | ||
432 | goto err_kfree; | ||
433 | } | ||
434 | |||
435 | for (i=0; i<p.state_count; i++) { | ||
436 | centrino_model[cpu]->op_points[i].index = p.states[i].control; | ||
437 | centrino_model[cpu]->op_points[i].frequency = p.states[i].core_frequency * 1000; | ||
438 | dprintk("adding state %i with frequency %u and control value %04x\n", | ||
439 | i, centrino_model[cpu]->op_points[i].frequency, centrino_model[cpu]->op_points[i].index); | ||
440 | } | ||
441 | centrino_model[cpu]->op_points[p.state_count].frequency = CPUFREQ_TABLE_END; | ||
442 | |||
443 | cur_freq = get_cur_freq(cpu); | ||
444 | |||
445 | for (i=0; i<p.state_count; i++) { | ||
446 | if (!p.states[i].core_frequency) { | ||
447 | dprintk("skipping state %u\n", i); | ||
448 | centrino_model[cpu]->op_points[i].frequency = CPUFREQ_ENTRY_INVALID; | ||
449 | continue; | ||
450 | } | ||
451 | |||
452 | if (extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0) != | ||
453 | (centrino_model[cpu]->op_points[i].frequency)) { | ||
454 | dprintk("Invalid encoded frequency (%u vs. %u)\n", | ||
455 | extract_clock(centrino_model[cpu]->op_points[i].index, cpu, 0), | ||
456 | centrino_model[cpu]->op_points[i].frequency); | ||
457 | result = -EINVAL; | ||
458 | goto err_kfree_all; | ||
459 | } | ||
460 | |||
461 | if (cur_freq == centrino_model[cpu]->op_points[i].frequency) | ||
462 | p.state = i; | ||
463 | } | ||
464 | |||
465 | /* notify BIOS that we exist */ | ||
466 | acpi_processor_notify_smm(THIS_MODULE); | ||
467 | |||
468 | return 0; | ||
469 | |||
470 | err_kfree_all: | ||
471 | kfree(centrino_model[cpu]->op_points); | ||
472 | err_kfree: | ||
473 | kfree(centrino_model[cpu]); | ||
474 | err_unreg: | ||
475 | acpi_processor_unregister_performance(&p, cpu); | ||
476 | dprintk(KERN_INFO PFX "invalid ACPI data\n"); | ||
477 | return (result); | ||
478 | } | ||
479 | #else | ||
480 | static inline int centrino_cpu_init_acpi(struct cpufreq_policy *policy) { return -ENODEV; } | ||
481 | #endif | ||
482 | |||
483 | static int centrino_cpu_init(struct cpufreq_policy *policy) | ||
484 | { | ||
485 | struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; | ||
486 | unsigned freq; | ||
487 | unsigned l, h; | ||
488 | int ret; | ||
489 | int i; | ||
490 | |||
491 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ | ||
492 | if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) | ||
493 | return -ENODEV; | ||
494 | |||
495 | for (i = 0; i < N_IDS; i++) | ||
496 | if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) | ||
497 | break; | ||
498 | |||
499 | if (i != N_IDS) | ||
500 | centrino_cpu[policy->cpu] = &cpu_ids[i]; | ||
501 | |||
502 | if (is_const_loops_cpu(policy->cpu)) { | ||
503 | centrino_driver.flags |= CPUFREQ_CONST_LOOPS; | ||
504 | } | ||
505 | |||
506 | if (centrino_cpu_init_acpi(policy)) { | ||
507 | if (policy->cpu != 0) | ||
508 | return -ENODEV; | ||
509 | |||
510 | if (!centrino_cpu[policy->cpu]) { | ||
511 | dprintk(KERN_INFO PFX "found unsupported CPU with " | ||
512 | "Enhanced SpeedStep: send /proc/cpuinfo to " | ||
513 | MAINTAINER "\n"); | ||
514 | return -ENODEV; | ||
515 | } | ||
516 | |||
517 | if (centrino_cpu_init_table(policy)) { | ||
518 | return -ENODEV; | ||
519 | } | ||
520 | } | ||
521 | |||
522 | /* Check to see if Enhanced SpeedStep is enabled, and try to | ||
523 | enable it if not. */ | ||
524 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
525 | |||
526 | if (!(l & (1<<16))) { | ||
527 | l |= (1<<16); | ||
528 | dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); | ||
529 | wrmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
530 | |||
531 | /* check to see if it stuck */ | ||
532 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
533 | if (!(l & (1<<16))) { | ||
534 | printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); | ||
535 | return -ENODEV; | ||
536 | } | ||
537 | } | ||
538 | |||
539 | freq = get_cur_freq(policy->cpu); | ||
540 | |||
541 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
542 | policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ | ||
543 | policy->cur = freq; | ||
544 | |||
545 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); | ||
546 | |||
547 | ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); | ||
548 | if (ret) | ||
549 | return (ret); | ||
550 | |||
551 | cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); | ||
552 | |||
553 | return 0; | ||
554 | } | ||
555 | |||
556 | static int centrino_cpu_exit(struct cpufreq_policy *policy) | ||
557 | { | ||
558 | unsigned int cpu = policy->cpu; | ||
559 | |||
560 | if (!centrino_model[cpu]) | ||
561 | return -ENODEV; | ||
562 | |||
563 | cpufreq_frequency_table_put_attr(cpu); | ||
564 | |||
565 | #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI | ||
566 | if (!centrino_model[cpu]->model_name) { | ||
567 | dprintk("unregistering and freeing ACPI data\n"); | ||
568 | acpi_processor_unregister_performance(&p, cpu); | ||
569 | kfree(centrino_model[cpu]->op_points); | ||
570 | kfree(centrino_model[cpu]); | ||
571 | } | ||
572 | #endif | ||
573 | |||
574 | centrino_model[cpu] = NULL; | ||
575 | |||
576 | return 0; | ||
577 | } | ||
578 | |||
579 | /** | ||
580 | * centrino_verify - verifies a new CPUFreq policy | ||
581 | * @policy: new policy | ||
582 | * | ||
583 | * Limit must be within this model's frequency range at least one | ||
584 | * border included. | ||
585 | */ | ||
586 | static int centrino_verify (struct cpufreq_policy *policy) | ||
587 | { | ||
588 | return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); | ||
589 | } | ||
590 | |||
591 | /** | ||
592 | * centrino_setpolicy - set a new CPUFreq policy | ||
593 | * @policy: new policy | ||
594 | * @target_freq: the target frequency | ||
595 | * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
596 | * | ||
597 | * Sets a new CPUFreq policy. | ||
598 | */ | ||
599 | static int centrino_target (struct cpufreq_policy *policy, | ||
600 | unsigned int target_freq, | ||
601 | unsigned int relation) | ||
602 | { | ||
603 | unsigned int newstate = 0; | ||
604 | unsigned int msr, oldmsr, h, cpu = policy->cpu; | ||
605 | struct cpufreq_freqs freqs; | ||
606 | cpumask_t saved_mask; | ||
607 | int retval; | ||
608 | |||
609 | if (centrino_model[cpu] == NULL) | ||
610 | return -ENODEV; | ||
611 | |||
612 | /* | ||
613 | * Support for SMP systems. | ||
614 | * Make sure we are running on the CPU that wants to change frequency | ||
615 | */ | ||
616 | saved_mask = current->cpus_allowed; | ||
617 | set_cpus_allowed(current, policy->cpus); | ||
618 | if (!cpu_isset(smp_processor_id(), policy->cpus)) { | ||
619 | dprintk("couldn't limit to CPUs in this domain\n"); | ||
620 | return(-EAGAIN); | ||
621 | } | ||
622 | |||
623 | if (cpufreq_frequency_table_target(policy, centrino_model[cpu]->op_points, target_freq, | ||
624 | relation, &newstate)) { | ||
625 | retval = -EINVAL; | ||
626 | goto migrate_end; | ||
627 | } | ||
628 | |||
629 | msr = centrino_model[cpu]->op_points[newstate].index; | ||
630 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); | ||
631 | |||
632 | if (msr == (oldmsr & 0xffff)) { | ||
633 | retval = 0; | ||
634 | dprintk("no change needed - msr was and needs to be %x\n", oldmsr); | ||
635 | goto migrate_end; | ||
636 | } | ||
637 | |||
638 | freqs.cpu = cpu; | ||
639 | freqs.old = extract_clock(oldmsr, cpu, 0); | ||
640 | freqs.new = extract_clock(msr, cpu, 0); | ||
641 | |||
642 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", | ||
643 | target_freq, freqs.old, freqs.new, msr); | ||
644 | |||
645 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
646 | |||
647 | /* all but 16 LSB are "reserved", so treat them with | ||
648 | care */ | ||
649 | oldmsr &= ~0xffff; | ||
650 | msr &= 0xffff; | ||
651 | oldmsr |= msr; | ||
652 | |||
653 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | ||
654 | |||
655 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
656 | |||
657 | retval = 0; | ||
658 | migrate_end: | ||
659 | set_cpus_allowed(current, saved_mask); | ||
660 | return (retval); | ||
661 | } | ||
662 | |||
663 | static struct freq_attr* centrino_attr[] = { | ||
664 | &cpufreq_freq_attr_scaling_available_freqs, | ||
665 | NULL, | ||
666 | }; | ||
667 | |||
668 | static struct cpufreq_driver centrino_driver = { | ||
669 | .name = "centrino", /* should be speedstep-centrino, | ||
670 | but there's a 16 char limit */ | ||
671 | .init = centrino_cpu_init, | ||
672 | .exit = centrino_cpu_exit, | ||
673 | .verify = centrino_verify, | ||
674 | .target = centrino_target, | ||
675 | .get = get_cur_freq, | ||
676 | .attr = centrino_attr, | ||
677 | .owner = THIS_MODULE, | ||
678 | }; | ||
679 | |||
680 | |||
681 | /** | ||
682 | * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver | ||
683 | * | ||
684 | * Initializes the Enhanced SpeedStep support. Returns -ENODEV on | ||
685 | * unsupported devices, -ENOENT if there's no voltage table for this | ||
686 | * particular CPU model, -EINVAL on problems during initiatization, | ||
687 | * and zero on success. | ||
688 | * | ||
689 | * This is quite picky. Not only does the CPU have to advertise the | ||
690 | * "est" flag in the cpuid capability flags, we look for a specific | ||
691 | * CPU model and stepping, and we need to have the exact model name in | ||
692 | * our voltage tables. That is, be paranoid about not releasing | ||
693 | * someone's valuable magic smoke. | ||
694 | */ | ||
695 | static int __init centrino_init(void) | ||
696 | { | ||
697 | struct cpuinfo_x86 *cpu = cpu_data; | ||
698 | |||
699 | if (!cpu_has(cpu, X86_FEATURE_EST)) | ||
700 | return -ENODEV; | ||
701 | |||
702 | return cpufreq_register_driver(¢rino_driver); | ||
703 | } | ||
704 | |||
705 | static void __exit centrino_exit(void) | ||
706 | { | ||
707 | cpufreq_unregister_driver(¢rino_driver); | ||
708 | } | ||
709 | |||
710 | MODULE_AUTHOR ("Jeremy Fitzhardinge <jeremy@goop.org>"); | ||
711 | MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); | ||
712 | MODULE_LICENSE ("GPL"); | ||
713 | |||
714 | late_initcall(centrino_init); | ||
715 | module_exit(centrino_exit); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h b/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h new file mode 100644 index 000000000000..5ce995c9d866 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-est-common.h | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * Routines common for drivers handling Enhanced Speedstep Technology | ||
3 | * Copyright (C) 2004 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2 -- see | ||
6 | * COPYING for details. | ||
7 | */ | ||
8 | |||
9 | static inline int is_const_loops_cpu(unsigned int cpu) | ||
10 | { | ||
11 | struct cpuinfo_x86 *c = cpu_data + cpu; | ||
12 | |||
13 | if (c->x86_vendor != X86_VENDOR_INTEL || !cpu_has(c, X86_FEATURE_EST)) | ||
14 | return 0; | ||
15 | |||
16 | /* | ||
17 | * on P-4s, the TSC runs with constant frequency independent of cpu freq | ||
18 | * when we use EST | ||
19 | */ | ||
20 | if (c->x86 == 0xf) | ||
21 | return 1; | ||
22 | |||
23 | return 0; | ||
24 | } | ||
25 | |||
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c new file mode 100644 index 000000000000..5b7d18a06afa --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -0,0 +1,424 @@ | |||
1 | /* | ||
2 | * (C) 2001 Dave Jones, Arjan van de ven. | ||
3 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
4 | * | ||
5 | * Licensed under the terms of the GNU GPL License version 2. | ||
6 | * Based upon reverse engineered information, and on Intel documentation | ||
7 | * for chipsets ICH2-M and ICH3-M. | ||
8 | * | ||
9 | * Many thanks to Ducrot Bruno for finding and fixing the last | ||
10 | * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler | ||
11 | * for extensive testing. | ||
12 | * | ||
13 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
14 | */ | ||
15 | |||
16 | |||
17 | /********************************************************************* | ||
18 | * SPEEDSTEP - DEFINITIONS * | ||
19 | *********************************************************************/ | ||
20 | |||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/cpufreq.h> | ||
25 | #include <linux/pci.h> | ||
26 | #include <linux/slab.h> | ||
27 | |||
28 | #include "speedstep-lib.h" | ||
29 | |||
30 | |||
31 | /* speedstep_chipset: | ||
32 | * It is necessary to know which chipset is used. As accesses to | ||
33 | * this device occur at various places in this module, we need a | ||
34 | * static struct pci_dev * pointing to that device. | ||
35 | */ | ||
36 | static struct pci_dev *speedstep_chipset_dev; | ||
37 | |||
38 | |||
39 | /* speedstep_processor | ||
40 | */ | ||
41 | static unsigned int speedstep_processor = 0; | ||
42 | |||
43 | |||
44 | /* | ||
45 | * There are only two frequency states for each processor. Values | ||
46 | * are in kHz for the time being. | ||
47 | */ | ||
48 | static struct cpufreq_frequency_table speedstep_freqs[] = { | ||
49 | {SPEEDSTEP_HIGH, 0}, | ||
50 | {SPEEDSTEP_LOW, 0}, | ||
51 | {0, CPUFREQ_TABLE_END}, | ||
52 | }; | ||
53 | |||
54 | |||
55 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg) | ||
56 | |||
57 | |||
58 | /** | ||
59 | * speedstep_set_state - set the SpeedStep state | ||
60 | * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | ||
61 | * | ||
62 | * Tries to change the SpeedStep state. | ||
63 | */ | ||
64 | static void speedstep_set_state (unsigned int state) | ||
65 | { | ||
66 | u32 pmbase; | ||
67 | u8 pm2_blk; | ||
68 | u8 value; | ||
69 | unsigned long flags; | ||
70 | |||
71 | if (!speedstep_chipset_dev || (state > 0x1)) | ||
72 | return; | ||
73 | |||
74 | /* get PMBASE */ | ||
75 | pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); | ||
76 | if (!(pmbase & 0x01)) { | ||
77 | printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | pmbase &= 0xFFFFFFFE; | ||
82 | if (!pmbase) { | ||
83 | printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); | ||
84 | return; | ||
85 | } | ||
86 | |||
87 | /* Disable IRQs */ | ||
88 | local_irq_save(flags); | ||
89 | |||
90 | /* read state */ | ||
91 | value = inb(pmbase + 0x50); | ||
92 | |||
93 | dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); | ||
94 | |||
95 | /* write new state */ | ||
96 | value &= 0xFE; | ||
97 | value |= state; | ||
98 | |||
99 | dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase); | ||
100 | |||
101 | /* Disable bus master arbitration */ | ||
102 | pm2_blk = inb(pmbase + 0x20); | ||
103 | pm2_blk |= 0x01; | ||
104 | outb(pm2_blk, (pmbase + 0x20)); | ||
105 | |||
106 | /* Actual transition */ | ||
107 | outb(value, (pmbase + 0x50)); | ||
108 | |||
109 | /* Restore bus master arbitration */ | ||
110 | pm2_blk &= 0xfe; | ||
111 | outb(pm2_blk, (pmbase + 0x20)); | ||
112 | |||
113 | /* check if transition was successful */ | ||
114 | value = inb(pmbase + 0x50); | ||
115 | |||
116 | /* Enable IRQs */ | ||
117 | local_irq_restore(flags); | ||
118 | |||
119 | dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); | ||
120 | |||
121 | if (state == (value & 0x1)) { | ||
122 | dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000)); | ||
123 | } else { | ||
124 | printk (KERN_ERR "cpufreq: change failed - I/O error\n"); | ||
125 | } | ||
126 | |||
127 | return; | ||
128 | } | ||
129 | |||
130 | |||
131 | /** | ||
132 | * speedstep_activate - activate SpeedStep control in the chipset | ||
133 | * | ||
134 | * Tries to activate the SpeedStep status and control registers. | ||
135 | * Returns -EINVAL on an unsupported chipset, and zero on success. | ||
136 | */ | ||
137 | static int speedstep_activate (void) | ||
138 | { | ||
139 | u16 value = 0; | ||
140 | |||
141 | if (!speedstep_chipset_dev) | ||
142 | return -EINVAL; | ||
143 | |||
144 | pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value); | ||
145 | if (!(value & 0x08)) { | ||
146 | value |= 0x08; | ||
147 | dprintk("activating SpeedStep (TM) registers\n"); | ||
148 | pci_write_config_word(speedstep_chipset_dev, 0x00A0, value); | ||
149 | } | ||
150 | |||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | |||
155 | /** | ||
156 | * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic | ||
157 | * | ||
158 | * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to | ||
159 | * the LPC bridge / PM module which contains all power-management | ||
160 | * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected | ||
161 | * chipset, or zero on failure. | ||
162 | */ | ||
163 | static unsigned int speedstep_detect_chipset (void) | ||
164 | { | ||
165 | speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
166 | PCI_DEVICE_ID_INTEL_82801DB_12, | ||
167 | PCI_ANY_ID, | ||
168 | PCI_ANY_ID, | ||
169 | NULL); | ||
170 | if (speedstep_chipset_dev) | ||
171 | return 4; /* 4-M */ | ||
172 | |||
173 | speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
174 | PCI_DEVICE_ID_INTEL_82801CA_12, | ||
175 | PCI_ANY_ID, | ||
176 | PCI_ANY_ID, | ||
177 | NULL); | ||
178 | if (speedstep_chipset_dev) | ||
179 | return 3; /* 3-M */ | ||
180 | |||
181 | |||
182 | speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
183 | PCI_DEVICE_ID_INTEL_82801BA_10, | ||
184 | PCI_ANY_ID, | ||
185 | PCI_ANY_ID, | ||
186 | NULL); | ||
187 | if (speedstep_chipset_dev) { | ||
188 | /* speedstep.c causes lockups on Dell Inspirons 8000 and | ||
189 | * 8100 which use a pretty old revision of the 82815 | ||
190 | * host brige. Abort on these systems. | ||
191 | */ | ||
192 | static struct pci_dev *hostbridge; | ||
193 | u8 rev = 0; | ||
194 | |||
195 | hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, | ||
196 | PCI_DEVICE_ID_INTEL_82815_MC, | ||
197 | PCI_ANY_ID, | ||
198 | PCI_ANY_ID, | ||
199 | NULL); | ||
200 | |||
201 | if (!hostbridge) | ||
202 | return 2; /* 2-M */ | ||
203 | |||
204 | pci_read_config_byte(hostbridge, PCI_REVISION_ID, &rev); | ||
205 | if (rev < 5) { | ||
206 | dprintk("hostbridge does not support speedstep\n"); | ||
207 | speedstep_chipset_dev = NULL; | ||
208 | pci_dev_put(hostbridge); | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | pci_dev_put(hostbridge); | ||
213 | return 2; /* 2-M */ | ||
214 | } | ||
215 | |||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static unsigned int _speedstep_get(cpumask_t cpus) | ||
220 | { | ||
221 | unsigned int speed; | ||
222 | cpumask_t cpus_allowed; | ||
223 | |||
224 | cpus_allowed = current->cpus_allowed; | ||
225 | set_cpus_allowed(current, cpus); | ||
226 | speed = speedstep_get_processor_frequency(speedstep_processor); | ||
227 | set_cpus_allowed(current, cpus_allowed); | ||
228 | dprintk("detected %u kHz as current frequency\n", speed); | ||
229 | return speed; | ||
230 | } | ||
231 | |||
232 | static unsigned int speedstep_get(unsigned int cpu) | ||
233 | { | ||
234 | return _speedstep_get(cpumask_of_cpu(cpu)); | ||
235 | } | ||
236 | |||
237 | /** | ||
238 | * speedstep_target - set a new CPUFreq policy | ||
239 | * @policy: new policy | ||
240 | * @target_freq: the target frequency | ||
241 | * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
242 | * | ||
243 | * Sets a new CPUFreq policy. | ||
244 | */ | ||
245 | static int speedstep_target (struct cpufreq_policy *policy, | ||
246 | unsigned int target_freq, | ||
247 | unsigned int relation) | ||
248 | { | ||
249 | unsigned int newstate = 0; | ||
250 | struct cpufreq_freqs freqs; | ||
251 | cpumask_t cpus_allowed; | ||
252 | int i; | ||
253 | |||
254 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) | ||
255 | return -EINVAL; | ||
256 | |||
257 | freqs.old = _speedstep_get(policy->cpus); | ||
258 | freqs.new = speedstep_freqs[newstate].frequency; | ||
259 | freqs.cpu = policy->cpu; | ||
260 | |||
261 | dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new); | ||
262 | |||
263 | /* no transition necessary */ | ||
264 | if (freqs.old == freqs.new) | ||
265 | return 0; | ||
266 | |||
267 | cpus_allowed = current->cpus_allowed; | ||
268 | |||
269 | for_each_cpu_mask(i, policy->cpus) { | ||
270 | freqs.cpu = i; | ||
271 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
272 | } | ||
273 | |||
274 | /* switch to physical CPU where state is to be changed */ | ||
275 | set_cpus_allowed(current, policy->cpus); | ||
276 | |||
277 | speedstep_set_state(newstate); | ||
278 | |||
279 | /* allow to be run on all CPUs */ | ||
280 | set_cpus_allowed(current, cpus_allowed); | ||
281 | |||
282 | for_each_cpu_mask(i, policy->cpus) { | ||
283 | freqs.cpu = i; | ||
284 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
285 | } | ||
286 | |||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | |||
291 | /** | ||
292 | * speedstep_verify - verifies a new CPUFreq policy | ||
293 | * @policy: new policy | ||
294 | * | ||
295 | * Limit must be within speedstep_low_freq and speedstep_high_freq, with | ||
296 | * at least one border included. | ||
297 | */ | ||
298 | static int speedstep_verify (struct cpufreq_policy *policy) | ||
299 | { | ||
300 | return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); | ||
301 | } | ||
302 | |||
303 | |||
304 | static int speedstep_cpu_init(struct cpufreq_policy *policy) | ||
305 | { | ||
306 | int result = 0; | ||
307 | unsigned int speed; | ||
308 | cpumask_t cpus_allowed; | ||
309 | |||
310 | /* only run on CPU to be set, or on its sibling */ | ||
311 | #ifdef CONFIG_SMP | ||
312 | policy->cpus = cpu_sibling_map[policy->cpu]; | ||
313 | #endif | ||
314 | |||
315 | cpus_allowed = current->cpus_allowed; | ||
316 | set_cpus_allowed(current, policy->cpus); | ||
317 | |||
318 | /* detect low and high frequency */ | ||
319 | result = speedstep_get_freqs(speedstep_processor, | ||
320 | &speedstep_freqs[SPEEDSTEP_LOW].frequency, | ||
321 | &speedstep_freqs[SPEEDSTEP_HIGH].frequency, | ||
322 | &speedstep_set_state); | ||
323 | set_cpus_allowed(current, cpus_allowed); | ||
324 | if (result) | ||
325 | return result; | ||
326 | |||
327 | /* get current speed setting */ | ||
328 | speed = _speedstep_get(policy->cpus); | ||
329 | if (!speed) | ||
330 | return -EIO; | ||
331 | |||
332 | dprintk("currently at %s speed setting - %i MHz\n", | ||
333 | (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", | ||
334 | (speed / 1000)); | ||
335 | |||
336 | /* cpuinfo and default policy values */ | ||
337 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
338 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
339 | policy->cur = speed; | ||
340 | |||
341 | result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); | ||
342 | if (result) | ||
343 | return (result); | ||
344 | |||
345 | cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); | ||
346 | |||
347 | return 0; | ||
348 | } | ||
349 | |||
350 | |||
351 | static int speedstep_cpu_exit(struct cpufreq_policy *policy) | ||
352 | { | ||
353 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | static struct freq_attr* speedstep_attr[] = { | ||
358 | &cpufreq_freq_attr_scaling_available_freqs, | ||
359 | NULL, | ||
360 | }; | ||
361 | |||
362 | |||
363 | static struct cpufreq_driver speedstep_driver = { | ||
364 | .name = "speedstep-ich", | ||
365 | .verify = speedstep_verify, | ||
366 | .target = speedstep_target, | ||
367 | .init = speedstep_cpu_init, | ||
368 | .exit = speedstep_cpu_exit, | ||
369 | .get = speedstep_get, | ||
370 | .owner = THIS_MODULE, | ||
371 | .attr = speedstep_attr, | ||
372 | }; | ||
373 | |||
374 | |||
375 | /** | ||
376 | * speedstep_init - initializes the SpeedStep CPUFreq driver | ||
377 | * | ||
378 | * Initializes the SpeedStep support. Returns -ENODEV on unsupported | ||
379 | * devices, -EINVAL on problems during initiatization, and zero on | ||
380 | * success. | ||
381 | */ | ||
382 | static int __init speedstep_init(void) | ||
383 | { | ||
384 | /* detect processor */ | ||
385 | speedstep_processor = speedstep_detect_processor(); | ||
386 | if (!speedstep_processor) { | ||
387 | dprintk("Intel(R) SpeedStep(TM) capable processor not found\n"); | ||
388 | return -ENODEV; | ||
389 | } | ||
390 | |||
391 | /* detect chipset */ | ||
392 | if (!speedstep_detect_chipset()) { | ||
393 | dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n"); | ||
394 | return -ENODEV; | ||
395 | } | ||
396 | |||
397 | /* activate speedstep support */ | ||
398 | if (speedstep_activate()) { | ||
399 | pci_dev_put(speedstep_chipset_dev); | ||
400 | return -EINVAL; | ||
401 | } | ||
402 | |||
403 | return cpufreq_register_driver(&speedstep_driver); | ||
404 | } | ||
405 | |||
406 | |||
407 | /** | ||
408 | * speedstep_exit - unregisters SpeedStep support | ||
409 | * | ||
410 | * Unregisters SpeedStep support. | ||
411 | */ | ||
412 | static void __exit speedstep_exit(void) | ||
413 | { | ||
414 | pci_dev_put(speedstep_chipset_dev); | ||
415 | cpufreq_unregister_driver(&speedstep_driver); | ||
416 | } | ||
417 | |||
418 | |||
419 | MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); | ||
420 | MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); | ||
421 | MODULE_LICENSE ("GPL"); | ||
422 | |||
423 | module_init(speedstep_init); | ||
424 | module_exit(speedstep_exit); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c new file mode 100644 index 000000000000..8ba430a9c3a2 --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c | |||
@@ -0,0 +1,385 @@ | |||
1 | /* | ||
2 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * Library for common functions for Intel SpeedStep v.1 and v.2 support | ||
7 | * | ||
8 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
9 | */ | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/moduleparam.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/cpufreq.h> | ||
16 | #include <linux/pci.h> | ||
17 | #include <linux/slab.h> | ||
18 | |||
19 | #include <asm/msr.h> | ||
20 | #include "speedstep-lib.h" | ||
21 | |||
22 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg) | ||
23 | |||
24 | #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK | ||
25 | static int relaxed_check = 0; | ||
26 | #else | ||
27 | #define relaxed_check 0 | ||
28 | #endif | ||
29 | |||
30 | /********************************************************************* | ||
31 | * GET PROCESSOR CORE SPEED IN KHZ * | ||
32 | *********************************************************************/ | ||
33 | |||
34 | static unsigned int pentium3_get_frequency (unsigned int processor) | ||
35 | { | ||
36 | /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ | ||
37 | struct { | ||
38 | unsigned int ratio; /* Frequency Multiplier (x10) */ | ||
39 | u8 bitmap; /* power on configuration bits | ||
40 | [27, 25:22] (in MSR 0x2a) */ | ||
41 | } msr_decode_mult [] = { | ||
42 | { 30, 0x01 }, | ||
43 | { 35, 0x05 }, | ||
44 | { 40, 0x02 }, | ||
45 | { 45, 0x06 }, | ||
46 | { 50, 0x00 }, | ||
47 | { 55, 0x04 }, | ||
48 | { 60, 0x0b }, | ||
49 | { 65, 0x0f }, | ||
50 | { 70, 0x09 }, | ||
51 | { 75, 0x0d }, | ||
52 | { 80, 0x0a }, | ||
53 | { 85, 0x26 }, | ||
54 | { 90, 0x20 }, | ||
55 | { 100, 0x2b }, | ||
56 | { 0, 0xff } /* error or unknown value */ | ||
57 | }; | ||
58 | |||
59 | /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ | ||
60 | struct { | ||
61 | unsigned int value; /* Front Side Bus speed in MHz */ | ||
62 | u8 bitmap; /* power on configuration bits [18: 19] | ||
63 | (in MSR 0x2a) */ | ||
64 | } msr_decode_fsb [] = { | ||
65 | { 66, 0x0 }, | ||
66 | { 100, 0x2 }, | ||
67 | { 133, 0x1 }, | ||
68 | { 0, 0xff} | ||
69 | }; | ||
70 | |||
71 | u32 msr_lo, msr_tmp; | ||
72 | int i = 0, j = 0; | ||
73 | |||
74 | /* read MSR 0x2a - we only need the low 32 bits */ | ||
75 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); | ||
76 | dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); | ||
77 | msr_tmp = msr_lo; | ||
78 | |||
79 | /* decode the FSB */ | ||
80 | msr_tmp &= 0x00c0000; | ||
81 | msr_tmp >>= 18; | ||
82 | while (msr_tmp != msr_decode_fsb[i].bitmap) { | ||
83 | if (msr_decode_fsb[i].bitmap == 0xff) | ||
84 | return 0; | ||
85 | i++; | ||
86 | } | ||
87 | |||
88 | /* decode the multiplier */ | ||
89 | if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) { | ||
90 | dprintk("workaround for early PIIIs\n"); | ||
91 | msr_lo &= 0x03c00000; | ||
92 | } else | ||
93 | msr_lo &= 0x0bc00000; | ||
94 | msr_lo >>= 22; | ||
95 | while (msr_lo != msr_decode_mult[j].bitmap) { | ||
96 | if (msr_decode_mult[j].bitmap == 0xff) | ||
97 | return 0; | ||
98 | j++; | ||
99 | } | ||
100 | |||
101 | dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); | ||
102 | |||
103 | return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100); | ||
104 | } | ||
105 | |||
106 | |||
107 | static unsigned int pentiumM_get_frequency(void) | ||
108 | { | ||
109 | u32 msr_lo, msr_tmp; | ||
110 | |||
111 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); | ||
112 | dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); | ||
113 | |||
114 | /* see table B-2 of 24547212.pdf */ | ||
115 | if (msr_lo & 0x00040000) { | ||
116 | printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp); | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | msr_tmp = (msr_lo >> 22) & 0x1f; | ||
121 | dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000)); | ||
122 | |||
123 | return (msr_tmp * 100 * 1000); | ||
124 | } | ||
125 | |||
126 | |||
127 | static unsigned int pentium4_get_frequency(void) | ||
128 | { | ||
129 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
130 | u32 msr_lo, msr_hi, mult; | ||
131 | unsigned int fsb = 0; | ||
132 | |||
133 | rdmsr(0x2c, msr_lo, msr_hi); | ||
134 | |||
135 | dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); | ||
136 | |||
137 | /* decode the FSB: see IA-32 Intel (C) Architecture Software | ||
138 | * Developer's Manual, Volume 3: System Prgramming Guide, | ||
139 | * revision #12 in Table B-1: MSRs in the Pentium 4 and | ||
140 | * Intel Xeon Processors, on page B-4 and B-5. | ||
141 | */ | ||
142 | if (c->x86_model < 2) | ||
143 | fsb = 100 * 1000; | ||
144 | else { | ||
145 | u8 fsb_code = (msr_lo >> 16) & 0x7; | ||
146 | switch (fsb_code) { | ||
147 | case 0: | ||
148 | fsb = 100 * 1000; | ||
149 | break; | ||
150 | case 1: | ||
151 | fsb = 13333 * 10; | ||
152 | break; | ||
153 | case 2: | ||
154 | fsb = 200 * 1000; | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | |||
159 | if (!fsb) | ||
160 | printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n"); | ||
161 | |||
162 | /* Multiplier. */ | ||
163 | if (c->x86_model < 2) | ||
164 | mult = msr_lo >> 27; | ||
165 | else | ||
166 | mult = msr_lo >> 24; | ||
167 | |||
168 | dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); | ||
169 | |||
170 | return (fsb * mult); | ||
171 | } | ||
172 | |||
173 | |||
174 | unsigned int speedstep_get_processor_frequency(unsigned int processor) | ||
175 | { | ||
176 | switch (processor) { | ||
177 | case SPEEDSTEP_PROCESSOR_PM: | ||
178 | return pentiumM_get_frequency(); | ||
179 | case SPEEDSTEP_PROCESSOR_P4D: | ||
180 | case SPEEDSTEP_PROCESSOR_P4M: | ||
181 | return pentium4_get_frequency(); | ||
182 | case SPEEDSTEP_PROCESSOR_PIII_T: | ||
183 | case SPEEDSTEP_PROCESSOR_PIII_C: | ||
184 | case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: | ||
185 | return pentium3_get_frequency(processor); | ||
186 | default: | ||
187 | return 0; | ||
188 | }; | ||
189 | return 0; | ||
190 | } | ||
191 | EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); | ||
192 | |||
193 | |||
194 | /********************************************************************* | ||
195 | * DETECT SPEEDSTEP-CAPABLE PROCESSOR * | ||
196 | *********************************************************************/ | ||
197 | |||
198 | unsigned int speedstep_detect_processor (void) | ||
199 | { | ||
200 | struct cpuinfo_x86 *c = cpu_data; | ||
201 | u32 ebx, msr_lo, msr_hi; | ||
202 | |||
203 | dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); | ||
204 | |||
205 | if ((c->x86_vendor != X86_VENDOR_INTEL) || | ||
206 | ((c->x86 != 6) && (c->x86 != 0xF))) | ||
207 | return 0; | ||
208 | |||
209 | if (c->x86 == 0xF) { | ||
210 | /* Intel Mobile Pentium 4-M | ||
211 | * or Intel Mobile Pentium 4 with 533 MHz FSB */ | ||
212 | if (c->x86_model != 2) | ||
213 | return 0; | ||
214 | |||
215 | ebx = cpuid_ebx(0x00000001); | ||
216 | ebx &= 0x000000FF; | ||
217 | |||
218 | dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); | ||
219 | |||
220 | switch (c->x86_mask) { | ||
221 | case 4: | ||
222 | /* | ||
223 | * B-stepping [M-P4-M] | ||
224 | * sample has ebx = 0x0f, production has 0x0e. | ||
225 | */ | ||
226 | if ((ebx == 0x0e) || (ebx == 0x0f)) | ||
227 | return SPEEDSTEP_PROCESSOR_P4M; | ||
228 | break; | ||
229 | case 7: | ||
230 | /* | ||
231 | * C-stepping [M-P4-M] | ||
232 | * needs to have ebx=0x0e, else it's a celeron: | ||
233 | * cf. 25130917.pdf / page 7, footnote 5 even | ||
234 | * though 25072120.pdf / page 7 doesn't say | ||
235 | * samples are only of B-stepping... | ||
236 | */ | ||
237 | if (ebx == 0x0e) | ||
238 | return SPEEDSTEP_PROCESSOR_P4M; | ||
239 | break; | ||
240 | case 9: | ||
241 | /* | ||
242 | * D-stepping [M-P4-M or M-P4/533] | ||
243 | * | ||
244 | * this is totally strange: CPUID 0x0F29 is | ||
245 | * used by M-P4-M, M-P4/533 and(!) Celeron CPUs. | ||
246 | * The latter need to be sorted out as they don't | ||
247 | * support speedstep. | ||
248 | * Celerons with CPUID 0x0F29 may have either | ||
249 | * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything | ||
250 | * specific. | ||
251 | * M-P4-Ms may have either ebx=0xe or 0xf [see above] | ||
252 | * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] | ||
253 | * also, M-P4M HTs have ebx=0x8, too | ||
254 | * For now, they are distinguished by the model_id string | ||
255 | */ | ||
256 | if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) | ||
257 | return SPEEDSTEP_PROCESSOR_P4M; | ||
258 | break; | ||
259 | default: | ||
260 | break; | ||
261 | } | ||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | switch (c->x86_model) { | ||
266 | case 0x0B: /* Intel PIII [Tualatin] */ | ||
267 | /* cpuid_ebx(1) is 0x04 for desktop PIII, | ||
268 | 0x06 for mobile PIII-M */ | ||
269 | ebx = cpuid_ebx(0x00000001); | ||
270 | dprintk("ebx is %x\n", ebx); | ||
271 | |||
272 | ebx &= 0x000000FF; | ||
273 | |||
274 | if (ebx != 0x06) | ||
275 | return 0; | ||
276 | |||
277 | /* So far all PIII-M processors support SpeedStep. See | ||
278 | * Intel's 24540640.pdf of June 2003 | ||
279 | */ | ||
280 | |||
281 | return SPEEDSTEP_PROCESSOR_PIII_T; | ||
282 | |||
283 | case 0x08: /* Intel PIII [Coppermine] */ | ||
284 | |||
285 | /* all mobile PIII Coppermines have FSB 100 MHz | ||
286 | * ==> sort out a few desktop PIIIs. */ | ||
287 | rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); | ||
288 | dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi); | ||
289 | msr_lo &= 0x00c0000; | ||
290 | if (msr_lo != 0x0080000) | ||
291 | return 0; | ||
292 | |||
293 | /* | ||
294 | * If the processor is a mobile version, | ||
295 | * platform ID has bit 50 set | ||
296 | * it has SpeedStep technology if either | ||
297 | * bit 56 or 57 is set | ||
298 | */ | ||
299 | rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); | ||
300 | dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi); | ||
301 | if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { | ||
302 | if (c->x86_mask == 0x01) { | ||
303 | dprintk("early PIII version\n"); | ||
304 | return SPEEDSTEP_PROCESSOR_PIII_C_EARLY; | ||
305 | } else | ||
306 | return SPEEDSTEP_PROCESSOR_PIII_C; | ||
307 | } | ||
308 | |||
309 | default: | ||
310 | return 0; | ||
311 | } | ||
312 | } | ||
313 | EXPORT_SYMBOL_GPL(speedstep_detect_processor); | ||
314 | |||
315 | |||
316 | /********************************************************************* | ||
317 | * DETECT SPEEDSTEP SPEEDS * | ||
318 | *********************************************************************/ | ||
319 | |||
320 | unsigned int speedstep_get_freqs(unsigned int processor, | ||
321 | unsigned int *low_speed, | ||
322 | unsigned int *high_speed, | ||
323 | void (*set_state) (unsigned int state)) | ||
324 | { | ||
325 | unsigned int prev_speed; | ||
326 | unsigned int ret = 0; | ||
327 | unsigned long flags; | ||
328 | |||
329 | if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) | ||
330 | return -EINVAL; | ||
331 | |||
332 | dprintk("trying to determine both speeds\n"); | ||
333 | |||
334 | /* get current speed */ | ||
335 | prev_speed = speedstep_get_processor_frequency(processor); | ||
336 | if (!prev_speed) | ||
337 | return -EIO; | ||
338 | |||
339 | dprintk("previous seped is %u\n", prev_speed); | ||
340 | |||
341 | local_irq_save(flags); | ||
342 | |||
343 | /* switch to low state */ | ||
344 | set_state(SPEEDSTEP_LOW); | ||
345 | *low_speed = speedstep_get_processor_frequency(processor); | ||
346 | if (!*low_speed) { | ||
347 | ret = -EIO; | ||
348 | goto out; | ||
349 | } | ||
350 | |||
351 | dprintk("low seped is %u\n", *low_speed); | ||
352 | |||
353 | /* switch to high state */ | ||
354 | set_state(SPEEDSTEP_HIGH); | ||
355 | *high_speed = speedstep_get_processor_frequency(processor); | ||
356 | if (!*high_speed) { | ||
357 | ret = -EIO; | ||
358 | goto out; | ||
359 | } | ||
360 | |||
361 | dprintk("high seped is %u\n", *high_speed); | ||
362 | |||
363 | if (*low_speed == *high_speed) { | ||
364 | ret = -ENODEV; | ||
365 | goto out; | ||
366 | } | ||
367 | |||
368 | /* switch to previous state, if necessary */ | ||
369 | if (*high_speed != prev_speed) | ||
370 | set_state(SPEEDSTEP_LOW); | ||
371 | |||
372 | out: | ||
373 | local_irq_restore(flags); | ||
374 | return (ret); | ||
375 | } | ||
376 | EXPORT_SYMBOL_GPL(speedstep_get_freqs); | ||
377 | |||
378 | #ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK | ||
379 | module_param(relaxed_check, int, 0444); | ||
380 | MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability."); | ||
381 | #endif | ||
382 | |||
383 | MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>"); | ||
384 | MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); | ||
385 | MODULE_LICENSE ("GPL"); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h new file mode 100644 index 000000000000..261a2c9b7f6b --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h | |||
@@ -0,0 +1,47 @@ | |||
1 | /* | ||
2 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | ||
3 | * | ||
4 | * Licensed under the terms of the GNU GPL License version 2. | ||
5 | * | ||
6 | * Library for common functions for Intel SpeedStep v.1 and v.2 support | ||
7 | * | ||
8 | * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* | ||
9 | */ | ||
10 | |||
11 | |||
12 | |||
13 | /* processors */ | ||
14 | |||
15 | #define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ | ||
16 | #define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ | ||
17 | #define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ | ||
18 | #define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ | ||
19 | |||
20 | /* the following processors are not speedstep-capable and are not auto-detected | ||
21 | * in speedstep_detect_processor(). However, their speed can be detected using | ||
22 | * the speedstep_get_processor_frequency() call. */ | ||
23 | #define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ | ||
24 | #define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ | ||
25 | |||
26 | /* speedstep states -- only two of them */ | ||
27 | |||
28 | #define SPEEDSTEP_HIGH 0x00000000 | ||
29 | #define SPEEDSTEP_LOW 0x00000001 | ||
30 | |||
31 | |||
32 | /* detect a speedstep-capable processor */ | ||
33 | extern unsigned int speedstep_detect_processor (void); | ||
34 | |||
35 | /* detect the current speed (in khz) of the processor */ | ||
36 | extern unsigned int speedstep_get_processor_frequency(unsigned int processor); | ||
37 | |||
38 | |||
39 | /* detect the low and high speeds of the processor. The callback | ||
40 | * set_state"'s first argument is either SPEEDSTEP_HIGH or | ||
41 | * SPEEDSTEP_LOW; the second argument is zero so that no | ||
42 | * cpufreq_notify_transition calls are initiated. | ||
43 | */ | ||
44 | extern unsigned int speedstep_get_freqs(unsigned int processor, | ||
45 | unsigned int *low_speed, | ||
46 | unsigned int *high_speed, | ||
47 | void (*set_state) (unsigned int state)); | ||
diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c new file mode 100644 index 000000000000..79440b3f087e --- /dev/null +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c | |||
@@ -0,0 +1,424 @@ | |||
1 | /* | ||
2 | * Intel SpeedStep SMI driver. | ||
3 | * | ||
4 | * (C) 2003 Hiroshi Miura <miura@da-cha.org> | ||
5 | * | ||
6 | * Licensed under the terms of the GNU GPL License version 2. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | |||
11 | /********************************************************************* | ||
12 | * SPEEDSTEP - DEFINITIONS * | ||
13 | *********************************************************************/ | ||
14 | |||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/moduleparam.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/cpufreq.h> | ||
20 | #include <linux/pci.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/delay.h> | ||
23 | #include <asm/ist.h> | ||
24 | |||
25 | #include "speedstep-lib.h" | ||
26 | |||
27 | /* speedstep system management interface port/command. | ||
28 | * | ||
29 | * These parameters are got from IST-SMI BIOS call. | ||
30 | * If user gives it, these are used. | ||
31 | * | ||
32 | */ | ||
33 | static int smi_port = 0; | ||
34 | static int smi_cmd = 0; | ||
35 | static unsigned int smi_sig = 0; | ||
36 | |||
37 | /* info about the processor */ | ||
38 | static unsigned int speedstep_processor = 0; | ||
39 | |||
40 | /* | ||
41 | * There are only two frequency states for each processor. Values | ||
42 | * are in kHz for the time being. | ||
43 | */ | ||
44 | static struct cpufreq_frequency_table speedstep_freqs[] = { | ||
45 | {SPEEDSTEP_HIGH, 0}, | ||
46 | {SPEEDSTEP_LOW, 0}, | ||
47 | {0, CPUFREQ_TABLE_END}, | ||
48 | }; | ||
49 | |||
50 | #define GET_SPEEDSTEP_OWNER 0 | ||
51 | #define GET_SPEEDSTEP_STATE 1 | ||
52 | #define SET_SPEEDSTEP_STATE 2 | ||
53 | #define GET_SPEEDSTEP_FREQS 4 | ||
54 | |||
55 | /* how often shall the SMI call be tried if it failed, e.g. because | ||
56 | * of DMA activity going on? */ | ||
57 | #define SMI_TRIES 5 | ||
58 | |||
59 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg) | ||
60 | |||
61 | /** | ||
62 | * speedstep_smi_ownership | ||
63 | */ | ||
64 | static int speedstep_smi_ownership (void) | ||
65 | { | ||
66 | u32 command, result, magic; | ||
67 | u32 function = GET_SPEEDSTEP_OWNER; | ||
68 | unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; | ||
69 | |||
70 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
71 | magic = virt_to_phys(magic_data); | ||
72 | |||
73 | dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); | ||
74 | |||
75 | __asm__ __volatile__( | ||
76 | "out %%al, (%%dx)\n" | ||
77 | : "=D" (result) | ||
78 | : "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic) | ||
79 | ); | ||
80 | |||
81 | dprintk("result is %x\n", result); | ||
82 | |||
83 | return result; | ||
84 | } | ||
85 | |||
86 | /** | ||
87 | * speedstep_smi_get_freqs - get SpeedStep preferred & current freq. | ||
88 | * @low: the low frequency value is placed here | ||
89 | * @high: the high frequency value is placed here | ||
90 | * | ||
91 | * Only available on later SpeedStep-enabled systems, returns false results or | ||
92 | * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing | ||
93 | * shows that the latter occurs if !(ist_info.event & 0xFFFF). | ||
94 | */ | ||
95 | static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) | ||
96 | { | ||
97 | u32 command, result = 0, edi, high_mhz, low_mhz; | ||
98 | u32 state=0; | ||
99 | u32 function = GET_SPEEDSTEP_FREQS; | ||
100 | |||
101 | if (!(ist_info.event & 0xFFFF)) { | ||
102 | dprintk("bug #1422 -- can't read freqs from BIOS\n", result); | ||
103 | return -ENODEV; | ||
104 | } | ||
105 | |||
106 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
107 | |||
108 | dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); | ||
109 | |||
110 | __asm__ __volatile__("movl $0, %%edi\n" | ||
111 | "out %%al, (%%dx)\n" | ||
112 | : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi) | ||
113 | : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) | ||
114 | ); | ||
115 | |||
116 | dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); | ||
117 | |||
118 | /* abort if results are obviously incorrect... */ | ||
119 | if ((high_mhz + low_mhz) < 600) | ||
120 | return -EINVAL; | ||
121 | |||
122 | *high = high_mhz * 1000; | ||
123 | *low = low_mhz * 1000; | ||
124 | |||
125 | return result; | ||
126 | } | ||
127 | |||
128 | /** | ||
129 | * speedstep_get_state - set the SpeedStep state | ||
130 | * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | ||
131 | * | ||
132 | */ | ||
133 | static int speedstep_get_state (void) | ||
134 | { | ||
135 | u32 function=GET_SPEEDSTEP_STATE; | ||
136 | u32 result, state, edi, command; | ||
137 | |||
138 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
139 | |||
140 | dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); | ||
141 | |||
142 | __asm__ __volatile__("movl $0, %%edi\n" | ||
143 | "out %%al, (%%dx)\n" | ||
144 | : "=a" (result), "=b" (state), "=D" (edi) | ||
145 | : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0) | ||
146 | ); | ||
147 | |||
148 | dprintk("state is %x, result is %x\n", state, result); | ||
149 | |||
150 | return (state & 1); | ||
151 | } | ||
152 | |||
153 | |||
154 | /** | ||
155 | * speedstep_set_state - set the SpeedStep state | ||
156 | * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | ||
157 | * | ||
158 | */ | ||
159 | static void speedstep_set_state (unsigned int state) | ||
160 | { | ||
161 | unsigned int result = 0, command, new_state; | ||
162 | unsigned long flags; | ||
163 | unsigned int function=SET_SPEEDSTEP_STATE; | ||
164 | unsigned int retry = 0; | ||
165 | |||
166 | if (state > 0x1) | ||
167 | return; | ||
168 | |||
169 | /* Disable IRQs */ | ||
170 | local_irq_save(flags); | ||
171 | |||
172 | command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); | ||
173 | |||
174 | dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port); | ||
175 | |||
176 | do { | ||
177 | if (retry) { | ||
178 | dprintk("retry %u, previous result %u, waiting...\n", retry, result); | ||
179 | mdelay(retry * 50); | ||
180 | } | ||
181 | retry++; | ||
182 | __asm__ __volatile__( | ||
183 | "movl $0, %%edi\n" | ||
184 | "out %%al, (%%dx)\n" | ||
185 | : "=b" (new_state), "=D" (result) | ||
186 | : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) | ||
187 | ); | ||
188 | } while ((new_state != state) && (retry <= SMI_TRIES)); | ||
189 | |||
190 | /* enable IRQs */ | ||
191 | local_irq_restore(flags); | ||
192 | |||
193 | if (new_state == state) { | ||
194 | dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); | ||
195 | } else { | ||
196 | printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result); | ||
197 | } | ||
198 | |||
199 | return; | ||
200 | } | ||
201 | |||
202 | |||
203 | /** | ||
204 | * speedstep_target - set a new CPUFreq policy | ||
205 | * @policy: new policy | ||
206 | * @target_freq: new freq | ||
207 | * @relation: | ||
208 | * | ||
209 | * Sets a new CPUFreq policy/freq. | ||
210 | */ | ||
211 | static int speedstep_target (struct cpufreq_policy *policy, | ||
212 | unsigned int target_freq, unsigned int relation) | ||
213 | { | ||
214 | unsigned int newstate = 0; | ||
215 | struct cpufreq_freqs freqs; | ||
216 | |||
217 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) | ||
218 | return -EINVAL; | ||
219 | |||
220 | freqs.old = speedstep_freqs[speedstep_get_state()].frequency; | ||
221 | freqs.new = speedstep_freqs[newstate].frequency; | ||
222 | freqs.cpu = 0; /* speedstep.c is UP only driver */ | ||
223 | |||
224 | if (freqs.old == freqs.new) | ||
225 | return 0; | ||
226 | |||
227 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
228 | speedstep_set_state(newstate); | ||
229 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
230 | |||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | |||
235 | /** | ||
236 | * speedstep_verify - verifies a new CPUFreq policy | ||
237 | * @policy: new policy | ||
238 | * | ||
239 | * Limit must be within speedstep_low_freq and speedstep_high_freq, with | ||
240 | * at least one border included. | ||
241 | */ | ||
242 | static int speedstep_verify (struct cpufreq_policy *policy) | ||
243 | { | ||
244 | return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); | ||
245 | } | ||
246 | |||
247 | |||
248 | static int speedstep_cpu_init(struct cpufreq_policy *policy) | ||
249 | { | ||
250 | int result; | ||
251 | unsigned int speed,state; | ||
252 | |||
253 | /* capability check */ | ||
254 | if (policy->cpu != 0) | ||
255 | return -ENODEV; | ||
256 | |||
257 | result = speedstep_smi_ownership(); | ||
258 | if (result) { | ||
259 | dprintk("fails in aquiring ownership of a SMI interface.\n"); | ||
260 | return -EINVAL; | ||
261 | } | ||
262 | |||
263 | /* detect low and high frequency */ | ||
264 | result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency, | ||
265 | &speedstep_freqs[SPEEDSTEP_HIGH].frequency); | ||
266 | if (result) { | ||
267 | /* fall back to speedstep_lib.c dection mechanism: try both states out */ | ||
268 | dprintk("could not detect low and high frequencies by SMI call.\n"); | ||
269 | result = speedstep_get_freqs(speedstep_processor, | ||
270 | &speedstep_freqs[SPEEDSTEP_LOW].frequency, | ||
271 | &speedstep_freqs[SPEEDSTEP_HIGH].frequency, | ||
272 | &speedstep_set_state); | ||
273 | |||
274 | if (result) { | ||
275 | dprintk("could not detect two different speeds -- aborting.\n"); | ||
276 | return result; | ||
277 | } else | ||
278 | dprintk("workaround worked.\n"); | ||
279 | } | ||
280 | |||
281 | /* get current speed setting */ | ||
282 | state = speedstep_get_state(); | ||
283 | speed = speedstep_freqs[state].frequency; | ||
284 | |||
285 | dprintk("currently at %s speed setting - %i MHz\n", | ||
286 | (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", | ||
287 | (speed / 1000)); | ||
288 | |||
289 | /* cpuinfo and default policy values */ | ||
290 | policy->governor = CPUFREQ_DEFAULT_GOVERNOR; | ||
291 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | ||
292 | policy->cur = speed; | ||
293 | |||
294 | result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); | ||
295 | if (result) | ||
296 | return (result); | ||
297 | |||
298 | cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); | ||
299 | |||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | static int speedstep_cpu_exit(struct cpufreq_policy *policy) | ||
304 | { | ||
305 | cpufreq_frequency_table_put_attr(policy->cpu); | ||
306 | return 0; | ||
307 | } | ||
308 | |||
309 | static unsigned int speedstep_get(unsigned int cpu) | ||
310 | { | ||
311 | if (cpu) | ||
312 | return -ENODEV; | ||
313 | return speedstep_get_processor_frequency(speedstep_processor); | ||
314 | } | ||
315 | |||
316 | |||
317 | static int speedstep_resume(struct cpufreq_policy *policy) | ||
318 | { | ||
319 | int result = speedstep_smi_ownership(); | ||
320 | |||
321 | if (result) | ||
322 | dprintk("fails in re-aquiring ownership of a SMI interface.\n"); | ||
323 | |||
324 | return result; | ||
325 | } | ||
326 | |||
327 | static struct freq_attr* speedstep_attr[] = { | ||
328 | &cpufreq_freq_attr_scaling_available_freqs, | ||
329 | NULL, | ||
330 | }; | ||
331 | |||
332 | static struct cpufreq_driver speedstep_driver = { | ||
333 | .name = "speedstep-smi", | ||
334 | .verify = speedstep_verify, | ||
335 | .target = speedstep_target, | ||
336 | .init = speedstep_cpu_init, | ||
337 | .exit = speedstep_cpu_exit, | ||
338 | .get = speedstep_get, | ||
339 | .resume = speedstep_resume, | ||
340 | .owner = THIS_MODULE, | ||
341 | .attr = speedstep_attr, | ||
342 | }; | ||
343 | |||
344 | /** | ||
345 | * speedstep_init - initializes the SpeedStep CPUFreq driver | ||
346 | * | ||
347 | * Initializes the SpeedStep support. Returns -ENODEV on unsupported | ||
348 | * BIOS, -EINVAL on problems during initiatization, and zero on | ||
349 | * success. | ||
350 | */ | ||
351 | static int __init speedstep_init(void) | ||
352 | { | ||
353 | speedstep_processor = speedstep_detect_processor(); | ||
354 | |||
355 | switch (speedstep_processor) { | ||
356 | case SPEEDSTEP_PROCESSOR_PIII_T: | ||
357 | case SPEEDSTEP_PROCESSOR_PIII_C: | ||
358 | case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: | ||
359 | break; | ||
360 | default: | ||
361 | speedstep_processor = 0; | ||
362 | } | ||
363 | |||
364 | if (!speedstep_processor) { | ||
365 | dprintk ("No supported Intel CPU detected.\n"); | ||
366 | return -ENODEV; | ||
367 | } | ||
368 | |||
369 | dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", | ||
370 | ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); | ||
371 | |||
372 | |||
373 | /* Error if no IST-SMI BIOS or no PARM | ||
374 | sig= 'ISGE' aka 'Intel Speedstep Gate E' */ | ||
375 | if ((ist_info.signature != 0x47534943) && ( | ||
376 | (smi_port == 0) || (smi_cmd == 0))) | ||
377 | return -ENODEV; | ||
378 | |||
379 | if (smi_sig == 1) | ||
380 | smi_sig = 0x47534943; | ||
381 | else | ||
382 | smi_sig = ist_info.signature; | ||
383 | |||
384 | /* setup smi_port from MODLULE_PARM or BIOS */ | ||
385 | if ((smi_port > 0xff) || (smi_port < 0)) { | ||
386 | return -EINVAL; | ||
387 | } else if (smi_port == 0) { | ||
388 | smi_port = ist_info.command & 0xff; | ||
389 | } | ||
390 | |||
391 | if ((smi_cmd > 0xff) || (smi_cmd < 0)) { | ||
392 | return -EINVAL; | ||
393 | } else if (smi_cmd == 0) { | ||
394 | smi_cmd = (ist_info.command >> 16) & 0xff; | ||
395 | } | ||
396 | |||
397 | return cpufreq_register_driver(&speedstep_driver); | ||
398 | } | ||
399 | |||
400 | |||
401 | /** | ||
402 | * speedstep_exit - unregisters SpeedStep support | ||
403 | * | ||
404 | * Unregisters SpeedStep support. | ||
405 | */ | ||
406 | static void __exit speedstep_exit(void) | ||
407 | { | ||
408 | cpufreq_unregister_driver(&speedstep_driver); | ||
409 | } | ||
410 | |||
411 | module_param(smi_port, int, 0444); | ||
412 | module_param(smi_cmd, int, 0444); | ||
413 | module_param(smi_sig, uint, 0444); | ||
414 | |||
415 | MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2"); | ||
416 | MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82"); | ||
417 | MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface."); | ||
418 | |||
419 | MODULE_AUTHOR ("Hiroshi Miura"); | ||
420 | MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface."); | ||
421 | MODULE_LICENSE ("GPL"); | ||
422 | |||
423 | module_init(speedstep_init); | ||
424 | module_exit(speedstep_exit); | ||
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c new file mode 100644 index 000000000000..ba4b01138c8f --- /dev/null +++ b/arch/i386/kernel/cpu/cyrix.c | |||
@@ -0,0 +1,439 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/bitops.h> | ||
3 | #include <linux/delay.h> | ||
4 | #include <linux/pci.h> | ||
5 | #include <asm/dma.h> | ||
6 | #include <asm/io.h> | ||
7 | #include <asm/processor.h> | ||
8 | #include <asm/timer.h> | ||
9 | |||
10 | #include "cpu.h" | ||
11 | |||
12 | /* | ||
13 | * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU | ||
14 | */ | ||
15 | static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | ||
16 | { | ||
17 | unsigned char ccr2, ccr3; | ||
18 | unsigned long flags; | ||
19 | |||
20 | /* we test for DEVID by checking whether CCR3 is writable */ | ||
21 | local_irq_save(flags); | ||
22 | ccr3 = getCx86(CX86_CCR3); | ||
23 | setCx86(CX86_CCR3, ccr3 ^ 0x80); | ||
24 | getCx86(0xc0); /* dummy to change bus */ | ||
25 | |||
26 | if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */ | ||
27 | ccr2 = getCx86(CX86_CCR2); | ||
28 | setCx86(CX86_CCR2, ccr2 ^ 0x04); | ||
29 | getCx86(0xc0); /* dummy */ | ||
30 | |||
31 | if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */ | ||
32 | *dir0 = 0xfd; | ||
33 | else { /* Cx486S A step */ | ||
34 | setCx86(CX86_CCR2, ccr2); | ||
35 | *dir0 = 0xfe; | ||
36 | } | ||
37 | } | ||
38 | else { | ||
39 | setCx86(CX86_CCR3, ccr3); /* restore CCR3 */ | ||
40 | |||
41 | /* read DIR0 and DIR1 CPU registers */ | ||
42 | *dir0 = getCx86(CX86_DIR0); | ||
43 | *dir1 = getCx86(CX86_DIR1); | ||
44 | } | ||
45 | local_irq_restore(flags); | ||
46 | } | ||
47 | |||
48 | /* | ||
49 | * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in | ||
50 | * order to identify the Cyrix CPU model after we're out of setup.c | ||
51 | * | ||
52 | * Actually since bugs.h doesn't even reference this perhaps someone should | ||
53 | * fix the documentation ??? | ||
54 | */ | ||
55 | static unsigned char Cx86_dir0_msb __initdata = 0; | ||
56 | |||
57 | static char Cx86_model[][9] __initdata = { | ||
58 | "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", | ||
59 | "M II ", "Unknown" | ||
60 | }; | ||
61 | static char Cx486_name[][5] __initdata = { | ||
62 | "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", | ||
63 | "SRx2", "DRx2" | ||
64 | }; | ||
65 | static char Cx486S_name[][4] __initdata = { | ||
66 | "S", "S2", "Se", "S2e" | ||
67 | }; | ||
68 | static char Cx486D_name[][4] __initdata = { | ||
69 | "DX", "DX2", "?", "?", "?", "DX4" | ||
70 | }; | ||
71 | static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock"; | ||
72 | static char cyrix_model_mult1[] __initdata = "12??43"; | ||
73 | static char cyrix_model_mult2[] __initdata = "12233445"; | ||
74 | |||
75 | /* | ||
76 | * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old | ||
77 | * BIOSes for compatibility with DOS games. This makes the udelay loop | ||
78 | * work correctly, and improves performance. | ||
79 | * | ||
80 | * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP | ||
81 | */ | ||
82 | |||
83 | extern void calibrate_delay(void) __init; | ||
84 | |||
85 | static void __init check_cx686_slop(struct cpuinfo_x86 *c) | ||
86 | { | ||
87 | unsigned long flags; | ||
88 | |||
89 | if (Cx86_dir0_msb == 3) { | ||
90 | unsigned char ccr3, ccr5; | ||
91 | |||
92 | local_irq_save(flags); | ||
93 | ccr3 = getCx86(CX86_CCR3); | ||
94 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | ||
95 | ccr5 = getCx86(CX86_CCR5); | ||
96 | if (ccr5 & 2) | ||
97 | setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ | ||
98 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
99 | local_irq_restore(flags); | ||
100 | |||
101 | if (ccr5 & 2) { /* possible wrong calibration done */ | ||
102 | printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); | ||
103 | calibrate_delay(); | ||
104 | c->loops_per_jiffy = loops_per_jiffy; | ||
105 | } | ||
106 | } | ||
107 | } | ||
108 | |||
109 | |||
110 | static void __init set_cx86_reorder(void) | ||
111 | { | ||
112 | u8 ccr3; | ||
113 | |||
114 | printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); | ||
115 | ccr3 = getCx86(CX86_CCR3); | ||
116 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | ||
117 | |||
118 | /* Load/Store Serialize to mem access disable (=reorder it) */ | ||
119 | setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); | ||
120 | /* set load/store serialize from 1GB to 4GB */ | ||
121 | ccr3 |= 0xe0; | ||
122 | setCx86(CX86_CCR3, ccr3); | ||
123 | } | ||
124 | |||
125 | static void __init set_cx86_memwb(void) | ||
126 | { | ||
127 | u32 cr0; | ||
128 | |||
129 | printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); | ||
130 | |||
131 | /* CCR2 bit 2: unlock NW bit */ | ||
132 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); | ||
133 | /* set 'Not Write-through' */ | ||
134 | cr0 = 0x20000000; | ||
135 | __asm__("movl %%cr0,%%eax\n\t" | ||
136 | "orl %0,%%eax\n\t" | ||
137 | "movl %%eax,%%cr0\n" | ||
138 | : : "r" (cr0) | ||
139 | :"ax"); | ||
140 | /* CCR2 bit 2: lock NW bit and set WT1 */ | ||
141 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); | ||
142 | } | ||
143 | |||
144 | static void __init set_cx86_inc(void) | ||
145 | { | ||
146 | unsigned char ccr3; | ||
147 | |||
148 | printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); | ||
149 | |||
150 | ccr3 = getCx86(CX86_CCR3); | ||
151 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | ||
152 | /* PCR1 -- Performance Control */ | ||
153 | /* Incrementor on, whatever that is */ | ||
154 | setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); | ||
155 | /* PCR0 -- Performance Control */ | ||
156 | /* Incrementor Margin 10 */ | ||
157 | setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); | ||
158 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * Configure later MediaGX and/or Geode processor. | ||
163 | */ | ||
164 | |||
165 | static void __init geode_configure(void) | ||
166 | { | ||
167 | unsigned long flags; | ||
168 | u8 ccr3, ccr4; | ||
169 | local_irq_save(flags); | ||
170 | |||
171 | /* Suspend on halt power saving and enable #SUSP pin */ | ||
172 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); | ||
173 | |||
174 | ccr3 = getCx86(CX86_CCR3); | ||
175 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* Enable */ | ||
176 | |||
177 | ccr4 = getCx86(CX86_CCR4); | ||
178 | ccr4 |= 0x38; /* FPU fast, DTE cache, Mem bypass */ | ||
179 | |||
180 | setCx86(CX86_CCR3, ccr3); | ||
181 | |||
182 | set_cx86_memwb(); | ||
183 | set_cx86_reorder(); | ||
184 | set_cx86_inc(); | ||
185 | |||
186 | local_irq_restore(flags); | ||
187 | } | ||
188 | |||
189 | |||
190 | #ifdef CONFIG_PCI | ||
191 | static struct pci_device_id cyrix_55x0[] = { | ||
192 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, | ||
193 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, | ||
194 | { }, | ||
195 | }; | ||
196 | #endif | ||
197 | |||
198 | static void __init init_cyrix(struct cpuinfo_x86 *c) | ||
199 | { | ||
200 | unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; | ||
201 | char *buf = c->x86_model_id; | ||
202 | const char *p = NULL; | ||
203 | |||
204 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | ||
205 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | ||
206 | clear_bit(0*32+31, c->x86_capability); | ||
207 | |||
208 | /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ | ||
209 | if ( test_bit(1*32+24, c->x86_capability) ) { | ||
210 | clear_bit(1*32+24, c->x86_capability); | ||
211 | set_bit(X86_FEATURE_CXMMX, c->x86_capability); | ||
212 | } | ||
213 | |||
214 | do_cyrix_devid(&dir0, &dir1); | ||
215 | |||
216 | check_cx686_slop(c); | ||
217 | |||
218 | Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */ | ||
219 | dir0_lsn = dir0 & 0xf; /* model or clock multiplier */ | ||
220 | |||
221 | /* common case step number/rev -- exceptions handled below */ | ||
222 | c->x86_model = (dir1 >> 4) + 1; | ||
223 | c->x86_mask = dir1 & 0xf; | ||
224 | |||
225 | /* Now cook; the original recipe is by Channing Corn, from Cyrix. | ||
226 | * We do the same thing for each generation: we work out | ||
227 | * the model, multiplier and stepping. Black magic included, | ||
228 | * to make the silicon step/rev numbers match the printed ones. | ||
229 | */ | ||
230 | |||
231 | switch (dir0_msn) { | ||
232 | unsigned char tmp; | ||
233 | |||
234 | case 0: /* Cx486SLC/DLC/SRx/DRx */ | ||
235 | p = Cx486_name[dir0_lsn & 7]; | ||
236 | break; | ||
237 | |||
238 | case 1: /* Cx486S/DX/DX2/DX4 */ | ||
239 | p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5] | ||
240 | : Cx486S_name[dir0_lsn & 3]; | ||
241 | break; | ||
242 | |||
243 | case 2: /* 5x86 */ | ||
244 | Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; | ||
245 | p = Cx86_cb+2; | ||
246 | break; | ||
247 | |||
248 | case 3: /* 6x86/6x86L */ | ||
249 | Cx86_cb[1] = ' '; | ||
250 | Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; | ||
251 | if (dir1 > 0x21) { /* 686L */ | ||
252 | Cx86_cb[0] = 'L'; | ||
253 | p = Cx86_cb; | ||
254 | (c->x86_model)++; | ||
255 | } else /* 686 */ | ||
256 | p = Cx86_cb+1; | ||
257 | /* Emulate MTRRs using Cyrix's ARRs. */ | ||
258 | set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); | ||
259 | /* 6x86's contain this bug */ | ||
260 | c->coma_bug = 1; | ||
261 | break; | ||
262 | |||
263 | case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ | ||
264 | #ifdef CONFIG_PCI | ||
265 | /* It isn't really a PCI quirk directly, but the cure is the | ||
266 | same. The MediaGX has deep magic SMM stuff that handles the | ||
267 | SB emulation. It thows away the fifo on disable_dma() which | ||
268 | is wrong and ruins the audio. | ||
269 | |||
270 | Bug2: VSA1 has a wrap bug so that using maximum sized DMA | ||
271 | causes bad things. According to NatSemi VSA2 has another | ||
272 | bug to do with 'hlt'. I've not seen any boards using VSA2 | ||
273 | and X doesn't seem to support it either so who cares 8). | ||
274 | VSA1 we work around however. | ||
275 | */ | ||
276 | |||
277 | printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); | ||
278 | isa_dma_bridge_buggy = 2; | ||
279 | #endif | ||
280 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ | ||
281 | |||
282 | /* | ||
283 | * The 5510/5520 companion chips have a funky PIT. | ||
284 | */ | ||
285 | if (pci_dev_present(cyrix_55x0)) | ||
286 | pit_latch_buggy = 1; | ||
287 | |||
288 | /* GXm supports extended cpuid levels 'ala' AMD */ | ||
289 | if (c->cpuid_level == 2) { | ||
290 | /* Enable cxMMX extensions (GX1 Datasheet 54) */ | ||
291 | setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); | ||
292 | |||
293 | /* GXlv/GXm/GX1 */ | ||
294 | if((dir1 >= 0x50 && dir1 <= 0x54) || dir1 >= 0x63) | ||
295 | geode_configure(); | ||
296 | get_model_name(c); /* get CPU marketing name */ | ||
297 | return; | ||
298 | } | ||
299 | else { /* MediaGX */ | ||
300 | Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; | ||
301 | p = Cx86_cb+2; | ||
302 | c->x86_model = (dir1 & 0x20) ? 1 : 2; | ||
303 | } | ||
304 | break; | ||
305 | |||
306 | case 5: /* 6x86MX/M II */ | ||
307 | if (dir1 > 7) | ||
308 | { | ||
309 | dir0_msn++; /* M II */ | ||
310 | /* Enable MMX extensions (App note 108) */ | ||
311 | setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); | ||
312 | } | ||
313 | else | ||
314 | { | ||
315 | c->coma_bug = 1; /* 6x86MX, it has the bug. */ | ||
316 | } | ||
317 | tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0; | ||
318 | Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7]; | ||
319 | p = Cx86_cb+tmp; | ||
320 | if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) | ||
321 | (c->x86_model)++; | ||
322 | /* Emulate MTRRs using Cyrix's ARRs. */ | ||
323 | set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); | ||
324 | break; | ||
325 | |||
326 | case 0xf: /* Cyrix 486 without DEVID registers */ | ||
327 | switch (dir0_lsn) { | ||
328 | case 0xd: /* either a 486SLC or DLC w/o DEVID */ | ||
329 | dir0_msn = 0; | ||
330 | p = Cx486_name[(c->hard_math) ? 1 : 0]; | ||
331 | break; | ||
332 | |||
333 | case 0xe: /* a 486S A step */ | ||
334 | dir0_msn = 0; | ||
335 | p = Cx486S_name[0]; | ||
336 | break; | ||
337 | } | ||
338 | break; | ||
339 | |||
340 | default: /* unknown (shouldn't happen, we know everyone ;-) */ | ||
341 | dir0_msn = 7; | ||
342 | break; | ||
343 | } | ||
344 | strcpy(buf, Cx86_model[dir0_msn & 7]); | ||
345 | if (p) strcat(buf, p); | ||
346 | return; | ||
347 | } | ||
348 | |||
349 | /* | ||
350 | * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected | ||
351 | * by the fact that they preserve the flags across the division of 5/2. | ||
352 | * PII and PPro exhibit this behavior too, but they have cpuid available. | ||
353 | */ | ||
354 | |||
355 | /* | ||
356 | * Perform the Cyrix 5/2 test. A Cyrix won't change | ||
357 | * the flags, while other 486 chips will. | ||
358 | */ | ||
359 | static inline int test_cyrix_52div(void) | ||
360 | { | ||
361 | unsigned int test; | ||
362 | |||
363 | __asm__ __volatile__( | ||
364 | "sahf\n\t" /* clear flags (%eax = 0x0005) */ | ||
365 | "div %b2\n\t" /* divide 5 by 2 */ | ||
366 | "lahf" /* store flags into %ah */ | ||
367 | : "=a" (test) | ||
368 | : "0" (5), "q" (2) | ||
369 | : "cc"); | ||
370 | |||
371 | /* AH is 0x02 on Cyrix after the divide.. */ | ||
372 | return (unsigned char) (test >> 8) == 0x02; | ||
373 | } | ||
374 | |||
375 | static void cyrix_identify(struct cpuinfo_x86 * c) | ||
376 | { | ||
377 | /* Detect Cyrix with disabled CPUID */ | ||
378 | if ( c->x86 == 4 && test_cyrix_52div() ) { | ||
379 | unsigned char dir0, dir1; | ||
380 | |||
381 | strcpy(c->x86_vendor_id, "CyrixInstead"); | ||
382 | c->x86_vendor = X86_VENDOR_CYRIX; | ||
383 | |||
384 | /* Actually enable cpuid on the older cyrix */ | ||
385 | |||
386 | /* Retrieve CPU revisions */ | ||
387 | |||
388 | do_cyrix_devid(&dir0, &dir1); | ||
389 | |||
390 | dir0>>=4; | ||
391 | |||
392 | /* Check it is an affected model */ | ||
393 | |||
394 | if (dir0 == 5 || dir0 == 3) | ||
395 | { | ||
396 | unsigned char ccr3, ccr4; | ||
397 | unsigned long flags; | ||
398 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); | ||
399 | local_irq_save(flags); | ||
400 | ccr3 = getCx86(CX86_CCR3); | ||
401 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | ||
402 | ccr4 = getCx86(CX86_CCR4); | ||
403 | setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */ | ||
404 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
405 | local_irq_restore(flags); | ||
406 | } | ||
407 | } | ||
408 | generic_identify(c); | ||
409 | } | ||
410 | |||
411 | static struct cpu_dev cyrix_cpu_dev __initdata = { | ||
412 | .c_vendor = "Cyrix", | ||
413 | .c_ident = { "CyrixInstead" }, | ||
414 | .c_init = init_cyrix, | ||
415 | .c_identify = cyrix_identify, | ||
416 | }; | ||
417 | |||
418 | int __init cyrix_init_cpu(void) | ||
419 | { | ||
420 | cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev; | ||
421 | return 0; | ||
422 | } | ||
423 | |||
424 | //early_arch_initcall(cyrix_init_cpu); | ||
425 | |||
426 | static struct cpu_dev nsc_cpu_dev __initdata = { | ||
427 | .c_vendor = "NSC", | ||
428 | .c_ident = { "Geode by NSC" }, | ||
429 | .c_init = init_cyrix, | ||
430 | .c_identify = generic_identify, | ||
431 | }; | ||
432 | |||
433 | int __init nsc_init_cpu(void) | ||
434 | { | ||
435 | cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev; | ||
436 | return 0; | ||
437 | } | ||
438 | |||
439 | //early_arch_initcall(nsc_init_cpu); | ||
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c new file mode 100644 index 000000000000..b8d847b850dc --- /dev/null +++ b/arch/i386/kernel/cpu/intel.c | |||
@@ -0,0 +1,248 @@ | |||
1 | #include <linux/config.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/kernel.h> | ||
4 | |||
5 | #include <linux/string.h> | ||
6 | #include <linux/bitops.h> | ||
7 | #include <linux/smp.h> | ||
8 | #include <linux/thread_info.h> | ||
9 | |||
10 | #include <asm/processor.h> | ||
11 | #include <asm/msr.h> | ||
12 | #include <asm/uaccess.h> | ||
13 | |||
14 | #include "cpu.h" | ||
15 | |||
16 | #ifdef CONFIG_X86_LOCAL_APIC | ||
17 | #include <asm/mpspec.h> | ||
18 | #include <asm/apic.h> | ||
19 | #include <mach_apic.h> | ||
20 | #endif | ||
21 | |||
22 | extern int trap_init_f00f_bug(void); | ||
23 | |||
24 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
25 | /* | ||
26 | * Alignment at which movsl is preferred for bulk memory copies. | ||
27 | */ | ||
28 | struct movsl_mask movsl_mask; | ||
29 | #endif | ||
30 | |||
31 | void __init early_intel_workaround(struct cpuinfo_x86 *c) | ||
32 | { | ||
33 | if (c->x86_vendor != X86_VENDOR_INTEL) | ||
34 | return; | ||
35 | /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ | ||
36 | if (c->x86 == 15 && c->x86_cache_alignment == 64) | ||
37 | c->x86_cache_alignment = 128; | ||
38 | } | ||
39 | |||
40 | /* | ||
41 | * Early probe support logic for ppro memory erratum #50 | ||
42 | * | ||
43 | * This is called before we do cpu ident work | ||
44 | */ | ||
45 | |||
46 | int __init ppro_with_ram_bug(void) | ||
47 | { | ||
48 | /* Uses data from early_cpu_detect now */ | ||
49 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | ||
50 | boot_cpu_data.x86 == 6 && | ||
51 | boot_cpu_data.x86_model == 1 && | ||
52 | boot_cpu_data.x86_mask < 8) { | ||
53 | printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); | ||
54 | return 1; | ||
55 | } | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | |||
60 | /* | ||
61 | * P4 Xeon errata 037 workaround. | ||
62 | * Hardware prefetcher may cause stale data to be loaded into the cache. | ||
63 | */ | ||
64 | static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) | ||
65 | { | ||
66 | unsigned long lo, hi; | ||
67 | |||
68 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { | ||
69 | rdmsr (MSR_IA32_MISC_ENABLE, lo, hi); | ||
70 | if ((lo & (1<<9)) == 0) { | ||
71 | printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); | ||
72 | printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); | ||
73 | lo |= (1<<9); /* Disable hw prefetching */ | ||
74 | wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); | ||
75 | } | ||
76 | } | ||
77 | } | ||
78 | |||
79 | |||
80 | static void __init init_intel(struct cpuinfo_x86 *c) | ||
81 | { | ||
82 | unsigned int l2 = 0; | ||
83 | char *p = NULL; | ||
84 | |||
85 | #ifdef CONFIG_X86_F00F_BUG | ||
86 | /* | ||
87 | * All current models of Pentium and Pentium with MMX technology CPUs | ||
88 | * have the F0 0F bug, which lets nonprivileged users lock up the system. | ||
89 | * Note that the workaround only should be initialized once... | ||
90 | */ | ||
91 | c->f00f_bug = 0; | ||
92 | if ( c->x86 == 5 ) { | ||
93 | static int f00f_workaround_enabled = 0; | ||
94 | |||
95 | c->f00f_bug = 1; | ||
96 | if ( !f00f_workaround_enabled ) { | ||
97 | trap_init_f00f_bug(); | ||
98 | printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); | ||
99 | f00f_workaround_enabled = 1; | ||
100 | } | ||
101 | } | ||
102 | #endif | ||
103 | |||
104 | select_idle_routine(c); | ||
105 | l2 = init_intel_cacheinfo(c); | ||
106 | |||
107 | /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ | ||
108 | if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) | ||
109 | clear_bit(X86_FEATURE_SEP, c->x86_capability); | ||
110 | |||
111 | /* Names for the Pentium II/Celeron processors | ||
112 | detectable only by also checking the cache size. | ||
113 | Dixon is NOT a Celeron. */ | ||
114 | if (c->x86 == 6) { | ||
115 | switch (c->x86_model) { | ||
116 | case 5: | ||
117 | if (c->x86_mask == 0) { | ||
118 | if (l2 == 0) | ||
119 | p = "Celeron (Covington)"; | ||
120 | else if (l2 == 256) | ||
121 | p = "Mobile Pentium II (Dixon)"; | ||
122 | } | ||
123 | break; | ||
124 | |||
125 | case 6: | ||
126 | if (l2 == 128) | ||
127 | p = "Celeron (Mendocino)"; | ||
128 | else if (c->x86_mask == 0 || c->x86_mask == 5) | ||
129 | p = "Celeron-A"; | ||
130 | break; | ||
131 | |||
132 | case 8: | ||
133 | if (l2 == 128) | ||
134 | p = "Celeron (Coppermine)"; | ||
135 | break; | ||
136 | } | ||
137 | } | ||
138 | |||
139 | if ( p ) | ||
140 | strcpy(c->x86_model_id, p); | ||
141 | |||
142 | detect_ht(c); | ||
143 | |||
144 | /* Work around errata */ | ||
145 | Intel_errata_workarounds(c); | ||
146 | |||
147 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
148 | /* | ||
149 | * Set up the preferred alignment for movsl bulk memory moves | ||
150 | */ | ||
151 | switch (c->x86) { | ||
152 | case 4: /* 486: untested */ | ||
153 | break; | ||
154 | case 5: /* Old Pentia: untested */ | ||
155 | break; | ||
156 | case 6: /* PII/PIII only like movsl with 8-byte alignment */ | ||
157 | movsl_mask.mask = 7; | ||
158 | break; | ||
159 | case 15: /* P4 is OK down to 8-byte alignment */ | ||
160 | movsl_mask.mask = 7; | ||
161 | break; | ||
162 | } | ||
163 | #endif | ||
164 | |||
165 | if (c->x86 == 15) | ||
166 | set_bit(X86_FEATURE_P4, c->x86_capability); | ||
167 | if (c->x86 == 6) | ||
168 | set_bit(X86_FEATURE_P3, c->x86_capability); | ||
169 | } | ||
170 | |||
171 | |||
172 | static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) | ||
173 | { | ||
174 | /* Intel PIII Tualatin. This comes in two flavours. | ||
175 | * One has 256kb of cache, the other 512. We have no way | ||
176 | * to determine which, so we use a boottime override | ||
177 | * for the 512kb model, and assume 256 otherwise. | ||
178 | */ | ||
179 | if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) | ||
180 | size = 256; | ||
181 | return size; | ||
182 | } | ||
183 | |||
184 | static struct cpu_dev intel_cpu_dev __initdata = { | ||
185 | .c_vendor = "Intel", | ||
186 | .c_ident = { "GenuineIntel" }, | ||
187 | .c_models = { | ||
188 | { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = | ||
189 | { | ||
190 | [0] = "486 DX-25/33", | ||
191 | [1] = "486 DX-50", | ||
192 | [2] = "486 SX", | ||
193 | [3] = "486 DX/2", | ||
194 | [4] = "486 SL", | ||
195 | [5] = "486 SX/2", | ||
196 | [7] = "486 DX/2-WB", | ||
197 | [8] = "486 DX/4", | ||
198 | [9] = "486 DX/4-WB" | ||
199 | } | ||
200 | }, | ||
201 | { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = | ||
202 | { | ||
203 | [0] = "Pentium 60/66 A-step", | ||
204 | [1] = "Pentium 60/66", | ||
205 | [2] = "Pentium 75 - 200", | ||
206 | [3] = "OverDrive PODP5V83", | ||
207 | [4] = "Pentium MMX", | ||
208 | [7] = "Mobile Pentium 75 - 200", | ||
209 | [8] = "Mobile Pentium MMX" | ||
210 | } | ||
211 | }, | ||
212 | { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = | ||
213 | { | ||
214 | [0] = "Pentium Pro A-step", | ||
215 | [1] = "Pentium Pro", | ||
216 | [3] = "Pentium II (Klamath)", | ||
217 | [4] = "Pentium II (Deschutes)", | ||
218 | [5] = "Pentium II (Deschutes)", | ||
219 | [6] = "Mobile Pentium II", | ||
220 | [7] = "Pentium III (Katmai)", | ||
221 | [8] = "Pentium III (Coppermine)", | ||
222 | [10] = "Pentium III (Cascades)", | ||
223 | [11] = "Pentium III (Tualatin)", | ||
224 | } | ||
225 | }, | ||
226 | { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = | ||
227 | { | ||
228 | [0] = "Pentium 4 (Unknown)", | ||
229 | [1] = "Pentium 4 (Willamette)", | ||
230 | [2] = "Pentium 4 (Northwood)", | ||
231 | [4] = "Pentium 4 (Foster)", | ||
232 | [5] = "Pentium 4 (Foster)", | ||
233 | } | ||
234 | }, | ||
235 | }, | ||
236 | .c_init = init_intel, | ||
237 | .c_identify = generic_identify, | ||
238 | .c_size_cache = intel_size_cache, | ||
239 | }; | ||
240 | |||
241 | __init int intel_cpu_init(void) | ||
242 | { | ||
243 | cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev; | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | // arch_initcall(intel_cpu_init); | ||
248 | |||
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c new file mode 100644 index 000000000000..aeb5b4ef8c8b --- /dev/null +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c | |||
@@ -0,0 +1,598 @@ | |||
1 | /* | ||
2 | * Routines to indentify caches on Intel CPU. | ||
3 | * | ||
4 | * Changes: | ||
5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) | ||
6 | */ | ||
7 | |||
8 | #include <linux/init.h> | ||
9 | #include <linux/slab.h> | ||
10 | #include <linux/device.h> | ||
11 | #include <linux/compiler.h> | ||
12 | #include <linux/cpu.h> | ||
13 | |||
14 | #include <asm/processor.h> | ||
15 | #include <asm/smp.h> | ||
16 | |||
17 | #define LVL_1_INST 1 | ||
18 | #define LVL_1_DATA 2 | ||
19 | #define LVL_2 3 | ||
20 | #define LVL_3 4 | ||
21 | #define LVL_TRACE 5 | ||
22 | |||
23 | struct _cache_table | ||
24 | { | ||
25 | unsigned char descriptor; | ||
26 | char cache_type; | ||
27 | short size; | ||
28 | }; | ||
29 | |||
30 | /* all the cache descriptor types we care about (no TLB or trace cache entries) */ | ||
31 | static struct _cache_table cache_table[] __initdata = | ||
32 | { | ||
33 | { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ | ||
34 | { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ | ||
35 | { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ | ||
36 | { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ | ||
37 | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | ||
38 | { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | ||
39 | { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | ||
40 | { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | ||
41 | { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ | ||
42 | { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ | ||
43 | { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | ||
44 | { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ | ||
45 | { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | ||
46 | { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ | ||
47 | { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ | ||
48 | { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ | ||
49 | { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ | ||
50 | { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ | ||
51 | { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | ||
52 | { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | ||
53 | { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | ||
54 | { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | ||
55 | { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ | ||
56 | { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ | ||
57 | { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ | ||
58 | { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ | ||
59 | { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | ||
60 | { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | ||
61 | { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | ||
62 | { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | ||
63 | { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ | ||
64 | { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ | ||
65 | { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ | ||
66 | { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ | ||
67 | { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ | ||
68 | { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ | ||
69 | { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ | ||
70 | { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ | ||
71 | { 0x00, 0, 0} | ||
72 | }; | ||
73 | |||
74 | |||
75 | enum _cache_type | ||
76 | { | ||
77 | CACHE_TYPE_NULL = 0, | ||
78 | CACHE_TYPE_DATA = 1, | ||
79 | CACHE_TYPE_INST = 2, | ||
80 | CACHE_TYPE_UNIFIED = 3 | ||
81 | }; | ||
82 | |||
83 | union _cpuid4_leaf_eax { | ||
84 | struct { | ||
85 | enum _cache_type type:5; | ||
86 | unsigned int level:3; | ||
87 | unsigned int is_self_initializing:1; | ||
88 | unsigned int is_fully_associative:1; | ||
89 | unsigned int reserved:4; | ||
90 | unsigned int num_threads_sharing:12; | ||
91 | unsigned int num_cores_on_die:6; | ||
92 | } split; | ||
93 | u32 full; | ||
94 | }; | ||
95 | |||
96 | union _cpuid4_leaf_ebx { | ||
97 | struct { | ||
98 | unsigned int coherency_line_size:12; | ||
99 | unsigned int physical_line_partition:10; | ||
100 | unsigned int ways_of_associativity:10; | ||
101 | } split; | ||
102 | u32 full; | ||
103 | }; | ||
104 | |||
105 | union _cpuid4_leaf_ecx { | ||
106 | struct { | ||
107 | unsigned int number_of_sets:32; | ||
108 | } split; | ||
109 | u32 full; | ||
110 | }; | ||
111 | |||
112 | struct _cpuid4_info { | ||
113 | union _cpuid4_leaf_eax eax; | ||
114 | union _cpuid4_leaf_ebx ebx; | ||
115 | union _cpuid4_leaf_ecx ecx; | ||
116 | unsigned long size; | ||
117 | cpumask_t shared_cpu_map; | ||
118 | }; | ||
119 | |||
120 | #define MAX_CACHE_LEAVES 4 | ||
121 | static unsigned short __devinitdata num_cache_leaves; | ||
122 | |||
123 | static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | ||
124 | { | ||
125 | unsigned int eax, ebx, ecx, edx; | ||
126 | union _cpuid4_leaf_eax cache_eax; | ||
127 | |||
128 | cpuid_count(4, index, &eax, &ebx, &ecx, &edx); | ||
129 | cache_eax.full = eax; | ||
130 | if (cache_eax.split.type == CACHE_TYPE_NULL) | ||
131 | return -1; | ||
132 | |||
133 | this_leaf->eax.full = eax; | ||
134 | this_leaf->ebx.full = ebx; | ||
135 | this_leaf->ecx.full = ecx; | ||
136 | this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) * | ||
137 | (this_leaf->ebx.split.coherency_line_size + 1) * | ||
138 | (this_leaf->ebx.split.physical_line_partition + 1) * | ||
139 | (this_leaf->ebx.split.ways_of_associativity + 1); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int __init find_num_cache_leaves(void) | ||
144 | { | ||
145 | unsigned int eax, ebx, ecx, edx; | ||
146 | union _cpuid4_leaf_eax cache_eax; | ||
147 | int i; | ||
148 | int retval; | ||
149 | |||
150 | retval = MAX_CACHE_LEAVES; | ||
151 | /* Do cpuid(4) loop to find out num_cache_leaves */ | ||
152 | for (i = 0; i < MAX_CACHE_LEAVES; i++) { | ||
153 | cpuid_count(4, i, &eax, &ebx, &ecx, &edx); | ||
154 | cache_eax.full = eax; | ||
155 | if (cache_eax.split.type == CACHE_TYPE_NULL) { | ||
156 | retval = i; | ||
157 | break; | ||
158 | } | ||
159 | } | ||
160 | return retval; | ||
161 | } | ||
162 | |||
163 | unsigned int __init init_intel_cacheinfo(struct cpuinfo_x86 *c) | ||
164 | { | ||
165 | unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ | ||
166 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ | ||
167 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ | ||
168 | |||
169 | if (c->cpuid_level > 4) { | ||
170 | static int is_initialized; | ||
171 | |||
172 | if (is_initialized == 0) { | ||
173 | /* Init num_cache_leaves from boot CPU */ | ||
174 | num_cache_leaves = find_num_cache_leaves(); | ||
175 | is_initialized++; | ||
176 | } | ||
177 | |||
178 | /* | ||
179 | * Whenever possible use cpuid(4), deterministic cache | ||
180 | * parameters cpuid leaf to find the cache details | ||
181 | */ | ||
182 | for (i = 0; i < num_cache_leaves; i++) { | ||
183 | struct _cpuid4_info this_leaf; | ||
184 | |||
185 | int retval; | ||
186 | |||
187 | retval = cpuid4_cache_lookup(i, &this_leaf); | ||
188 | if (retval >= 0) { | ||
189 | switch(this_leaf.eax.split.level) { | ||
190 | case 1: | ||
191 | if (this_leaf.eax.split.type == | ||
192 | CACHE_TYPE_DATA) | ||
193 | new_l1d = this_leaf.size/1024; | ||
194 | else if (this_leaf.eax.split.type == | ||
195 | CACHE_TYPE_INST) | ||
196 | new_l1i = this_leaf.size/1024; | ||
197 | break; | ||
198 | case 2: | ||
199 | new_l2 = this_leaf.size/1024; | ||
200 | break; | ||
201 | case 3: | ||
202 | new_l3 = this_leaf.size/1024; | ||
203 | break; | ||
204 | default: | ||
205 | break; | ||
206 | } | ||
207 | } | ||
208 | } | ||
209 | } | ||
210 | if (c->cpuid_level > 1) { | ||
211 | /* supports eax=2 call */ | ||
212 | int i, j, n; | ||
213 | int regs[4]; | ||
214 | unsigned char *dp = (unsigned char *)regs; | ||
215 | |||
216 | /* Number of times to iterate */ | ||
217 | n = cpuid_eax(2) & 0xFF; | ||
218 | |||
219 | for ( i = 0 ; i < n ; i++ ) { | ||
220 | cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); | ||
221 | |||
222 | /* If bit 31 is set, this is an unknown format */ | ||
223 | for ( j = 0 ; j < 3 ; j++ ) { | ||
224 | if ( regs[j] < 0 ) regs[j] = 0; | ||
225 | } | ||
226 | |||
227 | /* Byte 0 is level count, not a descriptor */ | ||
228 | for ( j = 1 ; j < 16 ; j++ ) { | ||
229 | unsigned char des = dp[j]; | ||
230 | unsigned char k = 0; | ||
231 | |||
232 | /* look up this descriptor in the table */ | ||
233 | while (cache_table[k].descriptor != 0) | ||
234 | { | ||
235 | if (cache_table[k].descriptor == des) { | ||
236 | switch (cache_table[k].cache_type) { | ||
237 | case LVL_1_INST: | ||
238 | l1i += cache_table[k].size; | ||
239 | break; | ||
240 | case LVL_1_DATA: | ||
241 | l1d += cache_table[k].size; | ||
242 | break; | ||
243 | case LVL_2: | ||
244 | l2 += cache_table[k].size; | ||
245 | break; | ||
246 | case LVL_3: | ||
247 | l3 += cache_table[k].size; | ||
248 | break; | ||
249 | case LVL_TRACE: | ||
250 | trace += cache_table[k].size; | ||
251 | break; | ||
252 | } | ||
253 | |||
254 | break; | ||
255 | } | ||
256 | |||
257 | k++; | ||
258 | } | ||
259 | } | ||
260 | } | ||
261 | |||
262 | if (new_l1d) | ||
263 | l1d = new_l1d; | ||
264 | |||
265 | if (new_l1i) | ||
266 | l1i = new_l1i; | ||
267 | |||
268 | if (new_l2) | ||
269 | l2 = new_l2; | ||
270 | |||
271 | if (new_l3) | ||
272 | l3 = new_l3; | ||
273 | |||
274 | if ( trace ) | ||
275 | printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); | ||
276 | else if ( l1i ) | ||
277 | printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); | ||
278 | if ( l1d ) | ||
279 | printk(", L1 D cache: %dK\n", l1d); | ||
280 | else | ||
281 | printk("\n"); | ||
282 | if ( l2 ) | ||
283 | printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); | ||
284 | if ( l3 ) | ||
285 | printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); | ||
286 | |||
287 | /* | ||
288 | * This assumes the L3 cache is shared; it typically lives in | ||
289 | * the northbridge. The L1 caches are included by the L2 | ||
290 | * cache, and so should not be included for the purpose of | ||
291 | * SMP switching weights. | ||
292 | */ | ||
293 | c->x86_cache_size = l2 ? l2 : (l1i+l1d); | ||
294 | } | ||
295 | |||
296 | return l2; | ||
297 | } | ||
298 | |||
299 | /* pointer to _cpuid4_info array (for each cache leaf) */ | ||
300 | static struct _cpuid4_info *cpuid4_info[NR_CPUS]; | ||
301 | #define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) | ||
302 | |||
303 | #ifdef CONFIG_SMP | ||
304 | static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | ||
305 | { | ||
306 | struct _cpuid4_info *this_leaf; | ||
307 | unsigned long num_threads_sharing; | ||
308 | |||
309 | this_leaf = CPUID4_INFO_IDX(cpu, index); | ||
310 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; | ||
311 | |||
312 | if (num_threads_sharing == 1) | ||
313 | cpu_set(cpu, this_leaf->shared_cpu_map); | ||
314 | #ifdef CONFIG_X86_HT | ||
315 | else if (num_threads_sharing == smp_num_siblings) | ||
316 | this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; | ||
317 | #endif | ||
318 | else | ||
319 | printk(KERN_INFO "Number of CPUs sharing cache didn't match " | ||
320 | "any known set of CPUs\n"); | ||
321 | } | ||
322 | #else | ||
323 | static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} | ||
324 | #endif | ||
325 | |||
326 | static void free_cache_attributes(unsigned int cpu) | ||
327 | { | ||
328 | kfree(cpuid4_info[cpu]); | ||
329 | cpuid4_info[cpu] = NULL; | ||
330 | } | ||
331 | |||
332 | static int __devinit detect_cache_attributes(unsigned int cpu) | ||
333 | { | ||
334 | struct _cpuid4_info *this_leaf; | ||
335 | unsigned long j; | ||
336 | int retval; | ||
337 | |||
338 | if (num_cache_leaves == 0) | ||
339 | return -ENOENT; | ||
340 | |||
341 | cpuid4_info[cpu] = kmalloc( | ||
342 | sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); | ||
343 | if (unlikely(cpuid4_info[cpu] == NULL)) | ||
344 | return -ENOMEM; | ||
345 | memset(cpuid4_info[cpu], 0, | ||
346 | sizeof(struct _cpuid4_info) * num_cache_leaves); | ||
347 | |||
348 | /* Do cpuid and store the results */ | ||
349 | for (j = 0; j < num_cache_leaves; j++) { | ||
350 | this_leaf = CPUID4_INFO_IDX(cpu, j); | ||
351 | retval = cpuid4_cache_lookup(j, this_leaf); | ||
352 | if (unlikely(retval < 0)) | ||
353 | goto err_out; | ||
354 | cache_shared_cpu_map_setup(cpu, j); | ||
355 | } | ||
356 | return 0; | ||
357 | |||
358 | err_out: | ||
359 | free_cache_attributes(cpu); | ||
360 | return -ENOMEM; | ||
361 | } | ||
362 | |||
363 | #ifdef CONFIG_SYSFS | ||
364 | |||
365 | #include <linux/kobject.h> | ||
366 | #include <linux/sysfs.h> | ||
367 | |||
368 | extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ | ||
369 | |||
370 | /* pointer to kobject for cpuX/cache */ | ||
371 | static struct kobject * cache_kobject[NR_CPUS]; | ||
372 | |||
373 | struct _index_kobject { | ||
374 | struct kobject kobj; | ||
375 | unsigned int cpu; | ||
376 | unsigned short index; | ||
377 | }; | ||
378 | |||
379 | /* pointer to array of kobjects for cpuX/cache/indexY */ | ||
380 | static struct _index_kobject *index_kobject[NR_CPUS]; | ||
381 | #define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y])) | ||
382 | |||
383 | #define show_one_plus(file_name, object, val) \ | ||
384 | static ssize_t show_##file_name \ | ||
385 | (struct _cpuid4_info *this_leaf, char *buf) \ | ||
386 | { \ | ||
387 | return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ | ||
388 | } | ||
389 | |||
390 | show_one_plus(level, eax.split.level, 0); | ||
391 | show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1); | ||
392 | show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); | ||
393 | show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); | ||
394 | show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); | ||
395 | |||
396 | static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) | ||
397 | { | ||
398 | return sprintf (buf, "%luK\n", this_leaf->size / 1024); | ||
399 | } | ||
400 | |||
401 | static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) | ||
402 | { | ||
403 | char mask_str[NR_CPUS]; | ||
404 | cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map); | ||
405 | return sprintf(buf, "%s\n", mask_str); | ||
406 | } | ||
407 | |||
408 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { | ||
409 | switch(this_leaf->eax.split.type) { | ||
410 | case CACHE_TYPE_DATA: | ||
411 | return sprintf(buf, "Data\n"); | ||
412 | break; | ||
413 | case CACHE_TYPE_INST: | ||
414 | return sprintf(buf, "Instruction\n"); | ||
415 | break; | ||
416 | case CACHE_TYPE_UNIFIED: | ||
417 | return sprintf(buf, "Unified\n"); | ||
418 | break; | ||
419 | default: | ||
420 | return sprintf(buf, "Unknown\n"); | ||
421 | break; | ||
422 | } | ||
423 | } | ||
424 | |||
425 | struct _cache_attr { | ||
426 | struct attribute attr; | ||
427 | ssize_t (*show)(struct _cpuid4_info *, char *); | ||
428 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); | ||
429 | }; | ||
430 | |||
431 | #define define_one_ro(_name) \ | ||
432 | static struct _cache_attr _name = \ | ||
433 | __ATTR(_name, 0444, show_##_name, NULL) | ||
434 | |||
435 | define_one_ro(level); | ||
436 | define_one_ro(type); | ||
437 | define_one_ro(coherency_line_size); | ||
438 | define_one_ro(physical_line_partition); | ||
439 | define_one_ro(ways_of_associativity); | ||
440 | define_one_ro(number_of_sets); | ||
441 | define_one_ro(size); | ||
442 | define_one_ro(shared_cpu_map); | ||
443 | |||
444 | static struct attribute * default_attrs[] = { | ||
445 | &type.attr, | ||
446 | &level.attr, | ||
447 | &coherency_line_size.attr, | ||
448 | &physical_line_partition.attr, | ||
449 | &ways_of_associativity.attr, | ||
450 | &number_of_sets.attr, | ||
451 | &size.attr, | ||
452 | &shared_cpu_map.attr, | ||
453 | NULL | ||
454 | }; | ||
455 | |||
456 | #define to_object(k) container_of(k, struct _index_kobject, kobj) | ||
457 | #define to_attr(a) container_of(a, struct _cache_attr, attr) | ||
458 | |||
459 | static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) | ||
460 | { | ||
461 | struct _cache_attr *fattr = to_attr(attr); | ||
462 | struct _index_kobject *this_leaf = to_object(kobj); | ||
463 | ssize_t ret; | ||
464 | |||
465 | ret = fattr->show ? | ||
466 | fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), | ||
467 | buf) : | ||
468 | 0; | ||
469 | return ret; | ||
470 | } | ||
471 | |||
472 | static ssize_t store(struct kobject * kobj, struct attribute * attr, | ||
473 | const char * buf, size_t count) | ||
474 | { | ||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | static struct sysfs_ops sysfs_ops = { | ||
479 | .show = show, | ||
480 | .store = store, | ||
481 | }; | ||
482 | |||
483 | static struct kobj_type ktype_cache = { | ||
484 | .sysfs_ops = &sysfs_ops, | ||
485 | .default_attrs = default_attrs, | ||
486 | }; | ||
487 | |||
488 | static struct kobj_type ktype_percpu_entry = { | ||
489 | .sysfs_ops = &sysfs_ops, | ||
490 | }; | ||
491 | |||
492 | static void cpuid4_cache_sysfs_exit(unsigned int cpu) | ||
493 | { | ||
494 | kfree(cache_kobject[cpu]); | ||
495 | kfree(index_kobject[cpu]); | ||
496 | cache_kobject[cpu] = NULL; | ||
497 | index_kobject[cpu] = NULL; | ||
498 | free_cache_attributes(cpu); | ||
499 | } | ||
500 | |||
501 | static int __devinit cpuid4_cache_sysfs_init(unsigned int cpu) | ||
502 | { | ||
503 | |||
504 | if (num_cache_leaves == 0) | ||
505 | return -ENOENT; | ||
506 | |||
507 | detect_cache_attributes(cpu); | ||
508 | if (cpuid4_info[cpu] == NULL) | ||
509 | return -ENOENT; | ||
510 | |||
511 | /* Allocate all required memory */ | ||
512 | cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL); | ||
513 | if (unlikely(cache_kobject[cpu] == NULL)) | ||
514 | goto err_out; | ||
515 | memset(cache_kobject[cpu], 0, sizeof(struct kobject)); | ||
516 | |||
517 | index_kobject[cpu] = kmalloc( | ||
518 | sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); | ||
519 | if (unlikely(index_kobject[cpu] == NULL)) | ||
520 | goto err_out; | ||
521 | memset(index_kobject[cpu], 0, | ||
522 | sizeof(struct _index_kobject) * num_cache_leaves); | ||
523 | |||
524 | return 0; | ||
525 | |||
526 | err_out: | ||
527 | cpuid4_cache_sysfs_exit(cpu); | ||
528 | return -ENOMEM; | ||
529 | } | ||
530 | |||
531 | /* Add/Remove cache interface for CPU device */ | ||
532 | static int __devinit cache_add_dev(struct sys_device * sys_dev) | ||
533 | { | ||
534 | unsigned int cpu = sys_dev->id; | ||
535 | unsigned long i, j; | ||
536 | struct _index_kobject *this_object; | ||
537 | int retval = 0; | ||
538 | |||
539 | retval = cpuid4_cache_sysfs_init(cpu); | ||
540 | if (unlikely(retval < 0)) | ||
541 | return retval; | ||
542 | |||
543 | cache_kobject[cpu]->parent = &sys_dev->kobj; | ||
544 | kobject_set_name(cache_kobject[cpu], "%s", "cache"); | ||
545 | cache_kobject[cpu]->ktype = &ktype_percpu_entry; | ||
546 | retval = kobject_register(cache_kobject[cpu]); | ||
547 | |||
548 | for (i = 0; i < num_cache_leaves; i++) { | ||
549 | this_object = INDEX_KOBJECT_PTR(cpu,i); | ||
550 | this_object->cpu = cpu; | ||
551 | this_object->index = i; | ||
552 | this_object->kobj.parent = cache_kobject[cpu]; | ||
553 | kobject_set_name(&(this_object->kobj), "index%1lu", i); | ||
554 | this_object->kobj.ktype = &ktype_cache; | ||
555 | retval = kobject_register(&(this_object->kobj)); | ||
556 | if (unlikely(retval)) { | ||
557 | for (j = 0; j < i; j++) { | ||
558 | kobject_unregister( | ||
559 | &(INDEX_KOBJECT_PTR(cpu,j)->kobj)); | ||
560 | } | ||
561 | kobject_unregister(cache_kobject[cpu]); | ||
562 | cpuid4_cache_sysfs_exit(cpu); | ||
563 | break; | ||
564 | } | ||
565 | } | ||
566 | return retval; | ||
567 | } | ||
568 | |||
569 | static int __devexit cache_remove_dev(struct sys_device * sys_dev) | ||
570 | { | ||
571 | unsigned int cpu = sys_dev->id; | ||
572 | unsigned long i; | ||
573 | |||
574 | for (i = 0; i < num_cache_leaves; i++) | ||
575 | kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); | ||
576 | kobject_unregister(cache_kobject[cpu]); | ||
577 | cpuid4_cache_sysfs_exit(cpu); | ||
578 | return 0; | ||
579 | } | ||
580 | |||
581 | static struct sysdev_driver cache_sysdev_driver = { | ||
582 | .add = cache_add_dev, | ||
583 | .remove = __devexit_p(cache_remove_dev), | ||
584 | }; | ||
585 | |||
586 | /* Register/Unregister the cpu_cache driver */ | ||
587 | static int __devinit cache_register_driver(void) | ||
588 | { | ||
589 | if (num_cache_leaves == 0) | ||
590 | return 0; | ||
591 | |||
592 | return sysdev_driver_register(&cpu_sysdev_class,&cache_sysdev_driver); | ||
593 | } | ||
594 | |||
595 | device_initcall(cache_register_driver); | ||
596 | |||
597 | #endif | ||
598 | |||
diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile new file mode 100644 index 000000000000..30808f3d6715 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/Makefile | |||
@@ -0,0 +1,2 @@ | |||
1 | obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o | ||
2 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | ||
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c new file mode 100644 index 000000000000..8df52e86c4d2 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/k7.c | |||
@@ -0,0 +1,97 @@ | |||
1 | /* | ||
2 | * Athlon/Hammer specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/config.h> | ||
10 | #include <linux/irq.h> | ||
11 | #include <linux/interrupt.h> | ||
12 | #include <linux/smp.h> | ||
13 | |||
14 | #include <asm/processor.h> | ||
15 | #include <asm/system.h> | ||
16 | #include <asm/msr.h> | ||
17 | |||
18 | #include "mce.h" | ||
19 | |||
20 | /* Machine Check Handler For AMD Athlon/Duron */ | ||
21 | static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) | ||
22 | { | ||
23 | int recover=1; | ||
24 | u32 alow, ahigh, high, low; | ||
25 | u32 mcgstl, mcgsth; | ||
26 | int i; | ||
27 | |||
28 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
29 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
30 | recover=0; | ||
31 | |||
32 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
33 | smp_processor_id(), mcgsth, mcgstl); | ||
34 | |||
35 | for (i=1; i<nr_mce_banks; i++) { | ||
36 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); | ||
37 | if (high&(1<<31)) { | ||
38 | if (high & (1<<29)) | ||
39 | recover |= 1; | ||
40 | if (high & (1<<25)) | ||
41 | recover |= 2; | ||
42 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); | ||
43 | high &= ~(1<<31); | ||
44 | if (high & (1<<27)) { | ||
45 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
46 | printk ("[%08x%08x]", ahigh, alow); | ||
47 | } | ||
48 | if (high & (1<<26)) { | ||
49 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
50 | printk (" at %08x%08x", ahigh, alow); | ||
51 | } | ||
52 | printk ("\n"); | ||
53 | /* Clear it */ | ||
54 | wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | ||
55 | /* Serialize */ | ||
56 | wmb(); | ||
57 | add_taint(TAINT_MACHINE_CHECK); | ||
58 | } | ||
59 | } | ||
60 | |||
61 | if (recover&2) | ||
62 | panic ("CPU context corrupt"); | ||
63 | if (recover&1) | ||
64 | panic ("Unable to continue"); | ||
65 | printk (KERN_EMERG "Attempting to continue.\n"); | ||
66 | mcgstl &= ~(1<<2); | ||
67 | wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); | ||
68 | } | ||
69 | |||
70 | |||
71 | /* AMD K7 machine check is Intel like */ | ||
72 | void __init amd_mcheck_init(struct cpuinfo_x86 *c) | ||
73 | { | ||
74 | u32 l, h; | ||
75 | int i; | ||
76 | |||
77 | machine_check_vector = k7_machine_check; | ||
78 | wmb(); | ||
79 | |||
80 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | ||
81 | rdmsr (MSR_IA32_MCG_CAP, l, h); | ||
82 | if (l & (1<<8)) /* Control register present ? */ | ||
83 | wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
84 | nr_mce_banks = l & 0xff; | ||
85 | |||
86 | /* Clear status for MC index 0 separately, we don't touch CTL, | ||
87 | * as some Athlons cause spurious MCEs when its enabled. */ | ||
88 | wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); | ||
89 | for (i=1; i<nr_mce_banks; i++) { | ||
90 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
91 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
92 | } | ||
93 | |||
94 | set_in_cr4 (X86_CR4_MCE); | ||
95 | printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
96 | smp_processor_id()); | ||
97 | } | ||
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c new file mode 100644 index 000000000000..bf6d1aefafc0 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/mce.c | |||
@@ -0,0 +1,77 @@ | |||
1 | /* | ||
2 | * mce.c - x86 Machine Check Exception Reporting | ||
3 | * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/config.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/smp.h> | ||
12 | #include <linux/thread_info.h> | ||
13 | |||
14 | #include <asm/processor.h> | ||
15 | #include <asm/system.h> | ||
16 | |||
17 | #include "mce.h" | ||
18 | |||
19 | int mce_disabled __initdata = 0; | ||
20 | int nr_mce_banks; | ||
21 | |||
22 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | ||
23 | |||
24 | /* Handle unconfigured int18 (should never happen) */ | ||
25 | static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code) | ||
26 | { | ||
27 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); | ||
28 | } | ||
29 | |||
30 | /* Call the installed machine check handler for this CPU setup. */ | ||
31 | void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; | ||
32 | |||
33 | /* This has to be run for each processor */ | ||
34 | void __init mcheck_init(struct cpuinfo_x86 *c) | ||
35 | { | ||
36 | if (mce_disabled==1) | ||
37 | return; | ||
38 | |||
39 | switch (c->x86_vendor) { | ||
40 | case X86_VENDOR_AMD: | ||
41 | if (c->x86==6 || c->x86==15) | ||
42 | amd_mcheck_init(c); | ||
43 | break; | ||
44 | |||
45 | case X86_VENDOR_INTEL: | ||
46 | if (c->x86==5) | ||
47 | intel_p5_mcheck_init(c); | ||
48 | if (c->x86==6) | ||
49 | intel_p6_mcheck_init(c); | ||
50 | if (c->x86==15) | ||
51 | intel_p4_mcheck_init(c); | ||
52 | break; | ||
53 | |||
54 | case X86_VENDOR_CENTAUR: | ||
55 | if (c->x86==5) | ||
56 | winchip_mcheck_init(c); | ||
57 | break; | ||
58 | |||
59 | default: | ||
60 | break; | ||
61 | } | ||
62 | } | ||
63 | |||
64 | static int __init mcheck_disable(char *str) | ||
65 | { | ||
66 | mce_disabled = 1; | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | static int __init mcheck_enable(char *str) | ||
71 | { | ||
72 | mce_disabled = -1; | ||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | __setup("nomce", mcheck_disable); | ||
77 | __setup("mce", mcheck_enable); | ||
diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h new file mode 100644 index 000000000000..dc2416dfef15 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/mce.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #include <linux/init.h> | ||
2 | |||
3 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
4 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
5 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
6 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
7 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
8 | |||
9 | /* Call the installed machine check handler for this CPU setup. */ | ||
10 | extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
11 | |||
12 | extern int mce_disabled __initdata; | ||
13 | extern int nr_mce_banks; | ||
14 | |||
diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c new file mode 100644 index 000000000000..7864ddfccf07 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/non-fatal.c | |||
@@ -0,0 +1,93 @@ | |||
1 | /* | ||
2 | * Non Fatal Machine Check Exception Reporting | ||
3 | * | ||
4 | * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk> | ||
5 | * | ||
6 | * This file contains routines to check for non-fatal MCEs every 15s | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/types.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/jiffies.h> | ||
14 | #include <linux/config.h> | ||
15 | #include <linux/irq.h> | ||
16 | #include <linux/workqueue.h> | ||
17 | #include <linux/interrupt.h> | ||
18 | #include <linux/smp.h> | ||
19 | #include <linux/module.h> | ||
20 | |||
21 | #include <asm/processor.h> | ||
22 | #include <asm/system.h> | ||
23 | #include <asm/msr.h> | ||
24 | |||
25 | #include "mce.h" | ||
26 | |||
27 | static int firstbank; | ||
28 | |||
29 | #define MCE_RATE 15*HZ /* timer rate is 15s */ | ||
30 | |||
31 | static void mce_checkregs (void *info) | ||
32 | { | ||
33 | u32 low, high; | ||
34 | int i; | ||
35 | |||
36 | for (i=firstbank; i<nr_mce_banks; i++) { | ||
37 | rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high); | ||
38 | |||
39 | if (high & (1<<31)) { | ||
40 | printk(KERN_INFO "MCE: The hardware reports a non " | ||
41 | "fatal, correctable incident occurred on " | ||
42 | "CPU %d.\n", | ||
43 | smp_processor_id()); | ||
44 | printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | ||
45 | |||
46 | /* Scrub the error so we don't pick it up in MCE_RATE seconds time. */ | ||
47 | wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | ||
48 | |||
49 | /* Serialize */ | ||
50 | wmb(); | ||
51 | add_taint(TAINT_MACHINE_CHECK); | ||
52 | } | ||
53 | } | ||
54 | } | ||
55 | |||
56 | static void mce_work_fn(void *data); | ||
57 | static DECLARE_WORK(mce_work, mce_work_fn, NULL); | ||
58 | |||
59 | static void mce_work_fn(void *data) | ||
60 | { | ||
61 | on_each_cpu(mce_checkregs, NULL, 1, 1); | ||
62 | schedule_delayed_work(&mce_work, MCE_RATE); | ||
63 | } | ||
64 | |||
65 | static int __init init_nonfatal_mce_checker(void) | ||
66 | { | ||
67 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
68 | |||
69 | /* Check for MCE support */ | ||
70 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
71 | return -ENODEV; | ||
72 | |||
73 | /* Check for PPro style MCA */ | ||
74 | if (!cpu_has(c, X86_FEATURE_MCA)) | ||
75 | return -ENODEV; | ||
76 | |||
77 | /* Some Athlons misbehave when we frob bank 0 */ | ||
78 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
79 | boot_cpu_data.x86 == 6) | ||
80 | firstbank = 1; | ||
81 | else | ||
82 | firstbank = 0; | ||
83 | |||
84 | /* | ||
85 | * Check for non-fatal errors every MCE_RATE s | ||
86 | */ | ||
87 | schedule_delayed_work(&mce_work, MCE_RATE); | ||
88 | printk(KERN_INFO "Machine check exception polling timer started.\n"); | ||
89 | return 0; | ||
90 | } | ||
91 | module_init(init_nonfatal_mce_checker); | ||
92 | |||
93 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c new file mode 100644 index 000000000000..8b16ceb929b4 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/p4.c | |||
@@ -0,0 +1,271 @@ | |||
1 | /* | ||
2 | * P4 specific Machine Check Exception Reporting | ||
3 | */ | ||
4 | |||
5 | #include <linux/init.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/config.h> | ||
9 | #include <linux/irq.h> | ||
10 | #include <linux/interrupt.h> | ||
11 | #include <linux/smp.h> | ||
12 | |||
13 | #include <asm/processor.h> | ||
14 | #include <asm/system.h> | ||
15 | #include <asm/msr.h> | ||
16 | #include <asm/apic.h> | ||
17 | |||
18 | #include "mce.h" | ||
19 | |||
20 | /* as supported by the P4/Xeon family */ | ||
21 | struct intel_mce_extended_msrs { | ||
22 | u32 eax; | ||
23 | u32 ebx; | ||
24 | u32 ecx; | ||
25 | u32 edx; | ||
26 | u32 esi; | ||
27 | u32 edi; | ||
28 | u32 ebp; | ||
29 | u32 esp; | ||
30 | u32 eflags; | ||
31 | u32 eip; | ||
32 | /* u32 *reserved[]; */ | ||
33 | }; | ||
34 | |||
35 | static int mce_num_extended_msrs = 0; | ||
36 | |||
37 | |||
38 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) | ||
40 | { | ||
41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
42 | smp_processor_id()); | ||
43 | add_taint(TAINT_MACHINE_CHECK); | ||
44 | } | ||
45 | |||
46 | /* P4/Xeon Thermal transition interrupt handler */ | ||
47 | static void intel_thermal_interrupt(struct pt_regs *regs) | ||
48 | { | ||
49 | u32 l, h; | ||
50 | unsigned int cpu = smp_processor_id(); | ||
51 | static unsigned long next[NR_CPUS]; | ||
52 | |||
53 | ack_APIC_irq(); | ||
54 | |||
55 | if (time_after(next[cpu], jiffies)) | ||
56 | return; | ||
57 | |||
58 | next[cpu] = jiffies + HZ*5; | ||
59 | rdmsr(MSR_IA32_THERM_STATUS, l, h); | ||
60 | if (l & 0x1) { | ||
61 | printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); | ||
62 | printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", | ||
63 | cpu); | ||
64 | add_taint(TAINT_MACHINE_CHECK); | ||
65 | } else { | ||
66 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); | ||
67 | } | ||
68 | } | ||
69 | |||
70 | /* Thermal interrupt handler for this CPU setup */ | ||
71 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; | ||
72 | |||
73 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) | ||
74 | { | ||
75 | irq_enter(); | ||
76 | vendor_thermal_interrupt(regs); | ||
77 | irq_exit(); | ||
78 | } | ||
79 | |||
80 | /* P4/Xeon Thermal regulation detect and init */ | ||
81 | static void __init intel_init_thermal(struct cpuinfo_x86 *c) | ||
82 | { | ||
83 | u32 l, h; | ||
84 | unsigned int cpu = smp_processor_id(); | ||
85 | |||
86 | /* Thermal monitoring */ | ||
87 | if (!cpu_has(c, X86_FEATURE_ACPI)) | ||
88 | return; /* -ENODEV */ | ||
89 | |||
90 | /* Clock modulation */ | ||
91 | if (!cpu_has(c, X86_FEATURE_ACC)) | ||
92 | return; /* -ENODEV */ | ||
93 | |||
94 | /* first check if its enabled already, in which case there might | ||
95 | * be some SMM goo which handles it, so we can't even put a handler | ||
96 | * since it might be delivered via SMI already -zwanem. | ||
97 | */ | ||
98 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); | ||
99 | h = apic_read(APIC_LVTTHMR); | ||
100 | if ((l & (1<<3)) && (h & APIC_DM_SMI)) { | ||
101 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", | ||
102 | cpu); | ||
103 | return; /* -EBUSY */ | ||
104 | } | ||
105 | |||
106 | /* check whether a vector already exists, temporarily masked? */ | ||
107 | if (h & APIC_VECTOR_MASK) { | ||
108 | printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " | ||
109 | "installed\n", | ||
110 | cpu, (h & APIC_VECTOR_MASK)); | ||
111 | return; /* -EBUSY */ | ||
112 | } | ||
113 | |||
114 | /* The temperature transition interrupt handler setup */ | ||
115 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ | ||
116 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ | ||
117 | apic_write_around(APIC_LVTTHMR, h); | ||
118 | |||
119 | rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); | ||
120 | wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); | ||
121 | |||
122 | /* ok we're good to go... */ | ||
123 | vendor_thermal_interrupt = intel_thermal_interrupt; | ||
124 | |||
125 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); | ||
126 | wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); | ||
127 | |||
128 | l = apic_read (APIC_LVTTHMR); | ||
129 | apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
130 | printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | ||
131 | return; | ||
132 | } | ||
133 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | ||
134 | |||
135 | |||
136 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | ||
137 | static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | ||
138 | { | ||
139 | u32 h; | ||
140 | |||
141 | if (mce_num_extended_msrs == 0) | ||
142 | goto done; | ||
143 | |||
144 | rdmsr (MSR_IA32_MCG_EAX, r->eax, h); | ||
145 | rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); | ||
146 | rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); | ||
147 | rdmsr (MSR_IA32_MCG_EDX, r->edx, h); | ||
148 | rdmsr (MSR_IA32_MCG_ESI, r->esi, h); | ||
149 | rdmsr (MSR_IA32_MCG_EDI, r->edi, h); | ||
150 | rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); | ||
151 | rdmsr (MSR_IA32_MCG_ESP, r->esp, h); | ||
152 | rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); | ||
153 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); | ||
154 | |||
155 | /* can we rely on kmalloc to do a dynamic | ||
156 | * allocation for the reserved registers? | ||
157 | */ | ||
158 | done: | ||
159 | return mce_num_extended_msrs; | ||
160 | } | ||
161 | |||
162 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | ||
163 | { | ||
164 | int recover=1; | ||
165 | u32 alow, ahigh, high, low; | ||
166 | u32 mcgstl, mcgsth; | ||
167 | int i; | ||
168 | struct intel_mce_extended_msrs dbg; | ||
169 | |||
170 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
171 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
172 | recover=0; | ||
173 | |||
174 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
175 | smp_processor_id(), mcgsth, mcgstl); | ||
176 | |||
177 | if (intel_get_extended_msrs(&dbg)) { | ||
178 | printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", | ||
179 | smp_processor_id(), dbg.eip, dbg.eflags); | ||
180 | printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", | ||
181 | dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); | ||
182 | printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", | ||
183 | dbg.esi, dbg.edi, dbg.ebp, dbg.esp); | ||
184 | } | ||
185 | |||
186 | for (i=0; i<nr_mce_banks; i++) { | ||
187 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); | ||
188 | if (high & (1<<31)) { | ||
189 | if (high & (1<<29)) | ||
190 | recover |= 1; | ||
191 | if (high & (1<<25)) | ||
192 | recover |= 2; | ||
193 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); | ||
194 | high &= ~(1<<31); | ||
195 | if (high & (1<<27)) { | ||
196 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
197 | printk ("[%08x%08x]", ahigh, alow); | ||
198 | } | ||
199 | if (high & (1<<26)) { | ||
200 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
201 | printk (" at %08x%08x", ahigh, alow); | ||
202 | } | ||
203 | printk ("\n"); | ||
204 | } | ||
205 | } | ||
206 | |||
207 | if (recover & 2) | ||
208 | panic ("CPU context corrupt"); | ||
209 | if (recover & 1) | ||
210 | panic ("Unable to continue"); | ||
211 | |||
212 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
213 | /* | ||
214 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | ||
215 | * recoverable/continuable.This will allow BIOS to look at the MSRs | ||
216 | * for errors if the OS could not log the error. | ||
217 | */ | ||
218 | for (i=0; i<nr_mce_banks; i++) { | ||
219 | u32 msr; | ||
220 | msr = MSR_IA32_MC0_STATUS+i*4; | ||
221 | rdmsr (msr, low, high); | ||
222 | if (high&(1<<31)) { | ||
223 | /* Clear it */ | ||
224 | wrmsr(msr, 0UL, 0UL); | ||
225 | /* Serialize */ | ||
226 | wmb(); | ||
227 | add_taint(TAINT_MACHINE_CHECK); | ||
228 | } | ||
229 | } | ||
230 | mcgstl &= ~(1<<2); | ||
231 | wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); | ||
232 | } | ||
233 | |||
234 | |||
235 | void __init intel_p4_mcheck_init(struct cpuinfo_x86 *c) | ||
236 | { | ||
237 | u32 l, h; | ||
238 | int i; | ||
239 | |||
240 | machine_check_vector = intel_machine_check; | ||
241 | wmb(); | ||
242 | |||
243 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | ||
244 | rdmsr (MSR_IA32_MCG_CAP, l, h); | ||
245 | if (l & (1<<8)) /* Control register present ? */ | ||
246 | wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
247 | nr_mce_banks = l & 0xff; | ||
248 | |||
249 | for (i=0; i<nr_mce_banks; i++) { | ||
250 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
251 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
252 | } | ||
253 | |||
254 | set_in_cr4 (X86_CR4_MCE); | ||
255 | printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
256 | smp_processor_id()); | ||
257 | |||
258 | /* Check for P4/Xeon extended MCE MSRs */ | ||
259 | rdmsr (MSR_IA32_MCG_CAP, l, h); | ||
260 | if (l & (1<<9)) {/* MCG_EXT_P */ | ||
261 | mce_num_extended_msrs = (l >> 16) & 0xff; | ||
262 | printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" | ||
263 | " available\n", | ||
264 | smp_processor_id(), mce_num_extended_msrs); | ||
265 | |||
266 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
267 | /* Check for P4/Xeon Thermal monitor */ | ||
268 | intel_init_thermal(c); | ||
269 | #endif | ||
270 | } | ||
271 | } | ||
diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c new file mode 100644 index 000000000000..c45a1b485c80 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/p5.c | |||
@@ -0,0 +1,54 @@ | |||
1 | /* | ||
2 | * P5 specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Alan Cox <alan@redhat.com> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/irq.h> | ||
10 | #include <linux/interrupt.h> | ||
11 | #include <linux/smp.h> | ||
12 | |||
13 | #include <asm/processor.h> | ||
14 | #include <asm/system.h> | ||
15 | #include <asm/msr.h> | ||
16 | |||
17 | #include "mce.h" | ||
18 | |||
19 | /* Machine check handler for Pentium class Intel */ | ||
20 | static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code) | ||
21 | { | ||
22 | u32 loaddr, hi, lotype; | ||
23 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); | ||
24 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); | ||
25 | printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); | ||
26 | if(lotype&(1<<5)) | ||
27 | printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); | ||
28 | add_taint(TAINT_MACHINE_CHECK); | ||
29 | } | ||
30 | |||
31 | /* Set up machine check reporting for processors with Intel style MCE */ | ||
32 | void __init intel_p5_mcheck_init(struct cpuinfo_x86 *c) | ||
33 | { | ||
34 | u32 l, h; | ||
35 | |||
36 | /*Check for MCE support */ | ||
37 | if( !cpu_has(c, X86_FEATURE_MCE) ) | ||
38 | return; | ||
39 | |||
40 | /* Default P5 to off as its often misconnected */ | ||
41 | if(mce_disabled != -1) | ||
42 | return; | ||
43 | machine_check_vector = pentium_machine_check; | ||
44 | wmb(); | ||
45 | |||
46 | /* Read registers before enabling */ | ||
47 | rdmsr(MSR_IA32_P5_MC_ADDR, l, h); | ||
48 | rdmsr(MSR_IA32_P5_MC_TYPE, l, h); | ||
49 | printk(KERN_INFO "Intel old style machine check architecture supported.\n"); | ||
50 | |||
51 | /* Enable MCE */ | ||
52 | set_in_cr4(X86_CR4_MCE); | ||
53 | printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); | ||
54 | } | ||
diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c new file mode 100644 index 000000000000..46640f8c2494 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/p6.c | |||
@@ -0,0 +1,115 @@ | |||
1 | /* | ||
2 | * P6 specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Alan Cox <alan@redhat.com> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/irq.h> | ||
10 | #include <linux/interrupt.h> | ||
11 | #include <linux/smp.h> | ||
12 | |||
13 | #include <asm/processor.h> | ||
14 | #include <asm/system.h> | ||
15 | #include <asm/msr.h> | ||
16 | |||
17 | #include "mce.h" | ||
18 | |||
19 | /* Machine Check Handler For PII/PIII */ | ||
20 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | ||
21 | { | ||
22 | int recover=1; | ||
23 | u32 alow, ahigh, high, low; | ||
24 | u32 mcgstl, mcgsth; | ||
25 | int i; | ||
26 | |||
27 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
28 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
29 | recover=0; | ||
30 | |||
31 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
32 | smp_processor_id(), mcgsth, mcgstl); | ||
33 | |||
34 | for (i=0; i<nr_mce_banks; i++) { | ||
35 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); | ||
36 | if (high & (1<<31)) { | ||
37 | if (high & (1<<29)) | ||
38 | recover |= 1; | ||
39 | if (high & (1<<25)) | ||
40 | recover |= 2; | ||
41 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); | ||
42 | high &= ~(1<<31); | ||
43 | if (high & (1<<27)) { | ||
44 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
45 | printk ("[%08x%08x]", ahigh, alow); | ||
46 | } | ||
47 | if (high & (1<<26)) { | ||
48 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
49 | printk (" at %08x%08x", ahigh, alow); | ||
50 | } | ||
51 | printk ("\n"); | ||
52 | } | ||
53 | } | ||
54 | |||
55 | if (recover & 2) | ||
56 | panic ("CPU context corrupt"); | ||
57 | if (recover & 1) | ||
58 | panic ("Unable to continue"); | ||
59 | |||
60 | printk (KERN_EMERG "Attempting to continue.\n"); | ||
61 | /* | ||
62 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | ||
63 | * recoverable/continuable.This will allow BIOS to look at the MSRs | ||
64 | * for errors if the OS could not log the error. | ||
65 | */ | ||
66 | for (i=0; i<nr_mce_banks; i++) { | ||
67 | unsigned int msr; | ||
68 | msr = MSR_IA32_MC0_STATUS+i*4; | ||
69 | rdmsr (msr,low, high); | ||
70 | if (high & (1<<31)) { | ||
71 | /* Clear it */ | ||
72 | wrmsr (msr, 0UL, 0UL); | ||
73 | /* Serialize */ | ||
74 | wmb(); | ||
75 | add_taint(TAINT_MACHINE_CHECK); | ||
76 | } | ||
77 | } | ||
78 | mcgstl &= ~(1<<2); | ||
79 | wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); | ||
80 | } | ||
81 | |||
82 | /* Set up machine check reporting for processors with Intel style MCE */ | ||
83 | void __init intel_p6_mcheck_init(struct cpuinfo_x86 *c) | ||
84 | { | ||
85 | u32 l, h; | ||
86 | int i; | ||
87 | |||
88 | /* Check for MCE support */ | ||
89 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
90 | return; | ||
91 | |||
92 | /* Check for PPro style MCA */ | ||
93 | if (!cpu_has(c, X86_FEATURE_MCA)) | ||
94 | return; | ||
95 | |||
96 | /* Ok machine check is available */ | ||
97 | machine_check_vector = intel_machine_check; | ||
98 | wmb(); | ||
99 | |||
100 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | ||
101 | rdmsr (MSR_IA32_MCG_CAP, l, h); | ||
102 | if (l & (1<<8)) /* Control register present ? */ | ||
103 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
104 | nr_mce_banks = l & 0xff; | ||
105 | |||
106 | /* Don't enable bank 0 on intel P6 cores, it goes bang quickly. */ | ||
107 | for (i=1; i<nr_mce_banks; i++) { | ||
108 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
109 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
110 | } | ||
111 | |||
112 | set_in_cr4 (X86_CR4_MCE); | ||
113 | printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
114 | smp_processor_id()); | ||
115 | } | ||
diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c new file mode 100644 index 000000000000..753fa7acb984 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/winchip.c | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * IDT Winchip specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Alan Cox <alan@redhat.com> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/irq.h> | ||
10 | #include <linux/interrupt.h> | ||
11 | |||
12 | #include <asm/processor.h> | ||
13 | #include <asm/system.h> | ||
14 | #include <asm/msr.h> | ||
15 | |||
16 | #include "mce.h" | ||
17 | |||
18 | /* Machine check handler for WinChip C6 */ | ||
19 | static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code) | ||
20 | { | ||
21 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); | ||
22 | add_taint(TAINT_MACHINE_CHECK); | ||
23 | } | ||
24 | |||
25 | /* Set up machine check reporting on the Winchip C6 series */ | ||
26 | void __init winchip_mcheck_init(struct cpuinfo_x86 *c) | ||
27 | { | ||
28 | u32 lo, hi; | ||
29 | machine_check_vector = winchip_machine_check; | ||
30 | wmb(); | ||
31 | rdmsr(MSR_IDT_FCR1, lo, hi); | ||
32 | lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */ | ||
33 | lo&= ~(1<<4); /* Enable MCE */ | ||
34 | wrmsr(MSR_IDT_FCR1, lo, hi); | ||
35 | set_in_cr4(X86_CR4_MCE); | ||
36 | printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); | ||
37 | } | ||
diff --git a/arch/i386/kernel/cpu/mtrr/Makefile b/arch/i386/kernel/cpu/mtrr/Makefile new file mode 100644 index 000000000000..a25b701ab84e --- /dev/null +++ b/arch/i386/kernel/cpu/mtrr/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | obj-y := main.o if.o generic.o state.o | ||
2 | obj-y += amd.o | ||
3 | obj-y += cyrix.o | ||
4 | obj-y += centaur.o | ||
5 | |||
diff --git a/arch/i386/kernel/cpu/mtrr/amd.c b/arch/i386/kernel/cpu/mtrr/amd.c new file mode 100644 index 000000000000..1a1e04b6fd00 --- /dev/null +++ b/arch/i386/kernel/cpu/mtrr/amd.c | |||
@@ -0,0 +1,121 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/mm.h> | ||
3 | #include <asm/mtrr.h> | ||
4 | #include <asm/msr.h> | ||
5 | |||
6 | #include "mtrr.h" | ||
7 | |||
8 | static void | ||
9 | amd_get_mtrr(unsigned int reg, unsigned long *base, | ||
10 | unsigned int *size, mtrr_type * type) | ||
11 | { | ||
12 | unsigned long low, high; | ||
13 | |||
14 | rdmsr(MSR_K6_UWCCR, low, high); | ||
15 | /* Upper dword is region 1, lower is region 0 */ | ||
16 | if (reg == 1) | ||
17 | low = high; | ||
18 | /* The base masks off on the right alignment */ | ||
19 | *base = (low & 0xFFFE0000) >> PAGE_SHIFT; | ||
20 | *type = 0; | ||
21 | if (low & 1) | ||
22 | *type = MTRR_TYPE_UNCACHABLE; | ||
23 | if (low & 2) | ||
24 | *type = MTRR_TYPE_WRCOMB; | ||
25 | if (!(low & 3)) { | ||
26 | *size = 0; | ||
27 | return; | ||
28 | } | ||
29 | /* | ||
30 | * This needs a little explaining. The size is stored as an | ||
31 | * inverted mask of bits of 128K granularity 15 bits long offset | ||
32 | * 2 bits | ||
33 | * | ||
34 | * So to get a size we do invert the mask and add 1 to the lowest | ||
35 | * mask bit (4 as its 2 bits in). This gives us a size we then shift | ||
36 | * to turn into 128K blocks | ||
37 | * | ||
38 | * eg 111 1111 1111 1100 is 512K | ||
39 | * | ||
40 | * invert 000 0000 0000 0011 | ||
41 | * +1 000 0000 0000 0100 | ||
42 | * *128K ... | ||
43 | */ | ||
44 | low = (~low) & 0x1FFFC; | ||
45 | *size = (low + 4) << (15 - PAGE_SHIFT); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | static void amd_set_mtrr(unsigned int reg, unsigned long base, | ||
50 | unsigned long size, mtrr_type type) | ||
51 | /* [SUMMARY] Set variable MTRR register on the local CPU. | ||
52 | <reg> The register to set. | ||
53 | <base> The base address of the region. | ||
54 | <size> The size of the region. If this is 0 the region is disabled. | ||
55 | <type> The type of the region. | ||
56 | <do_safe> If TRUE, do the change safely. If FALSE, safety measures should | ||
57 | be done externally. | ||
58 | [RETURNS] Nothing. | ||
59 | */ | ||
60 | { | ||
61 | u32 regs[2]; | ||
62 | |||
63 | /* | ||
64 | * Low is MTRR0 , High MTRR 1 | ||
65 | */ | ||
66 | rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); | ||
67 | /* | ||
68 | * Blank to disable | ||
69 | */ | ||
70 | if (size == 0) | ||
71 | regs[reg] = 0; | ||
72 | else | ||
73 | /* Set the register to the base, the type (off by one) and an | ||
74 | inverted bitmask of the size The size is the only odd | ||
75 | bit. We are fed say 512K We invert this and we get 111 1111 | ||
76 | 1111 1011 but if you subtract one and invert you get the | ||
77 | desired 111 1111 1111 1100 mask | ||
78 | |||
79 | But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ | ||
80 | regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) | ||
81 | | (base << PAGE_SHIFT) | (type + 1); | ||
82 | |||
83 | /* | ||
84 | * The writeback rule is quite specific. See the manual. Its | ||
85 | * disable local interrupts, write back the cache, set the mtrr | ||
86 | */ | ||
87 | wbinvd(); | ||
88 | wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); | ||
89 | } | ||
90 | |||
91 | static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | ||
92 | { | ||
93 | /* Apply the K6 block alignment and size rules | ||
94 | In order | ||
95 | o Uncached or gathering only | ||
96 | o 128K or bigger block | ||
97 | o Power of 2 block | ||
98 | o base suitably aligned to the power | ||
99 | */ | ||
100 | if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) | ||
101 | || (size & ~(size - 1)) - size || (base & (size - 1))) | ||
102 | return -EINVAL; | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | static struct mtrr_ops amd_mtrr_ops = { | ||
107 | .vendor = X86_VENDOR_AMD, | ||
108 | .set = amd_set_mtrr, | ||
109 | .get = amd_get_mtrr, | ||
110 | .get_free_region = generic_get_free_region, | ||
111 | .validate_add_page = amd_validate_add_page, | ||
112 | .have_wrcomb = positive_have_wrcomb, | ||
113 | }; | ||
114 | |||
115 | int __init amd_init_mtrr(void) | ||
116 | { | ||
117 | set_mtrr_ops(&amd_mtrr_ops); | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | //arch_initcall(amd_mtrr_init); | ||
diff --git a/arch/i386/kernel/cpu/mtrr/centaur.c b/arch/i386/kernel/cpu/mtrr/centaur.c new file mode 100644 index 000000000000..33f00ac314ef --- /dev/null +++ b/arch/i386/kernel/cpu/mtrr/centaur.c | |||
@@ -0,0 +1,223 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/mm.h> | ||
3 | #include <asm/mtrr.h> | ||
4 | #include <asm/msr.h> | ||
5 | #include "mtrr.h" | ||
6 | |||
7 | static struct { | ||
8 | unsigned long high; | ||
9 | unsigned long low; | ||
10 | } centaur_mcr[8]; | ||
11 | |||
12 | static u8 centaur_mcr_reserved; | ||
13 | static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ | ||
14 | |||
15 | /* | ||
16 | * Report boot time MCR setups | ||
17 | */ | ||
18 | |||
19 | static int | ||
20 | centaur_get_free_region(unsigned long base, unsigned long size) | ||
21 | /* [SUMMARY] Get a free MTRR. | ||
22 | <base> The starting (base) address of the region. | ||
23 | <size> The size (in bytes) of the region. | ||
24 | [RETURNS] The index of the region on success, else -1 on error. | ||
25 | */ | ||
26 | { | ||
27 | int i, max; | ||
28 | mtrr_type ltype; | ||
29 | unsigned long lbase; | ||
30 | unsigned int lsize; | ||
31 | |||
32 | max = num_var_ranges; | ||
33 | for (i = 0; i < max; ++i) { | ||
34 | if (centaur_mcr_reserved & (1 << i)) | ||
35 | continue; | ||
36 | mtrr_if->get(i, &lbase, &lsize, <ype); | ||
37 | if (lsize == 0) | ||
38 | return i; | ||
39 | } | ||
40 | return -ENOSPC; | ||
41 | } | ||
42 | |||
43 | void | ||
44 | mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) | ||
45 | { | ||
46 | centaur_mcr[mcr].low = lo; | ||
47 | centaur_mcr[mcr].high = hi; | ||
48 | } | ||
49 | |||
50 | static void | ||
51 | centaur_get_mcr(unsigned int reg, unsigned long *base, | ||
52 | unsigned int *size, mtrr_type * type) | ||
53 | { | ||
54 | *base = centaur_mcr[reg].high >> PAGE_SHIFT; | ||
55 | *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; | ||
56 | *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ | ||
57 | if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) | ||
58 | *type = MTRR_TYPE_UNCACHABLE; | ||
59 | if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) | ||
60 | *type = MTRR_TYPE_WRBACK; | ||
61 | if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) | ||
62 | *type = MTRR_TYPE_WRBACK; | ||
63 | |||
64 | } | ||
65 | |||
66 | static void centaur_set_mcr(unsigned int reg, unsigned long base, | ||
67 | unsigned long size, mtrr_type type) | ||
68 | { | ||
69 | unsigned long low, high; | ||
70 | |||
71 | if (size == 0) { | ||
72 | /* Disable */ | ||
73 | high = low = 0; | ||
74 | } else { | ||
75 | high = base << PAGE_SHIFT; | ||
76 | if (centaur_mcr_type == 0) | ||
77 | low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ | ||
78 | else { | ||
79 | if (type == MTRR_TYPE_UNCACHABLE) | ||
80 | low = -size << PAGE_SHIFT | 0x02; /* NC */ | ||
81 | else | ||
82 | low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ | ||
83 | } | ||
84 | } | ||
85 | centaur_mcr[reg].high = high; | ||
86 | centaur_mcr[reg].low = low; | ||
87 | wrmsr(MSR_IDT_MCR0 + reg, low, high); | ||
88 | } | ||
89 | |||
90 | #if 0 | ||
91 | /* | ||
92 | * Initialise the later (saner) Winchip MCR variant. In this version | ||
93 | * the BIOS can pass us the registers it has used (but not their values) | ||
94 | * and the control register is read/write | ||
95 | */ | ||
96 | |||
97 | static void __init | ||
98 | centaur_mcr1_init(void) | ||
99 | { | ||
100 | unsigned i; | ||
101 | u32 lo, hi; | ||
102 | |||
103 | /* Unfortunately, MCR's are read-only, so there is no way to | ||
104 | * find out what the bios might have done. | ||
105 | */ | ||
106 | |||
107 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
108 | if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */ | ||
109 | lo &= ~0x1C0; /* clear key */ | ||
110 | lo |= 0x040; /* set key to 1 */ | ||
111 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */ | ||
112 | } | ||
113 | |||
114 | centaur_mcr_type = 1; | ||
115 | |||
116 | /* | ||
117 | * Clear any unconfigured MCR's. | ||
118 | */ | ||
119 | |||
120 | for (i = 0; i < 8; ++i) { | ||
121 | if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) { | ||
122 | if (!(lo & (1 << (9 + i)))) | ||
123 | wrmsr(MSR_IDT_MCR0 + i, 0, 0); | ||
124 | else | ||
125 | /* | ||
126 | * If the BIOS set up an MCR we cannot see it | ||
127 | * but we don't wish to obliterate it | ||
128 | */ | ||
129 | centaur_mcr_reserved |= (1 << i); | ||
130 | } | ||
131 | } | ||
132 | /* | ||
133 | * Throw the main write-combining switch... | ||
134 | * However if OOSTORE is enabled then people have already done far | ||
135 | * cleverer things and we should behave. | ||
136 | */ | ||
137 | |||
138 | lo |= 15; /* Write combine enables */ | ||
139 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Initialise the original winchip with read only MCR registers | ||
144 | * no used bitmask for the BIOS to pass on and write only control | ||
145 | */ | ||
146 | |||
147 | static void __init | ||
148 | centaur_mcr0_init(void) | ||
149 | { | ||
150 | unsigned i; | ||
151 | |||
152 | /* Unfortunately, MCR's are read-only, so there is no way to | ||
153 | * find out what the bios might have done. | ||
154 | */ | ||
155 | |||
156 | /* Clear any unconfigured MCR's. | ||
157 | * This way we are sure that the centaur_mcr array contains the actual | ||
158 | * values. The disadvantage is that any BIOS tweaks are thus undone. | ||
159 | * | ||
160 | */ | ||
161 | for (i = 0; i < 8; ++i) { | ||
162 | if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) | ||
163 | wrmsr(MSR_IDT_MCR0 + i, 0, 0); | ||
164 | } | ||
165 | |||
166 | wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */ | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Initialise Winchip series MCR registers | ||
171 | */ | ||
172 | |||
173 | static void __init | ||
174 | centaur_mcr_init(void) | ||
175 | { | ||
176 | struct set_mtrr_context ctxt; | ||
177 | |||
178 | set_mtrr_prepare_save(&ctxt); | ||
179 | set_mtrr_cache_disable(&ctxt); | ||
180 | |||
181 | if (boot_cpu_data.x86_model == 4) | ||
182 | centaur_mcr0_init(); | ||
183 | else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9) | ||
184 | centaur_mcr1_init(); | ||
185 | |||
186 | set_mtrr_done(&ctxt); | ||
187 | } | ||
188 | #endif | ||
189 | |||
190 | static int centaur_validate_add_page(unsigned long base, | ||
191 | unsigned long size, unsigned int type) | ||
192 | { | ||
193 | /* | ||
194 | * FIXME: Winchip2 supports uncached | ||
195 | */ | ||
196 | if (type != MTRR_TYPE_WRCOMB && | ||
197 | (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { | ||
198 | printk(KERN_WARNING | ||
199 | "mtrr: only write-combining%s supported\n", | ||
200 | centaur_mcr_type ? " and uncacheable are" | ||
201 | : " is"); | ||
202 | return -EINVAL; | ||
203 | } | ||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | static struct mtrr_ops centaur_mtrr_ops = { | ||
208 | .vendor = X86_VENDOR_CENTAUR, | ||
209 | // .init = centaur_mcr_init, | ||
210 | .set = centaur_set_mcr, | ||
211 | .get = centaur_get_mcr, | ||
212 | .get_free_region = centaur_get_free_region, | ||
213 | .validate_add_page = centaur_validate_add_page, | ||
214 | .have_wrcomb = positive_have_wrcomb, | ||
215 | }; | ||
216 | |||
217 | int __init centaur_init_mtrr(void) | ||
218 | { | ||
219 | set_mtrr_ops(¢aur_mtrr_ops); | ||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | //arch_initcall(centaur_init_mtrr); | ||
diff --git a/arch/i386/kernel/cpu/mtrr/changelog b/arch/i386/kernel/cpu/mtrr/changelog new file mode 100644 index 000000000000..af1368535955 --- /dev/null +++ b/arch/i386/kernel/cpu/mtrr/changelog | |||
@@ -0,0 +1,229 @@ | |||
1 | ChangeLog | ||
2 | |||
3 | Prehistory Martin Tischhäuser <martin@ikcbarka.fzk.de> | ||
4 | Initial register-setting code (from proform-1.0). | ||
5 | 19971216 Richard Gooch <rgooch@atnf.csiro.au> | ||
6 | Original version for /proc/mtrr interface, SMP-safe. | ||
7 | v1.0 | ||
8 | 19971217 Richard Gooch <rgooch@atnf.csiro.au> | ||
9 | Bug fix for ioctls()'s. | ||
10 | Added sample code in Documentation/mtrr.txt | ||
11 | v1.1 | ||
12 | 19971218 Richard Gooch <rgooch@atnf.csiro.au> | ||
13 | Disallow overlapping regions. | ||
14 | 19971219 Jens Maurer <jmaurer@menuett.rhein-main.de> | ||
15 | Register-setting fixups. | ||
16 | v1.2 | ||
17 | 19971222 Richard Gooch <rgooch@atnf.csiro.au> | ||
18 | Fixups for kernel 2.1.75. | ||
19 | v1.3 | ||
20 | 19971229 David Wragg <dpw@doc.ic.ac.uk> | ||
21 | Register-setting fixups and conformity with Intel conventions. | ||
22 | 19971229 Richard Gooch <rgooch@atnf.csiro.au> | ||
23 | Cosmetic changes and wrote this ChangeLog ;-) | ||
24 | 19980106 Richard Gooch <rgooch@atnf.csiro.au> | ||
25 | Fixups for kernel 2.1.78. | ||
26 | v1.4 | ||
27 | 19980119 David Wragg <dpw@doc.ic.ac.uk> | ||
28 | Included passive-release enable code (elsewhere in PCI setup). | ||
29 | v1.5 | ||
30 | 19980131 Richard Gooch <rgooch@atnf.csiro.au> | ||
31 | Replaced global kernel lock with private spinlock. | ||
32 | v1.6 | ||
33 | 19980201 Richard Gooch <rgooch@atnf.csiro.au> | ||
34 | Added wait for other CPUs to complete changes. | ||
35 | v1.7 | ||
36 | 19980202 Richard Gooch <rgooch@atnf.csiro.au> | ||
37 | Bug fix in definition of <set_mtrr> for UP. | ||
38 | v1.8 | ||
39 | 19980319 Richard Gooch <rgooch@atnf.csiro.au> | ||
40 | Fixups for kernel 2.1.90. | ||
41 | 19980323 Richard Gooch <rgooch@atnf.csiro.au> | ||
42 | Move SMP BIOS fixup before secondary CPUs call <calibrate_delay> | ||
43 | v1.9 | ||
44 | 19980325 Richard Gooch <rgooch@atnf.csiro.au> | ||
45 | Fixed test for overlapping regions: confused by adjacent regions | ||
46 | 19980326 Richard Gooch <rgooch@atnf.csiro.au> | ||
47 | Added wbinvd in <set_mtrr_prepare>. | ||
48 | 19980401 Richard Gooch <rgooch@atnf.csiro.au> | ||
49 | Bug fix for non-SMP compilation. | ||
50 | 19980418 David Wragg <dpw@doc.ic.ac.uk> | ||
51 | Fixed-MTRR synchronisation for SMP and use atomic operations | ||
52 | instead of spinlocks. | ||
53 | 19980418 Richard Gooch <rgooch@atnf.csiro.au> | ||
54 | Differentiate different MTRR register classes for BIOS fixup. | ||
55 | v1.10 | ||
56 | 19980419 David Wragg <dpw@doc.ic.ac.uk> | ||
57 | Bug fix in variable MTRR synchronisation. | ||
58 | v1.11 | ||
59 | 19980419 Richard Gooch <rgooch@atnf.csiro.au> | ||
60 | Fixups for kernel 2.1.97. | ||
61 | v1.12 | ||
62 | 19980421 Richard Gooch <rgooch@atnf.csiro.au> | ||
63 | Safer synchronisation across CPUs when changing MTRRs. | ||
64 | v1.13 | ||
65 | 19980423 Richard Gooch <rgooch@atnf.csiro.au> | ||
66 | Bugfix for SMP systems without MTRR support. | ||
67 | v1.14 | ||
68 | 19980427 Richard Gooch <rgooch@atnf.csiro.au> | ||
69 | Trap calls to <mtrr_add> and <mtrr_del> on non-MTRR machines. | ||
70 | v1.15 | ||
71 | 19980427 Richard Gooch <rgooch@atnf.csiro.au> | ||
72 | Use atomic bitops for setting SMP change mask. | ||
73 | v1.16 | ||
74 | 19980428 Richard Gooch <rgooch@atnf.csiro.au> | ||
75 | Removed spurious diagnostic message. | ||
76 | v1.17 | ||
77 | 19980429 Richard Gooch <rgooch@atnf.csiro.au> | ||
78 | Moved register-setting macros into this file. | ||
79 | Moved setup code from init/main.c to i386-specific areas. | ||
80 | v1.18 | ||
81 | 19980502 Richard Gooch <rgooch@atnf.csiro.au> | ||
82 | Moved MTRR detection outside conditionals in <mtrr_init>. | ||
83 | v1.19 | ||
84 | 19980502 Richard Gooch <rgooch@atnf.csiro.au> | ||
85 | Documentation improvement: mention Pentium II and AGP. | ||
86 | v1.20 | ||
87 | 19980521 Richard Gooch <rgooch@atnf.csiro.au> | ||
88 | Only manipulate interrupt enable flag on local CPU. | ||
89 | Allow enclosed uncachable regions. | ||
90 | v1.21 | ||
91 | 19980611 Richard Gooch <rgooch@atnf.csiro.au> | ||
92 | Always define <main_lock>. | ||
93 | v1.22 | ||
94 | 19980901 Richard Gooch <rgooch@atnf.csiro.au> | ||
95 | Removed module support in order to tidy up code. | ||
96 | Added sanity check for <mtrr_add>/<mtrr_del> before <mtrr_init>. | ||
97 | Created addition queue for prior to SMP commence. | ||
98 | v1.23 | ||
99 | 19980902 Richard Gooch <rgooch@atnf.csiro.au> | ||
100 | Ported patch to kernel 2.1.120-pre3. | ||
101 | v1.24 | ||
102 | 19980910 Richard Gooch <rgooch@atnf.csiro.au> | ||
103 | Removed sanity checks and addition queue: Linus prefers an OOPS. | ||
104 | v1.25 | ||
105 | 19981001 Richard Gooch <rgooch@atnf.csiro.au> | ||
106 | Fixed harmless compiler warning in include/asm-i386/mtrr.h | ||
107 | Fixed version numbering and history for v1.23 -> v1.24. | ||
108 | v1.26 | ||
109 | 19990118 Richard Gooch <rgooch@atnf.csiro.au> | ||
110 | Added devfs support. | ||
111 | v1.27 | ||
112 | 19990123 Richard Gooch <rgooch@atnf.csiro.au> | ||
113 | Changed locking to spin with reschedule. | ||
114 | Made use of new <smp_call_function>. | ||
115 | v1.28 | ||
116 | 19990201 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
117 | Extended the driver to be able to use Cyrix style ARRs. | ||
118 | 19990204 Richard Gooch <rgooch@atnf.csiro.au> | ||
119 | Restructured Cyrix support. | ||
120 | v1.29 | ||
121 | 19990204 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
122 | Refined ARR support: enable MAPEN in set_mtrr_prepare() | ||
123 | and disable MAPEN in set_mtrr_done(). | ||
124 | 19990205 Richard Gooch <rgooch@atnf.csiro.au> | ||
125 | Minor cleanups. | ||
126 | v1.30 | ||
127 | 19990208 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
128 | Protect plain 6x86s (and other processors without the | ||
129 | Page Global Enable feature) against accessing CR4 in | ||
130 | set_mtrr_prepare() and set_mtrr_done(). | ||
131 | 19990210 Richard Gooch <rgooch@atnf.csiro.au> | ||
132 | Turned <set_mtrr_up> and <get_mtrr> into function pointers. | ||
133 | v1.31 | ||
134 | 19990212 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
135 | Major rewrite of cyrix_arr_init(): do not touch ARRs, | ||
136 | leave them as the BIOS have set them up. | ||
137 | Enable usage of all 8 ARRs. | ||
138 | Avoid multiplications by 3 everywhere and other | ||
139 | code clean ups/speed ups. | ||
140 | 19990213 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
141 | Set up other Cyrix processors identical to the boot cpu. | ||
142 | Since Cyrix don't support Intel APIC, this is l'art pour l'art. | ||
143 | Weigh ARRs by size: | ||
144 | If size <= 32M is given, set up ARR# we were given. | ||
145 | If size > 32M is given, set up ARR7 only if it is free, | ||
146 | fail otherwise. | ||
147 | 19990214 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
148 | Also check for size >= 256K if we are to set up ARR7, | ||
149 | mtrr_add() returns the value it gets from set_mtrr() | ||
150 | 19990218 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
151 | Remove Cyrix "coma bug" workaround from here. | ||
152 | Moved to linux/arch/i386/kernel/setup.c and | ||
153 | linux/include/asm-i386/bugs.h | ||
154 | 19990228 Richard Gooch <rgooch@atnf.csiro.au> | ||
155 | Added MTRRIOC_KILL_ENTRY ioctl(2) | ||
156 | Trap for counter underflow in <mtrr_file_del>. | ||
157 | Trap for 4 MiB aligned regions for PPro, stepping <= 7. | ||
158 | 19990301 Richard Gooch <rgooch@atnf.csiro.au> | ||
159 | Created <get_free_region> hook. | ||
160 | 19990305 Richard Gooch <rgooch@atnf.csiro.au> | ||
161 | Temporarily disable AMD support now MTRR capability flag is set. | ||
162 | v1.32 | ||
163 | 19990308 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
164 | Adjust my changes (19990212-19990218) to Richard Gooch's | ||
165 | latest changes. (19990228-19990305) | ||
166 | v1.33 | ||
167 | 19990309 Richard Gooch <rgooch@atnf.csiro.au> | ||
168 | Fixed typo in <printk> message. | ||
169 | 19990310 Richard Gooch <rgooch@atnf.csiro.au> | ||
170 | Support K6-II/III based on Alan Cox's <alan@redhat.com> patches. | ||
171 | v1.34 | ||
172 | 19990511 Bart Hartgers <bart@etpmod.phys.tue.nl> | ||
173 | Support Centaur C6 MCR's. | ||
174 | 19990512 Richard Gooch <rgooch@atnf.csiro.au> | ||
175 | Minor cleanups. | ||
176 | v1.35 | ||
177 | 19990707 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
178 | Check whether ARR3 is protected in cyrix_get_free_region() | ||
179 | and mtrr_del(). The code won't attempt to delete or change it | ||
180 | from now on if the BIOS protected ARR3. It silently skips ARR3 | ||
181 | in cyrix_get_free_region() or returns with an error code from | ||
182 | mtrr_del(). | ||
183 | 19990711 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
184 | Reset some bits in the CCRs in cyrix_arr_init() to disable SMM | ||
185 | if ARR3 isn't protected. This is needed because if SMM is active | ||
186 | and ARR3 isn't protected then deleting and setting ARR3 again | ||
187 | may lock up the processor. With SMM entirely disabled, it does | ||
188 | not happen. | ||
189 | 19990812 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
190 | Rearrange switch() statements so the driver accomodates to | ||
191 | the fact that the AMD Athlon handles its MTRRs the same way | ||
192 | as Intel does. | ||
193 | 19990814 Zoltán Böszörményi <zboszor@mail.externet.hu> | ||
194 | Double check for Intel in mtrr_add()'s big switch() because | ||
195 | that revision check is only valid for Intel CPUs. | ||
196 | 19990819 Alan Cox <alan@redhat.com> | ||
197 | Tested Zoltan's changes on a pre production Athlon - 100% | ||
198 | success. | ||
199 | 19991008 Manfred Spraul <manfreds@colorfullife.com> | ||
200 | replaced spin_lock_reschedule() with a normal semaphore. | ||
201 | v1.36 | ||
202 | 20000221 Richard Gooch <rgooch@atnf.csiro.au> | ||
203 | Compile fix if procfs and devfs not enabled. | ||
204 | Formatting changes. | ||
205 | v1.37 | ||
206 | 20001109 H. Peter Anvin <hpa@zytor.com> | ||
207 | Use the new centralized CPU feature detects. | ||
208 | |||
209 | v1.38 | ||
210 | 20010309 Dave Jones <davej@suse.de> | ||
211 | Add support for Cyrix III. | ||
212 | |||
213 | v1.39 | ||
214 | 20010312 Dave Jones <davej@suse.de> | ||
215 | Ugh, I broke AMD support. | ||
216 | Reworked fix by Troels Walsted Hansen <troels@thule.no> | ||
217 | |||
218 | v1.40 | ||
219 | 20010327 Dave Jones <davej@suse.de> | ||
220 | Adapted Cyrix III support to include VIA C3. | ||
221 | |||
222 | v2.0 | ||
223 | 20020306 Patrick Mochel <mochel@osdl.org> | ||
224 | Split mtrr.c -> mtrr/*.c | ||
225 | Converted to Linux Kernel Coding Style | ||
226 | Fixed several minor nits in form | ||
227 | Moved some SMP-only functions out, so they can be used | ||
228 | for power management in the future. | ||
229 | TODO: Fix user interface cruft. | ||
diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c new file mode 100644 index 000000000000..933b0dd62f48 --- /dev/null +++ b/arch/i386/kernel/cpu/mtrr/cyrix.c | |||
@@ -0,0 +1,364 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/mm.h> | ||
3 | #include <asm/mtrr.h> | ||
4 | #include <asm/msr.h> | ||
5 | #include <asm/io.h> | ||
6 | #include "mtrr.h" | ||
7 | |||
8 | int arr3_protected; | ||
9 | |||
10 | static void | ||
11 | cyrix_get_arr(unsigned int reg, unsigned long *base, | ||
12 | unsigned int *size, mtrr_type * type) | ||
13 | { | ||
14 | unsigned long flags; | ||
15 | unsigned char arr, ccr3, rcr, shift; | ||
16 | |||
17 | arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ | ||
18 | |||
19 | /* Save flags and disable interrupts */ | ||
20 | local_irq_save(flags); | ||
21 | |||
22 | ccr3 = getCx86(CX86_CCR3); | ||
23 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | ||
24 | ((unsigned char *) base)[3] = getCx86(arr); | ||
25 | ((unsigned char *) base)[2] = getCx86(arr + 1); | ||
26 | ((unsigned char *) base)[1] = getCx86(arr + 2); | ||
27 | rcr = getCx86(CX86_RCR_BASE + reg); | ||
28 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
29 | |||
30 | /* Enable interrupts if it was enabled previously */ | ||
31 | local_irq_restore(flags); | ||
32 | shift = ((unsigned char *) base)[1] & 0x0f; | ||
33 | *base >>= PAGE_SHIFT; | ||
34 | |||
35 | /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 | ||
36 | * Note: shift==0xf means 4G, this is unsupported. | ||
37 | */ | ||
38 | if (shift) | ||
39 | *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1); | ||
40 | else | ||
41 | *size = 0; | ||
42 | |||
43 | /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */ | ||
44 | if (reg < 7) { | ||
45 | switch (rcr) { | ||
46 | case 1: | ||
47 | *type = MTRR_TYPE_UNCACHABLE; | ||
48 | break; | ||
49 | case 8: | ||
50 | *type = MTRR_TYPE_WRBACK; | ||
51 | break; | ||
52 | case 9: | ||
53 | *type = MTRR_TYPE_WRCOMB; | ||
54 | break; | ||
55 | case 24: | ||
56 | default: | ||
57 | *type = MTRR_TYPE_WRTHROUGH; | ||
58 | break; | ||
59 | } | ||
60 | } else { | ||
61 | switch (rcr) { | ||
62 | case 0: | ||
63 | *type = MTRR_TYPE_UNCACHABLE; | ||
64 | break; | ||
65 | case 8: | ||
66 | *type = MTRR_TYPE_WRCOMB; | ||
67 | break; | ||
68 | case 9: | ||
69 | *type = MTRR_TYPE_WRBACK; | ||
70 | break; | ||
71 | case 25: | ||
72 | default: | ||
73 | *type = MTRR_TYPE_WRTHROUGH; | ||
74 | break; | ||
75 | } | ||
76 | } | ||
77 | } | ||
78 | |||
79 | static int | ||
80 | cyrix_get_free_region(unsigned long base, unsigned long size) | ||
81 | /* [SUMMARY] Get a free ARR. | ||
82 | <base> The starting (base) address of the region. | ||
83 | <size> The size (in bytes) of the region. | ||
84 | [RETURNS] The index of the region on success, else -1 on error. | ||
85 | */ | ||
86 | { | ||
87 | int i; | ||
88 | mtrr_type ltype; | ||
89 | unsigned long lbase; | ||
90 | unsigned int lsize; | ||
91 | |||
92 | /* If we are to set up a region >32M then look at ARR7 immediately */ | ||
93 | if (size > 0x2000) { | ||
94 | cyrix_get_arr(7, &lbase, &lsize, <ype); | ||
95 | if (lsize == 0) | ||
96 | return 7; | ||
97 | /* Else try ARR0-ARR6 first */ | ||
98 | } else { | ||
99 | for (i = 0; i < 7; i++) { | ||
100 | cyrix_get_arr(i, &lbase, &lsize, <ype); | ||
101 | if ((i == 3) && arr3_protected) | ||
102 | continue; | ||
103 | if (lsize == 0) | ||
104 | return i; | ||
105 | } | ||
106 | /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ | ||
107 | cyrix_get_arr(i, &lbase, &lsize, <ype); | ||
108 | if ((lsize == 0) && (size >= 0x40)) | ||
109 | return i; | ||
110 | } | ||
111 | return -ENOSPC; | ||
112 | } | ||
113 | |||
114 | static u32 cr4 = 0; | ||
115 | static u32 ccr3; | ||
116 | |||
117 | static void prepare_set(void) | ||
118 | { | ||
119 | u32 cr0; | ||
120 | |||
121 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | ||
122 | if ( cpu_has_pge ) { | ||
123 | cr4 = read_cr4(); | ||
124 | write_cr4(cr4 & (unsigned char) ~(1 << 7)); | ||
125 | } | ||
126 | |||
127 | /* Disable and flush caches. Note that wbinvd flushes the TLBs as | ||
128 | a side-effect */ | ||
129 | cr0 = read_cr0() | 0x40000000; | ||
130 | wbinvd(); | ||
131 | write_cr0(cr0); | ||
132 | wbinvd(); | ||
133 | |||
134 | /* Cyrix ARRs - everything else were excluded at the top */ | ||
135 | ccr3 = getCx86(CX86_CCR3); | ||
136 | |||
137 | /* Cyrix ARRs - everything else were excluded at the top */ | ||
138 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); | ||
139 | |||
140 | } | ||
141 | |||
142 | static void post_set(void) | ||
143 | { | ||
144 | /* Flush caches and TLBs */ | ||
145 | wbinvd(); | ||
146 | |||
147 | /* Cyrix ARRs - everything else was excluded at the top */ | ||
148 | setCx86(CX86_CCR3, ccr3); | ||
149 | |||
150 | /* Enable caches */ | ||
151 | write_cr0(read_cr0() & 0xbfffffff); | ||
152 | |||
153 | /* Restore value of CR4 */ | ||
154 | if ( cpu_has_pge ) | ||
155 | write_cr4(cr4); | ||
156 | } | ||
157 | |||
158 | static void cyrix_set_arr(unsigned int reg, unsigned long base, | ||
159 | unsigned long size, mtrr_type type) | ||
160 | { | ||
161 | unsigned char arr, arr_type, arr_size; | ||
162 | |||
163 | arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ | ||
164 | |||
165 | /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */ | ||
166 | if (reg >= 7) | ||
167 | size >>= 6; | ||
168 | |||
169 | size &= 0x7fff; /* make sure arr_size <= 14 */ | ||
170 | for (arr_size = 0; size; arr_size++, size >>= 1) ; | ||
171 | |||
172 | if (reg < 7) { | ||
173 | switch (type) { | ||
174 | case MTRR_TYPE_UNCACHABLE: | ||
175 | arr_type = 1; | ||
176 | break; | ||
177 | case MTRR_TYPE_WRCOMB: | ||
178 | arr_type = 9; | ||
179 | break; | ||
180 | case MTRR_TYPE_WRTHROUGH: | ||
181 | arr_type = 24; | ||
182 | break; | ||
183 | default: | ||
184 | arr_type = 8; | ||
185 | break; | ||
186 | } | ||
187 | } else { | ||
188 | switch (type) { | ||
189 | case MTRR_TYPE_UNCACHABLE: | ||
190 | arr_type = 0; | ||
191 | break; | ||
192 | case MTRR_TYPE_WRCOMB: | ||
193 | arr_type = 8; | ||
194 | break; | ||
195 | case MTRR_TYPE_WRTHROUGH: | ||
196 | arr_type = 25; | ||
197 | break; | ||
198 | default: | ||
199 | arr_type = 9; | ||
200 | break; | ||
201 | } | ||
202 | } | ||
203 | |||
204 | prepare_set(); | ||
205 | |||
206 | base <<= PAGE_SHIFT; | ||
207 | setCx86(arr, ((unsigned char *) &base)[3]); | ||
208 | setCx86(arr + 1, ((unsigned char *) &base)[2]); | ||
209 | setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); | ||
210 | setCx86(CX86_RCR_BASE + reg, arr_type); | ||
211 | |||
212 | post_set(); | ||
213 | } | ||
214 | |||
215 | typedef struct { | ||
216 | unsigned long base; | ||
217 | unsigned int size; | ||
218 | mtrr_type type; | ||
219 | } arr_state_t; | ||
220 | |||
221 | static arr_state_t arr_state[8] __initdata = { | ||
222 | {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, | ||
223 | {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} | ||
224 | }; | ||
225 | |||
226 | static unsigned char ccr_state[7] __initdata = { 0, 0, 0, 0, 0, 0, 0 }; | ||
227 | |||
228 | static void cyrix_set_all(void) | ||
229 | { | ||
230 | int i; | ||
231 | |||
232 | prepare_set(); | ||
233 | |||
234 | /* the CCRs are not contiguous */ | ||
235 | for (i = 0; i < 4; i++) | ||
236 | setCx86(CX86_CCR0 + i, ccr_state[i]); | ||
237 | for (; i < 7; i++) | ||
238 | setCx86(CX86_CCR4 + i, ccr_state[i]); | ||
239 | for (i = 0; i < 8; i++) | ||
240 | cyrix_set_arr(i, arr_state[i].base, | ||
241 | arr_state[i].size, arr_state[i].type); | ||
242 | |||
243 | post_set(); | ||
244 | } | ||
245 | |||
246 | #if 0 | ||
247 | /* | ||
248 | * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection | ||
249 | * with the SMM (System Management Mode) mode. So we need the following: | ||
250 | * Check whether SMI_LOCK (CCR3 bit 0) is set | ||
251 | * if it is set, write a warning message: ARR3 cannot be changed! | ||
252 | * (it cannot be changed until the next processor reset) | ||
253 | * if it is reset, then we can change it, set all the needed bits: | ||
254 | * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset) | ||
255 | * - disable access to SMM memory (CCR1 bit 2 reset) | ||
256 | * - disable SMM mode (CCR1 bit 1 reset) | ||
257 | * - disable write protection of ARR3 (CCR6 bit 1 reset) | ||
258 | * - (maybe) disable ARR3 | ||
259 | * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set) | ||
260 | */ | ||
261 | static void __init | ||
262 | cyrix_arr_init(void) | ||
263 | { | ||
264 | struct set_mtrr_context ctxt; | ||
265 | unsigned char ccr[7]; | ||
266 | int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 }; | ||
267 | #ifdef CONFIG_SMP | ||
268 | int i; | ||
269 | #endif | ||
270 | |||
271 | /* flush cache and enable MAPEN */ | ||
272 | set_mtrr_prepare_save(&ctxt); | ||
273 | set_mtrr_cache_disable(&ctxt); | ||
274 | |||
275 | /* Save all CCRs locally */ | ||
276 | ccr[0] = getCx86(CX86_CCR0); | ||
277 | ccr[1] = getCx86(CX86_CCR1); | ||
278 | ccr[2] = getCx86(CX86_CCR2); | ||
279 | ccr[3] = ctxt.ccr3; | ||
280 | ccr[4] = getCx86(CX86_CCR4); | ||
281 | ccr[5] = getCx86(CX86_CCR5); | ||
282 | ccr[6] = getCx86(CX86_CCR6); | ||
283 | |||
284 | if (ccr[3] & 1) { | ||
285 | ccrc[3] = 1; | ||
286 | arr3_protected = 1; | ||
287 | } else { | ||
288 | /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and | ||
289 | * access to SMM memory through ARR3 (bit 7). | ||
290 | */ | ||
291 | if (ccr[1] & 0x80) { | ||
292 | ccr[1] &= 0x7f; | ||
293 | ccrc[1] |= 0x80; | ||
294 | } | ||
295 | if (ccr[1] & 0x04) { | ||
296 | ccr[1] &= 0xfb; | ||
297 | ccrc[1] |= 0x04; | ||
298 | } | ||
299 | if (ccr[1] & 0x02) { | ||
300 | ccr[1] &= 0xfd; | ||
301 | ccrc[1] |= 0x02; | ||
302 | } | ||
303 | arr3_protected = 0; | ||
304 | if (ccr[6] & 0x02) { | ||
305 | ccr[6] &= 0xfd; | ||
306 | ccrc[6] = 1; /* Disable write protection of ARR3 */ | ||
307 | setCx86(CX86_CCR6, ccr[6]); | ||
308 | } | ||
309 | /* Disable ARR3. This is safe now that we disabled SMM. */ | ||
310 | /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */ | ||
311 | } | ||
312 | /* If we changed CCR1 in memory, change it in the processor, too. */ | ||
313 | if (ccrc[1]) | ||
314 | setCx86(CX86_CCR1, ccr[1]); | ||
315 | |||
316 | /* Enable ARR usage by the processor */ | ||
317 | if (!(ccr[5] & 0x20)) { | ||
318 | ccr[5] |= 0x20; | ||
319 | ccrc[5] = 1; | ||
320 | setCx86(CX86_CCR5, ccr[5]); | ||
321 | } | ||
322 | #ifdef CONFIG_SMP | ||
323 | for (i = 0; i < 7; i++) | ||
324 | ccr_state[i] = ccr[i]; | ||
325 | for (i = 0; i < 8; i++) | ||
326 | cyrix_get_arr(i, | ||
327 | &arr_state[i].base, &arr_state[i].size, | ||
328 | &arr_state[i].type); | ||
329 | #endif | ||
330 | |||
331 | set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */ | ||
332 | |||
333 | if (ccrc[5]) | ||
334 | printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n"); | ||
335 | if (ccrc[3]) | ||
336 | printk(KERN_INFO "mtrr: ARR3 cannot be changed\n"); | ||
337 | /* | ||
338 | if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n"); | ||
339 | if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n"); | ||
340 | if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n"); | ||
341 | */ | ||
342 | if (ccrc[6]) | ||
343 | printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n"); | ||
344 | } | ||
345 | #endif | ||
346 | |||
347 | static struct mtrr_ops cyrix_mtrr_ops = { | ||
348 | .vendor = X86_VENDOR_CYRIX, | ||
349 | // .init = cyrix_arr_init, | ||
350 | .set_all = cyrix_set_all, | ||
351 | .set = cyrix_set_arr, | ||
352 | .get = cyrix_get_arr, | ||
353 | .get_free_region = cyrix_get_free_region, | ||
354 | .validate_add_page = generic_validate_add_page, | ||
355 | .have_wrcomb = positive_have_wrcomb, | ||
356 | }; | ||
357 | |||
358 | int __init cyrix_init_mtrr(void) | ||
359 | { | ||
360 | set_mtrr_ops(&cyrix_mtrr_ops); | ||
361 | return 0; | ||
362 | } | ||
363 | |||
364 | //arch_initcall(cyrix_init_mtrr); | ||
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c new file mode 100644 index 000000000000..a4cce454d09b --- /dev/null +++ b/arch/i386/kernel/cpu/mtrr/generic.c | |||
@@ -0,0 +1,417 @@ | |||
1 | /* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong | ||
2 | because MTRRs can span upto 40 bits (36bits on most modern x86) */ | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/slab.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <asm/io.h> | ||
7 | #include <asm/mtrr.h> | ||
8 | #include <asm/msr.h> | ||
9 | #include <asm/system.h> | ||
10 | #include <asm/cpufeature.h> | ||
11 | #include <asm/tlbflush.h> | ||
12 | #include "mtrr.h" | ||
13 | |||
14 | struct mtrr_state { | ||
15 | struct mtrr_var_range *var_ranges; | ||
16 | mtrr_type fixed_ranges[NUM_FIXED_RANGES]; | ||
17 | unsigned char enabled; | ||
18 | mtrr_type def_type; | ||
19 | }; | ||
20 | |||
21 | static unsigned long smp_changes_mask; | ||
22 | static struct mtrr_state mtrr_state = {}; | ||
23 | |||
24 | /* Get the MSR pair relating to a var range */ | ||
25 | static void __init | ||
26 | get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | ||
27 | { | ||
28 | rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); | ||
29 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); | ||
30 | } | ||
31 | |||
32 | static void __init | ||
33 | get_fixed_ranges(mtrr_type * frs) | ||
34 | { | ||
35 | unsigned int *p = (unsigned int *) frs; | ||
36 | int i; | ||
37 | |||
38 | rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); | ||
39 | |||
40 | for (i = 0; i < 2; i++) | ||
41 | rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); | ||
42 | for (i = 0; i < 8; i++) | ||
43 | rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); | ||
44 | } | ||
45 | |||
46 | /* Grab all of the MTRR state for this CPU into *state */ | ||
47 | void __init get_mtrr_state(void) | ||
48 | { | ||
49 | unsigned int i; | ||
50 | struct mtrr_var_range *vrs; | ||
51 | unsigned lo, dummy; | ||
52 | |||
53 | if (!mtrr_state.var_ranges) { | ||
54 | mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), | ||
55 | GFP_KERNEL); | ||
56 | if (!mtrr_state.var_ranges) | ||
57 | return; | ||
58 | } | ||
59 | vrs = mtrr_state.var_ranges; | ||
60 | |||
61 | for (i = 0; i < num_var_ranges; i++) | ||
62 | get_mtrr_var_range(i, &vrs[i]); | ||
63 | get_fixed_ranges(mtrr_state.fixed_ranges); | ||
64 | |||
65 | rdmsr(MTRRdefType_MSR, lo, dummy); | ||
66 | mtrr_state.def_type = (lo & 0xff); | ||
67 | mtrr_state.enabled = (lo & 0xc00) >> 10; | ||
68 | } | ||
69 | |||
70 | /* Free resources associated with a struct mtrr_state */ | ||
71 | void __init finalize_mtrr_state(void) | ||
72 | { | ||
73 | if (mtrr_state.var_ranges) | ||
74 | kfree(mtrr_state.var_ranges); | ||
75 | mtrr_state.var_ranges = NULL; | ||
76 | } | ||
77 | |||
78 | /* Some BIOS's are fucked and don't set all MTRRs the same! */ | ||
79 | void __init mtrr_state_warn(void) | ||
80 | { | ||
81 | unsigned long mask = smp_changes_mask; | ||
82 | |||
83 | if (!mask) | ||
84 | return; | ||
85 | if (mask & MTRR_CHANGE_MASK_FIXED) | ||
86 | printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); | ||
87 | if (mask & MTRR_CHANGE_MASK_VARIABLE) | ||
88 | printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); | ||
89 | if (mask & MTRR_CHANGE_MASK_DEFTYPE) | ||
90 | printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); | ||
91 | printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); | ||
92 | printk(KERN_INFO "mtrr: corrected configuration.\n"); | ||
93 | } | ||
94 | |||
95 | /* Doesn't attempt to pass an error out to MTRR users | ||
96 | because it's quite complicated in some cases and probably not | ||
97 | worth it because the best error handling is to ignore it. */ | ||
98 | void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) | ||
99 | { | ||
100 | if (wrmsr_safe(msr, a, b) < 0) | ||
101 | printk(KERN_ERR | ||
102 | "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", | ||
103 | smp_processor_id(), msr, a, b); | ||
104 | } | ||
105 | |||
106 | int generic_get_free_region(unsigned long base, unsigned long size) | ||
107 | /* [SUMMARY] Get a free MTRR. | ||
108 | <base> The starting (base) address of the region. | ||
109 | <size> The size (in bytes) of the region. | ||
110 | [RETURNS] The index of the region on success, else -1 on error. | ||
111 | */ | ||
112 | { | ||
113 | int i, max; | ||
114 | mtrr_type ltype; | ||
115 | unsigned long lbase; | ||
116 | unsigned lsize; | ||
117 | |||
118 | max = num_var_ranges; | ||
119 | for (i = 0; i < max; ++i) { | ||
120 | mtrr_if->get(i, &lbase, &lsize, <ype); | ||
121 | if (lsize == 0) | ||
122 | return i; | ||
123 | } | ||
124 | return -ENOSPC; | ||
125 | } | ||
126 | |||
127 | void generic_get_mtrr(unsigned int reg, unsigned long *base, | ||
128 | unsigned int *size, mtrr_type * type) | ||
129 | { | ||
130 | unsigned int mask_lo, mask_hi, base_lo, base_hi; | ||
131 | |||
132 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); | ||
133 | if ((mask_lo & 0x800) == 0) { | ||
134 | /* Invalid (i.e. free) range */ | ||
135 | *base = 0; | ||
136 | *size = 0; | ||
137 | *type = 0; | ||
138 | return; | ||
139 | } | ||
140 | |||
141 | rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); | ||
142 | |||
143 | /* Work out the shifted address mask. */ | ||
144 | mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) | ||
145 | | mask_lo >> PAGE_SHIFT; | ||
146 | |||
147 | /* This works correctly if size is a power of two, i.e. a | ||
148 | contiguous range. */ | ||
149 | *size = -mask_lo; | ||
150 | *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; | ||
151 | *type = base_lo & 0xff; | ||
152 | } | ||
153 | |||
154 | static int set_fixed_ranges(mtrr_type * frs) | ||
155 | { | ||
156 | unsigned int *p = (unsigned int *) frs; | ||
157 | int changed = FALSE; | ||
158 | int i; | ||
159 | unsigned int lo, hi; | ||
160 | |||
161 | rdmsr(MTRRfix64K_00000_MSR, lo, hi); | ||
162 | if (p[0] != lo || p[1] != hi) { | ||
163 | mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]); | ||
164 | changed = TRUE; | ||
165 | } | ||
166 | |||
167 | for (i = 0; i < 2; i++) { | ||
168 | rdmsr(MTRRfix16K_80000_MSR + i, lo, hi); | ||
169 | if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) { | ||
170 | mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], | ||
171 | p[3 + i * 2]); | ||
172 | changed = TRUE; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | for (i = 0; i < 8; i++) { | ||
177 | rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi); | ||
178 | if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) { | ||
179 | mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], | ||
180 | p[7 + i * 2]); | ||
181 | changed = TRUE; | ||
182 | } | ||
183 | } | ||
184 | return changed; | ||
185 | } | ||
186 | |||
187 | /* Set the MSR pair relating to a var range. Returns TRUE if | ||
188 | changes are made */ | ||
189 | static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) | ||
190 | { | ||
191 | unsigned int lo, hi; | ||
192 | int changed = FALSE; | ||
193 | |||
194 | rdmsr(MTRRphysBase_MSR(index), lo, hi); | ||
195 | if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) | ||
196 | || (vr->base_hi & 0xfUL) != (hi & 0xfUL)) { | ||
197 | mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); | ||
198 | changed = TRUE; | ||
199 | } | ||
200 | |||
201 | rdmsr(MTRRphysMask_MSR(index), lo, hi); | ||
202 | |||
203 | if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) | ||
204 | || (vr->mask_hi & 0xfUL) != (hi & 0xfUL)) { | ||
205 | mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); | ||
206 | changed = TRUE; | ||
207 | } | ||
208 | return changed; | ||
209 | } | ||
210 | |||
211 | static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi) | ||
212 | /* [SUMMARY] Set the MTRR state for this CPU. | ||
213 | <state> The MTRR state information to read. | ||
214 | <ctxt> Some relevant CPU context. | ||
215 | [NOTE] The CPU must already be in a safe state for MTRR changes. | ||
216 | [RETURNS] 0 if no changes made, else a mask indication what was changed. | ||
217 | */ | ||
218 | { | ||
219 | unsigned int i; | ||
220 | unsigned long change_mask = 0; | ||
221 | |||
222 | for (i = 0; i < num_var_ranges; i++) | ||
223 | if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) | ||
224 | change_mask |= MTRR_CHANGE_MASK_VARIABLE; | ||
225 | |||
226 | if (set_fixed_ranges(mtrr_state.fixed_ranges)) | ||
227 | change_mask |= MTRR_CHANGE_MASK_FIXED; | ||
228 | |||
229 | /* Set_mtrr_restore restores the old value of MTRRdefType, | ||
230 | so to set it we fiddle with the saved value */ | ||
231 | if ((deftype_lo & 0xff) != mtrr_state.def_type | ||
232 | || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { | ||
233 | deftype_lo |= (mtrr_state.def_type | mtrr_state.enabled << 10); | ||
234 | change_mask |= MTRR_CHANGE_MASK_DEFTYPE; | ||
235 | } | ||
236 | |||
237 | return change_mask; | ||
238 | } | ||
239 | |||
240 | |||
241 | static unsigned long cr4 = 0; | ||
242 | static u32 deftype_lo, deftype_hi; | ||
243 | static DEFINE_SPINLOCK(set_atomicity_lock); | ||
244 | |||
245 | /* | ||
246 | * Since we are disabling the cache don't allow any interrupts - they | ||
247 | * would run extremely slow and would only increase the pain. The caller must | ||
248 | * ensure that local interrupts are disabled and are reenabled after post_set() | ||
249 | * has been called. | ||
250 | */ | ||
251 | |||
252 | static void prepare_set(void) | ||
253 | { | ||
254 | unsigned long cr0; | ||
255 | |||
256 | /* Note that this is not ideal, since the cache is only flushed/disabled | ||
257 | for this CPU while the MTRRs are changed, but changing this requires | ||
258 | more invasive changes to the way the kernel boots */ | ||
259 | |||
260 | spin_lock(&set_atomicity_lock); | ||
261 | |||
262 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ | ||
263 | cr0 = read_cr0() | 0x40000000; /* set CD flag */ | ||
264 | write_cr0(cr0); | ||
265 | wbinvd(); | ||
266 | |||
267 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | ||
268 | if ( cpu_has_pge ) { | ||
269 | cr4 = read_cr4(); | ||
270 | write_cr4(cr4 & ~X86_CR4_PGE); | ||
271 | } | ||
272 | |||
273 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ | ||
274 | __flush_tlb(); | ||
275 | |||
276 | /* Save MTRR state */ | ||
277 | rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); | ||
278 | |||
279 | /* Disable MTRRs, and set the default type to uncached */ | ||
280 | mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi); | ||
281 | } | ||
282 | |||
283 | static void post_set(void) | ||
284 | { | ||
285 | /* Flush TLBs (no need to flush caches - they are disabled) */ | ||
286 | __flush_tlb(); | ||
287 | |||
288 | /* Intel (P6) standard MTRRs */ | ||
289 | mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); | ||
290 | |||
291 | /* Enable caches */ | ||
292 | write_cr0(read_cr0() & 0xbfffffff); | ||
293 | |||
294 | /* Restore value of CR4 */ | ||
295 | if ( cpu_has_pge ) | ||
296 | write_cr4(cr4); | ||
297 | spin_unlock(&set_atomicity_lock); | ||
298 | } | ||
299 | |||
300 | static void generic_set_all(void) | ||
301 | { | ||
302 | unsigned long mask, count; | ||
303 | unsigned long flags; | ||
304 | |||
305 | local_irq_save(flags); | ||
306 | prepare_set(); | ||
307 | |||
308 | /* Actually set the state */ | ||
309 | mask = set_mtrr_state(deftype_lo,deftype_hi); | ||
310 | |||
311 | post_set(); | ||
312 | local_irq_restore(flags); | ||
313 | |||
314 | /* Use the atomic bitops to update the global mask */ | ||
315 | for (count = 0; count < sizeof mask * 8; ++count) { | ||
316 | if (mask & 0x01) | ||
317 | set_bit(count, &smp_changes_mask); | ||
318 | mask >>= 1; | ||
319 | } | ||
320 | |||
321 | } | ||
322 | |||
323 | static void generic_set_mtrr(unsigned int reg, unsigned long base, | ||
324 | unsigned long size, mtrr_type type) | ||
325 | /* [SUMMARY] Set variable MTRR register on the local CPU. | ||
326 | <reg> The register to set. | ||
327 | <base> The base address of the region. | ||
328 | <size> The size of the region. If this is 0 the region is disabled. | ||
329 | <type> The type of the region. | ||
330 | <do_safe> If TRUE, do the change safely. If FALSE, safety measures should | ||
331 | be done externally. | ||
332 | [RETURNS] Nothing. | ||
333 | */ | ||
334 | { | ||
335 | unsigned long flags; | ||
336 | |||
337 | local_irq_save(flags); | ||
338 | prepare_set(); | ||
339 | |||
340 | if (size == 0) { | ||
341 | /* The invalid bit is kept in the mask, so we simply clear the | ||
342 | relevant mask register to disable a range. */ | ||
343 | mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); | ||
344 | } else { | ||
345 | mtrr_wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type, | ||
346 | (base & size_and_mask) >> (32 - PAGE_SHIFT)); | ||
347 | mtrr_wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800, | ||
348 | (-size & size_and_mask) >> (32 - PAGE_SHIFT)); | ||
349 | } | ||
350 | |||
351 | post_set(); | ||
352 | local_irq_restore(flags); | ||
353 | } | ||
354 | |||
355 | int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | ||
356 | { | ||
357 | unsigned long lbase, last; | ||
358 | |||
359 | /* For Intel PPro stepping <= 7, must be 4 MiB aligned | ||
360 | and not touch 0x70000000->0x7003FFFF */ | ||
361 | if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && | ||
362 | boot_cpu_data.x86_model == 1 && | ||
363 | boot_cpu_data.x86_mask <= 7) { | ||
364 | if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { | ||
365 | printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); | ||
366 | return -EINVAL; | ||
367 | } | ||
368 | if (!(base + size < 0x70000000 || base > 0x7003FFFF) && | ||
369 | (type == MTRR_TYPE_WRCOMB | ||
370 | || type == MTRR_TYPE_WRBACK)) { | ||
371 | printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); | ||
372 | return -EINVAL; | ||
373 | } | ||
374 | } | ||
375 | |||
376 | if (base + size < 0x100) { | ||
377 | printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n", | ||
378 | base, size); | ||
379 | return -EINVAL; | ||
380 | } | ||
381 | /* Check upper bits of base and last are equal and lower bits are 0 | ||
382 | for base and 1 for last */ | ||
383 | last = base + size - 1; | ||
384 | for (lbase = base; !(lbase & 1) && (last & 1); | ||
385 | lbase = lbase >> 1, last = last >> 1) ; | ||
386 | if (lbase != last) { | ||
387 | printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", | ||
388 | base, size); | ||
389 | return -EINVAL; | ||
390 | } | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | |||
395 | static int generic_have_wrcomb(void) | ||
396 | { | ||
397 | unsigned long config, dummy; | ||
398 | rdmsr(MTRRcap_MSR, config, dummy); | ||
399 | return (config & (1 << 10)); | ||
400 | } | ||
401 | |||
402 | int positive_have_wrcomb(void) | ||
403 | { | ||
404 | return 1; | ||
405 | } | ||
406 | |||
407 | /* generic structure... | ||
408 | */ | ||
409 | struct mtrr_ops generic_mtrr_ops = { | ||
410 | .use_intel_if = 1, | ||
411 | .set_all = generic_set_all, | ||
412 | .get = generic_get_mtrr, | ||
413 | .get_free_region = generic_get_free_region, | ||
414 | .set = generic_set_mtrr, | ||
415 | .validate_add_page = generic_validate_add_page, | ||
416 | .have_wrcomb = generic_have_wrcomb, | ||
417 | }; | ||
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c new file mode 100644 index 000000000000..1923e0aed26a --- /dev/null +++ b/arch/i386/kernel/cpu/mtrr/if.c | |||
@@ -0,0 +1,374 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/proc_fs.h> | ||
3 | #include <linux/ctype.h> | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/seq_file.h> | ||
6 | #include <asm/uaccess.h> | ||
7 | |||
8 | #define LINE_SIZE 80 | ||
9 | |||
10 | #include <asm/mtrr.h> | ||
11 | #include "mtrr.h" | ||
12 | |||
13 | /* RED-PEN: this is accessed without any locking */ | ||
14 | extern unsigned int *usage_table; | ||
15 | |||
16 | |||
17 | #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) | ||
18 | |||
19 | static char *mtrr_strings[MTRR_NUM_TYPES] = | ||
20 | { | ||
21 | "uncachable", /* 0 */ | ||
22 | "write-combining", /* 1 */ | ||
23 | "?", /* 2 */ | ||
24 | "?", /* 3 */ | ||
25 | "write-through", /* 4 */ | ||
26 | "write-protect", /* 5 */ | ||
27 | "write-back", /* 6 */ | ||
28 | }; | ||
29 | |||
30 | char *mtrr_attrib_to_str(int x) | ||
31 | { | ||
32 | return (x <= 6) ? mtrr_strings[x] : "?"; | ||
33 | } | ||
34 | |||
35 | #ifdef CONFIG_PROC_FS | ||
36 | |||
37 | static int | ||
38 | mtrr_file_add(unsigned long base, unsigned long size, | ||
39 | unsigned int type, char increment, struct file *file, int page) | ||
40 | { | ||
41 | int reg, max; | ||
42 | unsigned int *fcount = FILE_FCOUNT(file); | ||
43 | |||
44 | max = num_var_ranges; | ||
45 | if (fcount == NULL) { | ||
46 | fcount = kmalloc(max * sizeof *fcount, GFP_KERNEL); | ||
47 | if (!fcount) | ||
48 | return -ENOMEM; | ||
49 | memset(fcount, 0, max * sizeof *fcount); | ||
50 | FILE_FCOUNT(file) = fcount; | ||
51 | } | ||
52 | if (!page) { | ||
53 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) | ||
54 | return -EINVAL; | ||
55 | base >>= PAGE_SHIFT; | ||
56 | size >>= PAGE_SHIFT; | ||
57 | } | ||
58 | reg = mtrr_add_page(base, size, type, 1); | ||
59 | if (reg >= 0) | ||
60 | ++fcount[reg]; | ||
61 | return reg; | ||
62 | } | ||
63 | |||
64 | static int | ||
65 | mtrr_file_del(unsigned long base, unsigned long size, | ||
66 | struct file *file, int page) | ||
67 | { | ||
68 | int reg; | ||
69 | unsigned int *fcount = FILE_FCOUNT(file); | ||
70 | |||
71 | if (!page) { | ||
72 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) | ||
73 | return -EINVAL; | ||
74 | base >>= PAGE_SHIFT; | ||
75 | size >>= PAGE_SHIFT; | ||
76 | } | ||
77 | reg = mtrr_del_page(-1, base, size); | ||
78 | if (reg < 0) | ||
79 | return reg; | ||
80 | if (fcount == NULL) | ||
81 | return reg; | ||
82 | if (fcount[reg] < 1) | ||
83 | return -EINVAL; | ||
84 | --fcount[reg]; | ||
85 | return reg; | ||
86 | } | ||
87 | |||
88 | /* RED-PEN: seq_file can seek now. this is ignored. */ | ||
89 | static ssize_t | ||
90 | mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | ||
91 | /* Format of control line: | ||
92 | "base=%Lx size=%Lx type=%s" OR: | ||
93 | "disable=%d" | ||
94 | */ | ||
95 | { | ||
96 | int i, err; | ||
97 | unsigned long reg; | ||
98 | unsigned long long base, size; | ||
99 | char *ptr; | ||
100 | char line[LINE_SIZE]; | ||
101 | size_t linelen; | ||
102 | |||
103 | if (!capable(CAP_SYS_ADMIN)) | ||
104 | return -EPERM; | ||
105 | if (!len) | ||
106 | return -EINVAL; | ||
107 | memset(line, 0, LINE_SIZE); | ||
108 | if (len > LINE_SIZE) | ||
109 | len = LINE_SIZE; | ||
110 | if (copy_from_user(line, buf, len - 1)) | ||
111 | return -EFAULT; | ||
112 | linelen = strlen(line); | ||
113 | ptr = line + linelen - 1; | ||
114 | if (linelen && *ptr == '\n') | ||
115 | *ptr = '\0'; | ||
116 | if (!strncmp(line, "disable=", 8)) { | ||
117 | reg = simple_strtoul(line + 8, &ptr, 0); | ||
118 | err = mtrr_del_page(reg, 0, 0); | ||
119 | if (err < 0) | ||
120 | return err; | ||
121 | return len; | ||
122 | } | ||
123 | if (strncmp(line, "base=", 5)) | ||
124 | return -EINVAL; | ||
125 | base = simple_strtoull(line + 5, &ptr, 0); | ||
126 | for (; isspace(*ptr); ++ptr) ; | ||
127 | if (strncmp(ptr, "size=", 5)) | ||
128 | return -EINVAL; | ||
129 | size = simple_strtoull(ptr + 5, &ptr, 0); | ||
130 | if ((base & 0xfff) || (size & 0xfff)) | ||
131 | return -EINVAL; | ||
132 | for (; isspace(*ptr); ++ptr) ; | ||
133 | if (strncmp(ptr, "type=", 5)) | ||
134 | return -EINVAL; | ||
135 | ptr += 5; | ||
136 | for (; isspace(*ptr); ++ptr) ; | ||
137 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { | ||
138 | if (strcmp(ptr, mtrr_strings[i])) | ||
139 | continue; | ||
140 | base >>= PAGE_SHIFT; | ||
141 | size >>= PAGE_SHIFT; | ||
142 | err = | ||
143 | mtrr_add_page((unsigned long) base, (unsigned long) size, i, | ||
144 | 1); | ||
145 | if (err < 0) | ||
146 | return err; | ||
147 | return len; | ||
148 | } | ||
149 | return -EINVAL; | ||
150 | } | ||
151 | |||
152 | static int | ||
153 | mtrr_ioctl(struct inode *inode, struct file *file, | ||
154 | unsigned int cmd, unsigned long __arg) | ||
155 | { | ||
156 | int err; | ||
157 | mtrr_type type; | ||
158 | struct mtrr_sentry sentry; | ||
159 | struct mtrr_gentry gentry; | ||
160 | void __user *arg = (void __user *) __arg; | ||
161 | |||
162 | switch (cmd) { | ||
163 | default: | ||
164 | return -ENOTTY; | ||
165 | case MTRRIOC_ADD_ENTRY: | ||
166 | if (!capable(CAP_SYS_ADMIN)) | ||
167 | return -EPERM; | ||
168 | if (copy_from_user(&sentry, arg, sizeof sentry)) | ||
169 | return -EFAULT; | ||
170 | err = | ||
171 | mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, | ||
172 | file, 0); | ||
173 | if (err < 0) | ||
174 | return err; | ||
175 | break; | ||
176 | case MTRRIOC_SET_ENTRY: | ||
177 | if (!capable(CAP_SYS_ADMIN)) | ||
178 | return -EPERM; | ||
179 | if (copy_from_user(&sentry, arg, sizeof sentry)) | ||
180 | return -EFAULT; | ||
181 | err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); | ||
182 | if (err < 0) | ||
183 | return err; | ||
184 | break; | ||
185 | case MTRRIOC_DEL_ENTRY: | ||
186 | if (!capable(CAP_SYS_ADMIN)) | ||
187 | return -EPERM; | ||
188 | if (copy_from_user(&sentry, arg, sizeof sentry)) | ||
189 | return -EFAULT; | ||
190 | err = mtrr_file_del(sentry.base, sentry.size, file, 0); | ||
191 | if (err < 0) | ||
192 | return err; | ||
193 | break; | ||
194 | case MTRRIOC_KILL_ENTRY: | ||
195 | if (!capable(CAP_SYS_ADMIN)) | ||
196 | return -EPERM; | ||
197 | if (copy_from_user(&sentry, arg, sizeof sentry)) | ||
198 | return -EFAULT; | ||
199 | err = mtrr_del(-1, sentry.base, sentry.size); | ||
200 | if (err < 0) | ||
201 | return err; | ||
202 | break; | ||
203 | case MTRRIOC_GET_ENTRY: | ||
204 | if (copy_from_user(&gentry, arg, sizeof gentry)) | ||
205 | return -EFAULT; | ||
206 | if (gentry.regnum >= num_var_ranges) | ||
207 | return -EINVAL; | ||
208 | mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type); | ||
209 | |||
210 | /* Hide entries that go above 4GB */ | ||
211 | if (gentry.base + gentry.size > 0x100000 | ||
212 | || gentry.size == 0x100000) | ||
213 | gentry.base = gentry.size = gentry.type = 0; | ||
214 | else { | ||
215 | gentry.base <<= PAGE_SHIFT; | ||
216 | gentry.size <<= PAGE_SHIFT; | ||
217 | gentry.type = type; | ||
218 | } | ||
219 | |||
220 | if (copy_to_user(arg, &gentry, sizeof gentry)) | ||
221 | return -EFAULT; | ||
222 | break; | ||
223 | case MTRRIOC_ADD_PAGE_ENTRY: | ||
224 | if (!capable(CAP_SYS_ADMIN)) | ||
225 | return -EPERM; | ||
226 | if (copy_from_user(&sentry, arg, sizeof sentry)) | ||
227 | return -EFAULT; | ||
228 | err = | ||
229 | mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, | ||
230 | file, 1); | ||
231 | if (err < 0) | ||
232 | return err; | ||
233 | break; | ||
234 | case MTRRIOC_SET_PAGE_ENTRY: | ||
235 | if (!capable(CAP_SYS_ADMIN)) | ||
236 | return -EPERM; | ||
237 | if (copy_from_user(&sentry, arg, sizeof sentry)) | ||
238 | return -EFAULT; | ||
239 | err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); | ||
240 | if (err < 0) | ||
241 | return err; | ||
242 | break; | ||
243 | case MTRRIOC_DEL_PAGE_ENTRY: | ||
244 | if (!capable(CAP_SYS_ADMIN)) | ||
245 | return -EPERM; | ||
246 | if (copy_from_user(&sentry, arg, sizeof sentry)) | ||
247 | return -EFAULT; | ||
248 | err = mtrr_file_del(sentry.base, sentry.size, file, 1); | ||
249 | if (err < 0) | ||
250 | return err; | ||
251 | break; | ||
252 | case MTRRIOC_KILL_PAGE_ENTRY: | ||
253 | if (!capable(CAP_SYS_ADMIN)) | ||
254 | return -EPERM; | ||
255 | if (copy_from_user(&sentry, arg, sizeof sentry)) | ||
256 | return -EFAULT; | ||
257 | err = mtrr_del_page(-1, sentry.base, sentry.size); | ||
258 | if (err < 0) | ||
259 | return err; | ||
260 | break; | ||
261 | case MTRRIOC_GET_PAGE_ENTRY: | ||
262 | if (copy_from_user(&gentry, arg, sizeof gentry)) | ||
263 | return -EFAULT; | ||
264 | if (gentry.regnum >= num_var_ranges) | ||
265 | return -EINVAL; | ||
266 | mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type); | ||
267 | gentry.type = type; | ||
268 | |||
269 | if (copy_to_user(arg, &gentry, sizeof gentry)) | ||
270 | return -EFAULT; | ||
271 | break; | ||
272 | } | ||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | static int | ||
277 | mtrr_close(struct inode *ino, struct file *file) | ||
278 | { | ||
279 | int i, max; | ||
280 | unsigned int *fcount = FILE_FCOUNT(file); | ||
281 | |||
282 | if (fcount != NULL) { | ||
283 | max = num_var_ranges; | ||
284 | for (i = 0; i < max; ++i) { | ||
285 | while (fcount[i] > 0) { | ||
286 | mtrr_del(i, 0, 0); | ||
287 | --fcount[i]; | ||
288 | } | ||
289 | } | ||
290 | kfree(fcount); | ||
291 | FILE_FCOUNT(file) = NULL; | ||
292 | } | ||
293 | return single_release(ino, file); | ||
294 | } | ||
295 | |||
296 | static int mtrr_seq_show(struct seq_file *seq, void *offset); | ||
297 | |||
298 | static int mtrr_open(struct inode *inode, struct file *file) | ||
299 | { | ||
300 | if (!mtrr_if) | ||
301 | return -EIO; | ||
302 | if (!mtrr_if->get) | ||
303 | return -ENXIO; | ||
304 | return single_open(file, mtrr_seq_show, NULL); | ||
305 | } | ||
306 | |||
307 | static struct file_operations mtrr_fops = { | ||
308 | .owner = THIS_MODULE, | ||
309 | .open = mtrr_open, | ||
310 | .read = seq_read, | ||
311 | .llseek = seq_lseek, | ||
312 | .write = mtrr_write, | ||
313 | .ioctl = mtrr_ioctl, | ||
314 | .release = mtrr_close, | ||
315 | }; | ||
316 | |||
317 | |||
318 | static struct proc_dir_entry *proc_root_mtrr; | ||
319 | |||
320 | |||
321 | static int mtrr_seq_show(struct seq_file *seq, void *offset) | ||
322 | { | ||
323 | char factor; | ||
324 | int i, max, len; | ||
325 | mtrr_type type; | ||
326 | unsigned long base; | ||
327 | unsigned int size; | ||
328 | |||
329 | len = 0; | ||
330 | max = num_var_ranges; | ||
331 | for (i = 0; i < max; i++) { | ||
332 | mtrr_if->get(i, &base, &size, &type); | ||
333 | if (size == 0) | ||
334 | usage_table[i] = 0; | ||
335 | else { | ||
336 | if (size < (0x100000 >> PAGE_SHIFT)) { | ||
337 | /* less than 1MB */ | ||
338 | factor = 'K'; | ||
339 | size <<= PAGE_SHIFT - 10; | ||
340 | } else { | ||
341 | factor = 'M'; | ||
342 | size >>= 20 - PAGE_SHIFT; | ||
343 | } | ||
344 | /* RED-PEN: base can be > 32bit */ | ||
345 | len += seq_printf(seq, | ||
346 | "reg%02i: base=0x%05lx000 (%4liMB), size=%4i%cB: %s, count=%d\n", | ||
347 | i, base, base >> (20 - PAGE_SHIFT), size, factor, | ||
348 | mtrr_attrib_to_str(type), usage_table[i]); | ||
349 | } | ||
350 | } | ||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | static int __init mtrr_if_init(void) | ||
355 | { | ||
356 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
357 | |||
358 | if ((!cpu_has(c, X86_FEATURE_MTRR)) && | ||
359 | (!cpu_has(c, X86_FEATURE_K6_MTRR)) && | ||
360 | (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) && | ||
361 | (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) | ||
362 | return -ENODEV; | ||
363 | |||
364 | proc_root_mtrr = | ||
365 | create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root); | ||
366 | if (proc_root_mtrr) { | ||
367 | proc_root_mtrr->owner = THIS_MODULE; | ||
368 | proc_root_mtrr->proc_fops = &mtrr_fops; | ||
369 | } | ||
370 | return 0; | ||
371 | } | ||
372 | |||
373 | arch_initcall(mtrr_if_init); | ||
374 | #endif /* CONFIG_PROC_FS */ | ||
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c new file mode 100644 index 000000000000..8f67b490a7fd --- /dev/null +++ b/arch/i386/kernel/cpu/mtrr/main.c | |||
@@ -0,0 +1,693 @@ | |||
1 | /* Generic MTRR (Memory Type Range Register) driver. | ||
2 | |||
3 | Copyright (C) 1997-2000 Richard Gooch | ||
4 | Copyright (c) 2002 Patrick Mochel | ||
5 | |||
6 | This library is free software; you can redistribute it and/or | ||
7 | modify it under the terms of the GNU Library General Public | ||
8 | License as published by the Free Software Foundation; either | ||
9 | version 2 of the License, or (at your option) any later version. | ||
10 | |||
11 | This library is distributed in the hope that it will be useful, | ||
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | Library General Public License for more details. | ||
15 | |||
16 | You should have received a copy of the GNU Library General Public | ||
17 | License along with this library; if not, write to the Free | ||
18 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
19 | |||
20 | Richard Gooch may be reached by email at rgooch@atnf.csiro.au | ||
21 | The postal address is: | ||
22 | Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. | ||
23 | |||
24 | Source: "Pentium Pro Family Developer's Manual, Volume 3: | ||
25 | Operating System Writer's Guide" (Intel document number 242692), | ||
26 | section 11.11.7 | ||
27 | |||
28 | This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> | ||
29 | on 6-7 March 2002. | ||
30 | Source: Intel Architecture Software Developers Manual, Volume 3: | ||
31 | System Programming Guide; Section 9.11. (1997 edition - PPro). | ||
32 | */ | ||
33 | |||
34 | #include <linux/module.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/pci.h> | ||
37 | #include <linux/smp.h> | ||
38 | #include <linux/cpu.h> | ||
39 | |||
40 | #include <asm/mtrr.h> | ||
41 | |||
42 | #include <asm/uaccess.h> | ||
43 | #include <asm/processor.h> | ||
44 | #include <asm/msr.h> | ||
45 | #include "mtrr.h" | ||
46 | |||
47 | #define MTRR_VERSION "2.0 (20020519)" | ||
48 | |||
49 | u32 num_var_ranges = 0; | ||
50 | |||
51 | unsigned int *usage_table; | ||
52 | static DECLARE_MUTEX(main_lock); | ||
53 | |||
54 | u32 size_or_mask, size_and_mask; | ||
55 | |||
56 | static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; | ||
57 | |||
58 | struct mtrr_ops * mtrr_if = NULL; | ||
59 | |||
60 | static void set_mtrr(unsigned int reg, unsigned long base, | ||
61 | unsigned long size, mtrr_type type); | ||
62 | |||
63 | extern int arr3_protected; | ||
64 | |||
65 | void set_mtrr_ops(struct mtrr_ops * ops) | ||
66 | { | ||
67 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) | ||
68 | mtrr_ops[ops->vendor] = ops; | ||
69 | } | ||
70 | |||
71 | /* Returns non-zero if we have the write-combining memory type */ | ||
72 | static int have_wrcomb(void) | ||
73 | { | ||
74 | struct pci_dev *dev; | ||
75 | |||
76 | if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { | ||
77 | /* ServerWorks LE chipsets have problems with write-combining | ||
78 | Don't allow it and leave room for other chipsets to be tagged */ | ||
79 | if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && | ||
80 | dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { | ||
81 | printk(KERN_INFO "mtrr: Serverworks LE detected. Write-combining disabled.\n"); | ||
82 | pci_dev_put(dev); | ||
83 | return 0; | ||
84 | } | ||
85 | /* Intel 450NX errata # 23. Non ascending cachline evictions to | ||
86 | write combining memory may resulting in data corruption */ | ||
87 | if (dev->vendor == PCI_VENDOR_ID_INTEL && | ||
88 | dev->device == PCI_DEVICE_ID_INTEL_82451NX) { | ||
89 | printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); | ||
90 | pci_dev_put(dev); | ||
91 | return 0; | ||
92 | } | ||
93 | pci_dev_put(dev); | ||
94 | } | ||
95 | return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); | ||
96 | } | ||
97 | |||
98 | /* This function returns the number of variable MTRRs */ | ||
99 | static void __init set_num_var_ranges(void) | ||
100 | { | ||
101 | unsigned long config = 0, dummy; | ||
102 | |||
103 | if (use_intel()) { | ||
104 | rdmsr(MTRRcap_MSR, config, dummy); | ||
105 | } else if (is_cpu(AMD)) | ||
106 | config = 2; | ||
107 | else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) | ||
108 | config = 8; | ||
109 | num_var_ranges = config & 0xff; | ||
110 | } | ||
111 | |||
112 | static void __init init_table(void) | ||
113 | { | ||
114 | int i, max; | ||
115 | |||
116 | max = num_var_ranges; | ||
117 | if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) | ||
118 | == NULL) { | ||
119 | printk(KERN_ERR "mtrr: could not allocate\n"); | ||
120 | return; | ||
121 | } | ||
122 | for (i = 0; i < max; i++) | ||
123 | usage_table[i] = 1; | ||
124 | } | ||
125 | |||
126 | struct set_mtrr_data { | ||
127 | atomic_t count; | ||
128 | atomic_t gate; | ||
129 | unsigned long smp_base; | ||
130 | unsigned long smp_size; | ||
131 | unsigned int smp_reg; | ||
132 | mtrr_type smp_type; | ||
133 | }; | ||
134 | |||
135 | #ifdef CONFIG_SMP | ||
136 | |||
137 | static void ipi_handler(void *info) | ||
138 | /* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. | ||
139 | [RETURNS] Nothing. | ||
140 | */ | ||
141 | { | ||
142 | struct set_mtrr_data *data = info; | ||
143 | unsigned long flags; | ||
144 | |||
145 | local_irq_save(flags); | ||
146 | |||
147 | atomic_dec(&data->count); | ||
148 | while(!atomic_read(&data->gate)) | ||
149 | cpu_relax(); | ||
150 | |||
151 | /* The master has cleared me to execute */ | ||
152 | if (data->smp_reg != ~0U) | ||
153 | mtrr_if->set(data->smp_reg, data->smp_base, | ||
154 | data->smp_size, data->smp_type); | ||
155 | else | ||
156 | mtrr_if->set_all(); | ||
157 | |||
158 | atomic_dec(&data->count); | ||
159 | while(atomic_read(&data->gate)) | ||
160 | cpu_relax(); | ||
161 | |||
162 | atomic_dec(&data->count); | ||
163 | local_irq_restore(flags); | ||
164 | } | ||
165 | |||
166 | #endif | ||
167 | |||
168 | /** | ||
169 | * set_mtrr - update mtrrs on all processors | ||
170 | * @reg: mtrr in question | ||
171 | * @base: mtrr base | ||
172 | * @size: mtrr size | ||
173 | * @type: mtrr type | ||
174 | * | ||
175 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: | ||
176 | * | ||
177 | * 1. Send IPI to do the following: | ||
178 | * 2. Disable Interrupts | ||
179 | * 3. Wait for all procs to do so | ||
180 | * 4. Enter no-fill cache mode | ||
181 | * 5. Flush caches | ||
182 | * 6. Clear PGE bit | ||
183 | * 7. Flush all TLBs | ||
184 | * 8. Disable all range registers | ||
185 | * 9. Update the MTRRs | ||
186 | * 10. Enable all range registers | ||
187 | * 11. Flush all TLBs and caches again | ||
188 | * 12. Enter normal cache mode and reenable caching | ||
189 | * 13. Set PGE | ||
190 | * 14. Wait for buddies to catch up | ||
191 | * 15. Enable interrupts. | ||
192 | * | ||
193 | * What does that mean for us? Well, first we set data.count to the number | ||
194 | * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait | ||
195 | * until it hits 0 and proceed. We set the data.gate flag and reset data.count. | ||
196 | * Meanwhile, they are waiting for that flag to be set. Once it's set, each | ||
197 | * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it | ||
198 | * differently, so we call mtrr_if->set() callback and let them take care of it. | ||
199 | * When they're done, they again decrement data->count and wait for data.gate to | ||
200 | * be reset. | ||
201 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. | ||
202 | * Everyone then enables interrupts and we all continue on. | ||
203 | * | ||
204 | * Note that the mechanism is the same for UP systems, too; all the SMP stuff | ||
205 | * becomes nops. | ||
206 | */ | ||
207 | static void set_mtrr(unsigned int reg, unsigned long base, | ||
208 | unsigned long size, mtrr_type type) | ||
209 | { | ||
210 | struct set_mtrr_data data; | ||
211 | unsigned long flags; | ||
212 | |||
213 | data.smp_reg = reg; | ||
214 | data.smp_base = base; | ||
215 | data.smp_size = size; | ||
216 | data.smp_type = type; | ||
217 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
218 | atomic_set(&data.gate,0); | ||
219 | |||
220 | /* Start the ball rolling on other CPUs */ | ||
221 | if (smp_call_function(ipi_handler, &data, 1, 0) != 0) | ||
222 | panic("mtrr: timed out waiting for other CPUs\n"); | ||
223 | |||
224 | local_irq_save(flags); | ||
225 | |||
226 | while(atomic_read(&data.count)) | ||
227 | cpu_relax(); | ||
228 | |||
229 | /* ok, reset count and toggle gate */ | ||
230 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
231 | atomic_set(&data.gate,1); | ||
232 | |||
233 | /* do our MTRR business */ | ||
234 | |||
235 | /* HACK! | ||
236 | * We use this same function to initialize the mtrrs on boot. | ||
237 | * The state of the boot cpu's mtrrs has been saved, and we want | ||
238 | * to replicate across all the APs. | ||
239 | * If we're doing that @reg is set to something special... | ||
240 | */ | ||
241 | if (reg != ~0U) | ||
242 | mtrr_if->set(reg,base,size,type); | ||
243 | |||
244 | /* wait for the others */ | ||
245 | while(atomic_read(&data.count)) | ||
246 | cpu_relax(); | ||
247 | |||
248 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
249 | atomic_set(&data.gate,0); | ||
250 | |||
251 | /* | ||
252 | * Wait here for everyone to have seen the gate change | ||
253 | * So we're the last ones to touch 'data' | ||
254 | */ | ||
255 | while(atomic_read(&data.count)) | ||
256 | cpu_relax(); | ||
257 | |||
258 | local_irq_restore(flags); | ||
259 | } | ||
260 | |||
261 | /** | ||
262 | * mtrr_add_page - Add a memory type region | ||
263 | * @base: Physical base address of region in pages (4 KB) | ||
264 | * @size: Physical size of region in pages (4 KB) | ||
265 | * @type: Type of MTRR desired | ||
266 | * @increment: If this is true do usage counting on the region | ||
267 | * | ||
268 | * Memory type region registers control the caching on newer Intel and | ||
269 | * non Intel processors. This function allows drivers to request an | ||
270 | * MTRR is added. The details and hardware specifics of each processor's | ||
271 | * implementation are hidden from the caller, but nevertheless the | ||
272 | * caller should expect to need to provide a power of two size on an | ||
273 | * equivalent power of two boundary. | ||
274 | * | ||
275 | * If the region cannot be added either because all regions are in use | ||
276 | * or the CPU cannot support it a negative value is returned. On success | ||
277 | * the register number for this entry is returned, but should be treated | ||
278 | * as a cookie only. | ||
279 | * | ||
280 | * On a multiprocessor machine the changes are made to all processors. | ||
281 | * This is required on x86 by the Intel processors. | ||
282 | * | ||
283 | * The available types are | ||
284 | * | ||
285 | * %MTRR_TYPE_UNCACHABLE - No caching | ||
286 | * | ||
287 | * %MTRR_TYPE_WRBACK - Write data back in bursts whenever | ||
288 | * | ||
289 | * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts | ||
290 | * | ||
291 | * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes | ||
292 | * | ||
293 | * BUGS: Needs a quiet flag for the cases where drivers do not mind | ||
294 | * failures and do not wish system log messages to be sent. | ||
295 | */ | ||
296 | |||
297 | int mtrr_add_page(unsigned long base, unsigned long size, | ||
298 | unsigned int type, char increment) | ||
299 | { | ||
300 | int i; | ||
301 | mtrr_type ltype; | ||
302 | unsigned long lbase; | ||
303 | unsigned int lsize; | ||
304 | int error; | ||
305 | |||
306 | if (!mtrr_if) | ||
307 | return -ENXIO; | ||
308 | |||
309 | if ((error = mtrr_if->validate_add_page(base,size,type))) | ||
310 | return error; | ||
311 | |||
312 | if (type >= MTRR_NUM_TYPES) { | ||
313 | printk(KERN_WARNING "mtrr: type: %u invalid\n", type); | ||
314 | return -EINVAL; | ||
315 | } | ||
316 | |||
317 | /* If the type is WC, check that this processor supports it */ | ||
318 | if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { | ||
319 | printk(KERN_WARNING | ||
320 | "mtrr: your processor doesn't support write-combining\n"); | ||
321 | return -ENOSYS; | ||
322 | } | ||
323 | |||
324 | if (base & size_or_mask || size & size_or_mask) { | ||
325 | printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); | ||
326 | return -EINVAL; | ||
327 | } | ||
328 | |||
329 | error = -EINVAL; | ||
330 | |||
331 | /* Search for existing MTRR */ | ||
332 | down(&main_lock); | ||
333 | for (i = 0; i < num_var_ranges; ++i) { | ||
334 | mtrr_if->get(i, &lbase, &lsize, <ype); | ||
335 | if (base >= lbase + lsize) | ||
336 | continue; | ||
337 | if ((base < lbase) && (base + size <= lbase)) | ||
338 | continue; | ||
339 | /* At this point we know there is some kind of overlap/enclosure */ | ||
340 | if ((base < lbase) || (base + size > lbase + lsize)) { | ||
341 | printk(KERN_WARNING | ||
342 | "mtrr: 0x%lx000,0x%lx000 overlaps existing" | ||
343 | " 0x%lx000,0x%x000\n", base, size, lbase, | ||
344 | lsize); | ||
345 | goto out; | ||
346 | } | ||
347 | /* New region is enclosed by an existing region */ | ||
348 | if (ltype != type) { | ||
349 | if (type == MTRR_TYPE_UNCACHABLE) | ||
350 | continue; | ||
351 | printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", | ||
352 | base, size, mtrr_attrib_to_str(ltype), | ||
353 | mtrr_attrib_to_str(type)); | ||
354 | goto out; | ||
355 | } | ||
356 | if (increment) | ||
357 | ++usage_table[i]; | ||
358 | error = i; | ||
359 | goto out; | ||
360 | } | ||
361 | /* Search for an empty MTRR */ | ||
362 | i = mtrr_if->get_free_region(base, size); | ||
363 | if (i >= 0) { | ||
364 | set_mtrr(i, base, size, type); | ||
365 | usage_table[i] = 1; | ||
366 | } else | ||
367 | printk(KERN_INFO "mtrr: no more MTRRs available\n"); | ||
368 | error = i; | ||
369 | out: | ||
370 | up(&main_lock); | ||
371 | return error; | ||
372 | } | ||
373 | |||
374 | /** | ||
375 | * mtrr_add - Add a memory type region | ||
376 | * @base: Physical base address of region | ||
377 | * @size: Physical size of region | ||
378 | * @type: Type of MTRR desired | ||
379 | * @increment: If this is true do usage counting on the region | ||
380 | * | ||
381 | * Memory type region registers control the caching on newer Intel and | ||
382 | * non Intel processors. This function allows drivers to request an | ||
383 | * MTRR is added. The details and hardware specifics of each processor's | ||
384 | * implementation are hidden from the caller, but nevertheless the | ||
385 | * caller should expect to need to provide a power of two size on an | ||
386 | * equivalent power of two boundary. | ||
387 | * | ||
388 | * If the region cannot be added either because all regions are in use | ||
389 | * or the CPU cannot support it a negative value is returned. On success | ||
390 | * the register number for this entry is returned, but should be treated | ||
391 | * as a cookie only. | ||
392 | * | ||
393 | * On a multiprocessor machine the changes are made to all processors. | ||
394 | * This is required on x86 by the Intel processors. | ||
395 | * | ||
396 | * The available types are | ||
397 | * | ||
398 | * %MTRR_TYPE_UNCACHABLE - No caching | ||
399 | * | ||
400 | * %MTRR_TYPE_WRBACK - Write data back in bursts whenever | ||
401 | * | ||
402 | * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts | ||
403 | * | ||
404 | * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes | ||
405 | * | ||
406 | * BUGS: Needs a quiet flag for the cases where drivers do not mind | ||
407 | * failures and do not wish system log messages to be sent. | ||
408 | */ | ||
409 | |||
410 | int | ||
411 | mtrr_add(unsigned long base, unsigned long size, unsigned int type, | ||
412 | char increment) | ||
413 | { | ||
414 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { | ||
415 | printk(KERN_WARNING "mtrr: size and base must be multiples of 4 kiB\n"); | ||
416 | printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); | ||
417 | return -EINVAL; | ||
418 | } | ||
419 | return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, | ||
420 | increment); | ||
421 | } | ||
422 | |||
423 | /** | ||
424 | * mtrr_del_page - delete a memory type region | ||
425 | * @reg: Register returned by mtrr_add | ||
426 | * @base: Physical base address | ||
427 | * @size: Size of region | ||
428 | * | ||
429 | * If register is supplied then base and size are ignored. This is | ||
430 | * how drivers should call it. | ||
431 | * | ||
432 | * Releases an MTRR region. If the usage count drops to zero the | ||
433 | * register is freed and the region returns to default state. | ||
434 | * On success the register is returned, on failure a negative error | ||
435 | * code. | ||
436 | */ | ||
437 | |||
438 | int mtrr_del_page(int reg, unsigned long base, unsigned long size) | ||
439 | { | ||
440 | int i, max; | ||
441 | mtrr_type ltype; | ||
442 | unsigned long lbase; | ||
443 | unsigned int lsize; | ||
444 | int error = -EINVAL; | ||
445 | |||
446 | if (!mtrr_if) | ||
447 | return -ENXIO; | ||
448 | |||
449 | max = num_var_ranges; | ||
450 | down(&main_lock); | ||
451 | if (reg < 0) { | ||
452 | /* Search for existing MTRR */ | ||
453 | for (i = 0; i < max; ++i) { | ||
454 | mtrr_if->get(i, &lbase, &lsize, <ype); | ||
455 | if (lbase == base && lsize == size) { | ||
456 | reg = i; | ||
457 | break; | ||
458 | } | ||
459 | } | ||
460 | if (reg < 0) { | ||
461 | printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, | ||
462 | size); | ||
463 | goto out; | ||
464 | } | ||
465 | } | ||
466 | if (reg >= max) { | ||
467 | printk(KERN_WARNING "mtrr: register: %d too big\n", reg); | ||
468 | goto out; | ||
469 | } | ||
470 | if (is_cpu(CYRIX) && !use_intel()) { | ||
471 | if ((reg == 3) && arr3_protected) { | ||
472 | printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n"); | ||
473 | goto out; | ||
474 | } | ||
475 | } | ||
476 | mtrr_if->get(reg, &lbase, &lsize, <ype); | ||
477 | if (lsize < 1) { | ||
478 | printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); | ||
479 | goto out; | ||
480 | } | ||
481 | if (usage_table[reg] < 1) { | ||
482 | printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); | ||
483 | goto out; | ||
484 | } | ||
485 | if (--usage_table[reg] < 1) | ||
486 | set_mtrr(reg, 0, 0, 0); | ||
487 | error = reg; | ||
488 | out: | ||
489 | up(&main_lock); | ||
490 | return error; | ||
491 | } | ||
492 | /** | ||
493 | * mtrr_del - delete a memory type region | ||
494 | * @reg: Register returned by mtrr_add | ||
495 | * @base: Physical base address | ||
496 | * @size: Size of region | ||
497 | * | ||
498 | * If register is supplied then base and size are ignored. This is | ||
499 | * how drivers should call it. | ||
500 | * | ||
501 | * Releases an MTRR region. If the usage count drops to zero the | ||
502 | * register is freed and the region returns to default state. | ||
503 | * On success the register is returned, on failure a negative error | ||
504 | * code. | ||
505 | */ | ||
506 | |||
507 | int | ||
508 | mtrr_del(int reg, unsigned long base, unsigned long size) | ||
509 | { | ||
510 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { | ||
511 | printk(KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n"); | ||
512 | printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); | ||
513 | return -EINVAL; | ||
514 | } | ||
515 | return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); | ||
516 | } | ||
517 | |||
518 | EXPORT_SYMBOL(mtrr_add); | ||
519 | EXPORT_SYMBOL(mtrr_del); | ||
520 | |||
521 | /* HACK ALERT! | ||
522 | * These should be called implicitly, but we can't yet until all the initcall | ||
523 | * stuff is done... | ||
524 | */ | ||
525 | extern void amd_init_mtrr(void); | ||
526 | extern void cyrix_init_mtrr(void); | ||
527 | extern void centaur_init_mtrr(void); | ||
528 | |||
529 | static void __init init_ifs(void) | ||
530 | { | ||
531 | amd_init_mtrr(); | ||
532 | cyrix_init_mtrr(); | ||
533 | centaur_init_mtrr(); | ||
534 | } | ||
535 | |||
536 | static void __init init_other_cpus(void) | ||
537 | { | ||
538 | if (use_intel()) | ||
539 | get_mtrr_state(); | ||
540 | |||
541 | /* bring up the other processors */ | ||
542 | set_mtrr(~0U,0,0,0); | ||
543 | |||
544 | if (use_intel()) { | ||
545 | finalize_mtrr_state(); | ||
546 | mtrr_state_warn(); | ||
547 | } | ||
548 | } | ||
549 | |||
550 | |||
551 | struct mtrr_value { | ||
552 | mtrr_type ltype; | ||
553 | unsigned long lbase; | ||
554 | unsigned int lsize; | ||
555 | }; | ||
556 | |||
557 | static struct mtrr_value * mtrr_state; | ||
558 | |||
559 | static int mtrr_save(struct sys_device * sysdev, u32 state) | ||
560 | { | ||
561 | int i; | ||
562 | int size = num_var_ranges * sizeof(struct mtrr_value); | ||
563 | |||
564 | mtrr_state = kmalloc(size,GFP_ATOMIC); | ||
565 | if (mtrr_state) | ||
566 | memset(mtrr_state,0,size); | ||
567 | else | ||
568 | return -ENOMEM; | ||
569 | |||
570 | for (i = 0; i < num_var_ranges; i++) { | ||
571 | mtrr_if->get(i, | ||
572 | &mtrr_state[i].lbase, | ||
573 | &mtrr_state[i].lsize, | ||
574 | &mtrr_state[i].ltype); | ||
575 | } | ||
576 | return 0; | ||
577 | } | ||
578 | |||
579 | static int mtrr_restore(struct sys_device * sysdev) | ||
580 | { | ||
581 | int i; | ||
582 | |||
583 | for (i = 0; i < num_var_ranges; i++) { | ||
584 | if (mtrr_state[i].lsize) | ||
585 | set_mtrr(i, | ||
586 | mtrr_state[i].lbase, | ||
587 | mtrr_state[i].lsize, | ||
588 | mtrr_state[i].ltype); | ||
589 | } | ||
590 | kfree(mtrr_state); | ||
591 | return 0; | ||
592 | } | ||
593 | |||
594 | |||
595 | |||
596 | static struct sysdev_driver mtrr_sysdev_driver = { | ||
597 | .suspend = mtrr_save, | ||
598 | .resume = mtrr_restore, | ||
599 | }; | ||
600 | |||
601 | |||
602 | /** | ||
603 | * mtrr_init - initialize mtrrs on the boot CPU | ||
604 | * | ||
605 | * This needs to be called early; before any of the other CPUs are | ||
606 | * initialized (i.e. before smp_init()). | ||
607 | * | ||
608 | */ | ||
609 | static int __init mtrr_init(void) | ||
610 | { | ||
611 | init_ifs(); | ||
612 | |||
613 | if (cpu_has_mtrr) { | ||
614 | mtrr_if = &generic_mtrr_ops; | ||
615 | size_or_mask = 0xff000000; /* 36 bits */ | ||
616 | size_and_mask = 0x00f00000; | ||
617 | |||
618 | switch (boot_cpu_data.x86_vendor) { | ||
619 | case X86_VENDOR_AMD: | ||
620 | /* The original Athlon docs said that | ||
621 | total addressable memory is 44 bits wide. | ||
622 | It was not really clear whether its MTRRs | ||
623 | follow this or not. (Read: 44 or 36 bits). | ||
624 | However, "x86-64_overview.pdf" explicitly | ||
625 | states that "previous implementations support | ||
626 | 36 bit MTRRs" and also provides a way to | ||
627 | query the width (in bits) of the physical | ||
628 | addressable memory on the Hammer family. | ||
629 | */ | ||
630 | if (boot_cpu_data.x86 == 15 | ||
631 | && (cpuid_eax(0x80000000) >= 0x80000008)) { | ||
632 | u32 phys_addr; | ||
633 | phys_addr = cpuid_eax(0x80000008) & 0xff; | ||
634 | size_or_mask = | ||
635 | ~((1 << (phys_addr - PAGE_SHIFT)) - 1); | ||
636 | size_and_mask = ~size_or_mask & 0xfff00000; | ||
637 | } | ||
638 | /* Athlon MTRRs use an Intel-compatible interface for | ||
639 | * getting and setting */ | ||
640 | break; | ||
641 | case X86_VENDOR_CENTAUR: | ||
642 | if (boot_cpu_data.x86 == 6) { | ||
643 | /* VIA Cyrix family have Intel style MTRRs, but don't support PAE */ | ||
644 | size_or_mask = 0xfff00000; /* 32 bits */ | ||
645 | size_and_mask = 0; | ||
646 | } | ||
647 | break; | ||
648 | |||
649 | default: | ||
650 | break; | ||
651 | } | ||
652 | } else { | ||
653 | switch (boot_cpu_data.x86_vendor) { | ||
654 | case X86_VENDOR_AMD: | ||
655 | if (cpu_has_k6_mtrr) { | ||
656 | /* Pre-Athlon (K6) AMD CPU MTRRs */ | ||
657 | mtrr_if = mtrr_ops[X86_VENDOR_AMD]; | ||
658 | size_or_mask = 0xfff00000; /* 32 bits */ | ||
659 | size_and_mask = 0; | ||
660 | } | ||
661 | break; | ||
662 | case X86_VENDOR_CENTAUR: | ||
663 | if (cpu_has_centaur_mcr) { | ||
664 | mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; | ||
665 | size_or_mask = 0xfff00000; /* 32 bits */ | ||
666 | size_and_mask = 0; | ||
667 | } | ||
668 | break; | ||
669 | case X86_VENDOR_CYRIX: | ||
670 | if (cpu_has_cyrix_arr) { | ||
671 | mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; | ||
672 | size_or_mask = 0xfff00000; /* 32 bits */ | ||
673 | size_and_mask = 0; | ||
674 | } | ||
675 | break; | ||
676 | default: | ||
677 | break; | ||
678 | } | ||
679 | } | ||
680 | printk(KERN_INFO "mtrr: v%s\n",MTRR_VERSION); | ||
681 | |||
682 | if (mtrr_if) { | ||
683 | set_num_var_ranges(); | ||
684 | init_table(); | ||
685 | init_other_cpus(); | ||
686 | |||
687 | return sysdev_driver_register(&cpu_sysdev_class, | ||
688 | &mtrr_sysdev_driver); | ||
689 | } | ||
690 | return -ENXIO; | ||
691 | } | ||
692 | |||
693 | subsys_initcall(mtrr_init); | ||
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h new file mode 100644 index 000000000000..de1351245599 --- /dev/null +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h | |||
@@ -0,0 +1,98 @@ | |||
1 | /* | ||
2 | * local mtrr defines. | ||
3 | */ | ||
4 | |||
5 | #ifndef TRUE | ||
6 | #define TRUE 1 | ||
7 | #define FALSE 0 | ||
8 | #endif | ||
9 | |||
10 | #define MTRRcap_MSR 0x0fe | ||
11 | #define MTRRdefType_MSR 0x2ff | ||
12 | |||
13 | #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) | ||
14 | #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) | ||
15 | |||
16 | #define NUM_FIXED_RANGES 88 | ||
17 | #define MTRRfix64K_00000_MSR 0x250 | ||
18 | #define MTRRfix16K_80000_MSR 0x258 | ||
19 | #define MTRRfix16K_A0000_MSR 0x259 | ||
20 | #define MTRRfix4K_C0000_MSR 0x268 | ||
21 | #define MTRRfix4K_C8000_MSR 0x269 | ||
22 | #define MTRRfix4K_D0000_MSR 0x26a | ||
23 | #define MTRRfix4K_D8000_MSR 0x26b | ||
24 | #define MTRRfix4K_E0000_MSR 0x26c | ||
25 | #define MTRRfix4K_E8000_MSR 0x26d | ||
26 | #define MTRRfix4K_F0000_MSR 0x26e | ||
27 | #define MTRRfix4K_F8000_MSR 0x26f | ||
28 | |||
29 | #define MTRR_CHANGE_MASK_FIXED 0x01 | ||
30 | #define MTRR_CHANGE_MASK_VARIABLE 0x02 | ||
31 | #define MTRR_CHANGE_MASK_DEFTYPE 0x04 | ||
32 | |||
33 | /* In the Intel processor's MTRR interface, the MTRR type is always held in | ||
34 | an 8 bit field: */ | ||
35 | typedef u8 mtrr_type; | ||
36 | |||
37 | struct mtrr_ops { | ||
38 | u32 vendor; | ||
39 | u32 use_intel_if; | ||
40 | // void (*init)(void); | ||
41 | void (*set)(unsigned int reg, unsigned long base, | ||
42 | unsigned long size, mtrr_type type); | ||
43 | void (*set_all)(void); | ||
44 | |||
45 | void (*get)(unsigned int reg, unsigned long *base, | ||
46 | unsigned int *size, mtrr_type * type); | ||
47 | int (*get_free_region) (unsigned long base, unsigned long size); | ||
48 | |||
49 | int (*validate_add_page)(unsigned long base, unsigned long size, | ||
50 | unsigned int type); | ||
51 | int (*have_wrcomb)(void); | ||
52 | }; | ||
53 | |||
54 | extern int generic_get_free_region(unsigned long base, unsigned long size); | ||
55 | extern int generic_validate_add_page(unsigned long base, unsigned long size, | ||
56 | unsigned int type); | ||
57 | |||
58 | extern struct mtrr_ops generic_mtrr_ops; | ||
59 | |||
60 | extern int positive_have_wrcomb(void); | ||
61 | |||
62 | /* library functions for processor-specific routines */ | ||
63 | struct set_mtrr_context { | ||
64 | unsigned long flags; | ||
65 | unsigned long deftype_lo; | ||
66 | unsigned long deftype_hi; | ||
67 | unsigned long cr4val; | ||
68 | unsigned long ccr3; | ||
69 | }; | ||
70 | |||
71 | struct mtrr_var_range { | ||
72 | unsigned long base_lo; | ||
73 | unsigned long base_hi; | ||
74 | unsigned long mask_lo; | ||
75 | unsigned long mask_hi; | ||
76 | }; | ||
77 | |||
78 | void set_mtrr_done(struct set_mtrr_context *ctxt); | ||
79 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); | ||
80 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); | ||
81 | |||
82 | void get_mtrr_state(void); | ||
83 | |||
84 | extern void set_mtrr_ops(struct mtrr_ops * ops); | ||
85 | |||
86 | extern u32 size_or_mask, size_and_mask; | ||
87 | extern struct mtrr_ops * mtrr_if; | ||
88 | |||
89 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) | ||
90 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) | ||
91 | |||
92 | extern unsigned int num_var_ranges; | ||
93 | |||
94 | void finalize_mtrr_state(void); | ||
95 | void mtrr_state_warn(void); | ||
96 | char *mtrr_attrib_to_str(int x); | ||
97 | void mtrr_wrmsr(unsigned, unsigned, unsigned); | ||
98 | |||
diff --git a/arch/i386/kernel/cpu/mtrr/state.c b/arch/i386/kernel/cpu/mtrr/state.c new file mode 100644 index 000000000000..f62ecd15811a --- /dev/null +++ b/arch/i386/kernel/cpu/mtrr/state.c | |||
@@ -0,0 +1,78 @@ | |||
1 | #include <linux/mm.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <asm/io.h> | ||
4 | #include <asm/mtrr.h> | ||
5 | #include <asm/msr.h> | ||
6 | #include "mtrr.h" | ||
7 | |||
8 | |||
9 | /* Put the processor into a state where MTRRs can be safely set */ | ||
10 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) | ||
11 | { | ||
12 | unsigned int cr0; | ||
13 | |||
14 | /* Disable interrupts locally */ | ||
15 | local_irq_save(ctxt->flags); | ||
16 | |||
17 | if (use_intel() || is_cpu(CYRIX)) { | ||
18 | |||
19 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | ||
20 | if ( cpu_has_pge ) { | ||
21 | ctxt->cr4val = read_cr4(); | ||
22 | write_cr4(ctxt->cr4val & (unsigned char) ~(1 << 7)); | ||
23 | } | ||
24 | |||
25 | /* Disable and flush caches. Note that wbinvd flushes the TLBs as | ||
26 | a side-effect */ | ||
27 | cr0 = read_cr0() | 0x40000000; | ||
28 | wbinvd(); | ||
29 | write_cr0(cr0); | ||
30 | wbinvd(); | ||
31 | |||
32 | if (use_intel()) | ||
33 | /* Save MTRR state */ | ||
34 | rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); | ||
35 | else | ||
36 | /* Cyrix ARRs - everything else were excluded at the top */ | ||
37 | ctxt->ccr3 = getCx86(CX86_CCR3); | ||
38 | } | ||
39 | } | ||
40 | |||
41 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) | ||
42 | { | ||
43 | if (use_intel()) | ||
44 | /* Disable MTRRs, and set the default type to uncached */ | ||
45 | mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, | ||
46 | ctxt->deftype_hi); | ||
47 | else if (is_cpu(CYRIX)) | ||
48 | /* Cyrix ARRs - everything else were excluded at the top */ | ||
49 | setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); | ||
50 | } | ||
51 | |||
52 | /* Restore the processor after a set_mtrr_prepare */ | ||
53 | void set_mtrr_done(struct set_mtrr_context *ctxt) | ||
54 | { | ||
55 | if (use_intel() || is_cpu(CYRIX)) { | ||
56 | |||
57 | /* Flush caches and TLBs */ | ||
58 | wbinvd(); | ||
59 | |||
60 | /* Restore MTRRdefType */ | ||
61 | if (use_intel()) | ||
62 | /* Intel (P6) standard MTRRs */ | ||
63 | mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); | ||
64 | else | ||
65 | /* Cyrix ARRs - everything else was excluded at the top */ | ||
66 | setCx86(CX86_CCR3, ctxt->ccr3); | ||
67 | |||
68 | /* Enable caches */ | ||
69 | write_cr0(read_cr0() & 0xbfffffff); | ||
70 | |||
71 | /* Restore value of CR4 */ | ||
72 | if ( cpu_has_pge ) | ||
73 | write_cr4(ctxt->cr4val); | ||
74 | } | ||
75 | /* Re-enable interrupts locally (if enabled previously) */ | ||
76 | local_irq_restore(ctxt->flags); | ||
77 | } | ||
78 | |||
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c new file mode 100644 index 000000000000..30898a260a5c --- /dev/null +++ b/arch/i386/kernel/cpu/nexgen.c | |||
@@ -0,0 +1,63 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/string.h> | ||
4 | #include <asm/processor.h> | ||
5 | |||
6 | #include "cpu.h" | ||
7 | |||
8 | /* | ||
9 | * Detect a NexGen CPU running without BIOS hypercode new enough | ||
10 | * to have CPUID. (Thanks to Herbert Oppmann) | ||
11 | */ | ||
12 | |||
13 | static int __init deep_magic_nexgen_probe(void) | ||
14 | { | ||
15 | int ret; | ||
16 | |||
17 | __asm__ __volatile__ ( | ||
18 | " movw $0x5555, %%ax\n" | ||
19 | " xorw %%dx,%%dx\n" | ||
20 | " movw $2, %%cx\n" | ||
21 | " divw %%cx\n" | ||
22 | " movl $0, %%eax\n" | ||
23 | " jnz 1f\n" | ||
24 | " movl $1, %%eax\n" | ||
25 | "1:\n" | ||
26 | : "=a" (ret) : : "cx", "dx" ); | ||
27 | return ret; | ||
28 | } | ||
29 | |||
30 | static void __init init_nexgen(struct cpuinfo_x86 * c) | ||
31 | { | ||
32 | c->x86_cache_size = 256; /* A few had 1 MB... */ | ||
33 | } | ||
34 | |||
35 | static void __init nexgen_identify(struct cpuinfo_x86 * c) | ||
36 | { | ||
37 | /* Detect NexGen with old hypercode */ | ||
38 | if ( deep_magic_nexgen_probe() ) { | ||
39 | strcpy(c->x86_vendor_id, "NexGenDriven"); | ||
40 | } | ||
41 | generic_identify(c); | ||
42 | } | ||
43 | |||
44 | static struct cpu_dev nexgen_cpu_dev __initdata = { | ||
45 | .c_vendor = "Nexgen", | ||
46 | .c_ident = { "NexGenDriven" }, | ||
47 | .c_models = { | ||
48 | { .vendor = X86_VENDOR_NEXGEN, | ||
49 | .family = 5, | ||
50 | .model_names = { [1] = "Nx586" } | ||
51 | }, | ||
52 | }, | ||
53 | .c_init = init_nexgen, | ||
54 | .c_identify = nexgen_identify, | ||
55 | }; | ||
56 | |||
57 | int __init nexgen_init_cpu(void) | ||
58 | { | ||
59 | cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | //early_arch_initcall(nexgen_init_cpu); | ||
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c new file mode 100644 index 000000000000..c8d83fdc237a --- /dev/null +++ b/arch/i386/kernel/cpu/proc.c | |||
@@ -0,0 +1,149 @@ | |||
1 | #include <linux/smp.h> | ||
2 | #include <linux/timex.h> | ||
3 | #include <linux/string.h> | ||
4 | #include <asm/semaphore.h> | ||
5 | #include <linux/seq_file.h> | ||
6 | |||
7 | /* | ||
8 | * Get CPU information for use by the procfs. | ||
9 | */ | ||
10 | static int show_cpuinfo(struct seq_file *m, void *v) | ||
11 | { | ||
12 | /* | ||
13 | * These flag bits must match the definitions in <asm/cpufeature.h>. | ||
14 | * NULL means this bit is undefined or reserved; either way it doesn't | ||
15 | * have meaning as far as Linux is concerned. Note that it's important | ||
16 | * to realize there is a difference between this table and CPUID -- if | ||
17 | * applications want to get the raw CPUID data, they should access | ||
18 | * /dev/cpu/<cpu_nr>/cpuid instead. | ||
19 | */ | ||
20 | static char *x86_cap_flags[] = { | ||
21 | /* Intel-defined */ | ||
22 | "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", | ||
23 | "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", | ||
24 | "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", | ||
25 | "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", | ||
26 | |||
27 | /* AMD-defined */ | ||
28 | "pni", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
29 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | ||
30 | NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, | ||
31 | NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow", | ||
32 | |||
33 | /* Transmeta-defined */ | ||
34 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | ||
35 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
36 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
37 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
38 | |||
39 | /* Other (Linux-defined) */ | ||
40 | "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", | ||
41 | NULL, NULL, NULL, NULL, | ||
42 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
43 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
44 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
45 | |||
46 | /* Intel-defined (#2) */ | ||
47 | "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est", | ||
48 | "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, | ||
49 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
50 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
51 | |||
52 | /* VIA/Cyrix/Centaur-defined */ | ||
53 | NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", | ||
54 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
55 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
56 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
57 | |||
58 | /* AMD-defined (#2) */ | ||
59 | "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL, | ||
60 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
61 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
62 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
63 | }; | ||
64 | struct cpuinfo_x86 *c = v; | ||
65 | int i, n = c - cpu_data; | ||
66 | int fpu_exception; | ||
67 | |||
68 | #ifdef CONFIG_SMP | ||
69 | if (!cpu_online(n)) | ||
70 | return 0; | ||
71 | #endif | ||
72 | seq_printf(m, "processor\t: %d\n" | ||
73 | "vendor_id\t: %s\n" | ||
74 | "cpu family\t: %d\n" | ||
75 | "model\t\t: %d\n" | ||
76 | "model name\t: %s\n", | ||
77 | n, | ||
78 | c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", | ||
79 | c->x86, | ||
80 | c->x86_model, | ||
81 | c->x86_model_id[0] ? c->x86_model_id : "unknown"); | ||
82 | |||
83 | if (c->x86_mask || c->cpuid_level >= 0) | ||
84 | seq_printf(m, "stepping\t: %d\n", c->x86_mask); | ||
85 | else | ||
86 | seq_printf(m, "stepping\t: unknown\n"); | ||
87 | |||
88 | if ( cpu_has(c, X86_FEATURE_TSC) ) { | ||
89 | seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", | ||
90 | cpu_khz / 1000, (cpu_khz % 1000)); | ||
91 | } | ||
92 | |||
93 | /* Cache size */ | ||
94 | if (c->x86_cache_size >= 0) | ||
95 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); | ||
96 | #ifdef CONFIG_X86_HT | ||
97 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); | ||
98 | seq_printf(m, "siblings\t: %d\n", c->x86_num_cores * smp_num_siblings); | ||
99 | #endif | ||
100 | |||
101 | /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ | ||
102 | fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); | ||
103 | seq_printf(m, "fdiv_bug\t: %s\n" | ||
104 | "hlt_bug\t\t: %s\n" | ||
105 | "f00f_bug\t: %s\n" | ||
106 | "coma_bug\t: %s\n" | ||
107 | "fpu\t\t: %s\n" | ||
108 | "fpu_exception\t: %s\n" | ||
109 | "cpuid level\t: %d\n" | ||
110 | "wp\t\t: %s\n" | ||
111 | "flags\t\t:", | ||
112 | c->fdiv_bug ? "yes" : "no", | ||
113 | c->hlt_works_ok ? "no" : "yes", | ||
114 | c->f00f_bug ? "yes" : "no", | ||
115 | c->coma_bug ? "yes" : "no", | ||
116 | c->hard_math ? "yes" : "no", | ||
117 | fpu_exception ? "yes" : "no", | ||
118 | c->cpuid_level, | ||
119 | c->wp_works_ok ? "yes" : "no"); | ||
120 | |||
121 | for ( i = 0 ; i < 32*NCAPINTS ; i++ ) | ||
122 | if ( test_bit(i, c->x86_capability) && | ||
123 | x86_cap_flags[i] != NULL ) | ||
124 | seq_printf(m, " %s", x86_cap_flags[i]); | ||
125 | |||
126 | seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", | ||
127 | c->loops_per_jiffy/(500000/HZ), | ||
128 | (c->loops_per_jiffy/(5000/HZ)) % 100); | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | static void *c_start(struct seq_file *m, loff_t *pos) | ||
133 | { | ||
134 | return *pos < NR_CPUS ? cpu_data + *pos : NULL; | ||
135 | } | ||
136 | static void *c_next(struct seq_file *m, void *v, loff_t *pos) | ||
137 | { | ||
138 | ++*pos; | ||
139 | return c_start(m, pos); | ||
140 | } | ||
141 | static void c_stop(struct seq_file *m, void *v) | ||
142 | { | ||
143 | } | ||
144 | struct seq_operations cpuinfo_op = { | ||
145 | .start = c_start, | ||
146 | .next = c_next, | ||
147 | .stop = c_stop, | ||
148 | .show = show_cpuinfo, | ||
149 | }; | ||
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c new file mode 100644 index 000000000000..8602425628ca --- /dev/null +++ b/arch/i386/kernel/cpu/rise.c | |||
@@ -0,0 +1,53 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/bitops.h> | ||
4 | #include <asm/processor.h> | ||
5 | |||
6 | #include "cpu.h" | ||
7 | |||
8 | static void __init init_rise(struct cpuinfo_x86 *c) | ||
9 | { | ||
10 | printk("CPU: Rise iDragon"); | ||
11 | if (c->x86_model > 2) | ||
12 | printk(" II"); | ||
13 | printk("\n"); | ||
14 | |||
15 | /* Unhide possibly hidden capability flags | ||
16 | The mp6 iDragon family don't have MSRs. | ||
17 | We switch on extra features with this cpuid weirdness: */ | ||
18 | __asm__ ( | ||
19 | "movl $0x6363452a, %%eax\n\t" | ||
20 | "movl $0x3231206c, %%ecx\n\t" | ||
21 | "movl $0x2a32313a, %%edx\n\t" | ||
22 | "cpuid\n\t" | ||
23 | "movl $0x63634523, %%eax\n\t" | ||
24 | "movl $0x32315f6c, %%ecx\n\t" | ||
25 | "movl $0x2333313a, %%edx\n\t" | ||
26 | "cpuid\n\t" : : : "eax", "ebx", "ecx", "edx" | ||
27 | ); | ||
28 | set_bit(X86_FEATURE_CX8, c->x86_capability); | ||
29 | } | ||
30 | |||
31 | static struct cpu_dev rise_cpu_dev __initdata = { | ||
32 | .c_vendor = "Rise", | ||
33 | .c_ident = { "RiseRiseRise" }, | ||
34 | .c_models = { | ||
35 | { .vendor = X86_VENDOR_RISE, .family = 5, .model_names = | ||
36 | { | ||
37 | [0] = "iDragon", | ||
38 | [2] = "iDragon", | ||
39 | [8] = "iDragon II", | ||
40 | [9] = "iDragon II" | ||
41 | } | ||
42 | }, | ||
43 | }, | ||
44 | .c_init = init_rise, | ||
45 | }; | ||
46 | |||
47 | int __init rise_init_cpu(void) | ||
48 | { | ||
49 | cpu_devs[X86_VENDOR_RISE] = &rise_cpu_dev; | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | //early_arch_initcall(rise_init_cpu); | ||
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c new file mode 100644 index 000000000000..f57e5ee94943 --- /dev/null +++ b/arch/i386/kernel/cpu/transmeta.c | |||
@@ -0,0 +1,107 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <asm/processor.h> | ||
4 | #include <asm/msr.h> | ||
5 | #include "cpu.h" | ||
6 | |||
7 | static void __init init_transmeta(struct cpuinfo_x86 *c) | ||
8 | { | ||
9 | unsigned int cap_mask, uk, max, dummy; | ||
10 | unsigned int cms_rev1, cms_rev2; | ||
11 | unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev; | ||
12 | char cpu_info[65]; | ||
13 | |||
14 | get_model_name(c); /* Same as AMD/Cyrix */ | ||
15 | display_cacheinfo(c); | ||
16 | |||
17 | /* Print CMS and CPU revision */ | ||
18 | max = cpuid_eax(0x80860000); | ||
19 | cpu_rev = 0; | ||
20 | if ( max >= 0x80860001 ) { | ||
21 | cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); | ||
22 | if (cpu_rev != 0x02000000) { | ||
23 | printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", | ||
24 | (cpu_rev >> 24) & 0xff, | ||
25 | (cpu_rev >> 16) & 0xff, | ||
26 | (cpu_rev >> 8) & 0xff, | ||
27 | cpu_rev & 0xff, | ||
28 | cpu_freq); | ||
29 | } | ||
30 | } | ||
31 | if ( max >= 0x80860002 ) { | ||
32 | cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); | ||
33 | if (cpu_rev == 0x02000000) { | ||
34 | printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", | ||
35 | new_cpu_rev, cpu_freq); | ||
36 | } | ||
37 | printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", | ||
38 | (cms_rev1 >> 24) & 0xff, | ||
39 | (cms_rev1 >> 16) & 0xff, | ||
40 | (cms_rev1 >> 8) & 0xff, | ||
41 | cms_rev1 & 0xff, | ||
42 | cms_rev2); | ||
43 | } | ||
44 | if ( max >= 0x80860006 ) { | ||
45 | cpuid(0x80860003, | ||
46 | (void *)&cpu_info[0], | ||
47 | (void *)&cpu_info[4], | ||
48 | (void *)&cpu_info[8], | ||
49 | (void *)&cpu_info[12]); | ||
50 | cpuid(0x80860004, | ||
51 | (void *)&cpu_info[16], | ||
52 | (void *)&cpu_info[20], | ||
53 | (void *)&cpu_info[24], | ||
54 | (void *)&cpu_info[28]); | ||
55 | cpuid(0x80860005, | ||
56 | (void *)&cpu_info[32], | ||
57 | (void *)&cpu_info[36], | ||
58 | (void *)&cpu_info[40], | ||
59 | (void *)&cpu_info[44]); | ||
60 | cpuid(0x80860006, | ||
61 | (void *)&cpu_info[48], | ||
62 | (void *)&cpu_info[52], | ||
63 | (void *)&cpu_info[56], | ||
64 | (void *)&cpu_info[60]); | ||
65 | cpu_info[64] = '\0'; | ||
66 | printk(KERN_INFO "CPU: %s\n", cpu_info); | ||
67 | } | ||
68 | |||
69 | /* Unhide possibly hidden capability flags */ | ||
70 | rdmsr(0x80860004, cap_mask, uk); | ||
71 | wrmsr(0x80860004, ~0, uk); | ||
72 | c->x86_capability[0] = cpuid_edx(0x00000001); | ||
73 | wrmsr(0x80860004, cap_mask, uk); | ||
74 | |||
75 | /* If we can run i686 user-space code, call us an i686 */ | ||
76 | #define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) | ||
77 | if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 ) | ||
78 | c->x86 = 6; | ||
79 | } | ||
80 | |||
81 | static void transmeta_identify(struct cpuinfo_x86 * c) | ||
82 | { | ||
83 | u32 xlvl; | ||
84 | generic_identify(c); | ||
85 | |||
86 | /* Transmeta-defined flags: level 0x80860001 */ | ||
87 | xlvl = cpuid_eax(0x80860000); | ||
88 | if ( (xlvl & 0xffff0000) == 0x80860000 ) { | ||
89 | if ( xlvl >= 0x80860001 ) | ||
90 | c->x86_capability[2] = cpuid_edx(0x80860001); | ||
91 | } | ||
92 | } | ||
93 | |||
94 | static struct cpu_dev transmeta_cpu_dev __initdata = { | ||
95 | .c_vendor = "Transmeta", | ||
96 | .c_ident = { "GenuineTMx86", "TransmetaCPU" }, | ||
97 | .c_init = init_transmeta, | ||
98 | .c_identify = transmeta_identify, | ||
99 | }; | ||
100 | |||
101 | int __init transmeta_init_cpu(void) | ||
102 | { | ||
103 | cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | //early_arch_initcall(transmeta_init_cpu); | ||
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c new file mode 100644 index 000000000000..264fcad559d5 --- /dev/null +++ b/arch/i386/kernel/cpu/umc.c | |||
@@ -0,0 +1,33 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <asm/processor.h> | ||
4 | #include "cpu.h" | ||
5 | |||
6 | /* UMC chips appear to be only either 386 or 486, so no special init takes place. | ||
7 | */ | ||
8 | static void __init init_umc(struct cpuinfo_x86 * c) | ||
9 | { | ||
10 | |||
11 | } | ||
12 | |||
13 | static struct cpu_dev umc_cpu_dev __initdata = { | ||
14 | .c_vendor = "UMC", | ||
15 | .c_ident = { "UMC UMC UMC" }, | ||
16 | .c_models = { | ||
17 | { .vendor = X86_VENDOR_UMC, .family = 4, .model_names = | ||
18 | { | ||
19 | [1] = "U5D", | ||
20 | [2] = "U5S", | ||
21 | } | ||
22 | }, | ||
23 | }, | ||
24 | .c_init = init_umc, | ||
25 | }; | ||
26 | |||
27 | int __init umc_init_cpu(void) | ||
28 | { | ||
29 | cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; | ||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | //early_arch_initcall(umc_init_cpu); | ||
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c new file mode 100644 index 000000000000..2e2756345bb2 --- /dev/null +++ b/arch/i386/kernel/cpuid.c | |||
@@ -0,0 +1,246 @@ | |||
1 | /* ----------------------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2000 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, | ||
8 | * USA; either version 2 of the License, or (at your option) any later | ||
9 | * version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * cpuid.c | ||
15 | * | ||
16 | * x86 CPUID access device | ||
17 | * | ||
18 | * This device is accessed by lseek() to the appropriate CPUID level | ||
19 | * and then read in chunks of 16 bytes. A larger size means multiple | ||
20 | * reads of consecutive levels. | ||
21 | * | ||
22 | * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on | ||
23 | * an SMP box will direct the access to CPU %d. | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/config.h> | ||
28 | |||
29 | #include <linux/types.h> | ||
30 | #include <linux/errno.h> | ||
31 | #include <linux/fcntl.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <linux/poll.h> | ||
34 | #include <linux/smp.h> | ||
35 | #include <linux/major.h> | ||
36 | #include <linux/fs.h> | ||
37 | #include <linux/smp_lock.h> | ||
38 | #include <linux/fs.h> | ||
39 | #include <linux/device.h> | ||
40 | #include <linux/cpu.h> | ||
41 | #include <linux/notifier.h> | ||
42 | |||
43 | #include <asm/processor.h> | ||
44 | #include <asm/msr.h> | ||
45 | #include <asm/uaccess.h> | ||
46 | #include <asm/system.h> | ||
47 | |||
48 | static struct class_simple *cpuid_class; | ||
49 | |||
50 | #ifdef CONFIG_SMP | ||
51 | |||
52 | struct cpuid_command { | ||
53 | int cpu; | ||
54 | u32 reg; | ||
55 | u32 *data; | ||
56 | }; | ||
57 | |||
58 | static void cpuid_smp_cpuid(void *cmd_block) | ||
59 | { | ||
60 | struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; | ||
61 | |||
62 | if (cmd->cpu == smp_processor_id()) | ||
63 | cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], | ||
64 | &cmd->data[3]); | ||
65 | } | ||
66 | |||
67 | static inline void do_cpuid(int cpu, u32 reg, u32 * data) | ||
68 | { | ||
69 | struct cpuid_command cmd; | ||
70 | |||
71 | preempt_disable(); | ||
72 | if (cpu == smp_processor_id()) { | ||
73 | cpuid(reg, &data[0], &data[1], &data[2], &data[3]); | ||
74 | } else { | ||
75 | cmd.cpu = cpu; | ||
76 | cmd.reg = reg; | ||
77 | cmd.data = data; | ||
78 | |||
79 | smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1); | ||
80 | } | ||
81 | preempt_enable(); | ||
82 | } | ||
83 | #else /* ! CONFIG_SMP */ | ||
84 | |||
85 | static inline void do_cpuid(int cpu, u32 reg, u32 * data) | ||
86 | { | ||
87 | cpuid(reg, &data[0], &data[1], &data[2], &data[3]); | ||
88 | } | ||
89 | |||
90 | #endif /* ! CONFIG_SMP */ | ||
91 | |||
92 | static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) | ||
93 | { | ||
94 | loff_t ret; | ||
95 | |||
96 | lock_kernel(); | ||
97 | |||
98 | switch (orig) { | ||
99 | case 0: | ||
100 | file->f_pos = offset; | ||
101 | ret = file->f_pos; | ||
102 | break; | ||
103 | case 1: | ||
104 | file->f_pos += offset; | ||
105 | ret = file->f_pos; | ||
106 | break; | ||
107 | default: | ||
108 | ret = -EINVAL; | ||
109 | } | ||
110 | |||
111 | unlock_kernel(); | ||
112 | return ret; | ||
113 | } | ||
114 | |||
115 | static ssize_t cpuid_read(struct file *file, char __user *buf, | ||
116 | size_t count, loff_t * ppos) | ||
117 | { | ||
118 | char __user *tmp = buf; | ||
119 | u32 data[4]; | ||
120 | size_t rv; | ||
121 | u32 reg = *ppos; | ||
122 | int cpu = iminor(file->f_dentry->d_inode); | ||
123 | |||
124 | if (count % 16) | ||
125 | return -EINVAL; /* Invalid chunk size */ | ||
126 | |||
127 | for (rv = 0; count; count -= 16) { | ||
128 | do_cpuid(cpu, reg, data); | ||
129 | if (copy_to_user(tmp, &data, 16)) | ||
130 | return -EFAULT; | ||
131 | tmp += 16; | ||
132 | *ppos = reg++; | ||
133 | } | ||
134 | |||
135 | return tmp - buf; | ||
136 | } | ||
137 | |||
138 | static int cpuid_open(struct inode *inode, struct file *file) | ||
139 | { | ||
140 | unsigned int cpu = iminor(file->f_dentry->d_inode); | ||
141 | struct cpuinfo_x86 *c = &(cpu_data)[cpu]; | ||
142 | |||
143 | if (cpu >= NR_CPUS || !cpu_online(cpu)) | ||
144 | return -ENXIO; /* No such CPU */ | ||
145 | if (c->cpuid_level < 0) | ||
146 | return -EIO; /* CPUID not supported */ | ||
147 | |||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * File operations we support | ||
153 | */ | ||
154 | static struct file_operations cpuid_fops = { | ||
155 | .owner = THIS_MODULE, | ||
156 | .llseek = cpuid_seek, | ||
157 | .read = cpuid_read, | ||
158 | .open = cpuid_open, | ||
159 | }; | ||
160 | |||
161 | static int cpuid_class_simple_device_add(int i) | ||
162 | { | ||
163 | int err = 0; | ||
164 | struct class_device *class_err; | ||
165 | |||
166 | class_err = class_simple_device_add(cpuid_class, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i); | ||
167 | if (IS_ERR(class_err)) | ||
168 | err = PTR_ERR(class_err); | ||
169 | return err; | ||
170 | } | ||
171 | |||
172 | static int __devinit cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
173 | { | ||
174 | unsigned int cpu = (unsigned long)hcpu; | ||
175 | |||
176 | switch (action) { | ||
177 | case CPU_ONLINE: | ||
178 | cpuid_class_simple_device_add(cpu); | ||
179 | break; | ||
180 | case CPU_DEAD: | ||
181 | class_simple_device_remove(MKDEV(CPUID_MAJOR, cpu)); | ||
182 | break; | ||
183 | } | ||
184 | return NOTIFY_OK; | ||
185 | } | ||
186 | |||
187 | static struct notifier_block cpuid_class_cpu_notifier = | ||
188 | { | ||
189 | .notifier_call = cpuid_class_cpu_callback, | ||
190 | }; | ||
191 | |||
192 | static int __init cpuid_init(void) | ||
193 | { | ||
194 | int i, err = 0; | ||
195 | i = 0; | ||
196 | |||
197 | if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) { | ||
198 | printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", | ||
199 | CPUID_MAJOR); | ||
200 | err = -EBUSY; | ||
201 | goto out; | ||
202 | } | ||
203 | cpuid_class = class_simple_create(THIS_MODULE, "cpuid"); | ||
204 | if (IS_ERR(cpuid_class)) { | ||
205 | err = PTR_ERR(cpuid_class); | ||
206 | goto out_chrdev; | ||
207 | } | ||
208 | for_each_online_cpu(i) { | ||
209 | err = cpuid_class_simple_device_add(i); | ||
210 | if (err != 0) | ||
211 | goto out_class; | ||
212 | } | ||
213 | register_cpu_notifier(&cpuid_class_cpu_notifier); | ||
214 | |||
215 | err = 0; | ||
216 | goto out; | ||
217 | |||
218 | out_class: | ||
219 | i = 0; | ||
220 | for_each_online_cpu(i) { | ||
221 | class_simple_device_remove(MKDEV(CPUID_MAJOR, i)); | ||
222 | } | ||
223 | class_simple_destroy(cpuid_class); | ||
224 | out_chrdev: | ||
225 | unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); | ||
226 | out: | ||
227 | return err; | ||
228 | } | ||
229 | |||
230 | static void __exit cpuid_exit(void) | ||
231 | { | ||
232 | int cpu = 0; | ||
233 | |||
234 | for_each_online_cpu(cpu) | ||
235 | class_simple_device_remove(MKDEV(CPUID_MAJOR, cpu)); | ||
236 | class_simple_destroy(cpuid_class); | ||
237 | unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); | ||
238 | unregister_cpu_notifier(&cpuid_class_cpu_notifier); | ||
239 | } | ||
240 | |||
241 | module_init(cpuid_init); | ||
242 | module_exit(cpuid_exit); | ||
243 | |||
244 | MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>"); | ||
245 | MODULE_DESCRIPTION("x86 generic CPUID driver"); | ||
246 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c new file mode 100644 index 000000000000..6ed7e28f306c --- /dev/null +++ b/arch/i386/kernel/dmi_scan.c | |||
@@ -0,0 +1,487 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/string.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/slab.h> | ||
7 | #include <linux/acpi.h> | ||
8 | #include <asm/io.h> | ||
9 | #include <linux/pm.h> | ||
10 | #include <asm/system.h> | ||
11 | #include <linux/dmi.h> | ||
12 | #include <linux/bootmem.h> | ||
13 | |||
14 | |||
15 | struct dmi_header | ||
16 | { | ||
17 | u8 type; | ||
18 | u8 length; | ||
19 | u16 handle; | ||
20 | }; | ||
21 | |||
22 | #undef DMI_DEBUG | ||
23 | |||
24 | #ifdef DMI_DEBUG | ||
25 | #define dmi_printk(x) printk x | ||
26 | #else | ||
27 | #define dmi_printk(x) | ||
28 | #endif | ||
29 | |||
30 | static char * __init dmi_string(struct dmi_header *dm, u8 s) | ||
31 | { | ||
32 | u8 *bp=(u8 *)dm; | ||
33 | bp+=dm->length; | ||
34 | if(!s) | ||
35 | return ""; | ||
36 | s--; | ||
37 | while(s>0 && *bp) | ||
38 | { | ||
39 | bp+=strlen(bp); | ||
40 | bp++; | ||
41 | s--; | ||
42 | } | ||
43 | return bp; | ||
44 | } | ||
45 | |||
46 | /* | ||
47 | * We have to be cautious here. We have seen BIOSes with DMI pointers | ||
48 | * pointing to completely the wrong place for example | ||
49 | */ | ||
50 | |||
51 | static int __init dmi_table(u32 base, int len, int num, void (*decode)(struct dmi_header *)) | ||
52 | { | ||
53 | u8 *buf; | ||
54 | struct dmi_header *dm; | ||
55 | u8 *data; | ||
56 | int i=0; | ||
57 | |||
58 | buf = bt_ioremap(base, len); | ||
59 | if(buf==NULL) | ||
60 | return -1; | ||
61 | |||
62 | data = buf; | ||
63 | |||
64 | /* | ||
65 | * Stop when we see all the items the table claimed to have | ||
66 | * OR we run off the end of the table (also happens) | ||
67 | */ | ||
68 | |||
69 | while(i<num && data-buf+sizeof(struct dmi_header)<=len) | ||
70 | { | ||
71 | dm=(struct dmi_header *)data; | ||
72 | /* | ||
73 | * We want to know the total length (formated area and strings) | ||
74 | * before decoding to make sure we won't run off the table in | ||
75 | * dmi_decode or dmi_string | ||
76 | */ | ||
77 | data+=dm->length; | ||
78 | while(data-buf<len-1 && (data[0] || data[1])) | ||
79 | data++; | ||
80 | if(data-buf<len-1) | ||
81 | decode(dm); | ||
82 | data+=2; | ||
83 | i++; | ||
84 | } | ||
85 | bt_iounmap(buf, len); | ||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | |||
90 | inline static int __init dmi_checksum(u8 *buf) | ||
91 | { | ||
92 | u8 sum=0; | ||
93 | int a; | ||
94 | |||
95 | for(a=0; a<15; a++) | ||
96 | sum+=buf[a]; | ||
97 | return (sum==0); | ||
98 | } | ||
99 | |||
100 | static int __init dmi_iterate(void (*decode)(struct dmi_header *)) | ||
101 | { | ||
102 | u8 buf[15]; | ||
103 | char __iomem *p, *q; | ||
104 | |||
105 | /* | ||
106 | * no iounmap() for that ioremap(); it would be a no-op, but it's | ||
107 | * so early in setup that sucker gets confused into doing what | ||
108 | * it shouldn't if we actually call it. | ||
109 | */ | ||
110 | p = ioremap(0xF0000, 0x10000); | ||
111 | if (p == NULL) | ||
112 | return -1; | ||
113 | for (q = p; q < p + 0x10000; q += 16) { | ||
114 | memcpy_fromio(buf, q, 15); | ||
115 | if(memcmp(buf, "_DMI_", 5)==0 && dmi_checksum(buf)) | ||
116 | { | ||
117 | u16 num=buf[13]<<8|buf[12]; | ||
118 | u16 len=buf[7]<<8|buf[6]; | ||
119 | u32 base=buf[11]<<24|buf[10]<<16|buf[9]<<8|buf[8]; | ||
120 | |||
121 | /* | ||
122 | * DMI version 0.0 means that the real version is taken from | ||
123 | * the SMBIOS version, which we don't know at this point. | ||
124 | */ | ||
125 | if(buf[14]!=0) | ||
126 | printk(KERN_INFO "DMI %d.%d present.\n", | ||
127 | buf[14]>>4, buf[14]&0x0F); | ||
128 | else | ||
129 | printk(KERN_INFO "DMI present.\n"); | ||
130 | dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n", | ||
131 | num, len)); | ||
132 | dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", | ||
133 | base)); | ||
134 | if(dmi_table(base,len, num, decode)==0) | ||
135 | return 0; | ||
136 | } | ||
137 | } | ||
138 | return -1; | ||
139 | } | ||
140 | |||
141 | static char *dmi_ident[DMI_STRING_MAX]; | ||
142 | |||
143 | /* | ||
144 | * Save a DMI string | ||
145 | */ | ||
146 | |||
147 | static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) | ||
148 | { | ||
149 | char *d = (char*)dm; | ||
150 | char *p = dmi_string(dm, d[string]); | ||
151 | if(p==NULL || *p == 0) | ||
152 | return; | ||
153 | if (dmi_ident[slot]) | ||
154 | return; | ||
155 | dmi_ident[slot] = alloc_bootmem(strlen(p)+1); | ||
156 | if(dmi_ident[slot]) | ||
157 | strcpy(dmi_ident[slot], p); | ||
158 | else | ||
159 | printk(KERN_ERR "dmi_save_ident: out of memory.\n"); | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * Ugly compatibility crap. | ||
164 | */ | ||
165 | #define dmi_blacklist dmi_system_id | ||
166 | #define NO_MATCH { DMI_NONE, NULL} | ||
167 | #define MATCH DMI_MATCH | ||
168 | |||
169 | /* | ||
170 | * Toshiba keyboard likes to repeat keys when they are not repeated. | ||
171 | */ | ||
172 | |||
173 | static __init int broken_toshiba_keyboard(struct dmi_blacklist *d) | ||
174 | { | ||
175 | printk(KERN_WARNING "Toshiba with broken keyboard detected. If your keyboard sometimes generates 3 keypresses instead of one, see http://davyd.ucc.asn.au/projects/toshiba/README\n"); | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | |||
180 | #ifdef CONFIG_ACPI_SLEEP | ||
181 | static __init int reset_videomode_after_s3(struct dmi_blacklist *d) | ||
182 | { | ||
183 | /* See acpi_wakeup.S */ | ||
184 | extern long acpi_video_flags; | ||
185 | acpi_video_flags |= 2; | ||
186 | return 0; | ||
187 | } | ||
188 | #endif | ||
189 | |||
190 | |||
191 | #ifdef CONFIG_ACPI_BOOT | ||
192 | extern int acpi_force; | ||
193 | |||
194 | static __init __attribute__((unused)) int dmi_disable_acpi(struct dmi_blacklist *d) | ||
195 | { | ||
196 | if (!acpi_force) { | ||
197 | printk(KERN_NOTICE "%s detected: acpi off\n",d->ident); | ||
198 | disable_acpi(); | ||
199 | } else { | ||
200 | printk(KERN_NOTICE | ||
201 | "Warning: DMI blacklist says broken, but acpi forced\n"); | ||
202 | } | ||
203 | return 0; | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * Limit ACPI to CPU enumeration for HT | ||
208 | */ | ||
209 | static __init __attribute__((unused)) int force_acpi_ht(struct dmi_blacklist *d) | ||
210 | { | ||
211 | if (!acpi_force) { | ||
212 | printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", d->ident); | ||
213 | disable_acpi(); | ||
214 | acpi_ht = 1; | ||
215 | } else { | ||
216 | printk(KERN_NOTICE | ||
217 | "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); | ||
218 | } | ||
219 | return 0; | ||
220 | } | ||
221 | #endif | ||
222 | |||
223 | #ifdef CONFIG_ACPI_PCI | ||
224 | static __init int disable_acpi_irq(struct dmi_blacklist *d) | ||
225 | { | ||
226 | if (!acpi_force) { | ||
227 | printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", | ||
228 | d->ident); | ||
229 | acpi_noirq_set(); | ||
230 | } | ||
231 | return 0; | ||
232 | } | ||
233 | static __init int disable_acpi_pci(struct dmi_blacklist *d) | ||
234 | { | ||
235 | if (!acpi_force) { | ||
236 | printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", | ||
237 | d->ident); | ||
238 | acpi_disable_pci(); | ||
239 | } | ||
240 | return 0; | ||
241 | } | ||
242 | #endif | ||
243 | |||
244 | /* | ||
245 | * Process the DMI blacklists | ||
246 | */ | ||
247 | |||
248 | |||
249 | /* | ||
250 | * This will be expanded over time to force things like the APM | ||
251 | * interrupt mask settings according to the laptop | ||
252 | */ | ||
253 | |||
254 | static __initdata struct dmi_blacklist dmi_blacklist[]={ | ||
255 | |||
256 | { broken_toshiba_keyboard, "Toshiba Satellite 4030cdt", { /* Keyboard generates spurious repeats */ | ||
257 | MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), | ||
258 | NO_MATCH, NO_MATCH, NO_MATCH | ||
259 | } }, | ||
260 | #ifdef CONFIG_ACPI_SLEEP | ||
261 | { reset_videomode_after_s3, "Toshiba Satellite 4030cdt", { /* Reset video mode after returning from ACPI S3 sleep */ | ||
262 | MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), | ||
263 | NO_MATCH, NO_MATCH, NO_MATCH | ||
264 | } }, | ||
265 | #endif | ||
266 | |||
267 | #ifdef CONFIG_ACPI_BOOT | ||
268 | /* | ||
269 | * If your system is blacklisted here, but you find that acpi=force | ||
270 | * works for you, please contact acpi-devel@sourceforge.net | ||
271 | */ | ||
272 | |||
273 | /* | ||
274 | * Boxes that need ACPI disabled | ||
275 | */ | ||
276 | |||
277 | { dmi_disable_acpi, "IBM Thinkpad", { | ||
278 | MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
279 | MATCH(DMI_BOARD_NAME, "2629H1G"), | ||
280 | NO_MATCH, NO_MATCH }}, | ||
281 | |||
282 | /* | ||
283 | * Boxes that need acpi=ht | ||
284 | */ | ||
285 | |||
286 | { force_acpi_ht, "FSC Primergy T850", { | ||
287 | MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), | ||
288 | MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), | ||
289 | NO_MATCH, NO_MATCH }}, | ||
290 | |||
291 | { force_acpi_ht, "DELL GX240", { | ||
292 | MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), | ||
293 | MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), | ||
294 | NO_MATCH, NO_MATCH }}, | ||
295 | |||
296 | { force_acpi_ht, "HP VISUALIZE NT Workstation", { | ||
297 | MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), | ||
298 | MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), | ||
299 | NO_MATCH, NO_MATCH }}, | ||
300 | |||
301 | { force_acpi_ht, "Compaq Workstation W8000", { | ||
302 | MATCH(DMI_SYS_VENDOR, "Compaq"), | ||
303 | MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), | ||
304 | NO_MATCH, NO_MATCH }}, | ||
305 | |||
306 | { force_acpi_ht, "ASUS P4B266", { | ||
307 | MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
308 | MATCH(DMI_BOARD_NAME, "P4B266"), | ||
309 | NO_MATCH, NO_MATCH }}, | ||
310 | |||
311 | { force_acpi_ht, "ASUS P2B-DS", { | ||
312 | MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
313 | MATCH(DMI_BOARD_NAME, "P2B-DS"), | ||
314 | NO_MATCH, NO_MATCH }}, | ||
315 | |||
316 | { force_acpi_ht, "ASUS CUR-DLS", { | ||
317 | MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
318 | MATCH(DMI_BOARD_NAME, "CUR-DLS"), | ||
319 | NO_MATCH, NO_MATCH }}, | ||
320 | |||
321 | { force_acpi_ht, "ABIT i440BX-W83977", { | ||
322 | MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"), | ||
323 | MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), | ||
324 | NO_MATCH, NO_MATCH }}, | ||
325 | |||
326 | { force_acpi_ht, "IBM Bladecenter", { | ||
327 | MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
328 | MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), | ||
329 | NO_MATCH, NO_MATCH }}, | ||
330 | |||
331 | { force_acpi_ht, "IBM eServer xSeries 360", { | ||
332 | MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
333 | MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), | ||
334 | NO_MATCH, NO_MATCH }}, | ||
335 | |||
336 | { force_acpi_ht, "IBM eserver xSeries 330", { | ||
337 | MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
338 | MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), | ||
339 | NO_MATCH, NO_MATCH }}, | ||
340 | |||
341 | { force_acpi_ht, "IBM eserver xSeries 440", { | ||
342 | MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
343 | MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), | ||
344 | NO_MATCH, NO_MATCH }}, | ||
345 | |||
346 | #endif // CONFIG_ACPI_BOOT | ||
347 | |||
348 | #ifdef CONFIG_ACPI_PCI | ||
349 | /* | ||
350 | * Boxes that need ACPI PCI IRQ routing disabled | ||
351 | */ | ||
352 | |||
353 | { disable_acpi_irq, "ASUS A7V", { | ||
354 | MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), | ||
355 | MATCH(DMI_BOARD_NAME, "<A7V>"), | ||
356 | /* newer BIOS, Revision 1011, does work */ | ||
357 | MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), | ||
358 | NO_MATCH }}, | ||
359 | |||
360 | /* | ||
361 | * Boxes that need ACPI PCI IRQ routing and PCI scan disabled | ||
362 | */ | ||
363 | { disable_acpi_pci, "ASUS PR-DLS", { /* _BBN 0 bug */ | ||
364 | MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
365 | MATCH(DMI_BOARD_NAME, "PR-DLS"), | ||
366 | MATCH(DMI_BIOS_VERSION, "ASUS PR-DLS ACPI BIOS Revision 1010"), | ||
367 | MATCH(DMI_BIOS_DATE, "03/21/2003") }}, | ||
368 | |||
369 | { disable_acpi_pci, "Acer TravelMate 36x Laptop", { | ||
370 | MATCH(DMI_SYS_VENDOR, "Acer"), | ||
371 | MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), | ||
372 | NO_MATCH, NO_MATCH | ||
373 | } }, | ||
374 | |||
375 | #endif | ||
376 | |||
377 | { NULL, } | ||
378 | }; | ||
379 | |||
380 | /* | ||
381 | * Process a DMI table entry. Right now all we care about are the BIOS | ||
382 | * and machine entries. For 2.5 we should pull the smbus controller info | ||
383 | * out of here. | ||
384 | */ | ||
385 | |||
386 | static void __init dmi_decode(struct dmi_header *dm) | ||
387 | { | ||
388 | #ifdef DMI_DEBUG | ||
389 | u8 *data = (u8 *)dm; | ||
390 | #endif | ||
391 | |||
392 | switch(dm->type) | ||
393 | { | ||
394 | case 0: | ||
395 | dmi_printk(("BIOS Vendor: %s\n", | ||
396 | dmi_string(dm, data[4]))); | ||
397 | dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); | ||
398 | dmi_printk(("BIOS Version: %s\n", | ||
399 | dmi_string(dm, data[5]))); | ||
400 | dmi_save_ident(dm, DMI_BIOS_VERSION, 5); | ||
401 | dmi_printk(("BIOS Release: %s\n", | ||
402 | dmi_string(dm, data[8]))); | ||
403 | dmi_save_ident(dm, DMI_BIOS_DATE, 8); | ||
404 | break; | ||
405 | case 1: | ||
406 | dmi_printk(("System Vendor: %s\n", | ||
407 | dmi_string(dm, data[4]))); | ||
408 | dmi_save_ident(dm, DMI_SYS_VENDOR, 4); | ||
409 | dmi_printk(("Product Name: %s\n", | ||
410 | dmi_string(dm, data[5]))); | ||
411 | dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); | ||
412 | dmi_printk(("Version: %s\n", | ||
413 | dmi_string(dm, data[6]))); | ||
414 | dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); | ||
415 | dmi_printk(("Serial Number: %s\n", | ||
416 | dmi_string(dm, data[7]))); | ||
417 | break; | ||
418 | case 2: | ||
419 | dmi_printk(("Board Vendor: %s\n", | ||
420 | dmi_string(dm, data[4]))); | ||
421 | dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); | ||
422 | dmi_printk(("Board Name: %s\n", | ||
423 | dmi_string(dm, data[5]))); | ||
424 | dmi_save_ident(dm, DMI_BOARD_NAME, 5); | ||
425 | dmi_printk(("Board Version: %s\n", | ||
426 | dmi_string(dm, data[6]))); | ||
427 | dmi_save_ident(dm, DMI_BOARD_VERSION, 6); | ||
428 | break; | ||
429 | } | ||
430 | } | ||
431 | |||
432 | void __init dmi_scan_machine(void) | ||
433 | { | ||
434 | int err = dmi_iterate(dmi_decode); | ||
435 | if(err == 0) | ||
436 | dmi_check_system(dmi_blacklist); | ||
437 | else | ||
438 | printk(KERN_INFO "DMI not present.\n"); | ||
439 | } | ||
440 | |||
441 | |||
442 | /** | ||
443 | * dmi_check_system - check system DMI data | ||
444 | * @list: array of dmi_system_id structures to match against | ||
445 | * | ||
446 | * Walk the blacklist table running matching functions until someone | ||
447 | * returns non zero or we hit the end. Callback function is called for | ||
448 | * each successfull match. Returns the number of matches. | ||
449 | */ | ||
450 | int dmi_check_system(struct dmi_system_id *list) | ||
451 | { | ||
452 | int i, count = 0; | ||
453 | struct dmi_system_id *d = list; | ||
454 | |||
455 | while (d->ident) { | ||
456 | for (i = 0; i < ARRAY_SIZE(d->matches); i++) { | ||
457 | int s = d->matches[i].slot; | ||
458 | if (s == DMI_NONE) | ||
459 | continue; | ||
460 | if (dmi_ident[s] && strstr(dmi_ident[s], d->matches[i].substr)) | ||
461 | continue; | ||
462 | /* No match */ | ||
463 | goto fail; | ||
464 | } | ||
465 | if (d->callback && d->callback(d)) | ||
466 | break; | ||
467 | count++; | ||
468 | fail: d++; | ||
469 | } | ||
470 | |||
471 | return count; | ||
472 | } | ||
473 | |||
474 | EXPORT_SYMBOL(dmi_check_system); | ||
475 | |||
476 | /** | ||
477 | * dmi_get_system_info - return DMI data value | ||
478 | * @field: data index (see enum dmi_filed) | ||
479 | * | ||
480 | * Returns one DMI data value, can be used to perform | ||
481 | * complex DMI data checks. | ||
482 | */ | ||
483 | char * dmi_get_system_info(int field) | ||
484 | { | ||
485 | return dmi_ident[field]; | ||
486 | } | ||
487 | |||
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c new file mode 100644 index 000000000000..789af3e9fb1f --- /dev/null +++ b/arch/i386/kernel/doublefault.c | |||
@@ -0,0 +1,65 @@ | |||
1 | #include <linux/mm.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/init_task.h> | ||
5 | #include <linux/fs.h> | ||
6 | |||
7 | #include <asm/uaccess.h> | ||
8 | #include <asm/pgtable.h> | ||
9 | #include <asm/processor.h> | ||
10 | #include <asm/desc.h> | ||
11 | |||
12 | #define DOUBLEFAULT_STACKSIZE (1024) | ||
13 | static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; | ||
14 | #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) | ||
15 | |||
16 | #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000) | ||
17 | |||
18 | static void doublefault_fn(void) | ||
19 | { | ||
20 | struct Xgt_desc_struct gdt_desc = {0, 0}; | ||
21 | unsigned long gdt, tss; | ||
22 | |||
23 | __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory"); | ||
24 | gdt = gdt_desc.address; | ||
25 | |||
26 | printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); | ||
27 | |||
28 | if (ptr_ok(gdt)) { | ||
29 | gdt += GDT_ENTRY_TSS << 3; | ||
30 | tss = *(u16 *)(gdt+2); | ||
31 | tss += *(u8 *)(gdt+4) << 16; | ||
32 | tss += *(u8 *)(gdt+7) << 24; | ||
33 | printk("double fault, tss at %08lx\n", tss); | ||
34 | |||
35 | if (ptr_ok(tss)) { | ||
36 | struct tss_struct *t = (struct tss_struct *)tss; | ||
37 | |||
38 | printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp); | ||
39 | |||
40 | printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", | ||
41 | t->eax, t->ebx, t->ecx, t->edx); | ||
42 | printk("esi = %08lx, edi = %08lx\n", | ||
43 | t->esi, t->edi); | ||
44 | } | ||
45 | } | ||
46 | |||
47 | for (;;) /* nothing */; | ||
48 | } | ||
49 | |||
50 | struct tss_struct doublefault_tss __cacheline_aligned = { | ||
51 | .esp0 = STACK_START, | ||
52 | .ss0 = __KERNEL_DS, | ||
53 | .ldt = 0, | ||
54 | .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, | ||
55 | |||
56 | .eip = (unsigned long) doublefault_fn, | ||
57 | .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */ | ||
58 | .esp = STACK_START, | ||
59 | .es = __USER_DS, | ||
60 | .cs = __KERNEL_CS, | ||
61 | .ss = __KERNEL_DS, | ||
62 | .ds = __USER_DS, | ||
63 | |||
64 | .__cr3 = __pa(swapper_pg_dir) | ||
65 | }; | ||
diff --git a/arch/i386/kernel/early_printk.c b/arch/i386/kernel/early_printk.c new file mode 100644 index 000000000000..92f812ba275c --- /dev/null +++ b/arch/i386/kernel/early_printk.c | |||
@@ -0,0 +1,2 @@ | |||
1 | |||
2 | #include "../../x86_64/kernel/early_printk.c" | ||
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c new file mode 100644 index 000000000000..9e5e0d8bd36e --- /dev/null +++ b/arch/i386/kernel/efi.c | |||
@@ -0,0 +1,635 @@ | |||
1 | /* | ||
2 | * Extensible Firmware Interface | ||
3 | * | ||
4 | * Based on Extensible Firmware Interface Specification version 1.0 | ||
5 | * | ||
6 | * Copyright (C) 1999 VA Linux Systems | ||
7 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | ||
8 | * Copyright (C) 1999-2002 Hewlett-Packard Co. | ||
9 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
10 | * Stephane Eranian <eranian@hpl.hp.com> | ||
11 | * | ||
12 | * All EFI Runtime Services are not implemented yet as EFI only | ||
13 | * supports physical mode addressing on SoftSDV. This is to be fixed | ||
14 | * in a future version. --drummond 1999-07-20 | ||
15 | * | ||
16 | * Implemented EFI runtime services and virtual mode calls. --davidm | ||
17 | * | ||
18 | * Goutham Rao: <goutham.rao@intel.com> | ||
19 | * Skip non-WB memory and ignore empty memory ranges. | ||
20 | */ | ||
21 | |||
22 | #include <linux/config.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/time.h> | ||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/bootmem.h> | ||
30 | #include <linux/ioport.h> | ||
31 | #include <linux/module.h> | ||
32 | #include <linux/efi.h> | ||
33 | |||
34 | #include <asm/setup.h> | ||
35 | #include <asm/io.h> | ||
36 | #include <asm/page.h> | ||
37 | #include <asm/pgtable.h> | ||
38 | #include <asm/processor.h> | ||
39 | #include <asm/desc.h> | ||
40 | #include <asm/tlbflush.h> | ||
41 | |||
42 | #define EFI_DEBUG 0 | ||
43 | #define PFX "EFI: " | ||
44 | |||
45 | extern efi_status_t asmlinkage efi_call_phys(void *, ...); | ||
46 | |||
47 | struct efi efi; | ||
48 | EXPORT_SYMBOL(efi); | ||
49 | static struct efi efi_phys __initdata; | ||
50 | struct efi_memory_map memmap __initdata; | ||
51 | |||
52 | /* | ||
53 | * We require an early boot_ioremap mapping mechanism initially | ||
54 | */ | ||
55 | extern void * boot_ioremap(unsigned long, unsigned long); | ||
56 | |||
57 | /* | ||
58 | * To make EFI call EFI runtime service in physical addressing mode we need | ||
59 | * prelog/epilog before/after the invocation to disable interrupt, to | ||
60 | * claim EFI runtime service handler exclusively and to duplicate a memory in | ||
61 | * low memory space say 0 - 3G. | ||
62 | */ | ||
63 | |||
64 | static unsigned long efi_rt_eflags; | ||
65 | static DEFINE_SPINLOCK(efi_rt_lock); | ||
66 | static pgd_t efi_bak_pg_dir_pointer[2]; | ||
67 | |||
68 | static void efi_call_phys_prelog(void) | ||
69 | { | ||
70 | unsigned long cr4; | ||
71 | unsigned long temp; | ||
72 | |||
73 | spin_lock(&efi_rt_lock); | ||
74 | local_irq_save(efi_rt_eflags); | ||
75 | |||
76 | /* | ||
77 | * If I don't have PSE, I should just duplicate two entries in page | ||
78 | * directory. If I have PSE, I just need to duplicate one entry in | ||
79 | * page directory. | ||
80 | */ | ||
81 | __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); | ||
82 | |||
83 | if (cr4 & X86_CR4_PSE) { | ||
84 | efi_bak_pg_dir_pointer[0].pgd = | ||
85 | swapper_pg_dir[pgd_index(0)].pgd; | ||
86 | swapper_pg_dir[0].pgd = | ||
87 | swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; | ||
88 | } else { | ||
89 | efi_bak_pg_dir_pointer[0].pgd = | ||
90 | swapper_pg_dir[pgd_index(0)].pgd; | ||
91 | efi_bak_pg_dir_pointer[1].pgd = | ||
92 | swapper_pg_dir[pgd_index(0x400000)].pgd; | ||
93 | swapper_pg_dir[pgd_index(0)].pgd = | ||
94 | swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; | ||
95 | temp = PAGE_OFFSET + 0x400000; | ||
96 | swapper_pg_dir[pgd_index(0x400000)].pgd = | ||
97 | swapper_pg_dir[pgd_index(temp)].pgd; | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * After the lock is released, the original page table is restored. | ||
102 | */ | ||
103 | local_flush_tlb(); | ||
104 | |||
105 | cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); | ||
106 | __asm__ __volatile__("lgdt %0":"=m" | ||
107 | (*(struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0]))); | ||
108 | } | ||
109 | |||
110 | static void efi_call_phys_epilog(void) | ||
111 | { | ||
112 | unsigned long cr4; | ||
113 | |||
114 | cpu_gdt_descr[0].address = | ||
115 | (unsigned long) __va(cpu_gdt_descr[0].address); | ||
116 | __asm__ __volatile__("lgdt %0":"=m"(cpu_gdt_descr)); | ||
117 | __asm__ __volatile__("movl %%cr4, %0":"=r"(cr4)); | ||
118 | |||
119 | if (cr4 & X86_CR4_PSE) { | ||
120 | swapper_pg_dir[pgd_index(0)].pgd = | ||
121 | efi_bak_pg_dir_pointer[0].pgd; | ||
122 | } else { | ||
123 | swapper_pg_dir[pgd_index(0)].pgd = | ||
124 | efi_bak_pg_dir_pointer[0].pgd; | ||
125 | swapper_pg_dir[pgd_index(0x400000)].pgd = | ||
126 | efi_bak_pg_dir_pointer[1].pgd; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * After the lock is released, the original page table is restored. | ||
131 | */ | ||
132 | local_flush_tlb(); | ||
133 | |||
134 | local_irq_restore(efi_rt_eflags); | ||
135 | spin_unlock(&efi_rt_lock); | ||
136 | } | ||
137 | |||
138 | static efi_status_t | ||
139 | phys_efi_set_virtual_address_map(unsigned long memory_map_size, | ||
140 | unsigned long descriptor_size, | ||
141 | u32 descriptor_version, | ||
142 | efi_memory_desc_t *virtual_map) | ||
143 | { | ||
144 | efi_status_t status; | ||
145 | |||
146 | efi_call_phys_prelog(); | ||
147 | status = efi_call_phys(efi_phys.set_virtual_address_map, | ||
148 | memory_map_size, descriptor_size, | ||
149 | descriptor_version, virtual_map); | ||
150 | efi_call_phys_epilog(); | ||
151 | return status; | ||
152 | } | ||
153 | |||
154 | static efi_status_t | ||
155 | phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) | ||
156 | { | ||
157 | efi_status_t status; | ||
158 | |||
159 | efi_call_phys_prelog(); | ||
160 | status = efi_call_phys(efi_phys.get_time, tm, tc); | ||
161 | efi_call_phys_epilog(); | ||
162 | return status; | ||
163 | } | ||
164 | |||
165 | inline int efi_set_rtc_mmss(unsigned long nowtime) | ||
166 | { | ||
167 | int real_seconds, real_minutes; | ||
168 | efi_status_t status; | ||
169 | efi_time_t eft; | ||
170 | efi_time_cap_t cap; | ||
171 | |||
172 | spin_lock(&efi_rt_lock); | ||
173 | status = efi.get_time(&eft, &cap); | ||
174 | spin_unlock(&efi_rt_lock); | ||
175 | if (status != EFI_SUCCESS) | ||
176 | panic("Ooops, efitime: can't read time!\n"); | ||
177 | real_seconds = nowtime % 60; | ||
178 | real_minutes = nowtime / 60; | ||
179 | |||
180 | if (((abs(real_minutes - eft.minute) + 15)/30) & 1) | ||
181 | real_minutes += 30; | ||
182 | real_minutes %= 60; | ||
183 | |||
184 | eft.minute = real_minutes; | ||
185 | eft.second = real_seconds; | ||
186 | |||
187 | if (status != EFI_SUCCESS) { | ||
188 | printk("Ooops: efitime: can't read time!\n"); | ||
189 | return -1; | ||
190 | } | ||
191 | return 0; | ||
192 | } | ||
193 | /* | ||
194 | * This should only be used during kernel init and before runtime | ||
195 | * services have been remapped, therefore, we'll need to call in physical | ||
196 | * mode. Note, this call isn't used later, so mark it __init. | ||
197 | */ | ||
198 | inline unsigned long __init efi_get_time(void) | ||
199 | { | ||
200 | efi_status_t status; | ||
201 | efi_time_t eft; | ||
202 | efi_time_cap_t cap; | ||
203 | |||
204 | status = phys_efi_get_time(&eft, &cap); | ||
205 | if (status != EFI_SUCCESS) | ||
206 | printk("Oops: efitime: can't read time status: 0x%lx\n",status); | ||
207 | |||
208 | return mktime(eft.year, eft.month, eft.day, eft.hour, | ||
209 | eft.minute, eft.second); | ||
210 | } | ||
211 | |||
212 | int is_available_memory(efi_memory_desc_t * md) | ||
213 | { | ||
214 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
215 | return 0; | ||
216 | |||
217 | switch (md->type) { | ||
218 | case EFI_LOADER_CODE: | ||
219 | case EFI_LOADER_DATA: | ||
220 | case EFI_BOOT_SERVICES_CODE: | ||
221 | case EFI_BOOT_SERVICES_DATA: | ||
222 | case EFI_CONVENTIONAL_MEMORY: | ||
223 | return 1; | ||
224 | } | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * We need to map the EFI memory map again after paging_init(). | ||
230 | */ | ||
231 | void __init efi_map_memmap(void) | ||
232 | { | ||
233 | memmap.map = NULL; | ||
234 | |||
235 | memmap.map = (efi_memory_desc_t *) | ||
236 | bt_ioremap((unsigned long) memmap.phys_map, | ||
237 | (memmap.nr_map * sizeof(efi_memory_desc_t))); | ||
238 | |||
239 | if (memmap.map == NULL) | ||
240 | printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); | ||
241 | } | ||
242 | |||
243 | #if EFI_DEBUG | ||
244 | static void __init print_efi_memmap(void) | ||
245 | { | ||
246 | efi_memory_desc_t *md; | ||
247 | int i; | ||
248 | |||
249 | for (i = 0; i < memmap.nr_map; i++) { | ||
250 | md = &memmap.map[i]; | ||
251 | printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " | ||
252 | "range=[0x%016llx-0x%016llx) (%lluMB)\n", | ||
253 | i, md->type, md->attribute, md->phys_addr, | ||
254 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | ||
255 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); | ||
256 | } | ||
257 | } | ||
258 | #endif /* EFI_DEBUG */ | ||
259 | |||
260 | /* | ||
261 | * Walks the EFI memory map and calls CALLBACK once for each EFI | ||
262 | * memory descriptor that has memory that is available for kernel use. | ||
263 | */ | ||
264 | void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) | ||
265 | { | ||
266 | int prev_valid = 0; | ||
267 | struct range { | ||
268 | unsigned long start; | ||
269 | unsigned long end; | ||
270 | } prev, curr; | ||
271 | efi_memory_desc_t *md; | ||
272 | unsigned long start, end; | ||
273 | int i; | ||
274 | |||
275 | for (i = 0; i < memmap.nr_map; i++) { | ||
276 | md = &memmap.map[i]; | ||
277 | |||
278 | if ((md->num_pages == 0) || (!is_available_memory(md))) | ||
279 | continue; | ||
280 | |||
281 | curr.start = md->phys_addr; | ||
282 | curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); | ||
283 | |||
284 | if (!prev_valid) { | ||
285 | prev = curr; | ||
286 | prev_valid = 1; | ||
287 | } else { | ||
288 | if (curr.start < prev.start) | ||
289 | printk(KERN_INFO PFX "Unordered memory map\n"); | ||
290 | if (prev.end == curr.start) | ||
291 | prev.end = curr.end; | ||
292 | else { | ||
293 | start = | ||
294 | (unsigned long) (PAGE_ALIGN(prev.start)); | ||
295 | end = (unsigned long) (prev.end & PAGE_MASK); | ||
296 | if ((end > start) | ||
297 | && (*callback) (start, end, arg) < 0) | ||
298 | return; | ||
299 | prev = curr; | ||
300 | } | ||
301 | } | ||
302 | } | ||
303 | if (prev_valid) { | ||
304 | start = (unsigned long) PAGE_ALIGN(prev.start); | ||
305 | end = (unsigned long) (prev.end & PAGE_MASK); | ||
306 | if (end > start) | ||
307 | (*callback) (start, end, arg); | ||
308 | } | ||
309 | } | ||
310 | |||
311 | void __init efi_init(void) | ||
312 | { | ||
313 | efi_config_table_t *config_tables; | ||
314 | efi_runtime_services_t *runtime; | ||
315 | efi_char16_t *c16; | ||
316 | char vendor[100] = "unknown"; | ||
317 | unsigned long num_config_tables; | ||
318 | int i = 0; | ||
319 | |||
320 | memset(&efi, 0, sizeof(efi) ); | ||
321 | memset(&efi_phys, 0, sizeof(efi_phys)); | ||
322 | |||
323 | efi_phys.systab = EFI_SYSTAB; | ||
324 | memmap.phys_map = EFI_MEMMAP; | ||
325 | memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; | ||
326 | memmap.desc_version = EFI_MEMDESC_VERSION; | ||
327 | |||
328 | efi.systab = (efi_system_table_t *) | ||
329 | boot_ioremap((unsigned long) efi_phys.systab, | ||
330 | sizeof(efi_system_table_t)); | ||
331 | /* | ||
332 | * Verify the EFI Table | ||
333 | */ | ||
334 | if (efi.systab == NULL) | ||
335 | printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n"); | ||
336 | if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
337 | printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n"); | ||
338 | if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0) | ||
339 | printk(KERN_ERR PFX | ||
340 | "Warning: EFI system table major version mismatch: " | ||
341 | "got %d.%02d, expected %d.%02d\n", | ||
342 | efi.systab->hdr.revision >> 16, | ||
343 | efi.systab->hdr.revision & 0xffff, | ||
344 | EFI_SYSTEM_TABLE_REVISION >> 16, | ||
345 | EFI_SYSTEM_TABLE_REVISION & 0xffff); | ||
346 | /* | ||
347 | * Grab some details from the system table | ||
348 | */ | ||
349 | num_config_tables = efi.systab->nr_tables; | ||
350 | config_tables = (efi_config_table_t *)efi.systab->tables; | ||
351 | runtime = efi.systab->runtime; | ||
352 | |||
353 | /* | ||
354 | * Show what we know for posterity | ||
355 | */ | ||
356 | c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); | ||
357 | if (c16) { | ||
358 | for (i = 0; i < sizeof(vendor) && *c16; ++i) | ||
359 | vendor[i] = *c16++; | ||
360 | vendor[i] = '\0'; | ||
361 | } else | ||
362 | printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); | ||
363 | |||
364 | printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n", | ||
365 | efi.systab->hdr.revision >> 16, | ||
366 | efi.systab->hdr.revision & 0xffff, vendor); | ||
367 | |||
368 | /* | ||
369 | * Let's see what config tables the firmware passed to us. | ||
370 | */ | ||
371 | config_tables = (efi_config_table_t *) | ||
372 | boot_ioremap((unsigned long) config_tables, | ||
373 | num_config_tables * sizeof(efi_config_table_t)); | ||
374 | |||
375 | if (config_tables == NULL) | ||
376 | printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); | ||
377 | |||
378 | for (i = 0; i < num_config_tables; i++) { | ||
379 | if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { | ||
380 | efi.mps = (void *)config_tables[i].table; | ||
381 | printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); | ||
382 | } else | ||
383 | if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { | ||
384 | efi.acpi20 = __va(config_tables[i].table); | ||
385 | printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); | ||
386 | } else | ||
387 | if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { | ||
388 | efi.acpi = __va(config_tables[i].table); | ||
389 | printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); | ||
390 | } else | ||
391 | if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { | ||
392 | efi.smbios = (void *) config_tables[i].table; | ||
393 | printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); | ||
394 | } else | ||
395 | if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { | ||
396 | efi.hcdp = (void *)config_tables[i].table; | ||
397 | printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); | ||
398 | } else | ||
399 | if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { | ||
400 | efi.uga = (void *)config_tables[i].table; | ||
401 | printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); | ||
402 | } | ||
403 | } | ||
404 | printk("\n"); | ||
405 | |||
406 | /* | ||
407 | * Check out the runtime services table. We need to map | ||
408 | * the runtime services table so that we can grab the physical | ||
409 | * address of several of the EFI runtime functions, needed to | ||
410 | * set the firmware into virtual mode. | ||
411 | */ | ||
412 | |||
413 | runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long) | ||
414 | runtime, | ||
415 | sizeof(efi_runtime_services_t)); | ||
416 | if (runtime != NULL) { | ||
417 | /* | ||
418 | * We will only need *early* access to the following | ||
419 | * two EFI runtime services before set_virtual_address_map | ||
420 | * is invoked. | ||
421 | */ | ||
422 | efi_phys.get_time = (efi_get_time_t *) runtime->get_time; | ||
423 | efi_phys.set_virtual_address_map = | ||
424 | (efi_set_virtual_address_map_t *) | ||
425 | runtime->set_virtual_address_map; | ||
426 | } else | ||
427 | printk(KERN_ERR PFX "Could not map the runtime service table!\n"); | ||
428 | |||
429 | /* Map the EFI memory map for use until paging_init() */ | ||
430 | |||
431 | memmap.map = (efi_memory_desc_t *) | ||
432 | boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); | ||
433 | |||
434 | if (memmap.map == NULL) | ||
435 | printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); | ||
436 | |||
437 | if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) { | ||
438 | printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't " | ||
439 | "match the one from EFI!\n"); | ||
440 | } | ||
441 | #if EFI_DEBUG | ||
442 | print_efi_memmap(); | ||
443 | #endif | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * This function will switch the EFI runtime services to virtual mode. | ||
448 | * Essentially, look through the EFI memmap and map every region that | ||
449 | * has the runtime attribute bit set in its memory descriptor and update | ||
450 | * that memory descriptor with the virtual address obtained from ioremap(). | ||
451 | * This enables the runtime services to be called without having to | ||
452 | * thunk back into physical mode for every invocation. | ||
453 | */ | ||
454 | |||
455 | void __init efi_enter_virtual_mode(void) | ||
456 | { | ||
457 | efi_memory_desc_t *md; | ||
458 | efi_status_t status; | ||
459 | int i; | ||
460 | |||
461 | efi.systab = NULL; | ||
462 | |||
463 | for (i = 0; i < memmap.nr_map; i++) { | ||
464 | md = &memmap.map[i]; | ||
465 | |||
466 | if (md->attribute & EFI_MEMORY_RUNTIME) { | ||
467 | md->virt_addr = | ||
468 | (unsigned long)ioremap(md->phys_addr, | ||
469 | md->num_pages << EFI_PAGE_SHIFT); | ||
470 | if (!(unsigned long)md->virt_addr) { | ||
471 | printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", | ||
472 | (unsigned long)md->phys_addr); | ||
473 | } | ||
474 | |||
475 | if (((unsigned long)md->phys_addr <= | ||
476 | (unsigned long)efi_phys.systab) && | ||
477 | ((unsigned long)efi_phys.systab < | ||
478 | md->phys_addr + | ||
479 | ((unsigned long)md->num_pages << | ||
480 | EFI_PAGE_SHIFT))) { | ||
481 | unsigned long addr; | ||
482 | |||
483 | addr = md->virt_addr - md->phys_addr + | ||
484 | (unsigned long)efi_phys.systab; | ||
485 | efi.systab = (efi_system_table_t *)addr; | ||
486 | } | ||
487 | } | ||
488 | } | ||
489 | |||
490 | if (!efi.systab) | ||
491 | BUG(); | ||
492 | |||
493 | status = phys_efi_set_virtual_address_map( | ||
494 | sizeof(efi_memory_desc_t) * memmap.nr_map, | ||
495 | sizeof(efi_memory_desc_t), | ||
496 | memmap.desc_version, | ||
497 | memmap.phys_map); | ||
498 | |||
499 | if (status != EFI_SUCCESS) { | ||
500 | printk (KERN_ALERT "You are screwed! " | ||
501 | "Unable to switch EFI into virtual mode " | ||
502 | "(status=%lx)\n", status); | ||
503 | panic("EFI call to SetVirtualAddressMap() failed!"); | ||
504 | } | ||
505 | |||
506 | /* | ||
507 | * Now that EFI is in virtual mode, update the function | ||
508 | * pointers in the runtime service table to the new virtual addresses. | ||
509 | */ | ||
510 | |||
511 | efi.get_time = (efi_get_time_t *) efi.systab->runtime->get_time; | ||
512 | efi.set_time = (efi_set_time_t *) efi.systab->runtime->set_time; | ||
513 | efi.get_wakeup_time = (efi_get_wakeup_time_t *) | ||
514 | efi.systab->runtime->get_wakeup_time; | ||
515 | efi.set_wakeup_time = (efi_set_wakeup_time_t *) | ||
516 | efi.systab->runtime->set_wakeup_time; | ||
517 | efi.get_variable = (efi_get_variable_t *) | ||
518 | efi.systab->runtime->get_variable; | ||
519 | efi.get_next_variable = (efi_get_next_variable_t *) | ||
520 | efi.systab->runtime->get_next_variable; | ||
521 | efi.set_variable = (efi_set_variable_t *) | ||
522 | efi.systab->runtime->set_variable; | ||
523 | efi.get_next_high_mono_count = (efi_get_next_high_mono_count_t *) | ||
524 | efi.systab->runtime->get_next_high_mono_count; | ||
525 | efi.reset_system = (efi_reset_system_t *) | ||
526 | efi.systab->runtime->reset_system; | ||
527 | } | ||
528 | |||
529 | void __init | ||
530 | efi_initialize_iomem_resources(struct resource *code_resource, | ||
531 | struct resource *data_resource) | ||
532 | { | ||
533 | struct resource *res; | ||
534 | efi_memory_desc_t *md; | ||
535 | int i; | ||
536 | |||
537 | for (i = 0; i < memmap.nr_map; i++) { | ||
538 | md = &memmap.map[i]; | ||
539 | |||
540 | if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > | ||
541 | 0x100000000ULL) | ||
542 | continue; | ||
543 | res = alloc_bootmem_low(sizeof(struct resource)); | ||
544 | switch (md->type) { | ||
545 | case EFI_RESERVED_TYPE: | ||
546 | res->name = "Reserved Memory"; | ||
547 | break; | ||
548 | case EFI_LOADER_CODE: | ||
549 | res->name = "Loader Code"; | ||
550 | break; | ||
551 | case EFI_LOADER_DATA: | ||
552 | res->name = "Loader Data"; | ||
553 | break; | ||
554 | case EFI_BOOT_SERVICES_DATA: | ||
555 | res->name = "BootServices Data"; | ||
556 | break; | ||
557 | case EFI_BOOT_SERVICES_CODE: | ||
558 | res->name = "BootServices Code"; | ||
559 | break; | ||
560 | case EFI_RUNTIME_SERVICES_CODE: | ||
561 | res->name = "Runtime Service Code"; | ||
562 | break; | ||
563 | case EFI_RUNTIME_SERVICES_DATA: | ||
564 | res->name = "Runtime Service Data"; | ||
565 | break; | ||
566 | case EFI_CONVENTIONAL_MEMORY: | ||
567 | res->name = "Conventional Memory"; | ||
568 | break; | ||
569 | case EFI_UNUSABLE_MEMORY: | ||
570 | res->name = "Unusable Memory"; | ||
571 | break; | ||
572 | case EFI_ACPI_RECLAIM_MEMORY: | ||
573 | res->name = "ACPI Reclaim"; | ||
574 | break; | ||
575 | case EFI_ACPI_MEMORY_NVS: | ||
576 | res->name = "ACPI NVS"; | ||
577 | break; | ||
578 | case EFI_MEMORY_MAPPED_IO: | ||
579 | res->name = "Memory Mapped IO"; | ||
580 | break; | ||
581 | case EFI_MEMORY_MAPPED_IO_PORT_SPACE: | ||
582 | res->name = "Memory Mapped IO Port Space"; | ||
583 | break; | ||
584 | default: | ||
585 | res->name = "Reserved"; | ||
586 | break; | ||
587 | } | ||
588 | res->start = md->phys_addr; | ||
589 | res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1); | ||
590 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
591 | if (request_resource(&iomem_resource, res) < 0) | ||
592 | printk(KERN_ERR PFX "Failed to allocate res %s : 0x%lx-0x%lx\n", | ||
593 | res->name, res->start, res->end); | ||
594 | /* | ||
595 | * We don't know which region contains kernel data so we try | ||
596 | * it repeatedly and let the resource manager test it. | ||
597 | */ | ||
598 | if (md->type == EFI_CONVENTIONAL_MEMORY) { | ||
599 | request_resource(res, code_resource); | ||
600 | request_resource(res, data_resource); | ||
601 | } | ||
602 | } | ||
603 | } | ||
604 | |||
605 | /* | ||
606 | * Convenience functions to obtain memory types and attributes | ||
607 | */ | ||
608 | |||
609 | u32 efi_mem_type(unsigned long phys_addr) | ||
610 | { | ||
611 | efi_memory_desc_t *md; | ||
612 | int i; | ||
613 | |||
614 | for (i = 0; i < memmap.nr_map; i++) { | ||
615 | md = &memmap.map[i]; | ||
616 | if ((md->phys_addr <= phys_addr) && (phys_addr < | ||
617 | (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) | ||
618 | return md->type; | ||
619 | } | ||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | u64 efi_mem_attributes(unsigned long phys_addr) | ||
624 | { | ||
625 | efi_memory_desc_t *md; | ||
626 | int i; | ||
627 | |||
628 | for (i = 0; i < memmap.nr_map; i++) { | ||
629 | md = &memmap.map[i]; | ||
630 | if ((md->phys_addr <= phys_addr) && (phys_addr < | ||
631 | (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) | ||
632 | return md->attribute; | ||
633 | } | ||
634 | return 0; | ||
635 | } | ||
diff --git a/arch/i386/kernel/efi_stub.S b/arch/i386/kernel/efi_stub.S new file mode 100644 index 000000000000..08c0312d9b6c --- /dev/null +++ b/arch/i386/kernel/efi_stub.S | |||
@@ -0,0 +1,124 @@ | |||
1 | /* | ||
2 | * EFI call stub for IA32. | ||
3 | * | ||
4 | * This stub allows us to make EFI calls in physical mode with interrupts | ||
5 | * turned off. | ||
6 | */ | ||
7 | |||
8 | #include <linux/config.h> | ||
9 | #include <linux/linkage.h> | ||
10 | #include <asm/page.h> | ||
11 | #include <asm/pgtable.h> | ||
12 | |||
13 | /* | ||
14 | * efi_call_phys(void *, ...) is a function with variable parameters. | ||
15 | * All the callers of this function assure that all the parameters are 4-bytes. | ||
16 | */ | ||
17 | |||
18 | /* | ||
19 | * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. | ||
20 | * So we'd better save all of them at the beginning of this function and restore | ||
21 | * at the end no matter how many we use, because we can not assure EFI runtime | ||
22 | * service functions will comply with gcc calling convention, too. | ||
23 | */ | ||
24 | |||
25 | .text | ||
26 | ENTRY(efi_call_phys) | ||
27 | /* | ||
28 | * 0. The function can only be called in Linux kernel. So CS has been | ||
29 | * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found | ||
30 | * the values of these registers are the same. And, the corresponding | ||
31 | * GDT entries are identical. So I will do nothing about segment reg | ||
32 | * and GDT, but change GDT base register in prelog and epilog. | ||
33 | */ | ||
34 | |||
35 | /* | ||
36 | * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. | ||
37 | * But to make it smoothly switch from virtual mode to flat mode. | ||
38 | * The mapping of lower virtual memory has been created in prelog and | ||
39 | * epilog. | ||
40 | */ | ||
41 | movl $1f, %edx | ||
42 | subl $__PAGE_OFFSET, %edx | ||
43 | jmp *%edx | ||
44 | 1: | ||
45 | |||
46 | /* | ||
47 | * 2. Now on the top of stack is the return | ||
48 | * address in the caller of efi_call_phys(), then parameter 1, | ||
49 | * parameter 2, ..., param n. To make things easy, we save the return | ||
50 | * address of efi_call_phys in a global variable. | ||
51 | */ | ||
52 | popl %edx | ||
53 | movl %edx, saved_return_addr | ||
54 | /* get the function pointer into ECX*/ | ||
55 | popl %ecx | ||
56 | movl %ecx, efi_rt_function_ptr | ||
57 | movl $2f, %edx | ||
58 | subl $__PAGE_OFFSET, %edx | ||
59 | pushl %edx | ||
60 | |||
61 | /* | ||
62 | * 3. Clear PG bit in %CR0. | ||
63 | */ | ||
64 | movl %cr0, %edx | ||
65 | andl $0x7fffffff, %edx | ||
66 | movl %edx, %cr0 | ||
67 | jmp 1f | ||
68 | 1: | ||
69 | |||
70 | /* | ||
71 | * 4. Adjust stack pointer. | ||
72 | */ | ||
73 | subl $__PAGE_OFFSET, %esp | ||
74 | |||
75 | /* | ||
76 | * 5. Call the physical function. | ||
77 | */ | ||
78 | jmp *%ecx | ||
79 | |||
80 | 2: | ||
81 | /* | ||
82 | * 6. After EFI runtime service returns, control will return to | ||
83 | * following instruction. We'd better readjust stack pointer first. | ||
84 | */ | ||
85 | addl $__PAGE_OFFSET, %esp | ||
86 | |||
87 | /* | ||
88 | * 7. Restore PG bit | ||
89 | */ | ||
90 | movl %cr0, %edx | ||
91 | orl $0x80000000, %edx | ||
92 | movl %edx, %cr0 | ||
93 | jmp 1f | ||
94 | 1: | ||
95 | /* | ||
96 | * 8. Now restore the virtual mode from flat mode by | ||
97 | * adding EIP with PAGE_OFFSET. | ||
98 | */ | ||
99 | movl $1f, %edx | ||
100 | jmp *%edx | ||
101 | 1: | ||
102 | |||
103 | /* | ||
104 | * 9. Balance the stack. And because EAX contain the return value, | ||
105 | * we'd better not clobber it. | ||
106 | */ | ||
107 | leal efi_rt_function_ptr, %edx | ||
108 | movl (%edx), %ecx | ||
109 | pushl %ecx | ||
110 | |||
111 | /* | ||
112 | * 10. Push the saved return address onto the stack and return. | ||
113 | */ | ||
114 | leal saved_return_addr, %edx | ||
115 | movl (%edx), %ecx | ||
116 | pushl %ecx | ||
117 | ret | ||
118 | .previous | ||
119 | |||
120 | .data | ||
121 | saved_return_addr: | ||
122 | .long 0 | ||
123 | efi_rt_function_ptr: | ||
124 | .long 0 | ||
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S new file mode 100644 index 000000000000..1e45ff292bc9 --- /dev/null +++ b/arch/i386/kernel/entry.S | |||
@@ -0,0 +1,950 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/entry.S | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
5 | */ | ||
6 | |||
7 | /* | ||
8 | * entry.S contains the system-call and fault low-level handling routines. | ||
9 | * This also contains the timer-interrupt handler, as well as all interrupts | ||
10 | * and faults that can result in a task-switch. | ||
11 | * | ||
12 | * NOTE: This code handles signal-recognition, which happens every time | ||
13 | * after a timer-interrupt and after each system call. | ||
14 | * | ||
15 | * I changed all the .align's to 4 (16 byte alignment), as that's faster | ||
16 | * on a 486. | ||
17 | * | ||
18 | * Stack layout in 'ret_from_system_call': | ||
19 | * ptrace needs to have all regs on the stack. | ||
20 | * if the order here is changed, it needs to be | ||
21 | * updated in fork.c:copy_process, signal.c:do_signal, | ||
22 | * ptrace.c and ptrace.h | ||
23 | * | ||
24 | * 0(%esp) - %ebx | ||
25 | * 4(%esp) - %ecx | ||
26 | * 8(%esp) - %edx | ||
27 | * C(%esp) - %esi | ||
28 | * 10(%esp) - %edi | ||
29 | * 14(%esp) - %ebp | ||
30 | * 18(%esp) - %eax | ||
31 | * 1C(%esp) - %ds | ||
32 | * 20(%esp) - %es | ||
33 | * 24(%esp) - orig_eax | ||
34 | * 28(%esp) - %eip | ||
35 | * 2C(%esp) - %cs | ||
36 | * 30(%esp) - %eflags | ||
37 | * 34(%esp) - %oldesp | ||
38 | * 38(%esp) - %oldss | ||
39 | * | ||
40 | * "current" is in register %ebx during any slow entries. | ||
41 | */ | ||
42 | |||
43 | #include <linux/config.h> | ||
44 | #include <linux/linkage.h> | ||
45 | #include <asm/thread_info.h> | ||
46 | #include <asm/errno.h> | ||
47 | #include <asm/segment.h> | ||
48 | #include <asm/smp.h> | ||
49 | #include <asm/page.h> | ||
50 | #include <asm/desc.h> | ||
51 | #include "irq_vectors.h" | ||
52 | |||
53 | #define nr_syscalls ((syscall_table_size)/4) | ||
54 | |||
55 | EBX = 0x00 | ||
56 | ECX = 0x04 | ||
57 | EDX = 0x08 | ||
58 | ESI = 0x0C | ||
59 | EDI = 0x10 | ||
60 | EBP = 0x14 | ||
61 | EAX = 0x18 | ||
62 | DS = 0x1C | ||
63 | ES = 0x20 | ||
64 | ORIG_EAX = 0x24 | ||
65 | EIP = 0x28 | ||
66 | CS = 0x2C | ||
67 | EFLAGS = 0x30 | ||
68 | OLDESP = 0x34 | ||
69 | OLDSS = 0x38 | ||
70 | |||
71 | CF_MASK = 0x00000001 | ||
72 | TF_MASK = 0x00000100 | ||
73 | IF_MASK = 0x00000200 | ||
74 | DF_MASK = 0x00000400 | ||
75 | NT_MASK = 0x00004000 | ||
76 | VM_MASK = 0x00020000 | ||
77 | |||
78 | #ifdef CONFIG_PREEMPT | ||
79 | #define preempt_stop cli | ||
80 | #else | ||
81 | #define preempt_stop | ||
82 | #define resume_kernel restore_nocheck | ||
83 | #endif | ||
84 | |||
85 | #define SAVE_ALL \ | ||
86 | cld; \ | ||
87 | pushl %es; \ | ||
88 | pushl %ds; \ | ||
89 | pushl %eax; \ | ||
90 | pushl %ebp; \ | ||
91 | pushl %edi; \ | ||
92 | pushl %esi; \ | ||
93 | pushl %edx; \ | ||
94 | pushl %ecx; \ | ||
95 | pushl %ebx; \ | ||
96 | movl $(__USER_DS), %edx; \ | ||
97 | movl %edx, %ds; \ | ||
98 | movl %edx, %es; | ||
99 | |||
100 | #define RESTORE_INT_REGS \ | ||
101 | popl %ebx; \ | ||
102 | popl %ecx; \ | ||
103 | popl %edx; \ | ||
104 | popl %esi; \ | ||
105 | popl %edi; \ | ||
106 | popl %ebp; \ | ||
107 | popl %eax | ||
108 | |||
109 | #define RESTORE_REGS \ | ||
110 | RESTORE_INT_REGS; \ | ||
111 | 1: popl %ds; \ | ||
112 | 2: popl %es; \ | ||
113 | .section .fixup,"ax"; \ | ||
114 | 3: movl $0,(%esp); \ | ||
115 | jmp 1b; \ | ||
116 | 4: movl $0,(%esp); \ | ||
117 | jmp 2b; \ | ||
118 | .previous; \ | ||
119 | .section __ex_table,"a";\ | ||
120 | .align 4; \ | ||
121 | .long 1b,3b; \ | ||
122 | .long 2b,4b; \ | ||
123 | .previous | ||
124 | |||
125 | |||
126 | ENTRY(ret_from_fork) | ||
127 | pushl %eax | ||
128 | call schedule_tail | ||
129 | GET_THREAD_INFO(%ebp) | ||
130 | popl %eax | ||
131 | jmp syscall_exit | ||
132 | |||
133 | /* | ||
134 | * Return to user mode is not as complex as all this looks, | ||
135 | * but we want the default path for a system call return to | ||
136 | * go as quickly as possible which is why some of this is | ||
137 | * less clear than it otherwise should be. | ||
138 | */ | ||
139 | |||
140 | # userspace resumption stub bypassing syscall exit tracing | ||
141 | ALIGN | ||
142 | ret_from_exception: | ||
143 | preempt_stop | ||
144 | ret_from_intr: | ||
145 | GET_THREAD_INFO(%ebp) | ||
146 | movl EFLAGS(%esp), %eax # mix EFLAGS and CS | ||
147 | movb CS(%esp), %al | ||
148 | testl $(VM_MASK | 3), %eax | ||
149 | jz resume_kernel | ||
150 | ENTRY(resume_userspace) | ||
151 | cli # make sure we don't miss an interrupt | ||
152 | # setting need_resched or sigpending | ||
153 | # between sampling and the iret | ||
154 | movl TI_flags(%ebp), %ecx | ||
155 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done on | ||
156 | # int/exception return? | ||
157 | jne work_pending | ||
158 | jmp restore_all | ||
159 | |||
160 | #ifdef CONFIG_PREEMPT | ||
161 | ENTRY(resume_kernel) | ||
162 | cli | ||
163 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? | ||
164 | jnz restore_nocheck | ||
165 | need_resched: | ||
166 | movl TI_flags(%ebp), %ecx # need_resched set ? | ||
167 | testb $_TIF_NEED_RESCHED, %cl | ||
168 | jz restore_all | ||
169 | testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? | ||
170 | jz restore_all | ||
171 | call preempt_schedule_irq | ||
172 | jmp need_resched | ||
173 | #endif | ||
174 | |||
175 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | ||
176 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | ||
177 | |||
178 | # sysenter call handler stub | ||
179 | ENTRY(sysenter_entry) | ||
180 | movl TSS_sysenter_esp0(%esp),%esp | ||
181 | sysenter_past_esp: | ||
182 | sti | ||
183 | pushl $(__USER_DS) | ||
184 | pushl %ebp | ||
185 | pushfl | ||
186 | pushl $(__USER_CS) | ||
187 | pushl $SYSENTER_RETURN | ||
188 | |||
189 | /* | ||
190 | * Load the potential sixth argument from user stack. | ||
191 | * Careful about security. | ||
192 | */ | ||
193 | cmpl $__PAGE_OFFSET-3,%ebp | ||
194 | jae syscall_fault | ||
195 | 1: movl (%ebp),%ebp | ||
196 | .section __ex_table,"a" | ||
197 | .align 4 | ||
198 | .long 1b,syscall_fault | ||
199 | .previous | ||
200 | |||
201 | pushl %eax | ||
202 | SAVE_ALL | ||
203 | GET_THREAD_INFO(%ebp) | ||
204 | |||
205 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | ||
206 | testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) | ||
207 | jnz syscall_trace_entry | ||
208 | cmpl $(nr_syscalls), %eax | ||
209 | jae syscall_badsys | ||
210 | call *sys_call_table(,%eax,4) | ||
211 | movl %eax,EAX(%esp) | ||
212 | cli | ||
213 | movl TI_flags(%ebp), %ecx | ||
214 | testw $_TIF_ALLWORK_MASK, %cx | ||
215 | jne syscall_exit_work | ||
216 | /* if something modifies registers it must also disable sysexit */ | ||
217 | movl EIP(%esp), %edx | ||
218 | movl OLDESP(%esp), %ecx | ||
219 | xorl %ebp,%ebp | ||
220 | sti | ||
221 | sysexit | ||
222 | |||
223 | |||
224 | # system call handler stub | ||
225 | ENTRY(system_call) | ||
226 | pushl %eax # save orig_eax | ||
227 | SAVE_ALL | ||
228 | GET_THREAD_INFO(%ebp) | ||
229 | # system call tracing in operation | ||
230 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | ||
231 | testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) | ||
232 | jnz syscall_trace_entry | ||
233 | cmpl $(nr_syscalls), %eax | ||
234 | jae syscall_badsys | ||
235 | syscall_call: | ||
236 | call *sys_call_table(,%eax,4) | ||
237 | movl %eax,EAX(%esp) # store the return value | ||
238 | syscall_exit: | ||
239 | cli # make sure we don't miss an interrupt | ||
240 | # setting need_resched or sigpending | ||
241 | # between sampling and the iret | ||
242 | movl TI_flags(%ebp), %ecx | ||
243 | testw $_TIF_ALLWORK_MASK, %cx # current->work | ||
244 | jne syscall_exit_work | ||
245 | |||
246 | restore_all: | ||
247 | movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS | ||
248 | movb OLDSS(%esp), %ah | ||
249 | movb CS(%esp), %al | ||
250 | andl $(VM_MASK | (4 << 8) | 3), %eax | ||
251 | cmpl $((4 << 8) | 3), %eax | ||
252 | je ldt_ss # returning to user-space with LDT SS | ||
253 | restore_nocheck: | ||
254 | RESTORE_REGS | ||
255 | addl $4, %esp | ||
256 | 1: iret | ||
257 | .section .fixup,"ax" | ||
258 | iret_exc: | ||
259 | sti | ||
260 | movl $__USER_DS, %edx | ||
261 | movl %edx, %ds | ||
262 | movl %edx, %es | ||
263 | movl $11,%eax | ||
264 | call do_exit | ||
265 | .previous | ||
266 | .section __ex_table,"a" | ||
267 | .align 4 | ||
268 | .long 1b,iret_exc | ||
269 | .previous | ||
270 | |||
271 | ldt_ss: | ||
272 | larl OLDSS(%esp), %eax | ||
273 | jnz restore_nocheck | ||
274 | testl $0x00400000, %eax # returning to 32bit stack? | ||
275 | jnz restore_nocheck # allright, normal return | ||
276 | /* If returning to userspace with 16bit stack, | ||
277 | * try to fix the higher word of ESP, as the CPU | ||
278 | * won't restore it. | ||
279 | * This is an "official" bug of all the x86-compatible | ||
280 | * CPUs, which we can try to work around to make | ||
281 | * dosemu and wine happy. */ | ||
282 | subl $8, %esp # reserve space for switch16 pointer | ||
283 | cli | ||
284 | movl %esp, %eax | ||
285 | /* Set up the 16bit stack frame with switch32 pointer on top, | ||
286 | * and a switch16 pointer on top of the current frame. */ | ||
287 | call setup_x86_bogus_stack | ||
288 | RESTORE_REGS | ||
289 | lss 20+4(%esp), %esp # switch to 16bit stack | ||
290 | 1: iret | ||
291 | .section __ex_table,"a" | ||
292 | .align 4 | ||
293 | .long 1b,iret_exc | ||
294 | .previous | ||
295 | |||
296 | # perform work that needs to be done immediately before resumption | ||
297 | ALIGN | ||
298 | work_pending: | ||
299 | testb $_TIF_NEED_RESCHED, %cl | ||
300 | jz work_notifysig | ||
301 | work_resched: | ||
302 | call schedule | ||
303 | cli # make sure we don't miss an interrupt | ||
304 | # setting need_resched or sigpending | ||
305 | # between sampling and the iret | ||
306 | movl TI_flags(%ebp), %ecx | ||
307 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done other | ||
308 | # than syscall tracing? | ||
309 | jz restore_all | ||
310 | testb $_TIF_NEED_RESCHED, %cl | ||
311 | jnz work_resched | ||
312 | |||
313 | work_notifysig: # deal with pending signals and | ||
314 | # notify-resume requests | ||
315 | testl $VM_MASK, EFLAGS(%esp) | ||
316 | movl %esp, %eax | ||
317 | jne work_notifysig_v86 # returning to kernel-space or | ||
318 | # vm86-space | ||
319 | xorl %edx, %edx | ||
320 | call do_notify_resume | ||
321 | jmp restore_all | ||
322 | |||
323 | ALIGN | ||
324 | work_notifysig_v86: | ||
325 | pushl %ecx # save ti_flags for do_notify_resume | ||
326 | call save_v86_state # %eax contains pt_regs pointer | ||
327 | popl %ecx | ||
328 | movl %eax, %esp | ||
329 | xorl %edx, %edx | ||
330 | call do_notify_resume | ||
331 | jmp restore_all | ||
332 | |||
333 | # perform syscall exit tracing | ||
334 | ALIGN | ||
335 | syscall_trace_entry: | ||
336 | movl $-ENOSYS,EAX(%esp) | ||
337 | movl %esp, %eax | ||
338 | xorl %edx,%edx | ||
339 | call do_syscall_trace | ||
340 | movl ORIG_EAX(%esp), %eax | ||
341 | cmpl $(nr_syscalls), %eax | ||
342 | jnae syscall_call | ||
343 | jmp syscall_exit | ||
344 | |||
345 | # perform syscall exit tracing | ||
346 | ALIGN | ||
347 | syscall_exit_work: | ||
348 | testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl | ||
349 | jz work_pending | ||
350 | sti # could let do_syscall_trace() call | ||
351 | # schedule() instead | ||
352 | movl %esp, %eax | ||
353 | movl $1, %edx | ||
354 | call do_syscall_trace | ||
355 | jmp resume_userspace | ||
356 | |||
357 | ALIGN | ||
358 | syscall_fault: | ||
359 | pushl %eax # save orig_eax | ||
360 | SAVE_ALL | ||
361 | GET_THREAD_INFO(%ebp) | ||
362 | movl $-EFAULT,EAX(%esp) | ||
363 | jmp resume_userspace | ||
364 | |||
365 | ALIGN | ||
366 | syscall_badsys: | ||
367 | movl $-ENOSYS,EAX(%esp) | ||
368 | jmp resume_userspace | ||
369 | |||
370 | #define FIXUP_ESPFIX_STACK \ | ||
371 | movl %esp, %eax; \ | ||
372 | /* switch to 32bit stack using the pointer on top of 16bit stack */ \ | ||
373 | lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ | ||
374 | /* copy data from 16bit stack to 32bit stack */ \ | ||
375 | call fixup_x86_bogus_stack; \ | ||
376 | /* put ESP to the proper location */ \ | ||
377 | movl %eax, %esp; | ||
378 | #define UNWIND_ESPFIX_STACK \ | ||
379 | pushl %eax; \ | ||
380 | movl %ss, %eax; \ | ||
381 | /* see if on 16bit stack */ \ | ||
382 | cmpw $__ESPFIX_SS, %ax; \ | ||
383 | jne 28f; \ | ||
384 | movl $__KERNEL_DS, %edx; \ | ||
385 | movl %edx, %ds; \ | ||
386 | movl %edx, %es; \ | ||
387 | /* switch to 32bit stack */ \ | ||
388 | FIXUP_ESPFIX_STACK \ | ||
389 | 28: popl %eax; | ||
390 | |||
391 | /* | ||
392 | * Build the entry stubs and pointer table with | ||
393 | * some assembler magic. | ||
394 | */ | ||
395 | .data | ||
396 | ENTRY(interrupt) | ||
397 | .text | ||
398 | |||
399 | vector=0 | ||
400 | ENTRY(irq_entries_start) | ||
401 | .rept NR_IRQS | ||
402 | ALIGN | ||
403 | 1: pushl $vector-256 | ||
404 | jmp common_interrupt | ||
405 | .data | ||
406 | .long 1b | ||
407 | .text | ||
408 | vector=vector+1 | ||
409 | .endr | ||
410 | |||
411 | ALIGN | ||
412 | common_interrupt: | ||
413 | SAVE_ALL | ||
414 | movl %esp,%eax | ||
415 | call do_IRQ | ||
416 | jmp ret_from_intr | ||
417 | |||
418 | #define BUILD_INTERRUPT(name, nr) \ | ||
419 | ENTRY(name) \ | ||
420 | pushl $nr-256; \ | ||
421 | SAVE_ALL \ | ||
422 | movl %esp,%eax; \ | ||
423 | call smp_/**/name; \ | ||
424 | jmp ret_from_intr; | ||
425 | |||
426 | /* The include is where all of the SMP etc. interrupts come from */ | ||
427 | #include "entry_arch.h" | ||
428 | |||
429 | ENTRY(divide_error) | ||
430 | pushl $0 # no error code | ||
431 | pushl $do_divide_error | ||
432 | ALIGN | ||
433 | error_code: | ||
434 | pushl %ds | ||
435 | pushl %eax | ||
436 | xorl %eax, %eax | ||
437 | pushl %ebp | ||
438 | pushl %edi | ||
439 | pushl %esi | ||
440 | pushl %edx | ||
441 | decl %eax # eax = -1 | ||
442 | pushl %ecx | ||
443 | pushl %ebx | ||
444 | cld | ||
445 | pushl %es | ||
446 | UNWIND_ESPFIX_STACK | ||
447 | popl %ecx | ||
448 | movl ES(%esp), %edi # get the function address | ||
449 | movl ORIG_EAX(%esp), %edx # get the error code | ||
450 | movl %eax, ORIG_EAX(%esp) | ||
451 | movl %ecx, ES(%esp) | ||
452 | movl $(__USER_DS), %ecx | ||
453 | movl %ecx, %ds | ||
454 | movl %ecx, %es | ||
455 | movl %esp,%eax # pt_regs pointer | ||
456 | call *%edi | ||
457 | jmp ret_from_exception | ||
458 | |||
459 | ENTRY(coprocessor_error) | ||
460 | pushl $0 | ||
461 | pushl $do_coprocessor_error | ||
462 | jmp error_code | ||
463 | |||
464 | ENTRY(simd_coprocessor_error) | ||
465 | pushl $0 | ||
466 | pushl $do_simd_coprocessor_error | ||
467 | jmp error_code | ||
468 | |||
469 | ENTRY(device_not_available) | ||
470 | pushl $-1 # mark this as an int | ||
471 | SAVE_ALL | ||
472 | movl %cr0, %eax | ||
473 | testl $0x4, %eax # EM (math emulation bit) | ||
474 | jne device_not_available_emulate | ||
475 | preempt_stop | ||
476 | call math_state_restore | ||
477 | jmp ret_from_exception | ||
478 | device_not_available_emulate: | ||
479 | pushl $0 # temporary storage for ORIG_EIP | ||
480 | call math_emulate | ||
481 | addl $4, %esp | ||
482 | jmp ret_from_exception | ||
483 | |||
484 | /* | ||
485 | * Debug traps and NMI can happen at the one SYSENTER instruction | ||
486 | * that sets up the real kernel stack. Check here, since we can't | ||
487 | * allow the wrong stack to be used. | ||
488 | * | ||
489 | * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have | ||
490 | * already pushed 3 words if it hits on the sysenter instruction: | ||
491 | * eflags, cs and eip. | ||
492 | * | ||
493 | * We just load the right stack, and push the three (known) values | ||
494 | * by hand onto the new stack - while updating the return eip past | ||
495 | * the instruction that would have done it for sysenter. | ||
496 | */ | ||
497 | #define FIX_STACK(offset, ok, label) \ | ||
498 | cmpw $__KERNEL_CS,4(%esp); \ | ||
499 | jne ok; \ | ||
500 | label: \ | ||
501 | movl TSS_sysenter_esp0+offset(%esp),%esp; \ | ||
502 | pushfl; \ | ||
503 | pushl $__KERNEL_CS; \ | ||
504 | pushl $sysenter_past_esp | ||
505 | |||
506 | ENTRY(debug) | ||
507 | cmpl $sysenter_entry,(%esp) | ||
508 | jne debug_stack_correct | ||
509 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | ||
510 | debug_stack_correct: | ||
511 | pushl $-1 # mark this as an int | ||
512 | SAVE_ALL | ||
513 | xorl %edx,%edx # error code 0 | ||
514 | movl %esp,%eax # pt_regs pointer | ||
515 | call do_debug | ||
516 | testl %eax,%eax | ||
517 | jnz restore_all | ||
518 | jmp ret_from_exception | ||
519 | |||
520 | /* | ||
521 | * NMI is doubly nasty. It can happen _while_ we're handling | ||
522 | * a debug fault, and the debug fault hasn't yet been able to | ||
523 | * clear up the stack. So we first check whether we got an | ||
524 | * NMI on the sysenter entry path, but after that we need to | ||
525 | * check whether we got an NMI on the debug path where the debug | ||
526 | * fault happened on the sysenter path. | ||
527 | */ | ||
528 | ENTRY(nmi) | ||
529 | pushl %eax | ||
530 | movl %ss, %eax | ||
531 | cmpw $__ESPFIX_SS, %ax | ||
532 | popl %eax | ||
533 | je nmi_16bit_stack | ||
534 | cmpl $sysenter_entry,(%esp) | ||
535 | je nmi_stack_fixup | ||
536 | pushl %eax | ||
537 | movl %esp,%eax | ||
538 | /* Do not access memory above the end of our stack page, | ||
539 | * it might not exist. | ||
540 | */ | ||
541 | andl $(THREAD_SIZE-1),%eax | ||
542 | cmpl $(THREAD_SIZE-20),%eax | ||
543 | popl %eax | ||
544 | jae nmi_stack_correct | ||
545 | cmpl $sysenter_entry,12(%esp) | ||
546 | je nmi_debug_stack_check | ||
547 | nmi_stack_correct: | ||
548 | pushl %eax | ||
549 | SAVE_ALL | ||
550 | xorl %edx,%edx # zero error code | ||
551 | movl %esp,%eax # pt_regs pointer | ||
552 | call do_nmi | ||
553 | jmp restore_all | ||
554 | |||
555 | nmi_stack_fixup: | ||
556 | FIX_STACK(12,nmi_stack_correct, 1) | ||
557 | jmp nmi_stack_correct | ||
558 | nmi_debug_stack_check: | ||
559 | cmpw $__KERNEL_CS,16(%esp) | ||
560 | jne nmi_stack_correct | ||
561 | cmpl $debug - 1,(%esp) | ||
562 | jle nmi_stack_correct | ||
563 | cmpl $debug_esp_fix_insn,(%esp) | ||
564 | jle nmi_debug_stack_fixup | ||
565 | nmi_debug_stack_fixup: | ||
566 | FIX_STACK(24,nmi_stack_correct, 1) | ||
567 | jmp nmi_stack_correct | ||
568 | |||
569 | nmi_16bit_stack: | ||
570 | /* create the pointer to lss back */ | ||
571 | pushl %ss | ||
572 | pushl %esp | ||
573 | movzwl %sp, %esp | ||
574 | addw $4, (%esp) | ||
575 | /* copy the iret frame of 12 bytes */ | ||
576 | .rept 3 | ||
577 | pushl 16(%esp) | ||
578 | .endr | ||
579 | pushl %eax | ||
580 | SAVE_ALL | ||
581 | FIXUP_ESPFIX_STACK # %eax == %esp | ||
582 | xorl %edx,%edx # zero error code | ||
583 | call do_nmi | ||
584 | RESTORE_REGS | ||
585 | lss 12+4(%esp), %esp # back to 16bit stack | ||
586 | 1: iret | ||
587 | .section __ex_table,"a" | ||
588 | .align 4 | ||
589 | .long 1b,iret_exc | ||
590 | .previous | ||
591 | |||
592 | ENTRY(int3) | ||
593 | pushl $-1 # mark this as an int | ||
594 | SAVE_ALL | ||
595 | xorl %edx,%edx # zero error code | ||
596 | movl %esp,%eax # pt_regs pointer | ||
597 | call do_int3 | ||
598 | testl %eax,%eax | ||
599 | jnz restore_all | ||
600 | jmp ret_from_exception | ||
601 | |||
602 | ENTRY(overflow) | ||
603 | pushl $0 | ||
604 | pushl $do_overflow | ||
605 | jmp error_code | ||
606 | |||
607 | ENTRY(bounds) | ||
608 | pushl $0 | ||
609 | pushl $do_bounds | ||
610 | jmp error_code | ||
611 | |||
612 | ENTRY(invalid_op) | ||
613 | pushl $0 | ||
614 | pushl $do_invalid_op | ||
615 | jmp error_code | ||
616 | |||
617 | ENTRY(coprocessor_segment_overrun) | ||
618 | pushl $0 | ||
619 | pushl $do_coprocessor_segment_overrun | ||
620 | jmp error_code | ||
621 | |||
622 | ENTRY(invalid_TSS) | ||
623 | pushl $do_invalid_TSS | ||
624 | jmp error_code | ||
625 | |||
626 | ENTRY(segment_not_present) | ||
627 | pushl $do_segment_not_present | ||
628 | jmp error_code | ||
629 | |||
630 | ENTRY(stack_segment) | ||
631 | pushl $do_stack_segment | ||
632 | jmp error_code | ||
633 | |||
634 | ENTRY(general_protection) | ||
635 | pushl $do_general_protection | ||
636 | jmp error_code | ||
637 | |||
638 | ENTRY(alignment_check) | ||
639 | pushl $do_alignment_check | ||
640 | jmp error_code | ||
641 | |||
642 | ENTRY(page_fault) | ||
643 | pushl $do_page_fault | ||
644 | jmp error_code | ||
645 | |||
646 | #ifdef CONFIG_X86_MCE | ||
647 | ENTRY(machine_check) | ||
648 | pushl $0 | ||
649 | pushl machine_check_vector | ||
650 | jmp error_code | ||
651 | #endif | ||
652 | |||
653 | ENTRY(spurious_interrupt_bug) | ||
654 | pushl $0 | ||
655 | pushl $do_spurious_interrupt_bug | ||
656 | jmp error_code | ||
657 | |||
658 | .data | ||
659 | ENTRY(sys_call_table) | ||
660 | .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ | ||
661 | .long sys_exit | ||
662 | .long sys_fork | ||
663 | .long sys_read | ||
664 | .long sys_write | ||
665 | .long sys_open /* 5 */ | ||
666 | .long sys_close | ||
667 | .long sys_waitpid | ||
668 | .long sys_creat | ||
669 | .long sys_link | ||
670 | .long sys_unlink /* 10 */ | ||
671 | .long sys_execve | ||
672 | .long sys_chdir | ||
673 | .long sys_time | ||
674 | .long sys_mknod | ||
675 | .long sys_chmod /* 15 */ | ||
676 | .long sys_lchown16 | ||
677 | .long sys_ni_syscall /* old break syscall holder */ | ||
678 | .long sys_stat | ||
679 | .long sys_lseek | ||
680 | .long sys_getpid /* 20 */ | ||
681 | .long sys_mount | ||
682 | .long sys_oldumount | ||
683 | .long sys_setuid16 | ||
684 | .long sys_getuid16 | ||
685 | .long sys_stime /* 25 */ | ||
686 | .long sys_ptrace | ||
687 | .long sys_alarm | ||
688 | .long sys_fstat | ||
689 | .long sys_pause | ||
690 | .long sys_utime /* 30 */ | ||
691 | .long sys_ni_syscall /* old stty syscall holder */ | ||
692 | .long sys_ni_syscall /* old gtty syscall holder */ | ||
693 | .long sys_access | ||
694 | .long sys_nice | ||
695 | .long sys_ni_syscall /* 35 - old ftime syscall holder */ | ||
696 | .long sys_sync | ||
697 | .long sys_kill | ||
698 | .long sys_rename | ||
699 | .long sys_mkdir | ||
700 | .long sys_rmdir /* 40 */ | ||
701 | .long sys_dup | ||
702 | .long sys_pipe | ||
703 | .long sys_times | ||
704 | .long sys_ni_syscall /* old prof syscall holder */ | ||
705 | .long sys_brk /* 45 */ | ||
706 | .long sys_setgid16 | ||
707 | .long sys_getgid16 | ||
708 | .long sys_signal | ||
709 | .long sys_geteuid16 | ||
710 | .long sys_getegid16 /* 50 */ | ||
711 | .long sys_acct | ||
712 | .long sys_umount /* recycled never used phys() */ | ||
713 | .long sys_ni_syscall /* old lock syscall holder */ | ||
714 | .long sys_ioctl | ||
715 | .long sys_fcntl /* 55 */ | ||
716 | .long sys_ni_syscall /* old mpx syscall holder */ | ||
717 | .long sys_setpgid | ||
718 | .long sys_ni_syscall /* old ulimit syscall holder */ | ||
719 | .long sys_olduname | ||
720 | .long sys_umask /* 60 */ | ||
721 | .long sys_chroot | ||
722 | .long sys_ustat | ||
723 | .long sys_dup2 | ||
724 | .long sys_getppid | ||
725 | .long sys_getpgrp /* 65 */ | ||
726 | .long sys_setsid | ||
727 | .long sys_sigaction | ||
728 | .long sys_sgetmask | ||
729 | .long sys_ssetmask | ||
730 | .long sys_setreuid16 /* 70 */ | ||
731 | .long sys_setregid16 | ||
732 | .long sys_sigsuspend | ||
733 | .long sys_sigpending | ||
734 | .long sys_sethostname | ||
735 | .long sys_setrlimit /* 75 */ | ||
736 | .long sys_old_getrlimit | ||
737 | .long sys_getrusage | ||
738 | .long sys_gettimeofday | ||
739 | .long sys_settimeofday | ||
740 | .long sys_getgroups16 /* 80 */ | ||
741 | .long sys_setgroups16 | ||
742 | .long old_select | ||
743 | .long sys_symlink | ||
744 | .long sys_lstat | ||
745 | .long sys_readlink /* 85 */ | ||
746 | .long sys_uselib | ||
747 | .long sys_swapon | ||
748 | .long sys_reboot | ||
749 | .long old_readdir | ||
750 | .long old_mmap /* 90 */ | ||
751 | .long sys_munmap | ||
752 | .long sys_truncate | ||
753 | .long sys_ftruncate | ||
754 | .long sys_fchmod | ||
755 | .long sys_fchown16 /* 95 */ | ||
756 | .long sys_getpriority | ||
757 | .long sys_setpriority | ||
758 | .long sys_ni_syscall /* old profil syscall holder */ | ||
759 | .long sys_statfs | ||
760 | .long sys_fstatfs /* 100 */ | ||
761 | .long sys_ioperm | ||
762 | .long sys_socketcall | ||
763 | .long sys_syslog | ||
764 | .long sys_setitimer | ||
765 | .long sys_getitimer /* 105 */ | ||
766 | .long sys_newstat | ||
767 | .long sys_newlstat | ||
768 | .long sys_newfstat | ||
769 | .long sys_uname | ||
770 | .long sys_iopl /* 110 */ | ||
771 | .long sys_vhangup | ||
772 | .long sys_ni_syscall /* old "idle" system call */ | ||
773 | .long sys_vm86old | ||
774 | .long sys_wait4 | ||
775 | .long sys_swapoff /* 115 */ | ||
776 | .long sys_sysinfo | ||
777 | .long sys_ipc | ||
778 | .long sys_fsync | ||
779 | .long sys_sigreturn | ||
780 | .long sys_clone /* 120 */ | ||
781 | .long sys_setdomainname | ||
782 | .long sys_newuname | ||
783 | .long sys_modify_ldt | ||
784 | .long sys_adjtimex | ||
785 | .long sys_mprotect /* 125 */ | ||
786 | .long sys_sigprocmask | ||
787 | .long sys_ni_syscall /* old "create_module" */ | ||
788 | .long sys_init_module | ||
789 | .long sys_delete_module | ||
790 | .long sys_ni_syscall /* 130: old "get_kernel_syms" */ | ||
791 | .long sys_quotactl | ||
792 | .long sys_getpgid | ||
793 | .long sys_fchdir | ||
794 | .long sys_bdflush | ||
795 | .long sys_sysfs /* 135 */ | ||
796 | .long sys_personality | ||
797 | .long sys_ni_syscall /* reserved for afs_syscall */ | ||
798 | .long sys_setfsuid16 | ||
799 | .long sys_setfsgid16 | ||
800 | .long sys_llseek /* 140 */ | ||
801 | .long sys_getdents | ||
802 | .long sys_select | ||
803 | .long sys_flock | ||
804 | .long sys_msync | ||
805 | .long sys_readv /* 145 */ | ||
806 | .long sys_writev | ||
807 | .long sys_getsid | ||
808 | .long sys_fdatasync | ||
809 | .long sys_sysctl | ||
810 | .long sys_mlock /* 150 */ | ||
811 | .long sys_munlock | ||
812 | .long sys_mlockall | ||
813 | .long sys_munlockall | ||
814 | .long sys_sched_setparam | ||
815 | .long sys_sched_getparam /* 155 */ | ||
816 | .long sys_sched_setscheduler | ||
817 | .long sys_sched_getscheduler | ||
818 | .long sys_sched_yield | ||
819 | .long sys_sched_get_priority_max | ||
820 | .long sys_sched_get_priority_min /* 160 */ | ||
821 | .long sys_sched_rr_get_interval | ||
822 | .long sys_nanosleep | ||
823 | .long sys_mremap | ||
824 | .long sys_setresuid16 | ||
825 | .long sys_getresuid16 /* 165 */ | ||
826 | .long sys_vm86 | ||
827 | .long sys_ni_syscall /* Old sys_query_module */ | ||
828 | .long sys_poll | ||
829 | .long sys_nfsservctl | ||
830 | .long sys_setresgid16 /* 170 */ | ||
831 | .long sys_getresgid16 | ||
832 | .long sys_prctl | ||
833 | .long sys_rt_sigreturn | ||
834 | .long sys_rt_sigaction | ||
835 | .long sys_rt_sigprocmask /* 175 */ | ||
836 | .long sys_rt_sigpending | ||
837 | .long sys_rt_sigtimedwait | ||
838 | .long sys_rt_sigqueueinfo | ||
839 | .long sys_rt_sigsuspend | ||
840 | .long sys_pread64 /* 180 */ | ||
841 | .long sys_pwrite64 | ||
842 | .long sys_chown16 | ||
843 | .long sys_getcwd | ||
844 | .long sys_capget | ||
845 | .long sys_capset /* 185 */ | ||
846 | .long sys_sigaltstack | ||
847 | .long sys_sendfile | ||
848 | .long sys_ni_syscall /* reserved for streams1 */ | ||
849 | .long sys_ni_syscall /* reserved for streams2 */ | ||
850 | .long sys_vfork /* 190 */ | ||
851 | .long sys_getrlimit | ||
852 | .long sys_mmap2 | ||
853 | .long sys_truncate64 | ||
854 | .long sys_ftruncate64 | ||
855 | .long sys_stat64 /* 195 */ | ||
856 | .long sys_lstat64 | ||
857 | .long sys_fstat64 | ||
858 | .long sys_lchown | ||
859 | .long sys_getuid | ||
860 | .long sys_getgid /* 200 */ | ||
861 | .long sys_geteuid | ||
862 | .long sys_getegid | ||
863 | .long sys_setreuid | ||
864 | .long sys_setregid | ||
865 | .long sys_getgroups /* 205 */ | ||
866 | .long sys_setgroups | ||
867 | .long sys_fchown | ||
868 | .long sys_setresuid | ||
869 | .long sys_getresuid | ||
870 | .long sys_setresgid /* 210 */ | ||
871 | .long sys_getresgid | ||
872 | .long sys_chown | ||
873 | .long sys_setuid | ||
874 | .long sys_setgid | ||
875 | .long sys_setfsuid /* 215 */ | ||
876 | .long sys_setfsgid | ||
877 | .long sys_pivot_root | ||
878 | .long sys_mincore | ||
879 | .long sys_madvise | ||
880 | .long sys_getdents64 /* 220 */ | ||
881 | .long sys_fcntl64 | ||
882 | .long sys_ni_syscall /* reserved for TUX */ | ||
883 | .long sys_ni_syscall | ||
884 | .long sys_gettid | ||
885 | .long sys_readahead /* 225 */ | ||
886 | .long sys_setxattr | ||
887 | .long sys_lsetxattr | ||
888 | .long sys_fsetxattr | ||
889 | .long sys_getxattr | ||
890 | .long sys_lgetxattr /* 230 */ | ||
891 | .long sys_fgetxattr | ||
892 | .long sys_listxattr | ||
893 | .long sys_llistxattr | ||
894 | .long sys_flistxattr | ||
895 | .long sys_removexattr /* 235 */ | ||
896 | .long sys_lremovexattr | ||
897 | .long sys_fremovexattr | ||
898 | .long sys_tkill | ||
899 | .long sys_sendfile64 | ||
900 | .long sys_futex /* 240 */ | ||
901 | .long sys_sched_setaffinity | ||
902 | .long sys_sched_getaffinity | ||
903 | .long sys_set_thread_area | ||
904 | .long sys_get_thread_area | ||
905 | .long sys_io_setup /* 245 */ | ||
906 | .long sys_io_destroy | ||
907 | .long sys_io_getevents | ||
908 | .long sys_io_submit | ||
909 | .long sys_io_cancel | ||
910 | .long sys_fadvise64 /* 250 */ | ||
911 | .long sys_ni_syscall | ||
912 | .long sys_exit_group | ||
913 | .long sys_lookup_dcookie | ||
914 | .long sys_epoll_create | ||
915 | .long sys_epoll_ctl /* 255 */ | ||
916 | .long sys_epoll_wait | ||
917 | .long sys_remap_file_pages | ||
918 | .long sys_set_tid_address | ||
919 | .long sys_timer_create | ||
920 | .long sys_timer_settime /* 260 */ | ||
921 | .long sys_timer_gettime | ||
922 | .long sys_timer_getoverrun | ||
923 | .long sys_timer_delete | ||
924 | .long sys_clock_settime | ||
925 | .long sys_clock_gettime /* 265 */ | ||
926 | .long sys_clock_getres | ||
927 | .long sys_clock_nanosleep | ||
928 | .long sys_statfs64 | ||
929 | .long sys_fstatfs64 | ||
930 | .long sys_tgkill /* 270 */ | ||
931 | .long sys_utimes | ||
932 | .long sys_fadvise64_64 | ||
933 | .long sys_ni_syscall /* sys_vserver */ | ||
934 | .long sys_mbind | ||
935 | .long sys_get_mempolicy | ||
936 | .long sys_set_mempolicy | ||
937 | .long sys_mq_open | ||
938 | .long sys_mq_unlink | ||
939 | .long sys_mq_timedsend | ||
940 | .long sys_mq_timedreceive /* 280 */ | ||
941 | .long sys_mq_notify | ||
942 | .long sys_mq_getsetattr | ||
943 | .long sys_ni_syscall /* reserved for kexec */ | ||
944 | .long sys_waitid | ||
945 | .long sys_ni_syscall /* 285 */ /* available */ | ||
946 | .long sys_add_key | ||
947 | .long sys_request_key | ||
948 | .long sys_keyctl | ||
949 | |||
950 | syscall_table_size=(.-sys_call_table) | ||
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S new file mode 100644 index 000000000000..d273fd746192 --- /dev/null +++ b/arch/i386/kernel/head.S | |||
@@ -0,0 +1,521 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/head.S -- the 32-bit startup code. | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
5 | * | ||
6 | * Enhanced CPU detection and feature setting code by Mike Jagdis | ||
7 | * and Martin Mares, November 1997. | ||
8 | */ | ||
9 | |||
10 | .text | ||
11 | #include <linux/config.h> | ||
12 | #include <linux/threads.h> | ||
13 | #include <linux/linkage.h> | ||
14 | #include <asm/segment.h> | ||
15 | #include <asm/page.h> | ||
16 | #include <asm/pgtable.h> | ||
17 | #include <asm/desc.h> | ||
18 | #include <asm/cache.h> | ||
19 | #include <asm/thread_info.h> | ||
20 | #include <asm/asm_offsets.h> | ||
21 | #include <asm/setup.h> | ||
22 | |||
23 | /* | ||
24 | * References to members of the new_cpu_data structure. | ||
25 | */ | ||
26 | |||
27 | #define X86 new_cpu_data+CPUINFO_x86 | ||
28 | #define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor | ||
29 | #define X86_MODEL new_cpu_data+CPUINFO_x86_model | ||
30 | #define X86_MASK new_cpu_data+CPUINFO_x86_mask | ||
31 | #define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math | ||
32 | #define X86_CPUID new_cpu_data+CPUINFO_cpuid_level | ||
33 | #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability | ||
34 | #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id | ||
35 | |||
36 | /* | ||
37 | * This is how much memory *in addition to the memory covered up to | ||
38 | * and including _end* we need mapped initially. We need one bit for | ||
39 | * each possible page, but only in low memory, which means | ||
40 | * 2^32/4096/8 = 128K worst case (4G/4G split.) | ||
41 | * | ||
42 | * Modulo rounding, each megabyte assigned here requires a kilobyte of | ||
43 | * memory, which is currently unreclaimed. | ||
44 | * | ||
45 | * This should be a multiple of a page. | ||
46 | */ | ||
47 | #define INIT_MAP_BEYOND_END (128*1024) | ||
48 | |||
49 | |||
50 | /* | ||
51 | * 32-bit kernel entrypoint; only used by the boot CPU. On entry, | ||
52 | * %esi points to the real-mode code as a 32-bit pointer. | ||
53 | * CS and DS must be 4 GB flat segments, but we don't depend on | ||
54 | * any particular GDT layout, because we load our own as soon as we | ||
55 | * can. | ||
56 | */ | ||
57 | ENTRY(startup_32) | ||
58 | |||
59 | /* | ||
60 | * Set segments to known values. | ||
61 | */ | ||
62 | cld | ||
63 | lgdt boot_gdt_descr - __PAGE_OFFSET | ||
64 | movl $(__BOOT_DS),%eax | ||
65 | movl %eax,%ds | ||
66 | movl %eax,%es | ||
67 | movl %eax,%fs | ||
68 | movl %eax,%gs | ||
69 | |||
70 | /* | ||
71 | * Clear BSS first so that there are no surprises... | ||
72 | * No need to cld as DF is already clear from cld above... | ||
73 | */ | ||
74 | xorl %eax,%eax | ||
75 | movl $__bss_start - __PAGE_OFFSET,%edi | ||
76 | movl $__bss_stop - __PAGE_OFFSET,%ecx | ||
77 | subl %edi,%ecx | ||
78 | shrl $2,%ecx | ||
79 | rep ; stosl | ||
80 | |||
81 | /* | ||
82 | * Initialize page tables. This creates a PDE and a set of page | ||
83 | * tables, which are located immediately beyond _end. The variable | ||
84 | * init_pg_tables_end is set up to point to the first "safe" location. | ||
85 | * Mappings are created both at virtual address 0 (identity mapping) | ||
86 | * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. | ||
87 | * | ||
88 | * Warning: don't use %esi or the stack in this code. However, %esp | ||
89 | * can be used as a GPR if you really need it... | ||
90 | */ | ||
91 | page_pde_offset = (__PAGE_OFFSET >> 20); | ||
92 | |||
93 | movl $(pg0 - __PAGE_OFFSET), %edi | ||
94 | movl $(swapper_pg_dir - __PAGE_OFFSET), %edx | ||
95 | movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ | ||
96 | 10: | ||
97 | leal 0x007(%edi),%ecx /* Create PDE entry */ | ||
98 | movl %ecx,(%edx) /* Store identity PDE entry */ | ||
99 | movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ | ||
100 | addl $4,%edx | ||
101 | movl $1024, %ecx | ||
102 | 11: | ||
103 | stosl | ||
104 | addl $0x1000,%eax | ||
105 | loop 11b | ||
106 | /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ | ||
107 | /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ | ||
108 | leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp | ||
109 | cmpl %ebp,%eax | ||
110 | jb 10b | ||
111 | movl %edi,(init_pg_tables_end - __PAGE_OFFSET) | ||
112 | |||
113 | #ifdef CONFIG_SMP | ||
114 | xorl %ebx,%ebx /* This is the boot CPU (BSP) */ | ||
115 | jmp 3f | ||
116 | |||
117 | /* | ||
118 | * Non-boot CPU entry point; entered from trampoline.S | ||
119 | * We can't lgdt here, because lgdt itself uses a data segment, but | ||
120 | * we know the trampoline has already loaded the boot_gdt_table GDT | ||
121 | * for us. | ||
122 | */ | ||
123 | ENTRY(startup_32_smp) | ||
124 | cld | ||
125 | movl $(__BOOT_DS),%eax | ||
126 | movl %eax,%ds | ||
127 | movl %eax,%es | ||
128 | movl %eax,%fs | ||
129 | movl %eax,%gs | ||
130 | |||
131 | /* | ||
132 | * New page tables may be in 4Mbyte page mode and may | ||
133 | * be using the global pages. | ||
134 | * | ||
135 | * NOTE! If we are on a 486 we may have no cr4 at all! | ||
136 | * So we do not try to touch it unless we really have | ||
137 | * some bits in it to set. This won't work if the BSP | ||
138 | * implements cr4 but this AP does not -- very unlikely | ||
139 | * but be warned! The same applies to the pse feature | ||
140 | * if not equally supported. --macro | ||
141 | * | ||
142 | * NOTE! We have to correct for the fact that we're | ||
143 | * not yet offset PAGE_OFFSET.. | ||
144 | */ | ||
145 | #define cr4_bits mmu_cr4_features-__PAGE_OFFSET | ||
146 | movl cr4_bits,%edx | ||
147 | andl %edx,%edx | ||
148 | jz 6f | ||
149 | movl %cr4,%eax # Turn on paging options (PSE,PAE,..) | ||
150 | orl %edx,%eax | ||
151 | movl %eax,%cr4 | ||
152 | |||
153 | btl $5, %eax # check if PAE is enabled | ||
154 | jnc 6f | ||
155 | |||
156 | /* Check if extended functions are implemented */ | ||
157 | movl $0x80000000, %eax | ||
158 | cpuid | ||
159 | cmpl $0x80000000, %eax | ||
160 | jbe 6f | ||
161 | mov $0x80000001, %eax | ||
162 | cpuid | ||
163 | /* Execute Disable bit supported? */ | ||
164 | btl $20, %edx | ||
165 | jnc 6f | ||
166 | |||
167 | /* Setup EFER (Extended Feature Enable Register) */ | ||
168 | movl $0xc0000080, %ecx | ||
169 | rdmsr | ||
170 | |||
171 | btsl $11, %eax | ||
172 | /* Make changes effective */ | ||
173 | wrmsr | ||
174 | |||
175 | 6: | ||
176 | /* This is a secondary processor (AP) */ | ||
177 | xorl %ebx,%ebx | ||
178 | incl %ebx | ||
179 | |||
180 | 3: | ||
181 | #endif /* CONFIG_SMP */ | ||
182 | |||
183 | /* | ||
184 | * Enable paging | ||
185 | */ | ||
186 | movl $swapper_pg_dir-__PAGE_OFFSET,%eax | ||
187 | movl %eax,%cr3 /* set the page table pointer.. */ | ||
188 | movl %cr0,%eax | ||
189 | orl $0x80000000,%eax | ||
190 | movl %eax,%cr0 /* ..and set paging (PG) bit */ | ||
191 | ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ | ||
192 | 1: | ||
193 | /* Set up the stack pointer */ | ||
194 | lss stack_start,%esp | ||
195 | |||
196 | /* | ||
197 | * Initialize eflags. Some BIOS's leave bits like NT set. This would | ||
198 | * confuse the debugger if this code is traced. | ||
199 | * XXX - best to initialize before switching to protected mode. | ||
200 | */ | ||
201 | pushl $0 | ||
202 | popfl | ||
203 | |||
204 | #ifdef CONFIG_SMP | ||
205 | andl %ebx,%ebx | ||
206 | jz 1f /* Initial CPU cleans BSS */ | ||
207 | jmp checkCPUtype | ||
208 | 1: | ||
209 | #endif /* CONFIG_SMP */ | ||
210 | |||
211 | /* | ||
212 | * start system 32-bit setup. We need to re-do some of the things done | ||
213 | * in 16-bit mode for the "real" operations. | ||
214 | */ | ||
215 | call setup_idt | ||
216 | |||
217 | /* | ||
218 | * Copy bootup parameters out of the way. | ||
219 | * Note: %esi still has the pointer to the real-mode data. | ||
220 | */ | ||
221 | movl $boot_params,%edi | ||
222 | movl $(PARAM_SIZE/4),%ecx | ||
223 | cld | ||
224 | rep | ||
225 | movsl | ||
226 | movl boot_params+NEW_CL_POINTER,%esi | ||
227 | andl %esi,%esi | ||
228 | jnz 2f # New command line protocol | ||
229 | cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR | ||
230 | jne 1f | ||
231 | movzwl OLD_CL_OFFSET,%esi | ||
232 | addl $(OLD_CL_BASE_ADDR),%esi | ||
233 | 2: | ||
234 | movl $saved_command_line,%edi | ||
235 | movl $(COMMAND_LINE_SIZE/4),%ecx | ||
236 | rep | ||
237 | movsl | ||
238 | 1: | ||
239 | checkCPUtype: | ||
240 | |||
241 | movl $-1,X86_CPUID # -1 for no CPUID initially | ||
242 | |||
243 | /* check if it is 486 or 386. */ | ||
244 | /* | ||
245 | * XXX - this does a lot of unnecessary setup. Alignment checks don't | ||
246 | * apply at our cpl of 0 and the stack ought to be aligned already, and | ||
247 | * we don't need to preserve eflags. | ||
248 | */ | ||
249 | |||
250 | movb $3,X86 # at least 386 | ||
251 | pushfl # push EFLAGS | ||
252 | popl %eax # get EFLAGS | ||
253 | movl %eax,%ecx # save original EFLAGS | ||
254 | xorl $0x240000,%eax # flip AC and ID bits in EFLAGS | ||
255 | pushl %eax # copy to EFLAGS | ||
256 | popfl # set EFLAGS | ||
257 | pushfl # get new EFLAGS | ||
258 | popl %eax # put it in eax | ||
259 | xorl %ecx,%eax # change in flags | ||
260 | pushl %ecx # restore original EFLAGS | ||
261 | popfl | ||
262 | testl $0x40000,%eax # check if AC bit changed | ||
263 | je is386 | ||
264 | |||
265 | movb $4,X86 # at least 486 | ||
266 | testl $0x200000,%eax # check if ID bit changed | ||
267 | je is486 | ||
268 | |||
269 | /* get vendor info */ | ||
270 | xorl %eax,%eax # call CPUID with 0 -> return vendor ID | ||
271 | cpuid | ||
272 | movl %eax,X86_CPUID # save CPUID level | ||
273 | movl %ebx,X86_VENDOR_ID # lo 4 chars | ||
274 | movl %edx,X86_VENDOR_ID+4 # next 4 chars | ||
275 | movl %ecx,X86_VENDOR_ID+8 # last 4 chars | ||
276 | |||
277 | orl %eax,%eax # do we have processor info as well? | ||
278 | je is486 | ||
279 | |||
280 | movl $1,%eax # Use the CPUID instruction to get CPU type | ||
281 | cpuid | ||
282 | movb %al,%cl # save reg for future use | ||
283 | andb $0x0f,%ah # mask processor family | ||
284 | movb %ah,X86 | ||
285 | andb $0xf0,%al # mask model | ||
286 | shrb $4,%al | ||
287 | movb %al,X86_MODEL | ||
288 | andb $0x0f,%cl # mask mask revision | ||
289 | movb %cl,X86_MASK | ||
290 | movl %edx,X86_CAPABILITY | ||
291 | |||
292 | is486: movl $0x50022,%ecx # set AM, WP, NE and MP | ||
293 | jmp 2f | ||
294 | |||
295 | is386: movl $2,%ecx # set MP | ||
296 | 2: movl %cr0,%eax | ||
297 | andl $0x80000011,%eax # Save PG,PE,ET | ||
298 | orl %ecx,%eax | ||
299 | movl %eax,%cr0 | ||
300 | |||
301 | call check_x87 | ||
302 | incb ready | ||
303 | lgdt cpu_gdt_descr | ||
304 | lidt idt_descr | ||
305 | ljmp $(__KERNEL_CS),$1f | ||
306 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers | ||
307 | movl %eax,%ss # after changing gdt. | ||
308 | |||
309 | movl $(__USER_DS),%eax # DS/ES contains default USER segment | ||
310 | movl %eax,%ds | ||
311 | movl %eax,%es | ||
312 | |||
313 | xorl %eax,%eax # Clear FS/GS and LDT | ||
314 | movl %eax,%fs | ||
315 | movl %eax,%gs | ||
316 | lldt %ax | ||
317 | cld # gcc2 wants the direction flag cleared at all times | ||
318 | #ifdef CONFIG_SMP | ||
319 | movb ready, %cl | ||
320 | cmpb $1,%cl | ||
321 | je 1f # the first CPU calls start_kernel | ||
322 | # all other CPUs call initialize_secondary | ||
323 | call initialize_secondary | ||
324 | jmp L6 | ||
325 | 1: | ||
326 | #endif /* CONFIG_SMP */ | ||
327 | call start_kernel | ||
328 | L6: | ||
329 | jmp L6 # main should never return here, but | ||
330 | # just in case, we know what happens. | ||
331 | |||
332 | /* | ||
333 | * We depend on ET to be correct. This checks for 287/387. | ||
334 | */ | ||
335 | check_x87: | ||
336 | movb $0,X86_HARD_MATH | ||
337 | clts | ||
338 | fninit | ||
339 | fstsw %ax | ||
340 | cmpb $0,%al | ||
341 | je 1f | ||
342 | movl %cr0,%eax /* no coprocessor: have to set bits */ | ||
343 | xorl $4,%eax /* set EM */ | ||
344 | movl %eax,%cr0 | ||
345 | ret | ||
346 | ALIGN | ||
347 | 1: movb $1,X86_HARD_MATH | ||
348 | .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ | ||
349 | ret | ||
350 | |||
351 | /* | ||
352 | * setup_idt | ||
353 | * | ||
354 | * sets up a idt with 256 entries pointing to | ||
355 | * ignore_int, interrupt gates. It doesn't actually load | ||
356 | * idt - that can be done only after paging has been enabled | ||
357 | * and the kernel moved to PAGE_OFFSET. Interrupts | ||
358 | * are enabled elsewhere, when we can be relatively | ||
359 | * sure everything is ok. | ||
360 | * | ||
361 | * Warning: %esi is live across this function. | ||
362 | */ | ||
363 | setup_idt: | ||
364 | lea ignore_int,%edx | ||
365 | movl $(__KERNEL_CS << 16),%eax | ||
366 | movw %dx,%ax /* selector = 0x0010 = cs */ | ||
367 | movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ | ||
368 | |||
369 | lea idt_table,%edi | ||
370 | mov $256,%ecx | ||
371 | rp_sidt: | ||
372 | movl %eax,(%edi) | ||
373 | movl %edx,4(%edi) | ||
374 | addl $8,%edi | ||
375 | dec %ecx | ||
376 | jne rp_sidt | ||
377 | ret | ||
378 | |||
379 | /* This is the default interrupt "handler" :-) */ | ||
380 | ALIGN | ||
381 | ignore_int: | ||
382 | cld | ||
383 | pushl %eax | ||
384 | pushl %ecx | ||
385 | pushl %edx | ||
386 | pushl %es | ||
387 | pushl %ds | ||
388 | movl $(__KERNEL_DS),%eax | ||
389 | movl %eax,%ds | ||
390 | movl %eax,%es | ||
391 | pushl 16(%esp) | ||
392 | pushl 24(%esp) | ||
393 | pushl 32(%esp) | ||
394 | pushl 40(%esp) | ||
395 | pushl $int_msg | ||
396 | call printk | ||
397 | addl $(5*4),%esp | ||
398 | popl %ds | ||
399 | popl %es | ||
400 | popl %edx | ||
401 | popl %ecx | ||
402 | popl %eax | ||
403 | iret | ||
404 | |||
405 | /* | ||
406 | * Real beginning of normal "text" segment | ||
407 | */ | ||
408 | ENTRY(stext) | ||
409 | ENTRY(_stext) | ||
410 | |||
411 | /* | ||
412 | * BSS section | ||
413 | */ | ||
414 | .section ".bss.page_aligned","w" | ||
415 | ENTRY(swapper_pg_dir) | ||
416 | .fill 1024,4,0 | ||
417 | ENTRY(empty_zero_page) | ||
418 | .fill 4096,1,0 | ||
419 | |||
420 | /* | ||
421 | * This starts the data section. | ||
422 | */ | ||
423 | .data | ||
424 | |||
425 | ENTRY(stack_start) | ||
426 | .long init_thread_union+THREAD_SIZE | ||
427 | .long __BOOT_DS | ||
428 | |||
429 | ready: .byte 0 | ||
430 | |||
431 | int_msg: | ||
432 | .asciz "Unknown interrupt or fault at EIP %p %p %p\n" | ||
433 | |||
434 | /* | ||
435 | * The IDT and GDT 'descriptors' are a strange 48-bit object | ||
436 | * only used by the lidt and lgdt instructions. They are not | ||
437 | * like usual segment descriptors - they consist of a 16-bit | ||
438 | * segment size, and 32-bit linear address value: | ||
439 | */ | ||
440 | |||
441 | .globl boot_gdt_descr | ||
442 | .globl idt_descr | ||
443 | .globl cpu_gdt_descr | ||
444 | |||
445 | ALIGN | ||
446 | # early boot GDT descriptor (must use 1:1 address mapping) | ||
447 | .word 0 # 32 bit align gdt_desc.address | ||
448 | boot_gdt_descr: | ||
449 | .word __BOOT_DS+7 | ||
450 | .long boot_gdt_table - __PAGE_OFFSET | ||
451 | |||
452 | .word 0 # 32-bit align idt_desc.address | ||
453 | idt_descr: | ||
454 | .word IDT_ENTRIES*8-1 # idt contains 256 entries | ||
455 | .long idt_table | ||
456 | |||
457 | # boot GDT descriptor (later on used by CPU#0): | ||
458 | .word 0 # 32 bit align gdt_desc.address | ||
459 | cpu_gdt_descr: | ||
460 | .word GDT_ENTRIES*8-1 | ||
461 | .long cpu_gdt_table | ||
462 | |||
463 | .fill NR_CPUS-1,8,0 # space for the other GDT descriptors | ||
464 | |||
465 | /* | ||
466 | * The boot_gdt_table must mirror the equivalent in setup.S and is | ||
467 | * used only for booting. | ||
468 | */ | ||
469 | .align L1_CACHE_BYTES | ||
470 | ENTRY(boot_gdt_table) | ||
471 | .fill GDT_ENTRY_BOOT_CS,8,0 | ||
472 | .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ | ||
473 | .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ | ||
474 | |||
475 | /* | ||
476 | * The Global Descriptor Table contains 28 quadwords, per-CPU. | ||
477 | */ | ||
478 | .align PAGE_SIZE_asm | ||
479 | ENTRY(cpu_gdt_table) | ||
480 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
481 | .quad 0x0000000000000000 /* 0x0b reserved */ | ||
482 | .quad 0x0000000000000000 /* 0x13 reserved */ | ||
483 | .quad 0x0000000000000000 /* 0x1b reserved */ | ||
484 | .quad 0x0000000000000000 /* 0x20 unused */ | ||
485 | .quad 0x0000000000000000 /* 0x28 unused */ | ||
486 | .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ | ||
487 | .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ | ||
488 | .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ | ||
489 | .quad 0x0000000000000000 /* 0x4b reserved */ | ||
490 | .quad 0x0000000000000000 /* 0x53 reserved */ | ||
491 | .quad 0x0000000000000000 /* 0x5b reserved */ | ||
492 | |||
493 | .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ | ||
494 | .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ | ||
495 | .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ | ||
496 | .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ | ||
497 | |||
498 | .quad 0x0000000000000000 /* 0x80 TSS descriptor */ | ||
499 | .quad 0x0000000000000000 /* 0x88 LDT descriptor */ | ||
500 | |||
501 | /* Segments used for calling PnP BIOS */ | ||
502 | .quad 0x00c09a0000000000 /* 0x90 32-bit code */ | ||
503 | .quad 0x00809a0000000000 /* 0x98 16-bit code */ | ||
504 | .quad 0x0080920000000000 /* 0xa0 16-bit data */ | ||
505 | .quad 0x0080920000000000 /* 0xa8 16-bit data */ | ||
506 | .quad 0x0080920000000000 /* 0xb0 16-bit data */ | ||
507 | /* | ||
508 | * The APM segments have byte granularity and their bases | ||
509 | * and limits are set at run time. | ||
510 | */ | ||
511 | .quad 0x00409a0000000000 /* 0xb8 APM CS code */ | ||
512 | .quad 0x00009a0000000000 /* 0xc0 APM CS 16 code (16 bit) */ | ||
513 | .quad 0x0040920000000000 /* 0xc8 APM DS data */ | ||
514 | |||
515 | .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ | ||
516 | .quad 0x0000000000000000 /* 0xd8 - unused */ | ||
517 | .quad 0x0000000000000000 /* 0xe0 - unused */ | ||
518 | .quad 0x0000000000000000 /* 0xe8 - unused */ | ||
519 | .quad 0x0000000000000000 /* 0xf0 - unused */ | ||
520 | .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ | ||
521 | |||
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c new file mode 100644 index 000000000000..14ec354bec92 --- /dev/null +++ b/arch/i386/kernel/i386_ksyms.c | |||
@@ -0,0 +1,195 @@ | |||
1 | #include <linux/config.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/smp.h> | ||
4 | #include <linux/user.h> | ||
5 | #include <linux/elfcore.h> | ||
6 | #include <linux/mca.h> | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/in6.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/smp_lock.h> | ||
11 | #include <linux/pm.h> | ||
12 | #include <linux/pci.h> | ||
13 | #include <linux/apm_bios.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/tty.h> | ||
17 | #include <linux/highmem.h> | ||
18 | #include <linux/time.h> | ||
19 | |||
20 | #include <asm/semaphore.h> | ||
21 | #include <asm/processor.h> | ||
22 | #include <asm/i387.h> | ||
23 | #include <asm/uaccess.h> | ||
24 | #include <asm/checksum.h> | ||
25 | #include <asm/io.h> | ||
26 | #include <asm/delay.h> | ||
27 | #include <asm/irq.h> | ||
28 | #include <asm/mmx.h> | ||
29 | #include <asm/desc.h> | ||
30 | #include <asm/pgtable.h> | ||
31 | #include <asm/tlbflush.h> | ||
32 | #include <asm/nmi.h> | ||
33 | #include <asm/ist.h> | ||
34 | #include <asm/kdebug.h> | ||
35 | |||
36 | extern void dump_thread(struct pt_regs *, struct user *); | ||
37 | extern spinlock_t rtc_lock; | ||
38 | |||
39 | /* This is definitely a GPL-only symbol */ | ||
40 | EXPORT_SYMBOL_GPL(cpu_gdt_table); | ||
41 | |||
42 | #if defined(CONFIG_APM_MODULE) | ||
43 | extern void machine_real_restart(unsigned char *, int); | ||
44 | EXPORT_SYMBOL(machine_real_restart); | ||
45 | extern void default_idle(void); | ||
46 | EXPORT_SYMBOL(default_idle); | ||
47 | #endif | ||
48 | |||
49 | #ifdef CONFIG_SMP | ||
50 | extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); | ||
51 | extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); | ||
52 | #endif | ||
53 | |||
54 | #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) | ||
55 | extern struct drive_info_struct drive_info; | ||
56 | EXPORT_SYMBOL(drive_info); | ||
57 | #endif | ||
58 | |||
59 | extern unsigned long cpu_khz; | ||
60 | extern unsigned long get_cmos_time(void); | ||
61 | |||
62 | /* platform dependent support */ | ||
63 | EXPORT_SYMBOL(boot_cpu_data); | ||
64 | #ifdef CONFIG_DISCONTIGMEM | ||
65 | EXPORT_SYMBOL(node_data); | ||
66 | EXPORT_SYMBOL(physnode_map); | ||
67 | #endif | ||
68 | #ifdef CONFIG_X86_NUMAQ | ||
69 | EXPORT_SYMBOL(xquad_portio); | ||
70 | #endif | ||
71 | EXPORT_SYMBOL(dump_thread); | ||
72 | EXPORT_SYMBOL(dump_fpu); | ||
73 | EXPORT_SYMBOL_GPL(kernel_fpu_begin); | ||
74 | EXPORT_SYMBOL(__ioremap); | ||
75 | EXPORT_SYMBOL(ioremap_nocache); | ||
76 | EXPORT_SYMBOL(iounmap); | ||
77 | EXPORT_SYMBOL(kernel_thread); | ||
78 | EXPORT_SYMBOL(pm_idle); | ||
79 | EXPORT_SYMBOL(pm_power_off); | ||
80 | EXPORT_SYMBOL(get_cmos_time); | ||
81 | EXPORT_SYMBOL(cpu_khz); | ||
82 | EXPORT_SYMBOL(apm_info); | ||
83 | |||
84 | EXPORT_SYMBOL(__down_failed); | ||
85 | EXPORT_SYMBOL(__down_failed_interruptible); | ||
86 | EXPORT_SYMBOL(__down_failed_trylock); | ||
87 | EXPORT_SYMBOL(__up_wakeup); | ||
88 | /* Networking helper routines. */ | ||
89 | EXPORT_SYMBOL(csum_partial_copy_generic); | ||
90 | /* Delay loops */ | ||
91 | EXPORT_SYMBOL(__ndelay); | ||
92 | EXPORT_SYMBOL(__udelay); | ||
93 | EXPORT_SYMBOL(__delay); | ||
94 | EXPORT_SYMBOL(__const_udelay); | ||
95 | |||
96 | EXPORT_SYMBOL(__get_user_1); | ||
97 | EXPORT_SYMBOL(__get_user_2); | ||
98 | EXPORT_SYMBOL(__get_user_4); | ||
99 | |||
100 | EXPORT_SYMBOL(__put_user_1); | ||
101 | EXPORT_SYMBOL(__put_user_2); | ||
102 | EXPORT_SYMBOL(__put_user_4); | ||
103 | EXPORT_SYMBOL(__put_user_8); | ||
104 | |||
105 | EXPORT_SYMBOL(strpbrk); | ||
106 | EXPORT_SYMBOL(strstr); | ||
107 | |||
108 | EXPORT_SYMBOL(strncpy_from_user); | ||
109 | EXPORT_SYMBOL(__strncpy_from_user); | ||
110 | EXPORT_SYMBOL(clear_user); | ||
111 | EXPORT_SYMBOL(__clear_user); | ||
112 | EXPORT_SYMBOL(__copy_from_user_ll); | ||
113 | EXPORT_SYMBOL(__copy_to_user_ll); | ||
114 | EXPORT_SYMBOL(strnlen_user); | ||
115 | |||
116 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
117 | EXPORT_SYMBOL(dma_free_coherent); | ||
118 | |||
119 | #ifdef CONFIG_PCI | ||
120 | EXPORT_SYMBOL(pci_mem_start); | ||
121 | #endif | ||
122 | |||
123 | #ifdef CONFIG_PCI_BIOS | ||
124 | EXPORT_SYMBOL(pcibios_set_irq_routing); | ||
125 | EXPORT_SYMBOL(pcibios_get_irq_routing_table); | ||
126 | #endif | ||
127 | |||
128 | #ifdef CONFIG_X86_USE_3DNOW | ||
129 | EXPORT_SYMBOL(_mmx_memcpy); | ||
130 | EXPORT_SYMBOL(mmx_clear_page); | ||
131 | EXPORT_SYMBOL(mmx_copy_page); | ||
132 | #endif | ||
133 | |||
134 | #ifdef CONFIG_X86_HT | ||
135 | EXPORT_SYMBOL(smp_num_siblings); | ||
136 | EXPORT_SYMBOL(cpu_sibling_map); | ||
137 | #endif | ||
138 | |||
139 | #ifdef CONFIG_SMP | ||
140 | EXPORT_SYMBOL(cpu_data); | ||
141 | EXPORT_SYMBOL(cpu_online_map); | ||
142 | EXPORT_SYMBOL(cpu_callout_map); | ||
143 | EXPORT_SYMBOL(__write_lock_failed); | ||
144 | EXPORT_SYMBOL(__read_lock_failed); | ||
145 | |||
146 | /* Global SMP stuff */ | ||
147 | EXPORT_SYMBOL(smp_call_function); | ||
148 | |||
149 | /* TLB flushing */ | ||
150 | EXPORT_SYMBOL(flush_tlb_page); | ||
151 | #endif | ||
152 | |||
153 | #ifdef CONFIG_X86_IO_APIC | ||
154 | EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); | ||
155 | #endif | ||
156 | |||
157 | #ifdef CONFIG_MCA | ||
158 | EXPORT_SYMBOL(machine_id); | ||
159 | #endif | ||
160 | |||
161 | #ifdef CONFIG_VT | ||
162 | EXPORT_SYMBOL(screen_info); | ||
163 | #endif | ||
164 | |||
165 | EXPORT_SYMBOL(get_wchan); | ||
166 | |||
167 | EXPORT_SYMBOL(rtc_lock); | ||
168 | |||
169 | EXPORT_SYMBOL_GPL(set_nmi_callback); | ||
170 | EXPORT_SYMBOL_GPL(unset_nmi_callback); | ||
171 | |||
172 | #undef memcmp | ||
173 | extern int memcmp(const void *,const void *,__kernel_size_t); | ||
174 | EXPORT_SYMBOL(memcmp); | ||
175 | |||
176 | EXPORT_SYMBOL(register_die_notifier); | ||
177 | #ifdef CONFIG_HAVE_DEC_LOCK | ||
178 | EXPORT_SYMBOL(_atomic_dec_and_lock); | ||
179 | #endif | ||
180 | |||
181 | EXPORT_SYMBOL(__PAGE_KERNEL); | ||
182 | |||
183 | #ifdef CONFIG_HIGHMEM | ||
184 | EXPORT_SYMBOL(kmap); | ||
185 | EXPORT_SYMBOL(kunmap); | ||
186 | EXPORT_SYMBOL(kmap_atomic); | ||
187 | EXPORT_SYMBOL(kunmap_atomic); | ||
188 | EXPORT_SYMBOL(kmap_atomic_to_page); | ||
189 | #endif | ||
190 | |||
191 | #if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) | ||
192 | EXPORT_SYMBOL(ist_info); | ||
193 | #endif | ||
194 | |||
195 | EXPORT_SYMBOL(csum_partial); | ||
diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c new file mode 100644 index 000000000000..c55e037f08f7 --- /dev/null +++ b/arch/i386/kernel/i387.c | |||
@@ -0,0 +1,555 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/i387.c | ||
3 | * | ||
4 | * Copyright (C) 1994 Linus Torvalds | ||
5 | * | ||
6 | * Pentium III FXSR, SSE support | ||
7 | * General FPU state handling cleanups | ||
8 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
9 | */ | ||
10 | |||
11 | #include <linux/config.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <asm/processor.h> | ||
14 | #include <asm/i387.h> | ||
15 | #include <asm/math_emu.h> | ||
16 | #include <asm/sigcontext.h> | ||
17 | #include <asm/user.h> | ||
18 | #include <asm/ptrace.h> | ||
19 | #include <asm/uaccess.h> | ||
20 | |||
21 | #ifdef CONFIG_MATH_EMULATION | ||
22 | #define HAVE_HWFP (boot_cpu_data.hard_math) | ||
23 | #else | ||
24 | #define HAVE_HWFP 1 | ||
25 | #endif | ||
26 | |||
27 | static unsigned long mxcsr_feature_mask = 0xffffffff; | ||
28 | |||
29 | void mxcsr_feature_mask_init(void) | ||
30 | { | ||
31 | unsigned long mask = 0; | ||
32 | clts(); | ||
33 | if (cpu_has_fxsr) { | ||
34 | memset(¤t->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); | ||
35 | asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); | ||
36 | mask = current->thread.i387.fxsave.mxcsr_mask; | ||
37 | if (mask == 0) mask = 0x0000ffbf; | ||
38 | } | ||
39 | mxcsr_feature_mask &= mask; | ||
40 | stts(); | ||
41 | } | ||
42 | |||
43 | /* | ||
44 | * The _current_ task is using the FPU for the first time | ||
45 | * so initialize it and set the mxcsr to its default | ||
46 | * value at reset if we support XMM instructions and then | ||
47 | * remeber the current task has used the FPU. | ||
48 | */ | ||
49 | void init_fpu(struct task_struct *tsk) | ||
50 | { | ||
51 | if (cpu_has_fxsr) { | ||
52 | memset(&tsk->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); | ||
53 | tsk->thread.i387.fxsave.cwd = 0x37f; | ||
54 | if (cpu_has_xmm) | ||
55 | tsk->thread.i387.fxsave.mxcsr = 0x1f80; | ||
56 | } else { | ||
57 | memset(&tsk->thread.i387.fsave, 0, sizeof(struct i387_fsave_struct)); | ||
58 | tsk->thread.i387.fsave.cwd = 0xffff037fu; | ||
59 | tsk->thread.i387.fsave.swd = 0xffff0000u; | ||
60 | tsk->thread.i387.fsave.twd = 0xffffffffu; | ||
61 | tsk->thread.i387.fsave.fos = 0xffff0000u; | ||
62 | } | ||
63 | /* only the device not available exception or ptrace can call init_fpu */ | ||
64 | set_stopped_child_used_math(tsk); | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * FPU lazy state save handling. | ||
69 | */ | ||
70 | |||
71 | void kernel_fpu_begin(void) | ||
72 | { | ||
73 | struct thread_info *thread = current_thread_info(); | ||
74 | |||
75 | preempt_disable(); | ||
76 | if (thread->status & TS_USEDFPU) { | ||
77 | __save_init_fpu(thread->task); | ||
78 | return; | ||
79 | } | ||
80 | clts(); | ||
81 | } | ||
82 | |||
83 | void restore_fpu( struct task_struct *tsk ) | ||
84 | { | ||
85 | if ( cpu_has_fxsr ) { | ||
86 | asm volatile( "fxrstor %0" | ||
87 | : : "m" (tsk->thread.i387.fxsave) ); | ||
88 | } else { | ||
89 | asm volatile( "frstor %0" | ||
90 | : : "m" (tsk->thread.i387.fsave) ); | ||
91 | } | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * FPU tag word conversions. | ||
96 | */ | ||
97 | |||
98 | static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) | ||
99 | { | ||
100 | unsigned int tmp; /* to avoid 16 bit prefixes in the code */ | ||
101 | |||
102 | /* Transform each pair of bits into 01 (valid) or 00 (empty) */ | ||
103 | tmp = ~twd; | ||
104 | tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ | ||
105 | /* and move the valid bits to the lower byte. */ | ||
106 | tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ | ||
107 | tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ | ||
108 | tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ | ||
109 | return tmp; | ||
110 | } | ||
111 | |||
112 | static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) | ||
113 | { | ||
114 | struct _fpxreg *st = NULL; | ||
115 | unsigned long tos = (fxsave->swd >> 11) & 7; | ||
116 | unsigned long twd = (unsigned long) fxsave->twd; | ||
117 | unsigned long tag; | ||
118 | unsigned long ret = 0xffff0000u; | ||
119 | int i; | ||
120 | |||
121 | #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); | ||
122 | |||
123 | for ( i = 0 ; i < 8 ; i++ ) { | ||
124 | if ( twd & 0x1 ) { | ||
125 | st = FPREG_ADDR( fxsave, (i - tos) & 7 ); | ||
126 | |||
127 | switch ( st->exponent & 0x7fff ) { | ||
128 | case 0x7fff: | ||
129 | tag = 2; /* Special */ | ||
130 | break; | ||
131 | case 0x0000: | ||
132 | if ( !st->significand[0] && | ||
133 | !st->significand[1] && | ||
134 | !st->significand[2] && | ||
135 | !st->significand[3] ) { | ||
136 | tag = 1; /* Zero */ | ||
137 | } else { | ||
138 | tag = 2; /* Special */ | ||
139 | } | ||
140 | break; | ||
141 | default: | ||
142 | if ( st->significand[3] & 0x8000 ) { | ||
143 | tag = 0; /* Valid */ | ||
144 | } else { | ||
145 | tag = 2; /* Special */ | ||
146 | } | ||
147 | break; | ||
148 | } | ||
149 | } else { | ||
150 | tag = 3; /* Empty */ | ||
151 | } | ||
152 | ret |= (tag << (2 * i)); | ||
153 | twd = twd >> 1; | ||
154 | } | ||
155 | return ret; | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * FPU state interaction. | ||
160 | */ | ||
161 | |||
162 | unsigned short get_fpu_cwd( struct task_struct *tsk ) | ||
163 | { | ||
164 | if ( cpu_has_fxsr ) { | ||
165 | return tsk->thread.i387.fxsave.cwd; | ||
166 | } else { | ||
167 | return (unsigned short)tsk->thread.i387.fsave.cwd; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | unsigned short get_fpu_swd( struct task_struct *tsk ) | ||
172 | { | ||
173 | if ( cpu_has_fxsr ) { | ||
174 | return tsk->thread.i387.fxsave.swd; | ||
175 | } else { | ||
176 | return (unsigned short)tsk->thread.i387.fsave.swd; | ||
177 | } | ||
178 | } | ||
179 | |||
180 | #if 0 | ||
181 | unsigned short get_fpu_twd( struct task_struct *tsk ) | ||
182 | { | ||
183 | if ( cpu_has_fxsr ) { | ||
184 | return tsk->thread.i387.fxsave.twd; | ||
185 | } else { | ||
186 | return (unsigned short)tsk->thread.i387.fsave.twd; | ||
187 | } | ||
188 | } | ||
189 | #endif /* 0 */ | ||
190 | |||
191 | unsigned short get_fpu_mxcsr( struct task_struct *tsk ) | ||
192 | { | ||
193 | if ( cpu_has_xmm ) { | ||
194 | return tsk->thread.i387.fxsave.mxcsr; | ||
195 | } else { | ||
196 | return 0x1f80; | ||
197 | } | ||
198 | } | ||
199 | |||
200 | #if 0 | ||
201 | |||
202 | void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd ) | ||
203 | { | ||
204 | if ( cpu_has_fxsr ) { | ||
205 | tsk->thread.i387.fxsave.cwd = cwd; | ||
206 | } else { | ||
207 | tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u); | ||
208 | } | ||
209 | } | ||
210 | |||
211 | void set_fpu_swd( struct task_struct *tsk, unsigned short swd ) | ||
212 | { | ||
213 | if ( cpu_has_fxsr ) { | ||
214 | tsk->thread.i387.fxsave.swd = swd; | ||
215 | } else { | ||
216 | tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u); | ||
217 | } | ||
218 | } | ||
219 | |||
220 | void set_fpu_twd( struct task_struct *tsk, unsigned short twd ) | ||
221 | { | ||
222 | if ( cpu_has_fxsr ) { | ||
223 | tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd); | ||
224 | } else { | ||
225 | tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | #endif /* 0 */ | ||
230 | |||
231 | /* | ||
232 | * FXSR floating point environment conversions. | ||
233 | */ | ||
234 | |||
235 | static int convert_fxsr_to_user( struct _fpstate __user *buf, | ||
236 | struct i387_fxsave_struct *fxsave ) | ||
237 | { | ||
238 | unsigned long env[7]; | ||
239 | struct _fpreg __user *to; | ||
240 | struct _fpxreg *from; | ||
241 | int i; | ||
242 | |||
243 | env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul; | ||
244 | env[1] = (unsigned long)fxsave->swd | 0xffff0000ul; | ||
245 | env[2] = twd_fxsr_to_i387(fxsave); | ||
246 | env[3] = fxsave->fip; | ||
247 | env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); | ||
248 | env[5] = fxsave->foo; | ||
249 | env[6] = fxsave->fos; | ||
250 | |||
251 | if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) | ||
252 | return 1; | ||
253 | |||
254 | to = &buf->_st[0]; | ||
255 | from = (struct _fpxreg *) &fxsave->st_space[0]; | ||
256 | for ( i = 0 ; i < 8 ; i++, to++, from++ ) { | ||
257 | unsigned long __user *t = (unsigned long __user *)to; | ||
258 | unsigned long *f = (unsigned long *)from; | ||
259 | |||
260 | if (__put_user(*f, t) || | ||
261 | __put_user(*(f + 1), t + 1) || | ||
262 | __put_user(from->exponent, &to->exponent)) | ||
263 | return 1; | ||
264 | } | ||
265 | return 0; | ||
266 | } | ||
267 | |||
268 | static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, | ||
269 | struct _fpstate __user *buf ) | ||
270 | { | ||
271 | unsigned long env[7]; | ||
272 | struct _fpxreg *to; | ||
273 | struct _fpreg __user *from; | ||
274 | int i; | ||
275 | |||
276 | if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) | ||
277 | return 1; | ||
278 | |||
279 | fxsave->cwd = (unsigned short)(env[0] & 0xffff); | ||
280 | fxsave->swd = (unsigned short)(env[1] & 0xffff); | ||
281 | fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); | ||
282 | fxsave->fip = env[3]; | ||
283 | fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16); | ||
284 | fxsave->fcs = (env[4] & 0xffff); | ||
285 | fxsave->foo = env[5]; | ||
286 | fxsave->fos = env[6]; | ||
287 | |||
288 | to = (struct _fpxreg *) &fxsave->st_space[0]; | ||
289 | from = &buf->_st[0]; | ||
290 | for ( i = 0 ; i < 8 ; i++, to++, from++ ) { | ||
291 | unsigned long *t = (unsigned long *)to; | ||
292 | unsigned long __user *f = (unsigned long __user *)from; | ||
293 | |||
294 | if (__get_user(*t, f) || | ||
295 | __get_user(*(t + 1), f + 1) || | ||
296 | __get_user(to->exponent, &from->exponent)) | ||
297 | return 1; | ||
298 | } | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * Signal frame handlers. | ||
304 | */ | ||
305 | |||
306 | static inline int save_i387_fsave( struct _fpstate __user *buf ) | ||
307 | { | ||
308 | struct task_struct *tsk = current; | ||
309 | |||
310 | unlazy_fpu( tsk ); | ||
311 | tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; | ||
312 | if ( __copy_to_user( buf, &tsk->thread.i387.fsave, | ||
313 | sizeof(struct i387_fsave_struct) ) ) | ||
314 | return -1; | ||
315 | return 1; | ||
316 | } | ||
317 | |||
318 | static int save_i387_fxsave( struct _fpstate __user *buf ) | ||
319 | { | ||
320 | struct task_struct *tsk = current; | ||
321 | int err = 0; | ||
322 | |||
323 | unlazy_fpu( tsk ); | ||
324 | |||
325 | if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) ) | ||
326 | return -1; | ||
327 | |||
328 | err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status ); | ||
329 | err |= __put_user( X86_FXSR_MAGIC, &buf->magic ); | ||
330 | if ( err ) | ||
331 | return -1; | ||
332 | |||
333 | if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave, | ||
334 | sizeof(struct i387_fxsave_struct) ) ) | ||
335 | return -1; | ||
336 | return 1; | ||
337 | } | ||
338 | |||
339 | int save_i387( struct _fpstate __user *buf ) | ||
340 | { | ||
341 | if ( !used_math() ) | ||
342 | return 0; | ||
343 | |||
344 | /* This will cause a "finit" to be triggered by the next | ||
345 | * attempted FPU operation by the 'current' process. | ||
346 | */ | ||
347 | clear_used_math(); | ||
348 | |||
349 | if ( HAVE_HWFP ) { | ||
350 | if ( cpu_has_fxsr ) { | ||
351 | return save_i387_fxsave( buf ); | ||
352 | } else { | ||
353 | return save_i387_fsave( buf ); | ||
354 | } | ||
355 | } else { | ||
356 | return save_i387_soft( ¤t->thread.i387.soft, buf ); | ||
357 | } | ||
358 | } | ||
359 | |||
360 | static inline int restore_i387_fsave( struct _fpstate __user *buf ) | ||
361 | { | ||
362 | struct task_struct *tsk = current; | ||
363 | clear_fpu( tsk ); | ||
364 | return __copy_from_user( &tsk->thread.i387.fsave, buf, | ||
365 | sizeof(struct i387_fsave_struct) ); | ||
366 | } | ||
367 | |||
368 | static int restore_i387_fxsave( struct _fpstate __user *buf ) | ||
369 | { | ||
370 | int err; | ||
371 | struct task_struct *tsk = current; | ||
372 | clear_fpu( tsk ); | ||
373 | err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0], | ||
374 | sizeof(struct i387_fxsave_struct) ); | ||
375 | /* mxcsr reserved bits must be masked to zero for security reasons */ | ||
376 | tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; | ||
377 | return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf ); | ||
378 | } | ||
379 | |||
380 | int restore_i387( struct _fpstate __user *buf ) | ||
381 | { | ||
382 | int err; | ||
383 | |||
384 | if ( HAVE_HWFP ) { | ||
385 | if ( cpu_has_fxsr ) { | ||
386 | err = restore_i387_fxsave( buf ); | ||
387 | } else { | ||
388 | err = restore_i387_fsave( buf ); | ||
389 | } | ||
390 | } else { | ||
391 | err = restore_i387_soft( ¤t->thread.i387.soft, buf ); | ||
392 | } | ||
393 | set_used_math(); | ||
394 | return err; | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * ptrace request handlers. | ||
399 | */ | ||
400 | |||
401 | static inline int get_fpregs_fsave( struct user_i387_struct __user *buf, | ||
402 | struct task_struct *tsk ) | ||
403 | { | ||
404 | return __copy_to_user( buf, &tsk->thread.i387.fsave, | ||
405 | sizeof(struct user_i387_struct) ); | ||
406 | } | ||
407 | |||
408 | static inline int get_fpregs_fxsave( struct user_i387_struct __user *buf, | ||
409 | struct task_struct *tsk ) | ||
410 | { | ||
411 | return convert_fxsr_to_user( (struct _fpstate __user *)buf, | ||
412 | &tsk->thread.i387.fxsave ); | ||
413 | } | ||
414 | |||
415 | int get_fpregs( struct user_i387_struct __user *buf, struct task_struct *tsk ) | ||
416 | { | ||
417 | if ( HAVE_HWFP ) { | ||
418 | if ( cpu_has_fxsr ) { | ||
419 | return get_fpregs_fxsave( buf, tsk ); | ||
420 | } else { | ||
421 | return get_fpregs_fsave( buf, tsk ); | ||
422 | } | ||
423 | } else { | ||
424 | return save_i387_soft( &tsk->thread.i387.soft, | ||
425 | (struct _fpstate __user *)buf ); | ||
426 | } | ||
427 | } | ||
428 | |||
429 | static inline int set_fpregs_fsave( struct task_struct *tsk, | ||
430 | struct user_i387_struct __user *buf ) | ||
431 | { | ||
432 | return __copy_from_user( &tsk->thread.i387.fsave, buf, | ||
433 | sizeof(struct user_i387_struct) ); | ||
434 | } | ||
435 | |||
436 | static inline int set_fpregs_fxsave( struct task_struct *tsk, | ||
437 | struct user_i387_struct __user *buf ) | ||
438 | { | ||
439 | return convert_fxsr_from_user( &tsk->thread.i387.fxsave, | ||
440 | (struct _fpstate __user *)buf ); | ||
441 | } | ||
442 | |||
443 | int set_fpregs( struct task_struct *tsk, struct user_i387_struct __user *buf ) | ||
444 | { | ||
445 | if ( HAVE_HWFP ) { | ||
446 | if ( cpu_has_fxsr ) { | ||
447 | return set_fpregs_fxsave( tsk, buf ); | ||
448 | } else { | ||
449 | return set_fpregs_fsave( tsk, buf ); | ||
450 | } | ||
451 | } else { | ||
452 | return restore_i387_soft( &tsk->thread.i387.soft, | ||
453 | (struct _fpstate __user *)buf ); | ||
454 | } | ||
455 | } | ||
456 | |||
457 | int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk ) | ||
458 | { | ||
459 | if ( cpu_has_fxsr ) { | ||
460 | if (__copy_to_user( buf, &tsk->thread.i387.fxsave, | ||
461 | sizeof(struct user_fxsr_struct) )) | ||
462 | return -EFAULT; | ||
463 | return 0; | ||
464 | } else { | ||
465 | return -EIO; | ||
466 | } | ||
467 | } | ||
468 | |||
469 | int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf ) | ||
470 | { | ||
471 | int ret = 0; | ||
472 | |||
473 | if ( cpu_has_fxsr ) { | ||
474 | if (__copy_from_user( &tsk->thread.i387.fxsave, buf, | ||
475 | sizeof(struct user_fxsr_struct) )) | ||
476 | ret = -EFAULT; | ||
477 | /* mxcsr reserved bits must be masked to zero for security reasons */ | ||
478 | tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; | ||
479 | } else { | ||
480 | ret = -EIO; | ||
481 | } | ||
482 | return ret; | ||
483 | } | ||
484 | |||
485 | /* | ||
486 | * FPU state for core dumps. | ||
487 | */ | ||
488 | |||
489 | static inline void copy_fpu_fsave( struct task_struct *tsk, | ||
490 | struct user_i387_struct *fpu ) | ||
491 | { | ||
492 | memcpy( fpu, &tsk->thread.i387.fsave, | ||
493 | sizeof(struct user_i387_struct) ); | ||
494 | } | ||
495 | |||
496 | static inline void copy_fpu_fxsave( struct task_struct *tsk, | ||
497 | struct user_i387_struct *fpu ) | ||
498 | { | ||
499 | unsigned short *to; | ||
500 | unsigned short *from; | ||
501 | int i; | ||
502 | |||
503 | memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) ); | ||
504 | |||
505 | to = (unsigned short *)&fpu->st_space[0]; | ||
506 | from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0]; | ||
507 | for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { | ||
508 | memcpy( to, from, 5 * sizeof(unsigned short) ); | ||
509 | } | ||
510 | } | ||
511 | |||
512 | int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) | ||
513 | { | ||
514 | int fpvalid; | ||
515 | struct task_struct *tsk = current; | ||
516 | |||
517 | fpvalid = !!used_math(); | ||
518 | if ( fpvalid ) { | ||
519 | unlazy_fpu( tsk ); | ||
520 | if ( cpu_has_fxsr ) { | ||
521 | copy_fpu_fxsave( tsk, fpu ); | ||
522 | } else { | ||
523 | copy_fpu_fsave( tsk, fpu ); | ||
524 | } | ||
525 | } | ||
526 | |||
527 | return fpvalid; | ||
528 | } | ||
529 | |||
530 | int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) | ||
531 | { | ||
532 | int fpvalid = !!tsk_used_math(tsk); | ||
533 | |||
534 | if (fpvalid) { | ||
535 | if (tsk == current) | ||
536 | unlazy_fpu(tsk); | ||
537 | if (cpu_has_fxsr) | ||
538 | copy_fpu_fxsave(tsk, fpu); | ||
539 | else | ||
540 | copy_fpu_fsave(tsk, fpu); | ||
541 | } | ||
542 | return fpvalid; | ||
543 | } | ||
544 | |||
545 | int dump_task_extended_fpu(struct task_struct *tsk, struct user_fxsr_struct *fpu) | ||
546 | { | ||
547 | int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr; | ||
548 | |||
549 | if (fpvalid) { | ||
550 | if (tsk == current) | ||
551 | unlazy_fpu(tsk); | ||
552 | memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu)); | ||
553 | } | ||
554 | return fpvalid; | ||
555 | } | ||
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c new file mode 100644 index 000000000000..560bef1afb3b --- /dev/null +++ b/arch/i386/kernel/i8259.c | |||
@@ -0,0 +1,429 @@ | |||
1 | #include <linux/config.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/signal.h> | ||
4 | #include <linux/sched.h> | ||
5 | #include <linux/ioport.h> | ||
6 | #include <linux/interrupt.h> | ||
7 | #include <linux/slab.h> | ||
8 | #include <linux/random.h> | ||
9 | #include <linux/smp_lock.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/kernel_stat.h> | ||
12 | #include <linux/sysdev.h> | ||
13 | #include <linux/bitops.h> | ||
14 | |||
15 | #include <asm/8253pit.h> | ||
16 | #include <asm/atomic.h> | ||
17 | #include <asm/system.h> | ||
18 | #include <asm/io.h> | ||
19 | #include <asm/irq.h> | ||
20 | #include <asm/timer.h> | ||
21 | #include <asm/pgtable.h> | ||
22 | #include <asm/delay.h> | ||
23 | #include <asm/desc.h> | ||
24 | #include <asm/apic.h> | ||
25 | #include <asm/arch_hooks.h> | ||
26 | #include <asm/i8259.h> | ||
27 | |||
28 | #include <linux/irq.h> | ||
29 | |||
30 | #include <io_ports.h> | ||
31 | |||
32 | /* | ||
33 | * This is the 'legacy' 8259A Programmable Interrupt Controller, | ||
34 | * present in the majority of PC/AT boxes. | ||
35 | * plus some generic x86 specific things if generic specifics makes | ||
36 | * any sense at all. | ||
37 | * this file should become arch/i386/kernel/irq.c when the old irq.c | ||
38 | * moves to arch independent land | ||
39 | */ | ||
40 | |||
41 | DEFINE_SPINLOCK(i8259A_lock); | ||
42 | |||
43 | static void end_8259A_irq (unsigned int irq) | ||
44 | { | ||
45 | if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) && | ||
46 | irq_desc[irq].action) | ||
47 | enable_8259A_irq(irq); | ||
48 | } | ||
49 | |||
50 | #define shutdown_8259A_irq disable_8259A_irq | ||
51 | |||
52 | static void mask_and_ack_8259A(unsigned int); | ||
53 | |||
54 | unsigned int startup_8259A_irq(unsigned int irq) | ||
55 | { | ||
56 | enable_8259A_irq(irq); | ||
57 | return 0; /* never anything pending */ | ||
58 | } | ||
59 | |||
60 | static struct hw_interrupt_type i8259A_irq_type = { | ||
61 | .typename = "XT-PIC", | ||
62 | .startup = startup_8259A_irq, | ||
63 | .shutdown = shutdown_8259A_irq, | ||
64 | .enable = enable_8259A_irq, | ||
65 | .disable = disable_8259A_irq, | ||
66 | .ack = mask_and_ack_8259A, | ||
67 | .end = end_8259A_irq, | ||
68 | }; | ||
69 | |||
70 | /* | ||
71 | * 8259A PIC functions to handle ISA devices: | ||
72 | */ | ||
73 | |||
74 | /* | ||
75 | * This contains the irq mask for both 8259A irq controllers, | ||
76 | */ | ||
77 | unsigned int cached_irq_mask = 0xffff; | ||
78 | |||
79 | /* | ||
80 | * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) | ||
81 | * boards the timer interrupt is not really connected to any IO-APIC pin, | ||
82 | * it's fed to the master 8259A's IR0 line only. | ||
83 | * | ||
84 | * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. | ||
85 | * this 'mixed mode' IRQ handling costs nothing because it's only used | ||
86 | * at IRQ setup time. | ||
87 | */ | ||
88 | unsigned long io_apic_irqs; | ||
89 | |||
90 | void disable_8259A_irq(unsigned int irq) | ||
91 | { | ||
92 | unsigned int mask = 1 << irq; | ||
93 | unsigned long flags; | ||
94 | |||
95 | spin_lock_irqsave(&i8259A_lock, flags); | ||
96 | cached_irq_mask |= mask; | ||
97 | if (irq & 8) | ||
98 | outb(cached_slave_mask, PIC_SLAVE_IMR); | ||
99 | else | ||
100 | outb(cached_master_mask, PIC_MASTER_IMR); | ||
101 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
102 | } | ||
103 | |||
104 | void enable_8259A_irq(unsigned int irq) | ||
105 | { | ||
106 | unsigned int mask = ~(1 << irq); | ||
107 | unsigned long flags; | ||
108 | |||
109 | spin_lock_irqsave(&i8259A_lock, flags); | ||
110 | cached_irq_mask &= mask; | ||
111 | if (irq & 8) | ||
112 | outb(cached_slave_mask, PIC_SLAVE_IMR); | ||
113 | else | ||
114 | outb(cached_master_mask, PIC_MASTER_IMR); | ||
115 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
116 | } | ||
117 | |||
118 | int i8259A_irq_pending(unsigned int irq) | ||
119 | { | ||
120 | unsigned int mask = 1<<irq; | ||
121 | unsigned long flags; | ||
122 | int ret; | ||
123 | |||
124 | spin_lock_irqsave(&i8259A_lock, flags); | ||
125 | if (irq < 8) | ||
126 | ret = inb(PIC_MASTER_CMD) & mask; | ||
127 | else | ||
128 | ret = inb(PIC_SLAVE_CMD) & (mask >> 8); | ||
129 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
130 | |||
131 | return ret; | ||
132 | } | ||
133 | |||
134 | void make_8259A_irq(unsigned int irq) | ||
135 | { | ||
136 | disable_irq_nosync(irq); | ||
137 | io_apic_irqs &= ~(1<<irq); | ||
138 | irq_desc[irq].handler = &i8259A_irq_type; | ||
139 | enable_irq(irq); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * This function assumes to be called rarely. Switching between | ||
144 | * 8259A registers is slow. | ||
145 | * This has to be protected by the irq controller spinlock | ||
146 | * before being called. | ||
147 | */ | ||
148 | static inline int i8259A_irq_real(unsigned int irq) | ||
149 | { | ||
150 | int value; | ||
151 | int irqmask = 1<<irq; | ||
152 | |||
153 | if (irq < 8) { | ||
154 | outb(0x0B,PIC_MASTER_CMD); /* ISR register */ | ||
155 | value = inb(PIC_MASTER_CMD) & irqmask; | ||
156 | outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */ | ||
157 | return value; | ||
158 | } | ||
159 | outb(0x0B,PIC_SLAVE_CMD); /* ISR register */ | ||
160 | value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); | ||
161 | outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ | ||
162 | return value; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Careful! The 8259A is a fragile beast, it pretty | ||
167 | * much _has_ to be done exactly like this (mask it | ||
168 | * first, _then_ send the EOI, and the order of EOI | ||
169 | * to the two 8259s is important! | ||
170 | */ | ||
171 | static void mask_and_ack_8259A(unsigned int irq) | ||
172 | { | ||
173 | unsigned int irqmask = 1 << irq; | ||
174 | unsigned long flags; | ||
175 | |||
176 | spin_lock_irqsave(&i8259A_lock, flags); | ||
177 | /* | ||
178 | * Lightweight spurious IRQ detection. We do not want | ||
179 | * to overdo spurious IRQ handling - it's usually a sign | ||
180 | * of hardware problems, so we only do the checks we can | ||
181 | * do without slowing down good hardware unnecesserily. | ||
182 | * | ||
183 | * Note that IRQ7 and IRQ15 (the two spurious IRQs | ||
184 | * usually resulting from the 8259A-1|2 PICs) occur | ||
185 | * even if the IRQ is masked in the 8259A. Thus we | ||
186 | * can check spurious 8259A IRQs without doing the | ||
187 | * quite slow i8259A_irq_real() call for every IRQ. | ||
188 | * This does not cover 100% of spurious interrupts, | ||
189 | * but should be enough to warn the user that there | ||
190 | * is something bad going on ... | ||
191 | */ | ||
192 | if (cached_irq_mask & irqmask) | ||
193 | goto spurious_8259A_irq; | ||
194 | cached_irq_mask |= irqmask; | ||
195 | |||
196 | handle_real_irq: | ||
197 | if (irq & 8) { | ||
198 | inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ | ||
199 | outb(cached_slave_mask, PIC_SLAVE_IMR); | ||
200 | outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ | ||
201 | outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ | ||
202 | } else { | ||
203 | inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ | ||
204 | outb(cached_master_mask, PIC_MASTER_IMR); | ||
205 | outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ | ||
206 | } | ||
207 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
208 | return; | ||
209 | |||
210 | spurious_8259A_irq: | ||
211 | /* | ||
212 | * this is the slow path - should happen rarely. | ||
213 | */ | ||
214 | if (i8259A_irq_real(irq)) | ||
215 | /* | ||
216 | * oops, the IRQ _is_ in service according to the | ||
217 | * 8259A - not spurious, go handle it. | ||
218 | */ | ||
219 | goto handle_real_irq; | ||
220 | |||
221 | { | ||
222 | static int spurious_irq_mask; | ||
223 | /* | ||
224 | * At this point we can be sure the IRQ is spurious, | ||
225 | * lets ACK and report it. [once per IRQ] | ||
226 | */ | ||
227 | if (!(spurious_irq_mask & irqmask)) { | ||
228 | printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); | ||
229 | spurious_irq_mask |= irqmask; | ||
230 | } | ||
231 | atomic_inc(&irq_err_count); | ||
232 | /* | ||
233 | * Theoretically we do not have to handle this IRQ, | ||
234 | * but in Linux this does not cause problems and is | ||
235 | * simpler for us. | ||
236 | */ | ||
237 | goto handle_real_irq; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | static char irq_trigger[2]; | ||
242 | /** | ||
243 | * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ | ||
244 | */ | ||
245 | static void restore_ELCR(char *trigger) | ||
246 | { | ||
247 | outb(trigger[0], 0x4d0); | ||
248 | outb(trigger[1], 0x4d1); | ||
249 | } | ||
250 | |||
251 | static void save_ELCR(char *trigger) | ||
252 | { | ||
253 | /* IRQ 0,1,2,8,13 are marked as reserved */ | ||
254 | trigger[0] = inb(0x4d0) & 0xF8; | ||
255 | trigger[1] = inb(0x4d1) & 0xDE; | ||
256 | } | ||
257 | |||
258 | static int i8259A_resume(struct sys_device *dev) | ||
259 | { | ||
260 | init_8259A(0); | ||
261 | restore_ELCR(irq_trigger); | ||
262 | return 0; | ||
263 | } | ||
264 | |||
265 | static int i8259A_suspend(struct sys_device *dev, u32 state) | ||
266 | { | ||
267 | save_ELCR(irq_trigger); | ||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | static struct sysdev_class i8259_sysdev_class = { | ||
272 | set_kset_name("i8259"), | ||
273 | .suspend = i8259A_suspend, | ||
274 | .resume = i8259A_resume, | ||
275 | }; | ||
276 | |||
277 | static struct sys_device device_i8259A = { | ||
278 | .id = 0, | ||
279 | .cls = &i8259_sysdev_class, | ||
280 | }; | ||
281 | |||
282 | static int __init i8259A_init_sysfs(void) | ||
283 | { | ||
284 | int error = sysdev_class_register(&i8259_sysdev_class); | ||
285 | if (!error) | ||
286 | error = sysdev_register(&device_i8259A); | ||
287 | return error; | ||
288 | } | ||
289 | |||
290 | device_initcall(i8259A_init_sysfs); | ||
291 | |||
292 | void init_8259A(int auto_eoi) | ||
293 | { | ||
294 | unsigned long flags; | ||
295 | |||
296 | spin_lock_irqsave(&i8259A_lock, flags); | ||
297 | |||
298 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | ||
299 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | ||
300 | |||
301 | /* | ||
302 | * outb_p - this has to work on a wide range of PC hardware. | ||
303 | */ | ||
304 | outb_p(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ | ||
305 | outb_p(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ | ||
306 | outb_p(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ | ||
307 | if (auto_eoi) /* master does Auto EOI */ | ||
308 | outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); | ||
309 | else /* master expects normal EOI */ | ||
310 | outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); | ||
311 | |||
312 | outb_p(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ | ||
313 | outb_p(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ | ||
314 | outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ | ||
315 | outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ | ||
316 | if (auto_eoi) | ||
317 | /* | ||
318 | * in AEOI mode we just have to mask the interrupt | ||
319 | * when acking. | ||
320 | */ | ||
321 | i8259A_irq_type.ack = disable_8259A_irq; | ||
322 | else | ||
323 | i8259A_irq_type.ack = mask_and_ack_8259A; | ||
324 | |||
325 | udelay(100); /* wait for 8259A to initialize */ | ||
326 | |||
327 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ | ||
328 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ | ||
329 | |||
330 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Note that on a 486, we don't want to do a SIGFPE on an irq13 | ||
335 | * as the irq is unreliable, and exception 16 works correctly | ||
336 | * (ie as explained in the intel literature). On a 386, you | ||
337 | * can't use exception 16 due to bad IBM design, so we have to | ||
338 | * rely on the less exact irq13. | ||
339 | * | ||
340 | * Careful.. Not only is IRQ13 unreliable, but it is also | ||
341 | * leads to races. IBM designers who came up with it should | ||
342 | * be shot. | ||
343 | */ | ||
344 | |||
345 | |||
346 | static irqreturn_t math_error_irq(int cpl, void *dev_id, struct pt_regs *regs) | ||
347 | { | ||
348 | extern void math_error(void __user *); | ||
349 | outb(0,0xF0); | ||
350 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | ||
351 | return IRQ_NONE; | ||
352 | math_error((void __user *)regs->eip); | ||
353 | return IRQ_HANDLED; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * New motherboards sometimes make IRQ 13 be a PCI interrupt, | ||
358 | * so allow interrupt sharing. | ||
359 | */ | ||
360 | static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; | ||
361 | |||
362 | void __init init_ISA_irqs (void) | ||
363 | { | ||
364 | int i; | ||
365 | |||
366 | #ifdef CONFIG_X86_LOCAL_APIC | ||
367 | init_bsp_APIC(); | ||
368 | #endif | ||
369 | init_8259A(0); | ||
370 | |||
371 | for (i = 0; i < NR_IRQS; i++) { | ||
372 | irq_desc[i].status = IRQ_DISABLED; | ||
373 | irq_desc[i].action = NULL; | ||
374 | irq_desc[i].depth = 1; | ||
375 | |||
376 | if (i < 16) { | ||
377 | /* | ||
378 | * 16 old-style INTA-cycle interrupts: | ||
379 | */ | ||
380 | irq_desc[i].handler = &i8259A_irq_type; | ||
381 | } else { | ||
382 | /* | ||
383 | * 'high' PCI IRQs filled in on demand | ||
384 | */ | ||
385 | irq_desc[i].handler = &no_irq_type; | ||
386 | } | ||
387 | } | ||
388 | } | ||
389 | |||
390 | void __init init_IRQ(void) | ||
391 | { | ||
392 | int i; | ||
393 | |||
394 | /* all the set up before the call gates are initialised */ | ||
395 | pre_intr_init_hook(); | ||
396 | |||
397 | /* | ||
398 | * Cover the whole vector space, no vector can escape | ||
399 | * us. (some of these will be overridden and become | ||
400 | * 'special' SMP interrupts) | ||
401 | */ | ||
402 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
403 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
404 | if (i >= NR_IRQS) | ||
405 | break; | ||
406 | if (vector != SYSCALL_VECTOR) | ||
407 | set_intr_gate(vector, interrupt[i]); | ||
408 | } | ||
409 | |||
410 | /* setup after call gates are initialised (usually add in | ||
411 | * the architecture specific gates) | ||
412 | */ | ||
413 | intr_init_hook(); | ||
414 | |||
415 | /* | ||
416 | * Set the clock to HZ Hz, we already have a valid | ||
417 | * vector now: | ||
418 | */ | ||
419 | setup_pit_timer(); | ||
420 | |||
421 | /* | ||
422 | * External FPU? Set up irq13 if so, for | ||
423 | * original braindamaged IBM FERR coupling. | ||
424 | */ | ||
425 | if (boot_cpu_data.hard_math && !cpu_has_fpu) | ||
426 | setup_irq(FPU_IRQ, &fpu_irq); | ||
427 | |||
428 | irq_ctx_init(smp_processor_id()); | ||
429 | } | ||
diff --git a/arch/i386/kernel/init_task.c b/arch/i386/kernel/init_task.c new file mode 100644 index 000000000000..9caa8e8db80c --- /dev/null +++ b/arch/i386/kernel/init_task.c | |||
@@ -0,0 +1,46 @@ | |||
1 | #include <linux/mm.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/init_task.h> | ||
6 | #include <linux/fs.h> | ||
7 | #include <linux/mqueue.h> | ||
8 | |||
9 | #include <asm/uaccess.h> | ||
10 | #include <asm/pgtable.h> | ||
11 | #include <asm/desc.h> | ||
12 | |||
13 | static struct fs_struct init_fs = INIT_FS; | ||
14 | static struct files_struct init_files = INIT_FILES; | ||
15 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | ||
16 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | ||
17 | struct mm_struct init_mm = INIT_MM(init_mm); | ||
18 | |||
19 | EXPORT_SYMBOL(init_mm); | ||
20 | |||
21 | /* | ||
22 | * Initial thread structure. | ||
23 | * | ||
24 | * We need to make sure that this is THREAD_SIZE aligned due to the | ||
25 | * way process stacks are handled. This is done by having a special | ||
26 | * "init_task" linker map entry.. | ||
27 | */ | ||
28 | union thread_union init_thread_union | ||
29 | __attribute__((__section__(".data.init_task"))) = | ||
30 | { INIT_THREAD_INFO(init_task) }; | ||
31 | |||
32 | /* | ||
33 | * Initial task structure. | ||
34 | * | ||
35 | * All other task structs will be allocated on slabs in fork.c | ||
36 | */ | ||
37 | struct task_struct init_task = INIT_TASK(init_task); | ||
38 | |||
39 | EXPORT_SYMBOL(init_task); | ||
40 | |||
41 | /* | ||
42 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, | ||
43 | * no more per-task TSS's. | ||
44 | */ | ||
45 | DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; | ||
46 | |||
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c new file mode 100644 index 000000000000..9c1350e811d0 --- /dev/null +++ b/arch/i386/kernel/io_apic.c | |||
@@ -0,0 +1,2545 @@ | |||
1 | /* | ||
2 | * Intel IO-APIC support for multi-Pentium hosts. | ||
3 | * | ||
4 | * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo | ||
5 | * | ||
6 | * Many thanks to Stig Venaas for trying out countless experimental | ||
7 | * patches and reporting/debugging problems patiently! | ||
8 | * | ||
9 | * (c) 1999, Multiple IO-APIC support, developed by | ||
10 | * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and | ||
11 | * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>, | ||
12 | * further tested and cleaned up by Zach Brown <zab@redhat.com> | ||
13 | * and Ingo Molnar <mingo@redhat.com> | ||
14 | * | ||
15 | * Fixes | ||
16 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs; | ||
17 | * thanks to Eric Gilmore | ||
18 | * and Rolf G. Tews | ||
19 | * for testing these extensively | ||
20 | * Paul Diefenbaugh : Added full ACPI support | ||
21 | */ | ||
22 | |||
23 | #include <linux/mm.h> | ||
24 | #include <linux/irq.h> | ||
25 | #include <linux/interrupt.h> | ||
26 | #include <linux/init.h> | ||
27 | #include <linux/delay.h> | ||
28 | #include <linux/sched.h> | ||
29 | #include <linux/config.h> | ||
30 | #include <linux/smp_lock.h> | ||
31 | #include <linux/mc146818rtc.h> | ||
32 | #include <linux/compiler.h> | ||
33 | #include <linux/acpi.h> | ||
34 | |||
35 | #include <linux/sysdev.h> | ||
36 | #include <asm/io.h> | ||
37 | #include <asm/smp.h> | ||
38 | #include <asm/desc.h> | ||
39 | #include <asm/timer.h> | ||
40 | |||
41 | #include <mach_apic.h> | ||
42 | |||
43 | #include "io_ports.h" | ||
44 | |||
45 | int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
46 | atomic_t irq_mis_count; | ||
47 | |||
48 | static DEFINE_SPINLOCK(ioapic_lock); | ||
49 | |||
50 | /* | ||
51 | * Is the SiS APIC rmw bug present ? | ||
52 | * -1 = don't know, 0 = no, 1 = yes | ||
53 | */ | ||
54 | int sis_apic_bug = -1; | ||
55 | |||
56 | /* | ||
57 | * # of IRQ routing registers | ||
58 | */ | ||
59 | int nr_ioapic_registers[MAX_IO_APICS]; | ||
60 | |||
61 | /* | ||
62 | * Rough estimation of how many shared IRQs there are, can | ||
63 | * be changed anytime. | ||
64 | */ | ||
65 | #define MAX_PLUS_SHARED_IRQS NR_IRQS | ||
66 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) | ||
67 | |||
68 | /* | ||
69 | * This is performance-critical, we want to do it O(1) | ||
70 | * | ||
71 | * the indexing order of this array favors 1:1 mappings | ||
72 | * between pins and IRQs. | ||
73 | */ | ||
74 | |||
75 | static struct irq_pin_list { | ||
76 | int apic, pin, next; | ||
77 | } irq_2_pin[PIN_MAP_SIZE]; | ||
78 | |||
79 | int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; | ||
80 | #ifdef CONFIG_PCI_MSI | ||
81 | #define vector_to_irq(vector) \ | ||
82 | (platform_legacy_irq(vector) ? vector : vector_irq[vector]) | ||
83 | #else | ||
84 | #define vector_to_irq(vector) (vector) | ||
85 | #endif | ||
86 | |||
87 | /* | ||
88 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are | ||
89 | * shared ISA-space IRQs, so we have to support them. We are super | ||
90 | * fast in the common case, and fast for shared ISA-space IRQs. | ||
91 | */ | ||
92 | static void add_pin_to_irq(unsigned int irq, int apic, int pin) | ||
93 | { | ||
94 | static int first_free_entry = NR_IRQS; | ||
95 | struct irq_pin_list *entry = irq_2_pin + irq; | ||
96 | |||
97 | while (entry->next) | ||
98 | entry = irq_2_pin + entry->next; | ||
99 | |||
100 | if (entry->pin != -1) { | ||
101 | entry->next = first_free_entry; | ||
102 | entry = irq_2_pin + entry->next; | ||
103 | if (++first_free_entry >= PIN_MAP_SIZE) | ||
104 | panic("io_apic.c: whoops"); | ||
105 | } | ||
106 | entry->apic = apic; | ||
107 | entry->pin = pin; | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * Reroute an IRQ to a different pin. | ||
112 | */ | ||
113 | static void __init replace_pin_at_irq(unsigned int irq, | ||
114 | int oldapic, int oldpin, | ||
115 | int newapic, int newpin) | ||
116 | { | ||
117 | struct irq_pin_list *entry = irq_2_pin + irq; | ||
118 | |||
119 | while (1) { | ||
120 | if (entry->apic == oldapic && entry->pin == oldpin) { | ||
121 | entry->apic = newapic; | ||
122 | entry->pin = newpin; | ||
123 | } | ||
124 | if (!entry->next) | ||
125 | break; | ||
126 | entry = irq_2_pin + entry->next; | ||
127 | } | ||
128 | } | ||
129 | |||
130 | static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) | ||
131 | { | ||
132 | struct irq_pin_list *entry = irq_2_pin + irq; | ||
133 | unsigned int pin, reg; | ||
134 | |||
135 | for (;;) { | ||
136 | pin = entry->pin; | ||
137 | if (pin == -1) | ||
138 | break; | ||
139 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | ||
140 | reg &= ~disable; | ||
141 | reg |= enable; | ||
142 | io_apic_modify(entry->apic, 0x10 + pin*2, reg); | ||
143 | if (!entry->next) | ||
144 | break; | ||
145 | entry = irq_2_pin + entry->next; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | /* mask = 1 */ | ||
150 | static void __mask_IO_APIC_irq (unsigned int irq) | ||
151 | { | ||
152 | __modify_IO_APIC_irq(irq, 0x00010000, 0); | ||
153 | } | ||
154 | |||
155 | /* mask = 0 */ | ||
156 | static void __unmask_IO_APIC_irq (unsigned int irq) | ||
157 | { | ||
158 | __modify_IO_APIC_irq(irq, 0, 0x00010000); | ||
159 | } | ||
160 | |||
161 | /* mask = 1, trigger = 0 */ | ||
162 | static void __mask_and_edge_IO_APIC_irq (unsigned int irq) | ||
163 | { | ||
164 | __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); | ||
165 | } | ||
166 | |||
167 | /* mask = 0, trigger = 1 */ | ||
168 | static void __unmask_and_level_IO_APIC_irq (unsigned int irq) | ||
169 | { | ||
170 | __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); | ||
171 | } | ||
172 | |||
173 | static void mask_IO_APIC_irq (unsigned int irq) | ||
174 | { | ||
175 | unsigned long flags; | ||
176 | |||
177 | spin_lock_irqsave(&ioapic_lock, flags); | ||
178 | __mask_IO_APIC_irq(irq); | ||
179 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
180 | } | ||
181 | |||
182 | static void unmask_IO_APIC_irq (unsigned int irq) | ||
183 | { | ||
184 | unsigned long flags; | ||
185 | |||
186 | spin_lock_irqsave(&ioapic_lock, flags); | ||
187 | __unmask_IO_APIC_irq(irq); | ||
188 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
189 | } | ||
190 | |||
191 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | ||
192 | { | ||
193 | struct IO_APIC_route_entry entry; | ||
194 | unsigned long flags; | ||
195 | |||
196 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ | ||
197 | spin_lock_irqsave(&ioapic_lock, flags); | ||
198 | *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); | ||
199 | *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); | ||
200 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
201 | if (entry.delivery_mode == dest_SMI) | ||
202 | return; | ||
203 | |||
204 | /* | ||
205 | * Disable it in the IO-APIC irq-routing table: | ||
206 | */ | ||
207 | memset(&entry, 0, sizeof(entry)); | ||
208 | entry.mask = 1; | ||
209 | spin_lock_irqsave(&ioapic_lock, flags); | ||
210 | io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); | ||
211 | io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); | ||
212 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
213 | } | ||
214 | |||
215 | static void clear_IO_APIC (void) | ||
216 | { | ||
217 | int apic, pin; | ||
218 | |||
219 | for (apic = 0; apic < nr_ioapics; apic++) | ||
220 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) | ||
221 | clear_IO_APIC_pin(apic, pin); | ||
222 | } | ||
223 | |||
224 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) | ||
225 | { | ||
226 | unsigned long flags; | ||
227 | int pin; | ||
228 | struct irq_pin_list *entry = irq_2_pin + irq; | ||
229 | unsigned int apicid_value; | ||
230 | |||
231 | apicid_value = cpu_mask_to_apicid(cpumask); | ||
232 | /* Prepare to do the io_apic_write */ | ||
233 | apicid_value = apicid_value << 24; | ||
234 | spin_lock_irqsave(&ioapic_lock, flags); | ||
235 | for (;;) { | ||
236 | pin = entry->pin; | ||
237 | if (pin == -1) | ||
238 | break; | ||
239 | io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); | ||
240 | if (!entry->next) | ||
241 | break; | ||
242 | entry = irq_2_pin + entry->next; | ||
243 | } | ||
244 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
245 | } | ||
246 | |||
247 | #if defined(CONFIG_IRQBALANCE) | ||
248 | # include <asm/processor.h> /* kernel_thread() */ | ||
249 | # include <linux/kernel_stat.h> /* kstat */ | ||
250 | # include <linux/slab.h> /* kmalloc() */ | ||
251 | # include <linux/timer.h> /* time_after() */ | ||
252 | |||
253 | # ifdef CONFIG_BALANCED_IRQ_DEBUG | ||
254 | # define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) | ||
255 | # define Dprintk(x...) do { TDprintk(x); } while (0) | ||
256 | # else | ||
257 | # define TDprintk(x...) | ||
258 | # define Dprintk(x...) | ||
259 | # endif | ||
260 | |||
261 | cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; | ||
262 | |||
263 | #define IRQBALANCE_CHECK_ARCH -999 | ||
264 | static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; | ||
265 | static int physical_balance = 0; | ||
266 | |||
267 | static struct irq_cpu_info { | ||
268 | unsigned long * last_irq; | ||
269 | unsigned long * irq_delta; | ||
270 | unsigned long irq; | ||
271 | } irq_cpu_data[NR_CPUS]; | ||
272 | |||
273 | #define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) | ||
274 | #define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) | ||
275 | #define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) | ||
276 | |||
277 | #define IDLE_ENOUGH(cpu,now) \ | ||
278 | (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) | ||
279 | |||
280 | #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) | ||
281 | |||
282 | #define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) | ||
283 | |||
284 | #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) | ||
285 | #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) | ||
286 | #define BALANCED_IRQ_MORE_DELTA (HZ/10) | ||
287 | #define BALANCED_IRQ_LESS_DELTA (HZ) | ||
288 | |||
289 | static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL; | ||
290 | |||
291 | static unsigned long move(int curr_cpu, cpumask_t allowed_mask, | ||
292 | unsigned long now, int direction) | ||
293 | { | ||
294 | int search_idle = 1; | ||
295 | int cpu = curr_cpu; | ||
296 | |||
297 | goto inside; | ||
298 | |||
299 | do { | ||
300 | if (unlikely(cpu == curr_cpu)) | ||
301 | search_idle = 0; | ||
302 | inside: | ||
303 | if (direction == 1) { | ||
304 | cpu++; | ||
305 | if (cpu >= NR_CPUS) | ||
306 | cpu = 0; | ||
307 | } else { | ||
308 | cpu--; | ||
309 | if (cpu == -1) | ||
310 | cpu = NR_CPUS-1; | ||
311 | } | ||
312 | } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || | ||
313 | (search_idle && !IDLE_ENOUGH(cpu,now))); | ||
314 | |||
315 | return cpu; | ||
316 | } | ||
317 | |||
318 | static inline void balance_irq(int cpu, int irq) | ||
319 | { | ||
320 | unsigned long now = jiffies; | ||
321 | cpumask_t allowed_mask; | ||
322 | unsigned int new_cpu; | ||
323 | |||
324 | if (irqbalance_disabled) | ||
325 | return; | ||
326 | |||
327 | cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); | ||
328 | new_cpu = move(cpu, allowed_mask, now, 1); | ||
329 | if (cpu != new_cpu) { | ||
330 | irq_desc_t *desc = irq_desc + irq; | ||
331 | unsigned long flags; | ||
332 | |||
333 | spin_lock_irqsave(&desc->lock, flags); | ||
334 | pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); | ||
335 | spin_unlock_irqrestore(&desc->lock, flags); | ||
336 | } | ||
337 | } | ||
338 | |||
339 | static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) | ||
340 | { | ||
341 | int i, j; | ||
342 | Dprintk("Rotating IRQs among CPUs.\n"); | ||
343 | for (i = 0; i < NR_CPUS; i++) { | ||
344 | for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) { | ||
345 | if (!irq_desc[j].action) | ||
346 | continue; | ||
347 | /* Is it a significant load ? */ | ||
348 | if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < | ||
349 | useful_load_threshold) | ||
350 | continue; | ||
351 | balance_irq(i, j); | ||
352 | } | ||
353 | } | ||
354 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, | ||
355 | balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | ||
356 | return; | ||
357 | } | ||
358 | |||
359 | static void do_irq_balance(void) | ||
360 | { | ||
361 | int i, j; | ||
362 | unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); | ||
363 | unsigned long move_this_load = 0; | ||
364 | int max_loaded = 0, min_loaded = 0; | ||
365 | int load; | ||
366 | unsigned long useful_load_threshold = balanced_irq_interval + 10; | ||
367 | int selected_irq; | ||
368 | int tmp_loaded, first_attempt = 1; | ||
369 | unsigned long tmp_cpu_irq; | ||
370 | unsigned long imbalance = 0; | ||
371 | cpumask_t allowed_mask, target_cpu_mask, tmp; | ||
372 | |||
373 | for (i = 0; i < NR_CPUS; i++) { | ||
374 | int package_index; | ||
375 | CPU_IRQ(i) = 0; | ||
376 | if (!cpu_online(i)) | ||
377 | continue; | ||
378 | package_index = CPU_TO_PACKAGEINDEX(i); | ||
379 | for (j = 0; j < NR_IRQS; j++) { | ||
380 | unsigned long value_now, delta; | ||
381 | /* Is this an active IRQ? */ | ||
382 | if (!irq_desc[j].action) | ||
383 | continue; | ||
384 | if ( package_index == i ) | ||
385 | IRQ_DELTA(package_index,j) = 0; | ||
386 | /* Determine the total count per processor per IRQ */ | ||
387 | value_now = (unsigned long) kstat_cpu(i).irqs[j]; | ||
388 | |||
389 | /* Determine the activity per processor per IRQ */ | ||
390 | delta = value_now - LAST_CPU_IRQ(i,j); | ||
391 | |||
392 | /* Update last_cpu_irq[][] for the next time */ | ||
393 | LAST_CPU_IRQ(i,j) = value_now; | ||
394 | |||
395 | /* Ignore IRQs whose rate is less than the clock */ | ||
396 | if (delta < useful_load_threshold) | ||
397 | continue; | ||
398 | /* update the load for the processor or package total */ | ||
399 | IRQ_DELTA(package_index,j) += delta; | ||
400 | |||
401 | /* Keep track of the higher numbered sibling as well */ | ||
402 | if (i != package_index) | ||
403 | CPU_IRQ(i) += delta; | ||
404 | /* | ||
405 | * We have sibling A and sibling B in the package | ||
406 | * | ||
407 | * cpu_irq[A] = load for cpu A + load for cpu B | ||
408 | * cpu_irq[B] = load for cpu B | ||
409 | */ | ||
410 | CPU_IRQ(package_index) += delta; | ||
411 | } | ||
412 | } | ||
413 | /* Find the least loaded processor package */ | ||
414 | for (i = 0; i < NR_CPUS; i++) { | ||
415 | if (!cpu_online(i)) | ||
416 | continue; | ||
417 | if (i != CPU_TO_PACKAGEINDEX(i)) | ||
418 | continue; | ||
419 | if (min_cpu_irq > CPU_IRQ(i)) { | ||
420 | min_cpu_irq = CPU_IRQ(i); | ||
421 | min_loaded = i; | ||
422 | } | ||
423 | } | ||
424 | max_cpu_irq = ULONG_MAX; | ||
425 | |||
426 | tryanothercpu: | ||
427 | /* Look for heaviest loaded processor. | ||
428 | * We may come back to get the next heaviest loaded processor. | ||
429 | * Skip processors with trivial loads. | ||
430 | */ | ||
431 | tmp_cpu_irq = 0; | ||
432 | tmp_loaded = -1; | ||
433 | for (i = 0; i < NR_CPUS; i++) { | ||
434 | if (!cpu_online(i)) | ||
435 | continue; | ||
436 | if (i != CPU_TO_PACKAGEINDEX(i)) | ||
437 | continue; | ||
438 | if (max_cpu_irq <= CPU_IRQ(i)) | ||
439 | continue; | ||
440 | if (tmp_cpu_irq < CPU_IRQ(i)) { | ||
441 | tmp_cpu_irq = CPU_IRQ(i); | ||
442 | tmp_loaded = i; | ||
443 | } | ||
444 | } | ||
445 | |||
446 | if (tmp_loaded == -1) { | ||
447 | /* In the case of small number of heavy interrupt sources, | ||
448 | * loading some of the cpus too much. We use Ingo's original | ||
449 | * approach to rotate them around. | ||
450 | */ | ||
451 | if (!first_attempt && imbalance >= useful_load_threshold) { | ||
452 | rotate_irqs_among_cpus(useful_load_threshold); | ||
453 | return; | ||
454 | } | ||
455 | goto not_worth_the_effort; | ||
456 | } | ||
457 | |||
458 | first_attempt = 0; /* heaviest search */ | ||
459 | max_cpu_irq = tmp_cpu_irq; /* load */ | ||
460 | max_loaded = tmp_loaded; /* processor */ | ||
461 | imbalance = (max_cpu_irq - min_cpu_irq) / 2; | ||
462 | |||
463 | Dprintk("max_loaded cpu = %d\n", max_loaded); | ||
464 | Dprintk("min_loaded cpu = %d\n", min_loaded); | ||
465 | Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq); | ||
466 | Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq); | ||
467 | Dprintk("load imbalance = %lu\n", imbalance); | ||
468 | |||
469 | /* if imbalance is less than approx 10% of max load, then | ||
470 | * observe diminishing returns action. - quit | ||
471 | */ | ||
472 | if (imbalance < (max_cpu_irq >> 3)) { | ||
473 | Dprintk("Imbalance too trivial\n"); | ||
474 | goto not_worth_the_effort; | ||
475 | } | ||
476 | |||
477 | tryanotherirq: | ||
478 | /* if we select an IRQ to move that can't go where we want, then | ||
479 | * see if there is another one to try. | ||
480 | */ | ||
481 | move_this_load = 0; | ||
482 | selected_irq = -1; | ||
483 | for (j = 0; j < NR_IRQS; j++) { | ||
484 | /* Is this an active IRQ? */ | ||
485 | if (!irq_desc[j].action) | ||
486 | continue; | ||
487 | if (imbalance <= IRQ_DELTA(max_loaded,j)) | ||
488 | continue; | ||
489 | /* Try to find the IRQ that is closest to the imbalance | ||
490 | * without going over. | ||
491 | */ | ||
492 | if (move_this_load < IRQ_DELTA(max_loaded,j)) { | ||
493 | move_this_load = IRQ_DELTA(max_loaded,j); | ||
494 | selected_irq = j; | ||
495 | } | ||
496 | } | ||
497 | if (selected_irq == -1) { | ||
498 | goto tryanothercpu; | ||
499 | } | ||
500 | |||
501 | imbalance = move_this_load; | ||
502 | |||
503 | /* For physical_balance case, we accumlated both load | ||
504 | * values in the one of the siblings cpu_irq[], | ||
505 | * to use the same code for physical and logical processors | ||
506 | * as much as possible. | ||
507 | * | ||
508 | * NOTE: the cpu_irq[] array holds the sum of the load for | ||
509 | * sibling A and sibling B in the slot for the lowest numbered | ||
510 | * sibling (A), _AND_ the load for sibling B in the slot for | ||
511 | * the higher numbered sibling. | ||
512 | * | ||
513 | * We seek the least loaded sibling by making the comparison | ||
514 | * (A+B)/2 vs B | ||
515 | */ | ||
516 | load = CPU_IRQ(min_loaded) >> 1; | ||
517 | for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { | ||
518 | if (load > CPU_IRQ(j)) { | ||
519 | /* This won't change cpu_sibling_map[min_loaded] */ | ||
520 | load = CPU_IRQ(j); | ||
521 | min_loaded = j; | ||
522 | } | ||
523 | } | ||
524 | |||
525 | cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); | ||
526 | target_cpu_mask = cpumask_of_cpu(min_loaded); | ||
527 | cpus_and(tmp, target_cpu_mask, allowed_mask); | ||
528 | |||
529 | if (!cpus_empty(tmp)) { | ||
530 | irq_desc_t *desc = irq_desc + selected_irq; | ||
531 | unsigned long flags; | ||
532 | |||
533 | Dprintk("irq = %d moved to cpu = %d\n", | ||
534 | selected_irq, min_loaded); | ||
535 | /* mark for change destination */ | ||
536 | spin_lock_irqsave(&desc->lock, flags); | ||
537 | pending_irq_balance_cpumask[selected_irq] = | ||
538 | cpumask_of_cpu(min_loaded); | ||
539 | spin_unlock_irqrestore(&desc->lock, flags); | ||
540 | /* Since we made a change, come back sooner to | ||
541 | * check for more variation. | ||
542 | */ | ||
543 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, | ||
544 | balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | ||
545 | return; | ||
546 | } | ||
547 | goto tryanotherirq; | ||
548 | |||
549 | not_worth_the_effort: | ||
550 | /* | ||
551 | * if we did not find an IRQ to move, then adjust the time interval | ||
552 | * upward | ||
553 | */ | ||
554 | balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, | ||
555 | balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); | ||
556 | Dprintk("IRQ worth rotating not found\n"); | ||
557 | return; | ||
558 | } | ||
559 | |||
560 | static int balanced_irq(void *unused) | ||
561 | { | ||
562 | int i; | ||
563 | unsigned long prev_balance_time = jiffies; | ||
564 | long time_remaining = balanced_irq_interval; | ||
565 | |||
566 | daemonize("kirqd"); | ||
567 | |||
568 | /* push everything to CPU 0 to give us a starting point. */ | ||
569 | for (i = 0 ; i < NR_IRQS ; i++) { | ||
570 | pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); | ||
571 | } | ||
572 | |||
573 | for ( ; ; ) { | ||
574 | set_current_state(TASK_INTERRUPTIBLE); | ||
575 | time_remaining = schedule_timeout(time_remaining); | ||
576 | try_to_freeze(PF_FREEZE); | ||
577 | if (time_after(jiffies, | ||
578 | prev_balance_time+balanced_irq_interval)) { | ||
579 | do_irq_balance(); | ||
580 | prev_balance_time = jiffies; | ||
581 | time_remaining = balanced_irq_interval; | ||
582 | } | ||
583 | } | ||
584 | return 0; | ||
585 | } | ||
586 | |||
587 | static int __init balanced_irq_init(void) | ||
588 | { | ||
589 | int i; | ||
590 | struct cpuinfo_x86 *c; | ||
591 | cpumask_t tmp; | ||
592 | |||
593 | cpus_shift_right(tmp, cpu_online_map, 2); | ||
594 | c = &boot_cpu_data; | ||
595 | /* When not overwritten by the command line ask subarchitecture. */ | ||
596 | if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) | ||
597 | irqbalance_disabled = NO_BALANCE_IRQ; | ||
598 | if (irqbalance_disabled) | ||
599 | return 0; | ||
600 | |||
601 | /* disable irqbalance completely if there is only one processor online */ | ||
602 | if (num_online_cpus() < 2) { | ||
603 | irqbalance_disabled = 1; | ||
604 | return 0; | ||
605 | } | ||
606 | /* | ||
607 | * Enable physical balance only if more than 1 physical processor | ||
608 | * is present | ||
609 | */ | ||
610 | if (smp_num_siblings > 1 && !cpus_empty(tmp)) | ||
611 | physical_balance = 1; | ||
612 | |||
613 | for (i = 0; i < NR_CPUS; i++) { | ||
614 | if (!cpu_online(i)) | ||
615 | continue; | ||
616 | irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); | ||
617 | irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); | ||
618 | if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { | ||
619 | printk(KERN_ERR "balanced_irq_init: out of memory"); | ||
620 | goto failed; | ||
621 | } | ||
622 | memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); | ||
623 | memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); | ||
624 | } | ||
625 | |||
626 | printk(KERN_INFO "Starting balanced_irq\n"); | ||
627 | if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) | ||
628 | return 0; | ||
629 | else | ||
630 | printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); | ||
631 | failed: | ||
632 | for (i = 0; i < NR_CPUS; i++) { | ||
633 | if(irq_cpu_data[i].irq_delta) | ||
634 | kfree(irq_cpu_data[i].irq_delta); | ||
635 | if(irq_cpu_data[i].last_irq) | ||
636 | kfree(irq_cpu_data[i].last_irq); | ||
637 | } | ||
638 | return 0; | ||
639 | } | ||
640 | |||
641 | int __init irqbalance_disable(char *str) | ||
642 | { | ||
643 | irqbalance_disabled = 1; | ||
644 | return 0; | ||
645 | } | ||
646 | |||
647 | __setup("noirqbalance", irqbalance_disable); | ||
648 | |||
649 | static inline void move_irq(int irq) | ||
650 | { | ||
651 | /* note - we hold the desc->lock */ | ||
652 | if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { | ||
653 | set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); | ||
654 | cpus_clear(pending_irq_balance_cpumask[irq]); | ||
655 | } | ||
656 | } | ||
657 | |||
658 | late_initcall(balanced_irq_init); | ||
659 | |||
660 | #else /* !CONFIG_IRQBALANCE */ | ||
661 | static inline void move_irq(int irq) { } | ||
662 | #endif /* CONFIG_IRQBALANCE */ | ||
663 | |||
664 | #ifndef CONFIG_SMP | ||
665 | void fastcall send_IPI_self(int vector) | ||
666 | { | ||
667 | unsigned int cfg; | ||
668 | |||
669 | /* | ||
670 | * Wait for idle. | ||
671 | */ | ||
672 | apic_wait_icr_idle(); | ||
673 | cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; | ||
674 | /* | ||
675 | * Send the IPI. The write to APIC_ICR fires this off. | ||
676 | */ | ||
677 | apic_write_around(APIC_ICR, cfg); | ||
678 | } | ||
679 | #endif /* !CONFIG_SMP */ | ||
680 | |||
681 | |||
682 | /* | ||
683 | * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to | ||
684 | * specific CPU-side IRQs. | ||
685 | */ | ||
686 | |||
687 | #define MAX_PIRQS 8 | ||
688 | static int pirq_entries [MAX_PIRQS]; | ||
689 | static int pirqs_enabled; | ||
690 | int skip_ioapic_setup; | ||
691 | |||
692 | static int __init ioapic_setup(char *str) | ||
693 | { | ||
694 | skip_ioapic_setup = 1; | ||
695 | return 1; | ||
696 | } | ||
697 | |||
698 | __setup("noapic", ioapic_setup); | ||
699 | |||
700 | static int __init ioapic_pirq_setup(char *str) | ||
701 | { | ||
702 | int i, max; | ||
703 | int ints[MAX_PIRQS+1]; | ||
704 | |||
705 | get_options(str, ARRAY_SIZE(ints), ints); | ||
706 | |||
707 | for (i = 0; i < MAX_PIRQS; i++) | ||
708 | pirq_entries[i] = -1; | ||
709 | |||
710 | pirqs_enabled = 1; | ||
711 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
712 | "PIRQ redirection, working around broken MP-BIOS.\n"); | ||
713 | max = MAX_PIRQS; | ||
714 | if (ints[0] < MAX_PIRQS) | ||
715 | max = ints[0]; | ||
716 | |||
717 | for (i = 0; i < max; i++) { | ||
718 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
719 | "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); | ||
720 | /* | ||
721 | * PIRQs are mapped upside down, usually. | ||
722 | */ | ||
723 | pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; | ||
724 | } | ||
725 | return 1; | ||
726 | } | ||
727 | |||
728 | __setup("pirq=", ioapic_pirq_setup); | ||
729 | |||
730 | /* | ||
731 | * Find the IRQ entry number of a certain pin. | ||
732 | */ | ||
733 | static int find_irq_entry(int apic, int pin, int type) | ||
734 | { | ||
735 | int i; | ||
736 | |||
737 | for (i = 0; i < mp_irq_entries; i++) | ||
738 | if (mp_irqs[i].mpc_irqtype == type && | ||
739 | (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || | ||
740 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && | ||
741 | mp_irqs[i].mpc_dstirq == pin) | ||
742 | return i; | ||
743 | |||
744 | return -1; | ||
745 | } | ||
746 | |||
747 | /* | ||
748 | * Find the pin to which IRQ[irq] (ISA) is connected | ||
749 | */ | ||
750 | static int find_isa_irq_pin(int irq, int type) | ||
751 | { | ||
752 | int i; | ||
753 | |||
754 | for (i = 0; i < mp_irq_entries; i++) { | ||
755 | int lbus = mp_irqs[i].mpc_srcbus; | ||
756 | |||
757 | if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || | ||
758 | mp_bus_id_to_type[lbus] == MP_BUS_EISA || | ||
759 | mp_bus_id_to_type[lbus] == MP_BUS_MCA || | ||
760 | mp_bus_id_to_type[lbus] == MP_BUS_NEC98 | ||
761 | ) && | ||
762 | (mp_irqs[i].mpc_irqtype == type) && | ||
763 | (mp_irqs[i].mpc_srcbusirq == irq)) | ||
764 | |||
765 | return mp_irqs[i].mpc_dstirq; | ||
766 | } | ||
767 | return -1; | ||
768 | } | ||
769 | |||
770 | /* | ||
771 | * Find a specific PCI IRQ entry. | ||
772 | * Not an __init, possibly needed by modules | ||
773 | */ | ||
774 | static int pin_2_irq(int idx, int apic, int pin); | ||
775 | |||
776 | int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | ||
777 | { | ||
778 | int apic, i, best_guess = -1; | ||
779 | |||
780 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " | ||
781 | "slot:%d, pin:%d.\n", bus, slot, pin); | ||
782 | if (mp_bus_id_to_pci_bus[bus] == -1) { | ||
783 | printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); | ||
784 | return -1; | ||
785 | } | ||
786 | for (i = 0; i < mp_irq_entries; i++) { | ||
787 | int lbus = mp_irqs[i].mpc_srcbus; | ||
788 | |||
789 | for (apic = 0; apic < nr_ioapics; apic++) | ||
790 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || | ||
791 | mp_irqs[i].mpc_dstapic == MP_APIC_ALL) | ||
792 | break; | ||
793 | |||
794 | if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && | ||
795 | !mp_irqs[i].mpc_irqtype && | ||
796 | (bus == lbus) && | ||
797 | (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { | ||
798 | int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); | ||
799 | |||
800 | if (!(apic || IO_APIC_IRQ(irq))) | ||
801 | continue; | ||
802 | |||
803 | if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) | ||
804 | return irq; | ||
805 | /* | ||
806 | * Use the first all-but-pin matching entry as a | ||
807 | * best-guess fuzzy result for broken mptables. | ||
808 | */ | ||
809 | if (best_guess < 0) | ||
810 | best_guess = irq; | ||
811 | } | ||
812 | } | ||
813 | return best_guess; | ||
814 | } | ||
815 | |||
816 | /* | ||
817 | * This function currently is only a helper for the i386 smp boot process where | ||
818 | * we need to reprogram the ioredtbls to cater for the cpus which have come online | ||
819 | * so mask in all cases should simply be TARGET_CPUS | ||
820 | */ | ||
821 | void __init setup_ioapic_dest(void) | ||
822 | { | ||
823 | int pin, ioapic, irq, irq_entry; | ||
824 | |||
825 | if (skip_ioapic_setup == 1) | ||
826 | return; | ||
827 | |||
828 | for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { | ||
829 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { | ||
830 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); | ||
831 | if (irq_entry == -1) | ||
832 | continue; | ||
833 | irq = pin_2_irq(irq_entry, ioapic, pin); | ||
834 | set_ioapic_affinity_irq(irq, TARGET_CPUS); | ||
835 | } | ||
836 | |||
837 | } | ||
838 | } | ||
839 | |||
840 | /* | ||
841 | * EISA Edge/Level control register, ELCR | ||
842 | */ | ||
843 | static int EISA_ELCR(unsigned int irq) | ||
844 | { | ||
845 | if (irq < 16) { | ||
846 | unsigned int port = 0x4d0 + (irq >> 3); | ||
847 | return (inb(port) >> (irq & 7)) & 1; | ||
848 | } | ||
849 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
850 | "Broken MPtable reports ISA irq %d\n", irq); | ||
851 | return 0; | ||
852 | } | ||
853 | |||
854 | /* EISA interrupts are always polarity zero and can be edge or level | ||
855 | * trigger depending on the ELCR value. If an interrupt is listed as | ||
856 | * EISA conforming in the MP table, that means its trigger type must | ||
857 | * be read in from the ELCR */ | ||
858 | |||
859 | #define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) | ||
860 | #define default_EISA_polarity(idx) (0) | ||
861 | |||
862 | /* ISA interrupts are always polarity zero edge triggered, | ||
863 | * when listed as conforming in the MP table. */ | ||
864 | |||
865 | #define default_ISA_trigger(idx) (0) | ||
866 | #define default_ISA_polarity(idx) (0) | ||
867 | |||
868 | /* PCI interrupts are always polarity one level triggered, | ||
869 | * when listed as conforming in the MP table. */ | ||
870 | |||
871 | #define default_PCI_trigger(idx) (1) | ||
872 | #define default_PCI_polarity(idx) (1) | ||
873 | |||
874 | /* MCA interrupts are always polarity zero level triggered, | ||
875 | * when listed as conforming in the MP table. */ | ||
876 | |||
877 | #define default_MCA_trigger(idx) (1) | ||
878 | #define default_MCA_polarity(idx) (0) | ||
879 | |||
880 | /* NEC98 interrupts are always polarity zero edge triggered, | ||
881 | * when listed as conforming in the MP table. */ | ||
882 | |||
883 | #define default_NEC98_trigger(idx) (0) | ||
884 | #define default_NEC98_polarity(idx) (0) | ||
885 | |||
886 | static int __init MPBIOS_polarity(int idx) | ||
887 | { | ||
888 | int bus = mp_irqs[idx].mpc_srcbus; | ||
889 | int polarity; | ||
890 | |||
891 | /* | ||
892 | * Determine IRQ line polarity (high active or low active): | ||
893 | */ | ||
894 | switch (mp_irqs[idx].mpc_irqflag & 3) | ||
895 | { | ||
896 | case 0: /* conforms, ie. bus-type dependent polarity */ | ||
897 | { | ||
898 | switch (mp_bus_id_to_type[bus]) | ||
899 | { | ||
900 | case MP_BUS_ISA: /* ISA pin */ | ||
901 | { | ||
902 | polarity = default_ISA_polarity(idx); | ||
903 | break; | ||
904 | } | ||
905 | case MP_BUS_EISA: /* EISA pin */ | ||
906 | { | ||
907 | polarity = default_EISA_polarity(idx); | ||
908 | break; | ||
909 | } | ||
910 | case MP_BUS_PCI: /* PCI pin */ | ||
911 | { | ||
912 | polarity = default_PCI_polarity(idx); | ||
913 | break; | ||
914 | } | ||
915 | case MP_BUS_MCA: /* MCA pin */ | ||
916 | { | ||
917 | polarity = default_MCA_polarity(idx); | ||
918 | break; | ||
919 | } | ||
920 | case MP_BUS_NEC98: /* NEC 98 pin */ | ||
921 | { | ||
922 | polarity = default_NEC98_polarity(idx); | ||
923 | break; | ||
924 | } | ||
925 | default: | ||
926 | { | ||
927 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
928 | polarity = 1; | ||
929 | break; | ||
930 | } | ||
931 | } | ||
932 | break; | ||
933 | } | ||
934 | case 1: /* high active */ | ||
935 | { | ||
936 | polarity = 0; | ||
937 | break; | ||
938 | } | ||
939 | case 2: /* reserved */ | ||
940 | { | ||
941 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
942 | polarity = 1; | ||
943 | break; | ||
944 | } | ||
945 | case 3: /* low active */ | ||
946 | { | ||
947 | polarity = 1; | ||
948 | break; | ||
949 | } | ||
950 | default: /* invalid */ | ||
951 | { | ||
952 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
953 | polarity = 1; | ||
954 | break; | ||
955 | } | ||
956 | } | ||
957 | return polarity; | ||
958 | } | ||
959 | |||
960 | static int MPBIOS_trigger(int idx) | ||
961 | { | ||
962 | int bus = mp_irqs[idx].mpc_srcbus; | ||
963 | int trigger; | ||
964 | |||
965 | /* | ||
966 | * Determine IRQ trigger mode (edge or level sensitive): | ||
967 | */ | ||
968 | switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) | ||
969 | { | ||
970 | case 0: /* conforms, ie. bus-type dependent */ | ||
971 | { | ||
972 | switch (mp_bus_id_to_type[bus]) | ||
973 | { | ||
974 | case MP_BUS_ISA: /* ISA pin */ | ||
975 | { | ||
976 | trigger = default_ISA_trigger(idx); | ||
977 | break; | ||
978 | } | ||
979 | case MP_BUS_EISA: /* EISA pin */ | ||
980 | { | ||
981 | trigger = default_EISA_trigger(idx); | ||
982 | break; | ||
983 | } | ||
984 | case MP_BUS_PCI: /* PCI pin */ | ||
985 | { | ||
986 | trigger = default_PCI_trigger(idx); | ||
987 | break; | ||
988 | } | ||
989 | case MP_BUS_MCA: /* MCA pin */ | ||
990 | { | ||
991 | trigger = default_MCA_trigger(idx); | ||
992 | break; | ||
993 | } | ||
994 | case MP_BUS_NEC98: /* NEC 98 pin */ | ||
995 | { | ||
996 | trigger = default_NEC98_trigger(idx); | ||
997 | break; | ||
998 | } | ||
999 | default: | ||
1000 | { | ||
1001 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
1002 | trigger = 1; | ||
1003 | break; | ||
1004 | } | ||
1005 | } | ||
1006 | break; | ||
1007 | } | ||
1008 | case 1: /* edge */ | ||
1009 | { | ||
1010 | trigger = 0; | ||
1011 | break; | ||
1012 | } | ||
1013 | case 2: /* reserved */ | ||
1014 | { | ||
1015 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
1016 | trigger = 1; | ||
1017 | break; | ||
1018 | } | ||
1019 | case 3: /* level */ | ||
1020 | { | ||
1021 | trigger = 1; | ||
1022 | break; | ||
1023 | } | ||
1024 | default: /* invalid */ | ||
1025 | { | ||
1026 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
1027 | trigger = 0; | ||
1028 | break; | ||
1029 | } | ||
1030 | } | ||
1031 | return trigger; | ||
1032 | } | ||
1033 | |||
1034 | static inline int irq_polarity(int idx) | ||
1035 | { | ||
1036 | return MPBIOS_polarity(idx); | ||
1037 | } | ||
1038 | |||
1039 | static inline int irq_trigger(int idx) | ||
1040 | { | ||
1041 | return MPBIOS_trigger(idx); | ||
1042 | } | ||
1043 | |||
1044 | static int pin_2_irq(int idx, int apic, int pin) | ||
1045 | { | ||
1046 | int irq, i; | ||
1047 | int bus = mp_irqs[idx].mpc_srcbus; | ||
1048 | |||
1049 | /* | ||
1050 | * Debugging check, we are in big trouble if this message pops up! | ||
1051 | */ | ||
1052 | if (mp_irqs[idx].mpc_dstirq != pin) | ||
1053 | printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); | ||
1054 | |||
1055 | switch (mp_bus_id_to_type[bus]) | ||
1056 | { | ||
1057 | case MP_BUS_ISA: /* ISA pin */ | ||
1058 | case MP_BUS_EISA: | ||
1059 | case MP_BUS_MCA: | ||
1060 | case MP_BUS_NEC98: | ||
1061 | { | ||
1062 | irq = mp_irqs[idx].mpc_srcbusirq; | ||
1063 | break; | ||
1064 | } | ||
1065 | case MP_BUS_PCI: /* PCI pin */ | ||
1066 | { | ||
1067 | /* | ||
1068 | * PCI IRQs are mapped in order | ||
1069 | */ | ||
1070 | i = irq = 0; | ||
1071 | while (i < apic) | ||
1072 | irq += nr_ioapic_registers[i++]; | ||
1073 | irq += pin; | ||
1074 | |||
1075 | /* | ||
1076 | * For MPS mode, so far only needed by ES7000 platform | ||
1077 | */ | ||
1078 | if (ioapic_renumber_irq) | ||
1079 | irq = ioapic_renumber_irq(apic, irq); | ||
1080 | |||
1081 | break; | ||
1082 | } | ||
1083 | default: | ||
1084 | { | ||
1085 | printk(KERN_ERR "unknown bus type %d.\n",bus); | ||
1086 | irq = 0; | ||
1087 | break; | ||
1088 | } | ||
1089 | } | ||
1090 | |||
1091 | /* | ||
1092 | * PCI IRQ command line redirection. Yes, limits are hardcoded. | ||
1093 | */ | ||
1094 | if ((pin >= 16) && (pin <= 23)) { | ||
1095 | if (pirq_entries[pin-16] != -1) { | ||
1096 | if (!pirq_entries[pin-16]) { | ||
1097 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
1098 | "disabling PIRQ%d\n", pin-16); | ||
1099 | } else { | ||
1100 | irq = pirq_entries[pin-16]; | ||
1101 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
1102 | "using PIRQ%d -> IRQ %d\n", | ||
1103 | pin-16, irq); | ||
1104 | } | ||
1105 | } | ||
1106 | } | ||
1107 | return irq; | ||
1108 | } | ||
1109 | |||
1110 | static inline int IO_APIC_irq_trigger(int irq) | ||
1111 | { | ||
1112 | int apic, idx, pin; | ||
1113 | |||
1114 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
1115 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
1116 | idx = find_irq_entry(apic,pin,mp_INT); | ||
1117 | if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) | ||
1118 | return irq_trigger(idx); | ||
1119 | } | ||
1120 | } | ||
1121 | /* | ||
1122 | * nonexistent IRQs are edge default | ||
1123 | */ | ||
1124 | return 0; | ||
1125 | } | ||
1126 | |||
1127 | /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ | ||
1128 | u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; | ||
1129 | |||
1130 | int assign_irq_vector(int irq) | ||
1131 | { | ||
1132 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; | ||
1133 | |||
1134 | BUG_ON(irq >= NR_IRQ_VECTORS); | ||
1135 | if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) | ||
1136 | return IO_APIC_VECTOR(irq); | ||
1137 | next: | ||
1138 | current_vector += 8; | ||
1139 | if (current_vector == SYSCALL_VECTOR) | ||
1140 | goto next; | ||
1141 | |||
1142 | if (current_vector >= FIRST_SYSTEM_VECTOR) { | ||
1143 | offset++; | ||
1144 | if (!(offset%8)) | ||
1145 | return -ENOSPC; | ||
1146 | current_vector = FIRST_DEVICE_VECTOR + offset; | ||
1147 | } | ||
1148 | |||
1149 | vector_irq[current_vector] = irq; | ||
1150 | if (irq != AUTO_ASSIGN) | ||
1151 | IO_APIC_VECTOR(irq) = current_vector; | ||
1152 | |||
1153 | return current_vector; | ||
1154 | } | ||
1155 | |||
1156 | static struct hw_interrupt_type ioapic_level_type; | ||
1157 | static struct hw_interrupt_type ioapic_edge_type; | ||
1158 | |||
1159 | #define IOAPIC_AUTO -1 | ||
1160 | #define IOAPIC_EDGE 0 | ||
1161 | #define IOAPIC_LEVEL 1 | ||
1162 | |||
1163 | static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger) | ||
1164 | { | ||
1165 | if (use_pci_vector() && !platform_legacy_irq(irq)) { | ||
1166 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | ||
1167 | trigger == IOAPIC_LEVEL) | ||
1168 | irq_desc[vector].handler = &ioapic_level_type; | ||
1169 | else | ||
1170 | irq_desc[vector].handler = &ioapic_edge_type; | ||
1171 | set_intr_gate(vector, interrupt[vector]); | ||
1172 | } else { | ||
1173 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | ||
1174 | trigger == IOAPIC_LEVEL) | ||
1175 | irq_desc[irq].handler = &ioapic_level_type; | ||
1176 | else | ||
1177 | irq_desc[irq].handler = &ioapic_edge_type; | ||
1178 | set_intr_gate(vector, interrupt[irq]); | ||
1179 | } | ||
1180 | } | ||
1181 | |||
1182 | static void __init setup_IO_APIC_irqs(void) | ||
1183 | { | ||
1184 | struct IO_APIC_route_entry entry; | ||
1185 | int apic, pin, idx, irq, first_notcon = 1, vector; | ||
1186 | unsigned long flags; | ||
1187 | |||
1188 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | ||
1189 | |||
1190 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
1191 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
1192 | |||
1193 | /* | ||
1194 | * add it to the IO-APIC irq-routing table: | ||
1195 | */ | ||
1196 | memset(&entry,0,sizeof(entry)); | ||
1197 | |||
1198 | entry.delivery_mode = INT_DELIVERY_MODE; | ||
1199 | entry.dest_mode = INT_DEST_MODE; | ||
1200 | entry.mask = 0; /* enable IRQ */ | ||
1201 | entry.dest.logical.logical_dest = | ||
1202 | cpu_mask_to_apicid(TARGET_CPUS); | ||
1203 | |||
1204 | idx = find_irq_entry(apic,pin,mp_INT); | ||
1205 | if (idx == -1) { | ||
1206 | if (first_notcon) { | ||
1207 | apic_printk(APIC_VERBOSE, KERN_DEBUG | ||
1208 | " IO-APIC (apicid-pin) %d-%d", | ||
1209 | mp_ioapics[apic].mpc_apicid, | ||
1210 | pin); | ||
1211 | first_notcon = 0; | ||
1212 | } else | ||
1213 | apic_printk(APIC_VERBOSE, ", %d-%d", | ||
1214 | mp_ioapics[apic].mpc_apicid, pin); | ||
1215 | continue; | ||
1216 | } | ||
1217 | |||
1218 | entry.trigger = irq_trigger(idx); | ||
1219 | entry.polarity = irq_polarity(idx); | ||
1220 | |||
1221 | if (irq_trigger(idx)) { | ||
1222 | entry.trigger = 1; | ||
1223 | entry.mask = 1; | ||
1224 | } | ||
1225 | |||
1226 | irq = pin_2_irq(idx, apic, pin); | ||
1227 | /* | ||
1228 | * skip adding the timer int on secondary nodes, which causes | ||
1229 | * a small but painful rift in the time-space continuum | ||
1230 | */ | ||
1231 | if (multi_timer_check(apic, irq)) | ||
1232 | continue; | ||
1233 | else | ||
1234 | add_pin_to_irq(irq, apic, pin); | ||
1235 | |||
1236 | if (!apic && !IO_APIC_IRQ(irq)) | ||
1237 | continue; | ||
1238 | |||
1239 | if (IO_APIC_IRQ(irq)) { | ||
1240 | vector = assign_irq_vector(irq); | ||
1241 | entry.vector = vector; | ||
1242 | ioapic_register_intr(irq, vector, IOAPIC_AUTO); | ||
1243 | |||
1244 | if (!apic && (irq < 16)) | ||
1245 | disable_8259A_irq(irq); | ||
1246 | } | ||
1247 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1248 | io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); | ||
1249 | io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); | ||
1250 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1251 | } | ||
1252 | } | ||
1253 | |||
1254 | if (!first_notcon) | ||
1255 | apic_printk(APIC_VERBOSE, " not connected.\n"); | ||
1256 | } | ||
1257 | |||
1258 | /* | ||
1259 | * Set up the 8259A-master output pin: | ||
1260 | */ | ||
1261 | static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) | ||
1262 | { | ||
1263 | struct IO_APIC_route_entry entry; | ||
1264 | unsigned long flags; | ||
1265 | |||
1266 | memset(&entry,0,sizeof(entry)); | ||
1267 | |||
1268 | disable_8259A_irq(0); | ||
1269 | |||
1270 | /* mask LVT0 */ | ||
1271 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | ||
1272 | |||
1273 | /* | ||
1274 | * We use logical delivery to get the timer IRQ | ||
1275 | * to the first CPU. | ||
1276 | */ | ||
1277 | entry.dest_mode = INT_DEST_MODE; | ||
1278 | entry.mask = 0; /* unmask IRQ now */ | ||
1279 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | ||
1280 | entry.delivery_mode = INT_DELIVERY_MODE; | ||
1281 | entry.polarity = 0; | ||
1282 | entry.trigger = 0; | ||
1283 | entry.vector = vector; | ||
1284 | |||
1285 | /* | ||
1286 | * The timer IRQ doesn't have to know that behind the | ||
1287 | * scene we have a 8259A-master in AEOI mode ... | ||
1288 | */ | ||
1289 | irq_desc[0].handler = &ioapic_edge_type; | ||
1290 | |||
1291 | /* | ||
1292 | * Add it to the IO-APIC irq-routing table: | ||
1293 | */ | ||
1294 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1295 | io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); | ||
1296 | io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); | ||
1297 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1298 | |||
1299 | enable_8259A_irq(0); | ||
1300 | } | ||
1301 | |||
1302 | static inline void UNEXPECTED_IO_APIC(void) | ||
1303 | { | ||
1304 | } | ||
1305 | |||
1306 | void __init print_IO_APIC(void) | ||
1307 | { | ||
1308 | int apic, i; | ||
1309 | union IO_APIC_reg_00 reg_00; | ||
1310 | union IO_APIC_reg_01 reg_01; | ||
1311 | union IO_APIC_reg_02 reg_02; | ||
1312 | union IO_APIC_reg_03 reg_03; | ||
1313 | unsigned long flags; | ||
1314 | |||
1315 | if (apic_verbosity == APIC_QUIET) | ||
1316 | return; | ||
1317 | |||
1318 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | ||
1319 | for (i = 0; i < nr_ioapics; i++) | ||
1320 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | ||
1321 | mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); | ||
1322 | |||
1323 | /* | ||
1324 | * We are a bit conservative about what we expect. We have to | ||
1325 | * know about every hardware change ASAP. | ||
1326 | */ | ||
1327 | printk(KERN_INFO "testing the IO APIC.......................\n"); | ||
1328 | |||
1329 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
1330 | |||
1331 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1332 | reg_00.raw = io_apic_read(apic, 0); | ||
1333 | reg_01.raw = io_apic_read(apic, 1); | ||
1334 | if (reg_01.bits.version >= 0x10) | ||
1335 | reg_02.raw = io_apic_read(apic, 2); | ||
1336 | if (reg_01.bits.version >= 0x20) | ||
1337 | reg_03.raw = io_apic_read(apic, 3); | ||
1338 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1339 | |||
1340 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); | ||
1341 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | ||
1342 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | ||
1343 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | ||
1344 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); | ||
1345 | if (reg_00.bits.ID >= get_physical_broadcast()) | ||
1346 | UNEXPECTED_IO_APIC(); | ||
1347 | if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) | ||
1348 | UNEXPECTED_IO_APIC(); | ||
1349 | |||
1350 | printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); | ||
1351 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); | ||
1352 | if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ | ||
1353 | (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ | ||
1354 | (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ | ||
1355 | (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ | ||
1356 | (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ | ||
1357 | (reg_01.bits.entries != 0x2E) && | ||
1358 | (reg_01.bits.entries != 0x3F) | ||
1359 | ) | ||
1360 | UNEXPECTED_IO_APIC(); | ||
1361 | |||
1362 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); | ||
1363 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); | ||
1364 | if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ | ||
1365 | (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ | ||
1366 | (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ | ||
1367 | (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ | ||
1368 | (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ | ||
1369 | ) | ||
1370 | UNEXPECTED_IO_APIC(); | ||
1371 | if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) | ||
1372 | UNEXPECTED_IO_APIC(); | ||
1373 | |||
1374 | /* | ||
1375 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, | ||
1376 | * but the value of reg_02 is read as the previous read register | ||
1377 | * value, so ignore it if reg_02 == reg_01. | ||
1378 | */ | ||
1379 | if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { | ||
1380 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); | ||
1381 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); | ||
1382 | if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) | ||
1383 | UNEXPECTED_IO_APIC(); | ||
1384 | } | ||
1385 | |||
1386 | /* | ||
1387 | * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 | ||
1388 | * or reg_03, but the value of reg_0[23] is read as the previous read | ||
1389 | * register value, so ignore it if reg_03 == reg_0[12]. | ||
1390 | */ | ||
1391 | if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && | ||
1392 | reg_03.raw != reg_01.raw) { | ||
1393 | printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); | ||
1394 | printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); | ||
1395 | if (reg_03.bits.__reserved_1) | ||
1396 | UNEXPECTED_IO_APIC(); | ||
1397 | } | ||
1398 | |||
1399 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | ||
1400 | |||
1401 | printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" | ||
1402 | " Stat Dest Deli Vect: \n"); | ||
1403 | |||
1404 | for (i = 0; i <= reg_01.bits.entries; i++) { | ||
1405 | struct IO_APIC_route_entry entry; | ||
1406 | |||
1407 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1408 | *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); | ||
1409 | *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); | ||
1410 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1411 | |||
1412 | printk(KERN_DEBUG " %02x %03X %02X ", | ||
1413 | i, | ||
1414 | entry.dest.logical.logical_dest, | ||
1415 | entry.dest.physical.physical_dest | ||
1416 | ); | ||
1417 | |||
1418 | printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", | ||
1419 | entry.mask, | ||
1420 | entry.trigger, | ||
1421 | entry.irr, | ||
1422 | entry.polarity, | ||
1423 | entry.delivery_status, | ||
1424 | entry.dest_mode, | ||
1425 | entry.delivery_mode, | ||
1426 | entry.vector | ||
1427 | ); | ||
1428 | } | ||
1429 | } | ||
1430 | if (use_pci_vector()) | ||
1431 | printk(KERN_INFO "Using vector-based indexing\n"); | ||
1432 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); | ||
1433 | for (i = 0; i < NR_IRQS; i++) { | ||
1434 | struct irq_pin_list *entry = irq_2_pin + i; | ||
1435 | if (entry->pin < 0) | ||
1436 | continue; | ||
1437 | if (use_pci_vector() && !platform_legacy_irq(i)) | ||
1438 | printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); | ||
1439 | else | ||
1440 | printk(KERN_DEBUG "IRQ%d ", i); | ||
1441 | for (;;) { | ||
1442 | printk("-> %d:%d", entry->apic, entry->pin); | ||
1443 | if (!entry->next) | ||
1444 | break; | ||
1445 | entry = irq_2_pin + entry->next; | ||
1446 | } | ||
1447 | printk("\n"); | ||
1448 | } | ||
1449 | |||
1450 | printk(KERN_INFO ".................................... done.\n"); | ||
1451 | |||
1452 | return; | ||
1453 | } | ||
1454 | |||
1455 | #if 0 | ||
1456 | |||
1457 | static void print_APIC_bitfield (int base) | ||
1458 | { | ||
1459 | unsigned int v; | ||
1460 | int i, j; | ||
1461 | |||
1462 | if (apic_verbosity == APIC_QUIET) | ||
1463 | return; | ||
1464 | |||
1465 | printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); | ||
1466 | for (i = 0; i < 8; i++) { | ||
1467 | v = apic_read(base + i*0x10); | ||
1468 | for (j = 0; j < 32; j++) { | ||
1469 | if (v & (1<<j)) | ||
1470 | printk("1"); | ||
1471 | else | ||
1472 | printk("0"); | ||
1473 | } | ||
1474 | printk("\n"); | ||
1475 | } | ||
1476 | } | ||
1477 | |||
1478 | void /*__init*/ print_local_APIC(void * dummy) | ||
1479 | { | ||
1480 | unsigned int v, ver, maxlvt; | ||
1481 | |||
1482 | if (apic_verbosity == APIC_QUIET) | ||
1483 | return; | ||
1484 | |||
1485 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | ||
1486 | smp_processor_id(), hard_smp_processor_id()); | ||
1487 | v = apic_read(APIC_ID); | ||
1488 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(v)); | ||
1489 | v = apic_read(APIC_LVR); | ||
1490 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | ||
1491 | ver = GET_APIC_VERSION(v); | ||
1492 | maxlvt = get_maxlvt(); | ||
1493 | |||
1494 | v = apic_read(APIC_TASKPRI); | ||
1495 | printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); | ||
1496 | |||
1497 | if (APIC_INTEGRATED(ver)) { /* !82489DX */ | ||
1498 | v = apic_read(APIC_ARBPRI); | ||
1499 | printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, | ||
1500 | v & APIC_ARBPRI_MASK); | ||
1501 | v = apic_read(APIC_PROCPRI); | ||
1502 | printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); | ||
1503 | } | ||
1504 | |||
1505 | v = apic_read(APIC_EOI); | ||
1506 | printk(KERN_DEBUG "... APIC EOI: %08x\n", v); | ||
1507 | v = apic_read(APIC_RRR); | ||
1508 | printk(KERN_DEBUG "... APIC RRR: %08x\n", v); | ||
1509 | v = apic_read(APIC_LDR); | ||
1510 | printk(KERN_DEBUG "... APIC LDR: %08x\n", v); | ||
1511 | v = apic_read(APIC_DFR); | ||
1512 | printk(KERN_DEBUG "... APIC DFR: %08x\n", v); | ||
1513 | v = apic_read(APIC_SPIV); | ||
1514 | printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); | ||
1515 | |||
1516 | printk(KERN_DEBUG "... APIC ISR field:\n"); | ||
1517 | print_APIC_bitfield(APIC_ISR); | ||
1518 | printk(KERN_DEBUG "... APIC TMR field:\n"); | ||
1519 | print_APIC_bitfield(APIC_TMR); | ||
1520 | printk(KERN_DEBUG "... APIC IRR field:\n"); | ||
1521 | print_APIC_bitfield(APIC_IRR); | ||
1522 | |||
1523 | if (APIC_INTEGRATED(ver)) { /* !82489DX */ | ||
1524 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | ||
1525 | apic_write(APIC_ESR, 0); | ||
1526 | v = apic_read(APIC_ESR); | ||
1527 | printk(KERN_DEBUG "... APIC ESR: %08x\n", v); | ||
1528 | } | ||
1529 | |||
1530 | v = apic_read(APIC_ICR); | ||
1531 | printk(KERN_DEBUG "... APIC ICR: %08x\n", v); | ||
1532 | v = apic_read(APIC_ICR2); | ||
1533 | printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); | ||
1534 | |||
1535 | v = apic_read(APIC_LVTT); | ||
1536 | printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); | ||
1537 | |||
1538 | if (maxlvt > 3) { /* PC is LVT#4. */ | ||
1539 | v = apic_read(APIC_LVTPC); | ||
1540 | printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); | ||
1541 | } | ||
1542 | v = apic_read(APIC_LVT0); | ||
1543 | printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); | ||
1544 | v = apic_read(APIC_LVT1); | ||
1545 | printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); | ||
1546 | |||
1547 | if (maxlvt > 2) { /* ERR is LVT#3. */ | ||
1548 | v = apic_read(APIC_LVTERR); | ||
1549 | printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); | ||
1550 | } | ||
1551 | |||
1552 | v = apic_read(APIC_TMICT); | ||
1553 | printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); | ||
1554 | v = apic_read(APIC_TMCCT); | ||
1555 | printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); | ||
1556 | v = apic_read(APIC_TDCR); | ||
1557 | printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); | ||
1558 | printk("\n"); | ||
1559 | } | ||
1560 | |||
1561 | void print_all_local_APICs (void) | ||
1562 | { | ||
1563 | on_each_cpu(print_local_APIC, NULL, 1, 1); | ||
1564 | } | ||
1565 | |||
1566 | void /*__init*/ print_PIC(void) | ||
1567 | { | ||
1568 | extern spinlock_t i8259A_lock; | ||
1569 | unsigned int v; | ||
1570 | unsigned long flags; | ||
1571 | |||
1572 | if (apic_verbosity == APIC_QUIET) | ||
1573 | return; | ||
1574 | |||
1575 | printk(KERN_DEBUG "\nprinting PIC contents\n"); | ||
1576 | |||
1577 | spin_lock_irqsave(&i8259A_lock, flags); | ||
1578 | |||
1579 | v = inb(0xa1) << 8 | inb(0x21); | ||
1580 | printk(KERN_DEBUG "... PIC IMR: %04x\n", v); | ||
1581 | |||
1582 | v = inb(0xa0) << 8 | inb(0x20); | ||
1583 | printk(KERN_DEBUG "... PIC IRR: %04x\n", v); | ||
1584 | |||
1585 | outb(0x0b,0xa0); | ||
1586 | outb(0x0b,0x20); | ||
1587 | v = inb(0xa0) << 8 | inb(0x20); | ||
1588 | outb(0x0a,0xa0); | ||
1589 | outb(0x0a,0x20); | ||
1590 | |||
1591 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
1592 | |||
1593 | printk(KERN_DEBUG "... PIC ISR: %04x\n", v); | ||
1594 | |||
1595 | v = inb(0x4d1) << 8 | inb(0x4d0); | ||
1596 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); | ||
1597 | } | ||
1598 | |||
1599 | #endif /* 0 */ | ||
1600 | |||
1601 | static void __init enable_IO_APIC(void) | ||
1602 | { | ||
1603 | union IO_APIC_reg_01 reg_01; | ||
1604 | int i; | ||
1605 | unsigned long flags; | ||
1606 | |||
1607 | for (i = 0; i < PIN_MAP_SIZE; i++) { | ||
1608 | irq_2_pin[i].pin = -1; | ||
1609 | irq_2_pin[i].next = 0; | ||
1610 | } | ||
1611 | if (!pirqs_enabled) | ||
1612 | for (i = 0; i < MAX_PIRQS; i++) | ||
1613 | pirq_entries[i] = -1; | ||
1614 | |||
1615 | /* | ||
1616 | * The number of IO-APIC IRQ registers (== #pins): | ||
1617 | */ | ||
1618 | for (i = 0; i < nr_ioapics; i++) { | ||
1619 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1620 | reg_01.raw = io_apic_read(i, 1); | ||
1621 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1622 | nr_ioapic_registers[i] = reg_01.bits.entries+1; | ||
1623 | } | ||
1624 | |||
1625 | /* | ||
1626 | * Do not trust the IO-APIC being empty at bootup | ||
1627 | */ | ||
1628 | clear_IO_APIC(); | ||
1629 | } | ||
1630 | |||
1631 | /* | ||
1632 | * Not an __init, needed by the reboot code | ||
1633 | */ | ||
1634 | void disable_IO_APIC(void) | ||
1635 | { | ||
1636 | /* | ||
1637 | * Clear the IO-APIC before rebooting: | ||
1638 | */ | ||
1639 | clear_IO_APIC(); | ||
1640 | |||
1641 | disconnect_bsp_APIC(); | ||
1642 | } | ||
1643 | |||
1644 | /* | ||
1645 | * function to set the IO-APIC physical IDs based on the | ||
1646 | * values stored in the MPC table. | ||
1647 | * | ||
1648 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 | ||
1649 | */ | ||
1650 | |||
1651 | #ifndef CONFIG_X86_NUMAQ | ||
1652 | static void __init setup_ioapic_ids_from_mpc(void) | ||
1653 | { | ||
1654 | union IO_APIC_reg_00 reg_00; | ||
1655 | physid_mask_t phys_id_present_map; | ||
1656 | int apic; | ||
1657 | int i; | ||
1658 | unsigned char old_id; | ||
1659 | unsigned long flags; | ||
1660 | |||
1661 | /* | ||
1662 | * This is broken; anything with a real cpu count has to | ||
1663 | * circumvent this idiocy regardless. | ||
1664 | */ | ||
1665 | phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); | ||
1666 | |||
1667 | /* | ||
1668 | * Set the IOAPIC ID to the value stored in the MPC table. | ||
1669 | */ | ||
1670 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
1671 | |||
1672 | /* Read the register 0 value */ | ||
1673 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1674 | reg_00.raw = io_apic_read(apic, 0); | ||
1675 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1676 | |||
1677 | old_id = mp_ioapics[apic].mpc_apicid; | ||
1678 | |||
1679 | if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { | ||
1680 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", | ||
1681 | apic, mp_ioapics[apic].mpc_apicid); | ||
1682 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | ||
1683 | reg_00.bits.ID); | ||
1684 | mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; | ||
1685 | } | ||
1686 | |||
1687 | /* Don't check I/O APIC IDs for some xAPIC systems. They have | ||
1688 | * no meaning without the serial APIC bus. */ | ||
1689 | if (NO_IOAPIC_CHECK) | ||
1690 | continue; | ||
1691 | /* | ||
1692 | * Sanity check, is the ID really free? Every APIC in a | ||
1693 | * system must have a unique ID or we get lots of nice | ||
1694 | * 'stuck on smp_invalidate_needed IPI wait' messages. | ||
1695 | */ | ||
1696 | if (check_apicid_used(phys_id_present_map, | ||
1697 | mp_ioapics[apic].mpc_apicid)) { | ||
1698 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | ||
1699 | apic, mp_ioapics[apic].mpc_apicid); | ||
1700 | for (i = 0; i < get_physical_broadcast(); i++) | ||
1701 | if (!physid_isset(i, phys_id_present_map)) | ||
1702 | break; | ||
1703 | if (i >= get_physical_broadcast()) | ||
1704 | panic("Max APIC ID exceeded!\n"); | ||
1705 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | ||
1706 | i); | ||
1707 | physid_set(i, phys_id_present_map); | ||
1708 | mp_ioapics[apic].mpc_apicid = i; | ||
1709 | } else { | ||
1710 | physid_mask_t tmp; | ||
1711 | tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); | ||
1712 | apic_printk(APIC_VERBOSE, "Setting %d in the " | ||
1713 | "phys_id_present_map\n", | ||
1714 | mp_ioapics[apic].mpc_apicid); | ||
1715 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | ||
1716 | } | ||
1717 | |||
1718 | |||
1719 | /* | ||
1720 | * We need to adjust the IRQ routing table | ||
1721 | * if the ID changed. | ||
1722 | */ | ||
1723 | if (old_id != mp_ioapics[apic].mpc_apicid) | ||
1724 | for (i = 0; i < mp_irq_entries; i++) | ||
1725 | if (mp_irqs[i].mpc_dstapic == old_id) | ||
1726 | mp_irqs[i].mpc_dstapic | ||
1727 | = mp_ioapics[apic].mpc_apicid; | ||
1728 | |||
1729 | /* | ||
1730 | * Read the right value from the MPC table and | ||
1731 | * write it into the ID register. | ||
1732 | */ | ||
1733 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
1734 | "...changing IO-APIC physical APIC ID to %d ...", | ||
1735 | mp_ioapics[apic].mpc_apicid); | ||
1736 | |||
1737 | reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; | ||
1738 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1739 | io_apic_write(apic, 0, reg_00.raw); | ||
1740 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1741 | |||
1742 | /* | ||
1743 | * Sanity check | ||
1744 | */ | ||
1745 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1746 | reg_00.raw = io_apic_read(apic, 0); | ||
1747 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1748 | if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) | ||
1749 | printk("could not set ID!\n"); | ||
1750 | else | ||
1751 | apic_printk(APIC_VERBOSE, " ok.\n"); | ||
1752 | } | ||
1753 | } | ||
1754 | #else | ||
1755 | static void __init setup_ioapic_ids_from_mpc(void) { } | ||
1756 | #endif | ||
1757 | |||
1758 | /* | ||
1759 | * There is a nasty bug in some older SMP boards, their mptable lies | ||
1760 | * about the timer IRQ. We do the following to work around the situation: | ||
1761 | * | ||
1762 | * - timer IRQ defaults to IO-APIC IRQ | ||
1763 | * - if this function detects that timer IRQs are defunct, then we fall | ||
1764 | * back to ISA timer IRQs | ||
1765 | */ | ||
1766 | static int __init timer_irq_works(void) | ||
1767 | { | ||
1768 | unsigned long t1 = jiffies; | ||
1769 | |||
1770 | local_irq_enable(); | ||
1771 | /* Let ten ticks pass... */ | ||
1772 | mdelay((10 * 1000) / HZ); | ||
1773 | |||
1774 | /* | ||
1775 | * Expect a few ticks at least, to be sure some possible | ||
1776 | * glue logic does not lock up after one or two first | ||
1777 | * ticks in a non-ExtINT mode. Also the local APIC | ||
1778 | * might have cached one ExtINT interrupt. Finally, at | ||
1779 | * least one tick may be lost due to delays. | ||
1780 | */ | ||
1781 | if (jiffies - t1 > 4) | ||
1782 | return 1; | ||
1783 | |||
1784 | return 0; | ||
1785 | } | ||
1786 | |||
1787 | /* | ||
1788 | * In the SMP+IOAPIC case it might happen that there are an unspecified | ||
1789 | * number of pending IRQ events unhandled. These cases are very rare, | ||
1790 | * so we 'resend' these IRQs via IPIs, to the same CPU. It's much | ||
1791 | * better to do it this way as thus we do not have to be aware of | ||
1792 | * 'pending' interrupts in the IRQ path, except at this point. | ||
1793 | */ | ||
1794 | /* | ||
1795 | * Edge triggered needs to resend any interrupt | ||
1796 | * that was delayed but this is now handled in the device | ||
1797 | * independent code. | ||
1798 | */ | ||
1799 | |||
1800 | /* | ||
1801 | * Starting up a edge-triggered IO-APIC interrupt is | ||
1802 | * nasty - we need to make sure that we get the edge. | ||
1803 | * If it is already asserted for some reason, we need | ||
1804 | * return 1 to indicate that is was pending. | ||
1805 | * | ||
1806 | * This is not complete - we should be able to fake | ||
1807 | * an edge even if it isn't on the 8259A... | ||
1808 | */ | ||
1809 | static unsigned int startup_edge_ioapic_irq(unsigned int irq) | ||
1810 | { | ||
1811 | int was_pending = 0; | ||
1812 | unsigned long flags; | ||
1813 | |||
1814 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1815 | if (irq < 16) { | ||
1816 | disable_8259A_irq(irq); | ||
1817 | if (i8259A_irq_pending(irq)) | ||
1818 | was_pending = 1; | ||
1819 | } | ||
1820 | __unmask_IO_APIC_irq(irq); | ||
1821 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1822 | |||
1823 | return was_pending; | ||
1824 | } | ||
1825 | |||
1826 | /* | ||
1827 | * Once we have recorded IRQ_PENDING already, we can mask the | ||
1828 | * interrupt for real. This prevents IRQ storms from unhandled | ||
1829 | * devices. | ||
1830 | */ | ||
1831 | static void ack_edge_ioapic_irq(unsigned int irq) | ||
1832 | { | ||
1833 | move_irq(irq); | ||
1834 | if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) | ||
1835 | == (IRQ_PENDING | IRQ_DISABLED)) | ||
1836 | mask_IO_APIC_irq(irq); | ||
1837 | ack_APIC_irq(); | ||
1838 | } | ||
1839 | |||
1840 | /* | ||
1841 | * Level triggered interrupts can just be masked, | ||
1842 | * and shutting down and starting up the interrupt | ||
1843 | * is the same as enabling and disabling them -- except | ||
1844 | * with a startup need to return a "was pending" value. | ||
1845 | * | ||
1846 | * Level triggered interrupts are special because we | ||
1847 | * do not touch any IO-APIC register while handling | ||
1848 | * them. We ack the APIC in the end-IRQ handler, not | ||
1849 | * in the start-IRQ-handler. Protection against reentrance | ||
1850 | * from the same interrupt is still provided, both by the | ||
1851 | * generic IRQ layer and by the fact that an unacked local | ||
1852 | * APIC does not accept IRQs. | ||
1853 | */ | ||
1854 | static unsigned int startup_level_ioapic_irq (unsigned int irq) | ||
1855 | { | ||
1856 | unmask_IO_APIC_irq(irq); | ||
1857 | |||
1858 | return 0; /* don't check for pending */ | ||
1859 | } | ||
1860 | |||
1861 | static void end_level_ioapic_irq (unsigned int irq) | ||
1862 | { | ||
1863 | unsigned long v; | ||
1864 | int i; | ||
1865 | |||
1866 | move_irq(irq); | ||
1867 | /* | ||
1868 | * It appears there is an erratum which affects at least version 0x11 | ||
1869 | * of I/O APIC (that's the 82093AA and cores integrated into various | ||
1870 | * chipsets). Under certain conditions a level-triggered interrupt is | ||
1871 | * erroneously delivered as edge-triggered one but the respective IRR | ||
1872 | * bit gets set nevertheless. As a result the I/O unit expects an EOI | ||
1873 | * message but it will never arrive and further interrupts are blocked | ||
1874 | * from the source. The exact reason is so far unknown, but the | ||
1875 | * phenomenon was observed when two consecutive interrupt requests | ||
1876 | * from a given source get delivered to the same CPU and the source is | ||
1877 | * temporarily disabled in between. | ||
1878 | * | ||
1879 | * A workaround is to simulate an EOI message manually. We achieve it | ||
1880 | * by setting the trigger mode to edge and then to level when the edge | ||
1881 | * trigger mode gets detected in the TMR of a local APIC for a | ||
1882 | * level-triggered interrupt. We mask the source for the time of the | ||
1883 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | ||
1884 | * The idea is from Manfred Spraul. --macro | ||
1885 | */ | ||
1886 | i = IO_APIC_VECTOR(irq); | ||
1887 | |||
1888 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); | ||
1889 | |||
1890 | ack_APIC_irq(); | ||
1891 | |||
1892 | if (!(v & (1 << (i & 0x1f)))) { | ||
1893 | atomic_inc(&irq_mis_count); | ||
1894 | spin_lock(&ioapic_lock); | ||
1895 | __mask_and_edge_IO_APIC_irq(irq); | ||
1896 | __unmask_and_level_IO_APIC_irq(irq); | ||
1897 | spin_unlock(&ioapic_lock); | ||
1898 | } | ||
1899 | } | ||
1900 | |||
1901 | #ifdef CONFIG_PCI_MSI | ||
1902 | static unsigned int startup_edge_ioapic_vector(unsigned int vector) | ||
1903 | { | ||
1904 | int irq = vector_to_irq(vector); | ||
1905 | |||
1906 | return startup_edge_ioapic_irq(irq); | ||
1907 | } | ||
1908 | |||
1909 | static void ack_edge_ioapic_vector(unsigned int vector) | ||
1910 | { | ||
1911 | int irq = vector_to_irq(vector); | ||
1912 | |||
1913 | ack_edge_ioapic_irq(irq); | ||
1914 | } | ||
1915 | |||
1916 | static unsigned int startup_level_ioapic_vector (unsigned int vector) | ||
1917 | { | ||
1918 | int irq = vector_to_irq(vector); | ||
1919 | |||
1920 | return startup_level_ioapic_irq (irq); | ||
1921 | } | ||
1922 | |||
1923 | static void end_level_ioapic_vector (unsigned int vector) | ||
1924 | { | ||
1925 | int irq = vector_to_irq(vector); | ||
1926 | |||
1927 | end_level_ioapic_irq(irq); | ||
1928 | } | ||
1929 | |||
1930 | static void mask_IO_APIC_vector (unsigned int vector) | ||
1931 | { | ||
1932 | int irq = vector_to_irq(vector); | ||
1933 | |||
1934 | mask_IO_APIC_irq(irq); | ||
1935 | } | ||
1936 | |||
1937 | static void unmask_IO_APIC_vector (unsigned int vector) | ||
1938 | { | ||
1939 | int irq = vector_to_irq(vector); | ||
1940 | |||
1941 | unmask_IO_APIC_irq(irq); | ||
1942 | } | ||
1943 | |||
1944 | static void set_ioapic_affinity_vector (unsigned int vector, | ||
1945 | cpumask_t cpu_mask) | ||
1946 | { | ||
1947 | int irq = vector_to_irq(vector); | ||
1948 | |||
1949 | set_ioapic_affinity_irq(irq, cpu_mask); | ||
1950 | } | ||
1951 | #endif | ||
1952 | |||
1953 | /* | ||
1954 | * Level and edge triggered IO-APIC interrupts need different handling, | ||
1955 | * so we use two separate IRQ descriptors. Edge triggered IRQs can be | ||
1956 | * handled with the level-triggered descriptor, but that one has slightly | ||
1957 | * more overhead. Level-triggered interrupts cannot be handled with the | ||
1958 | * edge-triggered handler, without risking IRQ storms and other ugly | ||
1959 | * races. | ||
1960 | */ | ||
1961 | static struct hw_interrupt_type ioapic_edge_type = { | ||
1962 | .typename = "IO-APIC-edge", | ||
1963 | .startup = startup_edge_ioapic, | ||
1964 | .shutdown = shutdown_edge_ioapic, | ||
1965 | .enable = enable_edge_ioapic, | ||
1966 | .disable = disable_edge_ioapic, | ||
1967 | .ack = ack_edge_ioapic, | ||
1968 | .end = end_edge_ioapic, | ||
1969 | .set_affinity = set_ioapic_affinity, | ||
1970 | }; | ||
1971 | |||
1972 | static struct hw_interrupt_type ioapic_level_type = { | ||
1973 | .typename = "IO-APIC-level", | ||
1974 | .startup = startup_level_ioapic, | ||
1975 | .shutdown = shutdown_level_ioapic, | ||
1976 | .enable = enable_level_ioapic, | ||
1977 | .disable = disable_level_ioapic, | ||
1978 | .ack = mask_and_ack_level_ioapic, | ||
1979 | .end = end_level_ioapic, | ||
1980 | .set_affinity = set_ioapic_affinity, | ||
1981 | }; | ||
1982 | |||
1983 | static inline void init_IO_APIC_traps(void) | ||
1984 | { | ||
1985 | int irq; | ||
1986 | |||
1987 | /* | ||
1988 | * NOTE! The local APIC isn't very good at handling | ||
1989 | * multiple interrupts at the same interrupt level. | ||
1990 | * As the interrupt level is determined by taking the | ||
1991 | * vector number and shifting that right by 4, we | ||
1992 | * want to spread these out a bit so that they don't | ||
1993 | * all fall in the same interrupt level. | ||
1994 | * | ||
1995 | * Also, we've got to be careful not to trash gate | ||
1996 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | ||
1997 | */ | ||
1998 | for (irq = 0; irq < NR_IRQS ; irq++) { | ||
1999 | int tmp = irq; | ||
2000 | if (use_pci_vector()) { | ||
2001 | if (!platform_legacy_irq(tmp)) | ||
2002 | if ((tmp = vector_to_irq(tmp)) == -1) | ||
2003 | continue; | ||
2004 | } | ||
2005 | if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { | ||
2006 | /* | ||
2007 | * Hmm.. We don't have an entry for this, | ||
2008 | * so default to an old-fashioned 8259 | ||
2009 | * interrupt if we can.. | ||
2010 | */ | ||
2011 | if (irq < 16) | ||
2012 | make_8259A_irq(irq); | ||
2013 | else | ||
2014 | /* Strange. Oh, well.. */ | ||
2015 | irq_desc[irq].handler = &no_irq_type; | ||
2016 | } | ||
2017 | } | ||
2018 | } | ||
2019 | |||
2020 | static void enable_lapic_irq (unsigned int irq) | ||
2021 | { | ||
2022 | unsigned long v; | ||
2023 | |||
2024 | v = apic_read(APIC_LVT0); | ||
2025 | apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); | ||
2026 | } | ||
2027 | |||
2028 | static void disable_lapic_irq (unsigned int irq) | ||
2029 | { | ||
2030 | unsigned long v; | ||
2031 | |||
2032 | v = apic_read(APIC_LVT0); | ||
2033 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | ||
2034 | } | ||
2035 | |||
2036 | static void ack_lapic_irq (unsigned int irq) | ||
2037 | { | ||
2038 | ack_APIC_irq(); | ||
2039 | } | ||
2040 | |||
2041 | static void end_lapic_irq (unsigned int i) { /* nothing */ } | ||
2042 | |||
2043 | static struct hw_interrupt_type lapic_irq_type = { | ||
2044 | .typename = "local-APIC-edge", | ||
2045 | .startup = NULL, /* startup_irq() not used for IRQ0 */ | ||
2046 | .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ | ||
2047 | .enable = enable_lapic_irq, | ||
2048 | .disable = disable_lapic_irq, | ||
2049 | .ack = ack_lapic_irq, | ||
2050 | .end = end_lapic_irq | ||
2051 | }; | ||
2052 | |||
2053 | static void setup_nmi (void) | ||
2054 | { | ||
2055 | /* | ||
2056 | * Dirty trick to enable the NMI watchdog ... | ||
2057 | * We put the 8259A master into AEOI mode and | ||
2058 | * unmask on all local APICs LVT0 as NMI. | ||
2059 | * | ||
2060 | * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') | ||
2061 | * is from Maciej W. Rozycki - so we do not have to EOI from | ||
2062 | * the NMI handler or the timer interrupt. | ||
2063 | */ | ||
2064 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); | ||
2065 | |||
2066 | on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1); | ||
2067 | |||
2068 | apic_printk(APIC_VERBOSE, " done.\n"); | ||
2069 | } | ||
2070 | |||
2071 | /* | ||
2072 | * This looks a bit hackish but it's about the only one way of sending | ||
2073 | * a few INTA cycles to 8259As and any associated glue logic. ICR does | ||
2074 | * not support the ExtINT mode, unfortunately. We need to send these | ||
2075 | * cycles as some i82489DX-based boards have glue logic that keeps the | ||
2076 | * 8259A interrupt line asserted until INTA. --macro | ||
2077 | */ | ||
2078 | static inline void unlock_ExtINT_logic(void) | ||
2079 | { | ||
2080 | int pin, i; | ||
2081 | struct IO_APIC_route_entry entry0, entry1; | ||
2082 | unsigned char save_control, save_freq_select; | ||
2083 | unsigned long flags; | ||
2084 | |||
2085 | pin = find_isa_irq_pin(8, mp_INT); | ||
2086 | if (pin == -1) | ||
2087 | return; | ||
2088 | |||
2089 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2090 | *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); | ||
2091 | *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); | ||
2092 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2093 | clear_IO_APIC_pin(0, pin); | ||
2094 | |||
2095 | memset(&entry1, 0, sizeof(entry1)); | ||
2096 | |||
2097 | entry1.dest_mode = 0; /* physical delivery */ | ||
2098 | entry1.mask = 0; /* unmask IRQ now */ | ||
2099 | entry1.dest.physical.physical_dest = hard_smp_processor_id(); | ||
2100 | entry1.delivery_mode = dest_ExtINT; | ||
2101 | entry1.polarity = entry0.polarity; | ||
2102 | entry1.trigger = 0; | ||
2103 | entry1.vector = 0; | ||
2104 | |||
2105 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2106 | io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); | ||
2107 | io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); | ||
2108 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2109 | |||
2110 | save_control = CMOS_READ(RTC_CONTROL); | ||
2111 | save_freq_select = CMOS_READ(RTC_FREQ_SELECT); | ||
2112 | CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, | ||
2113 | RTC_FREQ_SELECT); | ||
2114 | CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); | ||
2115 | |||
2116 | i = 100; | ||
2117 | while (i-- > 0) { | ||
2118 | mdelay(10); | ||
2119 | if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) | ||
2120 | i -= 10; | ||
2121 | } | ||
2122 | |||
2123 | CMOS_WRITE(save_control, RTC_CONTROL); | ||
2124 | CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); | ||
2125 | clear_IO_APIC_pin(0, pin); | ||
2126 | |||
2127 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2128 | io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); | ||
2129 | io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); | ||
2130 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2131 | } | ||
2132 | |||
2133 | /* | ||
2134 | * This code may look a bit paranoid, but it's supposed to cooperate with | ||
2135 | * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ | ||
2136 | * is so screwy. Thanks to Brian Perkins for testing/hacking this beast | ||
2137 | * fanatically on his truly buggy board. | ||
2138 | */ | ||
2139 | static inline void check_timer(void) | ||
2140 | { | ||
2141 | int pin1, pin2; | ||
2142 | int vector; | ||
2143 | |||
2144 | /* | ||
2145 | * get/set the timer IRQ vector: | ||
2146 | */ | ||
2147 | disable_8259A_irq(0); | ||
2148 | vector = assign_irq_vector(0); | ||
2149 | set_intr_gate(vector, interrupt[0]); | ||
2150 | |||
2151 | /* | ||
2152 | * Subtle, code in do_timer_interrupt() expects an AEOI | ||
2153 | * mode for the 8259A whenever interrupts are routed | ||
2154 | * through I/O APICs. Also IRQ0 has to be enabled in | ||
2155 | * the 8259A which implies the virtual wire has to be | ||
2156 | * disabled in the local APIC. | ||
2157 | */ | ||
2158 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | ||
2159 | init_8259A(1); | ||
2160 | timer_ack = 1; | ||
2161 | enable_8259A_irq(0); | ||
2162 | |||
2163 | pin1 = find_isa_irq_pin(0, mp_INT); | ||
2164 | pin2 = find_isa_irq_pin(0, mp_ExtINT); | ||
2165 | |||
2166 | printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2); | ||
2167 | |||
2168 | if (pin1 != -1) { | ||
2169 | /* | ||
2170 | * Ok, does IRQ0 through the IOAPIC work? | ||
2171 | */ | ||
2172 | unmask_IO_APIC_irq(0); | ||
2173 | if (timer_irq_works()) { | ||
2174 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2175 | disable_8259A_irq(0); | ||
2176 | setup_nmi(); | ||
2177 | enable_8259A_irq(0); | ||
2178 | check_nmi_watchdog(); | ||
2179 | } | ||
2180 | return; | ||
2181 | } | ||
2182 | clear_IO_APIC_pin(0, pin1); | ||
2183 | printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); | ||
2184 | } | ||
2185 | |||
2186 | printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); | ||
2187 | if (pin2 != -1) { | ||
2188 | printk("\n..... (found pin %d) ...", pin2); | ||
2189 | /* | ||
2190 | * legacy devices should be connected to IO APIC #0 | ||
2191 | */ | ||
2192 | setup_ExtINT_IRQ0_pin(pin2, vector); | ||
2193 | if (timer_irq_works()) { | ||
2194 | printk("works.\n"); | ||
2195 | if (pin1 != -1) | ||
2196 | replace_pin_at_irq(0, 0, pin1, 0, pin2); | ||
2197 | else | ||
2198 | add_pin_to_irq(0, 0, pin2); | ||
2199 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2200 | setup_nmi(); | ||
2201 | check_nmi_watchdog(); | ||
2202 | } | ||
2203 | return; | ||
2204 | } | ||
2205 | /* | ||
2206 | * Cleanup, just in case ... | ||
2207 | */ | ||
2208 | clear_IO_APIC_pin(0, pin2); | ||
2209 | } | ||
2210 | printk(" failed.\n"); | ||
2211 | |||
2212 | if (nmi_watchdog == NMI_IO_APIC) { | ||
2213 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | ||
2214 | nmi_watchdog = 0; | ||
2215 | } | ||
2216 | |||
2217 | printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | ||
2218 | |||
2219 | disable_8259A_irq(0); | ||
2220 | irq_desc[0].handler = &lapic_irq_type; | ||
2221 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | ||
2222 | enable_8259A_irq(0); | ||
2223 | |||
2224 | if (timer_irq_works()) { | ||
2225 | printk(" works.\n"); | ||
2226 | return; | ||
2227 | } | ||
2228 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); | ||
2229 | printk(" failed.\n"); | ||
2230 | |||
2231 | printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | ||
2232 | |||
2233 | timer_ack = 0; | ||
2234 | init_8259A(0); | ||
2235 | make_8259A_irq(0); | ||
2236 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | ||
2237 | |||
2238 | unlock_ExtINT_logic(); | ||
2239 | |||
2240 | if (timer_irq_works()) { | ||
2241 | printk(" works.\n"); | ||
2242 | return; | ||
2243 | } | ||
2244 | printk(" failed :(.\n"); | ||
2245 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " | ||
2246 | "report. Then try booting with the 'noapic' option"); | ||
2247 | } | ||
2248 | |||
2249 | /* | ||
2250 | * | ||
2251 | * IRQ's that are handled by the PIC in the MPS IOAPIC case. | ||
2252 | * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. | ||
2253 | * Linux doesn't really care, as it's not actually used | ||
2254 | * for any interrupt handling anyway. | ||
2255 | */ | ||
2256 | #define PIC_IRQS (1 << PIC_CASCADE_IR) | ||
2257 | |||
2258 | void __init setup_IO_APIC(void) | ||
2259 | { | ||
2260 | enable_IO_APIC(); | ||
2261 | |||
2262 | if (acpi_ioapic) | ||
2263 | io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ | ||
2264 | else | ||
2265 | io_apic_irqs = ~PIC_IRQS; | ||
2266 | |||
2267 | printk("ENABLING IO-APIC IRQs\n"); | ||
2268 | |||
2269 | /* | ||
2270 | * Set up IO-APIC IRQ routing. | ||
2271 | */ | ||
2272 | if (!acpi_ioapic) | ||
2273 | setup_ioapic_ids_from_mpc(); | ||
2274 | sync_Arb_IDs(); | ||
2275 | setup_IO_APIC_irqs(); | ||
2276 | init_IO_APIC_traps(); | ||
2277 | check_timer(); | ||
2278 | if (!acpi_ioapic) | ||
2279 | print_IO_APIC(); | ||
2280 | } | ||
2281 | |||
2282 | /* | ||
2283 | * Called after all the initialization is done. If we didnt find any | ||
2284 | * APIC bugs then we can allow the modify fast path | ||
2285 | */ | ||
2286 | |||
2287 | static int __init io_apic_bug_finalize(void) | ||
2288 | { | ||
2289 | if(sis_apic_bug == -1) | ||
2290 | sis_apic_bug = 0; | ||
2291 | return 0; | ||
2292 | } | ||
2293 | |||
2294 | late_initcall(io_apic_bug_finalize); | ||
2295 | |||
2296 | struct sysfs_ioapic_data { | ||
2297 | struct sys_device dev; | ||
2298 | struct IO_APIC_route_entry entry[0]; | ||
2299 | }; | ||
2300 | static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; | ||
2301 | |||
2302 | static int ioapic_suspend(struct sys_device *dev, u32 state) | ||
2303 | { | ||
2304 | struct IO_APIC_route_entry *entry; | ||
2305 | struct sysfs_ioapic_data *data; | ||
2306 | unsigned long flags; | ||
2307 | int i; | ||
2308 | |||
2309 | data = container_of(dev, struct sysfs_ioapic_data, dev); | ||
2310 | entry = data->entry; | ||
2311 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2312 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { | ||
2313 | *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); | ||
2314 | *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); | ||
2315 | } | ||
2316 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2317 | |||
2318 | return 0; | ||
2319 | } | ||
2320 | |||
2321 | static int ioapic_resume(struct sys_device *dev) | ||
2322 | { | ||
2323 | struct IO_APIC_route_entry *entry; | ||
2324 | struct sysfs_ioapic_data *data; | ||
2325 | unsigned long flags; | ||
2326 | union IO_APIC_reg_00 reg_00; | ||
2327 | int i; | ||
2328 | |||
2329 | data = container_of(dev, struct sysfs_ioapic_data, dev); | ||
2330 | entry = data->entry; | ||
2331 | |||
2332 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2333 | reg_00.raw = io_apic_read(dev->id, 0); | ||
2334 | if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { | ||
2335 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; | ||
2336 | io_apic_write(dev->id, 0, reg_00.raw); | ||
2337 | } | ||
2338 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { | ||
2339 | io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); | ||
2340 | io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); | ||
2341 | } | ||
2342 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2343 | |||
2344 | return 0; | ||
2345 | } | ||
2346 | |||
2347 | static struct sysdev_class ioapic_sysdev_class = { | ||
2348 | set_kset_name("ioapic"), | ||
2349 | .suspend = ioapic_suspend, | ||
2350 | .resume = ioapic_resume, | ||
2351 | }; | ||
2352 | |||
2353 | static int __init ioapic_init_sysfs(void) | ||
2354 | { | ||
2355 | struct sys_device * dev; | ||
2356 | int i, size, error = 0; | ||
2357 | |||
2358 | error = sysdev_class_register(&ioapic_sysdev_class); | ||
2359 | if (error) | ||
2360 | return error; | ||
2361 | |||
2362 | for (i = 0; i < nr_ioapics; i++ ) { | ||
2363 | size = sizeof(struct sys_device) + nr_ioapic_registers[i] | ||
2364 | * sizeof(struct IO_APIC_route_entry); | ||
2365 | mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); | ||
2366 | if (!mp_ioapic_data[i]) { | ||
2367 | printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); | ||
2368 | continue; | ||
2369 | } | ||
2370 | memset(mp_ioapic_data[i], 0, size); | ||
2371 | dev = &mp_ioapic_data[i]->dev; | ||
2372 | dev->id = i; | ||
2373 | dev->cls = &ioapic_sysdev_class; | ||
2374 | error = sysdev_register(dev); | ||
2375 | if (error) { | ||
2376 | kfree(mp_ioapic_data[i]); | ||
2377 | mp_ioapic_data[i] = NULL; | ||
2378 | printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); | ||
2379 | continue; | ||
2380 | } | ||
2381 | } | ||
2382 | |||
2383 | return 0; | ||
2384 | } | ||
2385 | |||
2386 | device_initcall(ioapic_init_sysfs); | ||
2387 | |||
2388 | /* -------------------------------------------------------------------------- | ||
2389 | ACPI-based IOAPIC Configuration | ||
2390 | -------------------------------------------------------------------------- */ | ||
2391 | |||
2392 | #ifdef CONFIG_ACPI_BOOT | ||
2393 | |||
2394 | int __init io_apic_get_unique_id (int ioapic, int apic_id) | ||
2395 | { | ||
2396 | union IO_APIC_reg_00 reg_00; | ||
2397 | static physid_mask_t apic_id_map = PHYSID_MASK_NONE; | ||
2398 | physid_mask_t tmp; | ||
2399 | unsigned long flags; | ||
2400 | int i = 0; | ||
2401 | |||
2402 | /* | ||
2403 | * The P4 platform supports up to 256 APIC IDs on two separate APIC | ||
2404 | * buses (one for LAPICs, one for IOAPICs), where predecessors only | ||
2405 | * supports up to 16 on one shared APIC bus. | ||
2406 | * | ||
2407 | * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full | ||
2408 | * advantage of new APIC bus architecture. | ||
2409 | */ | ||
2410 | |||
2411 | if (physids_empty(apic_id_map)) | ||
2412 | apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); | ||
2413 | |||
2414 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2415 | reg_00.raw = io_apic_read(ioapic, 0); | ||
2416 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2417 | |||
2418 | if (apic_id >= get_physical_broadcast()) { | ||
2419 | printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " | ||
2420 | "%d\n", ioapic, apic_id, reg_00.bits.ID); | ||
2421 | apic_id = reg_00.bits.ID; | ||
2422 | } | ||
2423 | |||
2424 | /* | ||
2425 | * Every APIC in a system must have a unique ID or we get lots of nice | ||
2426 | * 'stuck on smp_invalidate_needed IPI wait' messages. | ||
2427 | */ | ||
2428 | if (check_apicid_used(apic_id_map, apic_id)) { | ||
2429 | |||
2430 | for (i = 0; i < get_physical_broadcast(); i++) { | ||
2431 | if (!check_apicid_used(apic_id_map, i)) | ||
2432 | break; | ||
2433 | } | ||
2434 | |||
2435 | if (i == get_physical_broadcast()) | ||
2436 | panic("Max apic_id exceeded!\n"); | ||
2437 | |||
2438 | printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " | ||
2439 | "trying %d\n", ioapic, apic_id, i); | ||
2440 | |||
2441 | apic_id = i; | ||
2442 | } | ||
2443 | |||
2444 | tmp = apicid_to_cpu_present(apic_id); | ||
2445 | physids_or(apic_id_map, apic_id_map, tmp); | ||
2446 | |||
2447 | if (reg_00.bits.ID != apic_id) { | ||
2448 | reg_00.bits.ID = apic_id; | ||
2449 | |||
2450 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2451 | io_apic_write(ioapic, 0, reg_00.raw); | ||
2452 | reg_00.raw = io_apic_read(ioapic, 0); | ||
2453 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2454 | |||
2455 | /* Sanity check */ | ||
2456 | if (reg_00.bits.ID != apic_id) | ||
2457 | panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic); | ||
2458 | } | ||
2459 | |||
2460 | apic_printk(APIC_VERBOSE, KERN_INFO | ||
2461 | "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); | ||
2462 | |||
2463 | return apic_id; | ||
2464 | } | ||
2465 | |||
2466 | |||
2467 | int __init io_apic_get_version (int ioapic) | ||
2468 | { | ||
2469 | union IO_APIC_reg_01 reg_01; | ||
2470 | unsigned long flags; | ||
2471 | |||
2472 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2473 | reg_01.raw = io_apic_read(ioapic, 1); | ||
2474 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2475 | |||
2476 | return reg_01.bits.version; | ||
2477 | } | ||
2478 | |||
2479 | |||
2480 | int __init io_apic_get_redir_entries (int ioapic) | ||
2481 | { | ||
2482 | union IO_APIC_reg_01 reg_01; | ||
2483 | unsigned long flags; | ||
2484 | |||
2485 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2486 | reg_01.raw = io_apic_read(ioapic, 1); | ||
2487 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2488 | |||
2489 | return reg_01.bits.entries; | ||
2490 | } | ||
2491 | |||
2492 | |||
2493 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) | ||
2494 | { | ||
2495 | struct IO_APIC_route_entry entry; | ||
2496 | unsigned long flags; | ||
2497 | |||
2498 | if (!IO_APIC_IRQ(irq)) { | ||
2499 | printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", | ||
2500 | ioapic); | ||
2501 | return -EINVAL; | ||
2502 | } | ||
2503 | |||
2504 | /* | ||
2505 | * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. | ||
2506 | * Note that we mask (disable) IRQs now -- these get enabled when the | ||
2507 | * corresponding device driver registers for this IRQ. | ||
2508 | */ | ||
2509 | |||
2510 | memset(&entry,0,sizeof(entry)); | ||
2511 | |||
2512 | entry.delivery_mode = INT_DELIVERY_MODE; | ||
2513 | entry.dest_mode = INT_DEST_MODE; | ||
2514 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | ||
2515 | entry.trigger = edge_level; | ||
2516 | entry.polarity = active_high_low; | ||
2517 | entry.mask = 1; | ||
2518 | |||
2519 | /* | ||
2520 | * IRQs < 16 are already in the irq_2_pin[] map | ||
2521 | */ | ||
2522 | if (irq >= 16) | ||
2523 | add_pin_to_irq(irq, ioapic, pin); | ||
2524 | |||
2525 | entry.vector = assign_irq_vector(irq); | ||
2526 | |||
2527 | apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " | ||
2528 | "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, | ||
2529 | mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, | ||
2530 | edge_level, active_high_low); | ||
2531 | |||
2532 | ioapic_register_intr(irq, entry.vector, edge_level); | ||
2533 | |||
2534 | if (!ioapic && (irq < 16)) | ||
2535 | disable_8259A_irq(irq); | ||
2536 | |||
2537 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2538 | io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); | ||
2539 | io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); | ||
2540 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2541 | |||
2542 | return 0; | ||
2543 | } | ||
2544 | |||
2545 | #endif /*CONFIG_ACPI_BOOT*/ | ||
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c new file mode 100644 index 000000000000..8b25160393c1 --- /dev/null +++ b/arch/i386/kernel/ioport.c | |||
@@ -0,0 +1,147 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/ioport.c | ||
3 | * | ||
4 | * This contains the io-permission bitmap code - written by obz, with changes | ||
5 | * by Linus. | ||
6 | */ | ||
7 | |||
8 | #include <linux/sched.h> | ||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/errno.h> | ||
11 | #include <linux/types.h> | ||
12 | #include <linux/ioport.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/smp_lock.h> | ||
15 | #include <linux/stddef.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/thread_info.h> | ||
18 | |||
19 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | ||
20 | static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) | ||
21 | { | ||
22 | unsigned long mask; | ||
23 | unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); | ||
24 | unsigned int low_index = base & (BITS_PER_LONG-1); | ||
25 | int length = low_index + extent; | ||
26 | |||
27 | if (low_index != 0) { | ||
28 | mask = (~0UL << low_index); | ||
29 | if (length < BITS_PER_LONG) | ||
30 | mask &= ~(~0UL << length); | ||
31 | if (new_value) | ||
32 | *bitmap_base++ |= mask; | ||
33 | else | ||
34 | *bitmap_base++ &= ~mask; | ||
35 | length -= BITS_PER_LONG; | ||
36 | } | ||
37 | |||
38 | mask = (new_value ? ~0UL : 0UL); | ||
39 | while (length >= BITS_PER_LONG) { | ||
40 | *bitmap_base++ = mask; | ||
41 | length -= BITS_PER_LONG; | ||
42 | } | ||
43 | |||
44 | if (length > 0) { | ||
45 | mask = ~(~0UL << length); | ||
46 | if (new_value) | ||
47 | *bitmap_base++ |= mask; | ||
48 | else | ||
49 | *bitmap_base++ &= ~mask; | ||
50 | } | ||
51 | } | ||
52 | |||
53 | |||
54 | /* | ||
55 | * this changes the io permissions bitmap in the current task. | ||
56 | */ | ||
57 | asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | ||
58 | { | ||
59 | unsigned long i, max_long, bytes, bytes_updated; | ||
60 | struct thread_struct * t = ¤t->thread; | ||
61 | struct tss_struct * tss; | ||
62 | unsigned long *bitmap; | ||
63 | |||
64 | if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) | ||
65 | return -EINVAL; | ||
66 | if (turn_on && !capable(CAP_SYS_RAWIO)) | ||
67 | return -EPERM; | ||
68 | |||
69 | /* | ||
70 | * If it's the first ioperm() call in this thread's lifetime, set the | ||
71 | * IO bitmap up. ioperm() is much less timing critical than clone(), | ||
72 | * this is why we delay this operation until now: | ||
73 | */ | ||
74 | if (!t->io_bitmap_ptr) { | ||
75 | bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | ||
76 | if (!bitmap) | ||
77 | return -ENOMEM; | ||
78 | |||
79 | memset(bitmap, 0xff, IO_BITMAP_BYTES); | ||
80 | t->io_bitmap_ptr = bitmap; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * do it in the per-thread copy and in the TSS ... | ||
85 | * | ||
86 | * Disable preemption via get_cpu() - we must not switch away | ||
87 | * because the ->io_bitmap_max value must match the bitmap | ||
88 | * contents: | ||
89 | */ | ||
90 | tss = &per_cpu(init_tss, get_cpu()); | ||
91 | |||
92 | set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); | ||
93 | |||
94 | /* | ||
95 | * Search for a (possibly new) maximum. This is simple and stupid, | ||
96 | * to keep it obviously correct: | ||
97 | */ | ||
98 | max_long = 0; | ||
99 | for (i = 0; i < IO_BITMAP_LONGS; i++) | ||
100 | if (t->io_bitmap_ptr[i] != ~0UL) | ||
101 | max_long = i; | ||
102 | |||
103 | bytes = (max_long + 1) * sizeof(long); | ||
104 | bytes_updated = max(bytes, t->io_bitmap_max); | ||
105 | |||
106 | t->io_bitmap_max = bytes; | ||
107 | |||
108 | /* | ||
109 | * Sets the lazy trigger so that the next I/O operation will | ||
110 | * reload the correct bitmap. | ||
111 | */ | ||
112 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; | ||
113 | |||
114 | put_cpu(); | ||
115 | |||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * sys_iopl has to be used when you want to access the IO ports | ||
121 | * beyond the 0x3ff range: to get the full 65536 ports bitmapped | ||
122 | * you'd need 8kB of bitmaps/process, which is a bit excessive. | ||
123 | * | ||
124 | * Here we just change the eflags value on the stack: we allow | ||
125 | * only the super-user to do it. This depends on the stack-layout | ||
126 | * on system-call entry - see also fork() and the signal handling | ||
127 | * code. | ||
128 | */ | ||
129 | |||
130 | asmlinkage long sys_iopl(unsigned long unused) | ||
131 | { | ||
132 | volatile struct pt_regs * regs = (struct pt_regs *) &unused; | ||
133 | unsigned int level = regs->ebx; | ||
134 | unsigned int old = (regs->eflags >> 12) & 3; | ||
135 | |||
136 | if (level > 3) | ||
137 | return -EINVAL; | ||
138 | /* Trying to gain more privileges? */ | ||
139 | if (level > old) { | ||
140 | if (!capable(CAP_SYS_RAWIO)) | ||
141 | return -EPERM; | ||
142 | } | ||
143 | regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12); | ||
144 | /* Make sure we return the long way (not sysenter) */ | ||
145 | set_thread_flag(TIF_IRET); | ||
146 | return 0; | ||
147 | } | ||
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c new file mode 100644 index 000000000000..73945a3c53c4 --- /dev/null +++ b/arch/i386/kernel/irq.c | |||
@@ -0,0 +1,261 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/irq.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar | ||
5 | * | ||
6 | * This file contains the lowest level x86-specific interrupt | ||
7 | * entry, irq-stacks and irq statistics code. All the remaining | ||
8 | * irq logic is done by the generic kernel/irq/ code and | ||
9 | * by the x86-specific irq controller code. (e.g. i8259.c and | ||
10 | * io_apic.c.) | ||
11 | */ | ||
12 | |||
13 | #include <asm/uaccess.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | #include <linux/interrupt.h> | ||
17 | #include <linux/kernel_stat.h> | ||
18 | |||
19 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; | ||
20 | EXPORT_PER_CPU_SYMBOL(irq_stat); | ||
21 | |||
22 | #ifndef CONFIG_X86_LOCAL_APIC | ||
23 | /* | ||
24 | * 'what should we do if we get a hw irq event on an illegal vector'. | ||
25 | * each architecture has to answer this themselves. | ||
26 | */ | ||
27 | void ack_bad_irq(unsigned int irq) | ||
28 | { | ||
29 | printk("unexpected IRQ trap at vector %02x\n", irq); | ||
30 | } | ||
31 | #endif | ||
32 | |||
33 | #ifdef CONFIG_4KSTACKS | ||
34 | /* | ||
35 | * per-CPU IRQ handling contexts (thread information and stack) | ||
36 | */ | ||
37 | union irq_ctx { | ||
38 | struct thread_info tinfo; | ||
39 | u32 stack[THREAD_SIZE/sizeof(u32)]; | ||
40 | }; | ||
41 | |||
42 | static union irq_ctx *hardirq_ctx[NR_CPUS]; | ||
43 | static union irq_ctx *softirq_ctx[NR_CPUS]; | ||
44 | #endif | ||
45 | |||
46 | /* | ||
47 | * do_IRQ handles all normal device IRQ's (the special | ||
48 | * SMP cross-CPU interrupts have their own specific | ||
49 | * handlers). | ||
50 | */ | ||
51 | fastcall unsigned int do_IRQ(struct pt_regs *regs) | ||
52 | { | ||
53 | /* high bits used in ret_from_ code */ | ||
54 | int irq = regs->orig_eax & 0xff; | ||
55 | #ifdef CONFIG_4KSTACKS | ||
56 | union irq_ctx *curctx, *irqctx; | ||
57 | u32 *isp; | ||
58 | #endif | ||
59 | |||
60 | irq_enter(); | ||
61 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
62 | /* Debugging check for stack overflow: is there less than 1KB free? */ | ||
63 | { | ||
64 | long esp; | ||
65 | |||
66 | __asm__ __volatile__("andl %%esp,%0" : | ||
67 | "=r" (esp) : "0" (THREAD_SIZE - 1)); | ||
68 | if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { | ||
69 | printk("do_IRQ: stack overflow: %ld\n", | ||
70 | esp - sizeof(struct thread_info)); | ||
71 | dump_stack(); | ||
72 | } | ||
73 | } | ||
74 | #endif | ||
75 | |||
76 | #ifdef CONFIG_4KSTACKS | ||
77 | |||
78 | curctx = (union irq_ctx *) current_thread_info(); | ||
79 | irqctx = hardirq_ctx[smp_processor_id()]; | ||
80 | |||
81 | /* | ||
82 | * this is where we switch to the IRQ stack. However, if we are | ||
83 | * already using the IRQ stack (because we interrupted a hardirq | ||
84 | * handler) we can't do that and just have to keep using the | ||
85 | * current stack (which is the irq stack already after all) | ||
86 | */ | ||
87 | if (curctx != irqctx) { | ||
88 | int arg1, arg2, ebx; | ||
89 | |||
90 | /* build the stack frame on the IRQ stack */ | ||
91 | isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); | ||
92 | irqctx->tinfo.task = curctx->tinfo.task; | ||
93 | irqctx->tinfo.previous_esp = current_stack_pointer; | ||
94 | |||
95 | asm volatile( | ||
96 | " xchgl %%ebx,%%esp \n" | ||
97 | " call __do_IRQ \n" | ||
98 | " movl %%ebx,%%esp \n" | ||
99 | : "=a" (arg1), "=d" (arg2), "=b" (ebx) | ||
100 | : "0" (irq), "1" (regs), "2" (isp) | ||
101 | : "memory", "cc", "ecx" | ||
102 | ); | ||
103 | } else | ||
104 | #endif | ||
105 | __do_IRQ(irq, regs); | ||
106 | |||
107 | irq_exit(); | ||
108 | |||
109 | return 1; | ||
110 | } | ||
111 | |||
112 | #ifdef CONFIG_4KSTACKS | ||
113 | |||
114 | /* | ||
115 | * These should really be __section__(".bss.page_aligned") as well, but | ||
116 | * gcc's 3.0 and earlier don't handle that correctly. | ||
117 | */ | ||
118 | static char softirq_stack[NR_CPUS * THREAD_SIZE] | ||
119 | __attribute__((__aligned__(THREAD_SIZE))); | ||
120 | |||
121 | static char hardirq_stack[NR_CPUS * THREAD_SIZE] | ||
122 | __attribute__((__aligned__(THREAD_SIZE))); | ||
123 | |||
124 | /* | ||
125 | * allocate per-cpu stacks for hardirq and for softirq processing | ||
126 | */ | ||
127 | void irq_ctx_init(int cpu) | ||
128 | { | ||
129 | union irq_ctx *irqctx; | ||
130 | |||
131 | if (hardirq_ctx[cpu]) | ||
132 | return; | ||
133 | |||
134 | irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; | ||
135 | irqctx->tinfo.task = NULL; | ||
136 | irqctx->tinfo.exec_domain = NULL; | ||
137 | irqctx->tinfo.cpu = cpu; | ||
138 | irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; | ||
139 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | ||
140 | |||
141 | hardirq_ctx[cpu] = irqctx; | ||
142 | |||
143 | irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; | ||
144 | irqctx->tinfo.task = NULL; | ||
145 | irqctx->tinfo.exec_domain = NULL; | ||
146 | irqctx->tinfo.cpu = cpu; | ||
147 | irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET; | ||
148 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | ||
149 | |||
150 | softirq_ctx[cpu] = irqctx; | ||
151 | |||
152 | printk("CPU %u irqstacks, hard=%p soft=%p\n", | ||
153 | cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); | ||
154 | } | ||
155 | |||
156 | extern asmlinkage void __do_softirq(void); | ||
157 | |||
158 | asmlinkage void do_softirq(void) | ||
159 | { | ||
160 | unsigned long flags; | ||
161 | struct thread_info *curctx; | ||
162 | union irq_ctx *irqctx; | ||
163 | u32 *isp; | ||
164 | |||
165 | if (in_interrupt()) | ||
166 | return; | ||
167 | |||
168 | local_irq_save(flags); | ||
169 | |||
170 | if (local_softirq_pending()) { | ||
171 | curctx = current_thread_info(); | ||
172 | irqctx = softirq_ctx[smp_processor_id()]; | ||
173 | irqctx->tinfo.task = curctx->task; | ||
174 | irqctx->tinfo.previous_esp = current_stack_pointer; | ||
175 | |||
176 | /* build the stack frame on the softirq stack */ | ||
177 | isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); | ||
178 | |||
179 | asm volatile( | ||
180 | " xchgl %%ebx,%%esp \n" | ||
181 | " call __do_softirq \n" | ||
182 | " movl %%ebx,%%esp \n" | ||
183 | : "=b"(isp) | ||
184 | : "0"(isp) | ||
185 | : "memory", "cc", "edx", "ecx", "eax" | ||
186 | ); | ||
187 | } | ||
188 | |||
189 | local_irq_restore(flags); | ||
190 | } | ||
191 | |||
192 | EXPORT_SYMBOL(do_softirq); | ||
193 | #endif | ||
194 | |||
195 | /* | ||
196 | * Interrupt statistics: | ||
197 | */ | ||
198 | |||
199 | atomic_t irq_err_count; | ||
200 | |||
201 | /* | ||
202 | * /proc/interrupts printing: | ||
203 | */ | ||
204 | |||
205 | int show_interrupts(struct seq_file *p, void *v) | ||
206 | { | ||
207 | int i = *(loff_t *) v, j; | ||
208 | struct irqaction * action; | ||
209 | unsigned long flags; | ||
210 | |||
211 | if (i == 0) { | ||
212 | seq_printf(p, " "); | ||
213 | for (j=0; j<NR_CPUS; j++) | ||
214 | if (cpu_online(j)) | ||
215 | seq_printf(p, "CPU%d ",j); | ||
216 | seq_putc(p, '\n'); | ||
217 | } | ||
218 | |||
219 | if (i < NR_IRQS) { | ||
220 | spin_lock_irqsave(&irq_desc[i].lock, flags); | ||
221 | action = irq_desc[i].action; | ||
222 | if (!action) | ||
223 | goto skip; | ||
224 | seq_printf(p, "%3d: ",i); | ||
225 | #ifndef CONFIG_SMP | ||
226 | seq_printf(p, "%10u ", kstat_irqs(i)); | ||
227 | #else | ||
228 | for (j = 0; j < NR_CPUS; j++) | ||
229 | if (cpu_online(j)) | ||
230 | seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); | ||
231 | #endif | ||
232 | seq_printf(p, " %14s", irq_desc[i].handler->typename); | ||
233 | seq_printf(p, " %s", action->name); | ||
234 | |||
235 | for (action=action->next; action; action = action->next) | ||
236 | seq_printf(p, ", %s", action->name); | ||
237 | |||
238 | seq_putc(p, '\n'); | ||
239 | skip: | ||
240 | spin_unlock_irqrestore(&irq_desc[i].lock, flags); | ||
241 | } else if (i == NR_IRQS) { | ||
242 | seq_printf(p, "NMI: "); | ||
243 | for (j = 0; j < NR_CPUS; j++) | ||
244 | if (cpu_online(j)) | ||
245 | seq_printf(p, "%10u ", nmi_count(j)); | ||
246 | seq_putc(p, '\n'); | ||
247 | #ifdef CONFIG_X86_LOCAL_APIC | ||
248 | seq_printf(p, "LOC: "); | ||
249 | for (j = 0; j < NR_CPUS; j++) | ||
250 | if (cpu_online(j)) | ||
251 | seq_printf(p, "%10u ", | ||
252 | per_cpu(irq_stat,j).apic_timer_irqs); | ||
253 | seq_putc(p, '\n'); | ||
254 | #endif | ||
255 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); | ||
256 | #if defined(CONFIG_X86_IO_APIC) | ||
257 | seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); | ||
258 | #endif | ||
259 | } | ||
260 | return 0; | ||
261 | } | ||
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c new file mode 100644 index 000000000000..671681659243 --- /dev/null +++ b/arch/i386/kernel/kprobes.c | |||
@@ -0,0 +1,385 @@ | |||
1 | /* | ||
2 | * Kernel Probes (KProbes) | ||
3 | * arch/i386/kernel/kprobes.c | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
18 | * | ||
19 | * Copyright (C) IBM Corporation, 2002, 2004 | ||
20 | * | ||
21 | * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel | ||
22 | * Probes initial implementation ( includes contributions from | ||
23 | * Rusty Russell). | ||
24 | * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes | ||
25 | * interface to access function arguments. | ||
26 | */ | ||
27 | |||
28 | #include <linux/config.h> | ||
29 | #include <linux/kprobes.h> | ||
30 | #include <linux/ptrace.h> | ||
31 | #include <linux/spinlock.h> | ||
32 | #include <linux/preempt.h> | ||
33 | #include <asm/kdebug.h> | ||
34 | #include <asm/desc.h> | ||
35 | |||
36 | /* kprobe_status settings */ | ||
37 | #define KPROBE_HIT_ACTIVE 0x00000001 | ||
38 | #define KPROBE_HIT_SS 0x00000002 | ||
39 | |||
40 | static struct kprobe *current_kprobe; | ||
41 | static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags; | ||
42 | static struct pt_regs jprobe_saved_regs; | ||
43 | static long *jprobe_saved_esp; | ||
44 | /* copy of the kernel stack at the probe fire time */ | ||
45 | static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; | ||
46 | void jprobe_return_end(void); | ||
47 | |||
48 | /* | ||
49 | * returns non-zero if opcode modifies the interrupt flag. | ||
50 | */ | ||
51 | static inline int is_IF_modifier(kprobe_opcode_t opcode) | ||
52 | { | ||
53 | switch (opcode) { | ||
54 | case 0xfa: /* cli */ | ||
55 | case 0xfb: /* sti */ | ||
56 | case 0xcf: /* iret/iretd */ | ||
57 | case 0x9d: /* popf/popfd */ | ||
58 | return 1; | ||
59 | } | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | int arch_prepare_kprobe(struct kprobe *p) | ||
64 | { | ||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | void arch_copy_kprobe(struct kprobe *p) | ||
69 | { | ||
70 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
71 | } | ||
72 | |||
73 | void arch_remove_kprobe(struct kprobe *p) | ||
74 | { | ||
75 | } | ||
76 | |||
77 | static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) | ||
78 | { | ||
79 | *p->addr = p->opcode; | ||
80 | regs->eip = (unsigned long)p->addr; | ||
81 | } | ||
82 | |||
83 | static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | ||
84 | { | ||
85 | regs->eflags |= TF_MASK; | ||
86 | regs->eflags &= ~IF_MASK; | ||
87 | /*single step inline if the instruction is an int3*/ | ||
88 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
89 | regs->eip = (unsigned long)p->addr; | ||
90 | else | ||
91 | regs->eip = (unsigned long)&p->ainsn.insn; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | ||
96 | * remain disabled thorough out this function. | ||
97 | */ | ||
98 | static int kprobe_handler(struct pt_regs *regs) | ||
99 | { | ||
100 | struct kprobe *p; | ||
101 | int ret = 0; | ||
102 | kprobe_opcode_t *addr = NULL; | ||
103 | unsigned long *lp; | ||
104 | |||
105 | /* We're in an interrupt, but this is clear and BUG()-safe. */ | ||
106 | preempt_disable(); | ||
107 | /* Check if the application is using LDT entry for its code segment and | ||
108 | * calculate the address by reading the base address from the LDT entry. | ||
109 | */ | ||
110 | if ((regs->xcs & 4) && (current->mm)) { | ||
111 | lp = (unsigned long *) ((unsigned long)((regs->xcs >> 3) * 8) | ||
112 | + (char *) current->mm->context.ldt); | ||
113 | addr = (kprobe_opcode_t *) (get_desc_base(lp) + regs->eip - | ||
114 | sizeof(kprobe_opcode_t)); | ||
115 | } else { | ||
116 | addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); | ||
117 | } | ||
118 | /* Check we're not actually recursing */ | ||
119 | if (kprobe_running()) { | ||
120 | /* We *are* holding lock here, so this is safe. | ||
121 | Disarm the probe we just hit, and ignore it. */ | ||
122 | p = get_kprobe(addr); | ||
123 | if (p) { | ||
124 | if (kprobe_status == KPROBE_HIT_SS) { | ||
125 | regs->eflags &= ~TF_MASK; | ||
126 | regs->eflags |= kprobe_saved_eflags; | ||
127 | unlock_kprobes(); | ||
128 | goto no_kprobe; | ||
129 | } | ||
130 | disarm_kprobe(p, regs); | ||
131 | ret = 1; | ||
132 | } else { | ||
133 | p = current_kprobe; | ||
134 | if (p->break_handler && p->break_handler(p, regs)) { | ||
135 | goto ss_probe; | ||
136 | } | ||
137 | } | ||
138 | /* If it's not ours, can't be delete race, (we hold lock). */ | ||
139 | goto no_kprobe; | ||
140 | } | ||
141 | |||
142 | lock_kprobes(); | ||
143 | p = get_kprobe(addr); | ||
144 | if (!p) { | ||
145 | unlock_kprobes(); | ||
146 | if (regs->eflags & VM_MASK) { | ||
147 | /* We are in virtual-8086 mode. Return 0 */ | ||
148 | goto no_kprobe; | ||
149 | } | ||
150 | |||
151 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
152 | /* | ||
153 | * The breakpoint instruction was removed right | ||
154 | * after we hit it. Another cpu has removed | ||
155 | * either a probepoint or a debugger breakpoint | ||
156 | * at this address. In either case, no further | ||
157 | * handling of this interrupt is appropriate. | ||
158 | */ | ||
159 | ret = 1; | ||
160 | } | ||
161 | /* Not one of ours: let kernel handle it */ | ||
162 | goto no_kprobe; | ||
163 | } | ||
164 | |||
165 | kprobe_status = KPROBE_HIT_ACTIVE; | ||
166 | current_kprobe = p; | ||
167 | kprobe_saved_eflags = kprobe_old_eflags | ||
168 | = (regs->eflags & (TF_MASK | IF_MASK)); | ||
169 | if (is_IF_modifier(p->opcode)) | ||
170 | kprobe_saved_eflags &= ~IF_MASK; | ||
171 | |||
172 | if (p->pre_handler && p->pre_handler(p, regs)) | ||
173 | /* handler has already set things up, so skip ss setup */ | ||
174 | return 1; | ||
175 | |||
176 | ss_probe: | ||
177 | prepare_singlestep(p, regs); | ||
178 | kprobe_status = KPROBE_HIT_SS; | ||
179 | return 1; | ||
180 | |||
181 | no_kprobe: | ||
182 | preempt_enable_no_resched(); | ||
183 | return ret; | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Called after single-stepping. p->addr is the address of the | ||
188 | * instruction whose first byte has been replaced by the "int 3" | ||
189 | * instruction. To avoid the SMP problems that can occur when we | ||
190 | * temporarily put back the original opcode to single-step, we | ||
191 | * single-stepped a copy of the instruction. The address of this | ||
192 | * copy is p->ainsn.insn. | ||
193 | * | ||
194 | * This function prepares to return from the post-single-step | ||
195 | * interrupt. We have to fix up the stack as follows: | ||
196 | * | ||
197 | * 0) Except in the case of absolute or indirect jump or call instructions, | ||
198 | * the new eip is relative to the copied instruction. We need to make | ||
199 | * it relative to the original instruction. | ||
200 | * | ||
201 | * 1) If the single-stepped instruction was pushfl, then the TF and IF | ||
202 | * flags are set in the just-pushed eflags, and may need to be cleared. | ||
203 | * | ||
204 | * 2) If the single-stepped instruction was a call, the return address | ||
205 | * that is atop the stack is the address following the copied instruction. | ||
206 | * We need to make it the address following the original instruction. | ||
207 | */ | ||
208 | static void resume_execution(struct kprobe *p, struct pt_regs *regs) | ||
209 | { | ||
210 | unsigned long *tos = (unsigned long *)®s->esp; | ||
211 | unsigned long next_eip = 0; | ||
212 | unsigned long copy_eip = (unsigned long)&p->ainsn.insn; | ||
213 | unsigned long orig_eip = (unsigned long)p->addr; | ||
214 | |||
215 | switch (p->ainsn.insn[0]) { | ||
216 | case 0x9c: /* pushfl */ | ||
217 | *tos &= ~(TF_MASK | IF_MASK); | ||
218 | *tos |= kprobe_old_eflags; | ||
219 | break; | ||
220 | case 0xe8: /* call relative - Fix return addr */ | ||
221 | *tos = orig_eip + (*tos - copy_eip); | ||
222 | break; | ||
223 | case 0xff: | ||
224 | if ((p->ainsn.insn[1] & 0x30) == 0x10) { | ||
225 | /* call absolute, indirect */ | ||
226 | /* Fix return addr; eip is correct. */ | ||
227 | next_eip = regs->eip; | ||
228 | *tos = orig_eip + (*tos - copy_eip); | ||
229 | } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ | ||
230 | ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ | ||
231 | /* eip is correct. */ | ||
232 | next_eip = regs->eip; | ||
233 | } | ||
234 | break; | ||
235 | case 0xea: /* jmp absolute -- eip is correct */ | ||
236 | next_eip = regs->eip; | ||
237 | break; | ||
238 | default: | ||
239 | break; | ||
240 | } | ||
241 | |||
242 | regs->eflags &= ~TF_MASK; | ||
243 | if (next_eip) { | ||
244 | regs->eip = next_eip; | ||
245 | } else { | ||
246 | regs->eip = orig_eip + (regs->eip - copy_eip); | ||
247 | } | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * Interrupts are disabled on entry as trap1 is an interrupt gate and they | ||
252 | * remain disabled thoroughout this function. And we hold kprobe lock. | ||
253 | */ | ||
254 | static inline int post_kprobe_handler(struct pt_regs *regs) | ||
255 | { | ||
256 | if (!kprobe_running()) | ||
257 | return 0; | ||
258 | |||
259 | if (current_kprobe->post_handler) | ||
260 | current_kprobe->post_handler(current_kprobe, regs, 0); | ||
261 | |||
262 | resume_execution(current_kprobe, regs); | ||
263 | regs->eflags |= kprobe_saved_eflags; | ||
264 | |||
265 | unlock_kprobes(); | ||
266 | preempt_enable_no_resched(); | ||
267 | |||
268 | /* | ||
269 | * if somebody else is singlestepping across a probe point, eflags | ||
270 | * will have TF set, in which case, continue the remaining processing | ||
271 | * of do_debug, as if this is not a probe hit. | ||
272 | */ | ||
273 | if (regs->eflags & TF_MASK) | ||
274 | return 0; | ||
275 | |||
276 | return 1; | ||
277 | } | ||
278 | |||
279 | /* Interrupts disabled, kprobe_lock held. */ | ||
280 | static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) | ||
281 | { | ||
282 | if (current_kprobe->fault_handler | ||
283 | && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) | ||
284 | return 1; | ||
285 | |||
286 | if (kprobe_status & KPROBE_HIT_SS) { | ||
287 | resume_execution(current_kprobe, regs); | ||
288 | regs->eflags |= kprobe_old_eflags; | ||
289 | |||
290 | unlock_kprobes(); | ||
291 | preempt_enable_no_resched(); | ||
292 | } | ||
293 | return 0; | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * Wrapper routine to for handling exceptions. | ||
298 | */ | ||
299 | int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, | ||
300 | void *data) | ||
301 | { | ||
302 | struct die_args *args = (struct die_args *)data; | ||
303 | switch (val) { | ||
304 | case DIE_INT3: | ||
305 | if (kprobe_handler(args->regs)) | ||
306 | return NOTIFY_STOP; | ||
307 | break; | ||
308 | case DIE_DEBUG: | ||
309 | if (post_kprobe_handler(args->regs)) | ||
310 | return NOTIFY_STOP; | ||
311 | break; | ||
312 | case DIE_GPF: | ||
313 | if (kprobe_running() && | ||
314 | kprobe_fault_handler(args->regs, args->trapnr)) | ||
315 | return NOTIFY_STOP; | ||
316 | break; | ||
317 | case DIE_PAGE_FAULT: | ||
318 | if (kprobe_running() && | ||
319 | kprobe_fault_handler(args->regs, args->trapnr)) | ||
320 | return NOTIFY_STOP; | ||
321 | break; | ||
322 | default: | ||
323 | break; | ||
324 | } | ||
325 | return NOTIFY_DONE; | ||
326 | } | ||
327 | |||
328 | int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | ||
329 | { | ||
330 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
331 | unsigned long addr; | ||
332 | |||
333 | jprobe_saved_regs = *regs; | ||
334 | jprobe_saved_esp = ®s->esp; | ||
335 | addr = (unsigned long)jprobe_saved_esp; | ||
336 | |||
337 | /* | ||
338 | * TBD: As Linus pointed out, gcc assumes that the callee | ||
339 | * owns the argument space and could overwrite it, e.g. | ||
340 | * tailcall optimization. So, to be absolutely safe | ||
341 | * we also save and restore enough stack bytes to cover | ||
342 | * the argument area. | ||
343 | */ | ||
344 | memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr)); | ||
345 | regs->eflags &= ~IF_MASK; | ||
346 | regs->eip = (unsigned long)(jp->entry); | ||
347 | return 1; | ||
348 | } | ||
349 | |||
350 | void jprobe_return(void) | ||
351 | { | ||
352 | preempt_enable_no_resched(); | ||
353 | asm volatile (" xchgl %%ebx,%%esp \n" | ||
354 | " int3 \n" | ||
355 | " .globl jprobe_return_end \n" | ||
356 | " jprobe_return_end: \n" | ||
357 | " nop \n"::"b" | ||
358 | (jprobe_saved_esp):"memory"); | ||
359 | } | ||
360 | |||
361 | int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | ||
362 | { | ||
363 | u8 *addr = (u8 *) (regs->eip - 1); | ||
364 | unsigned long stack_addr = (unsigned long)jprobe_saved_esp; | ||
365 | struct jprobe *jp = container_of(p, struct jprobe, kp); | ||
366 | |||
367 | if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { | ||
368 | if (®s->esp != jprobe_saved_esp) { | ||
369 | struct pt_regs *saved_regs = | ||
370 | container_of(jprobe_saved_esp, struct pt_regs, esp); | ||
371 | printk("current esp %p does not match saved esp %p\n", | ||
372 | ®s->esp, jprobe_saved_esp); | ||
373 | printk("Saved registers for jprobe %p\n", jp); | ||
374 | show_registers(saved_regs); | ||
375 | printk("Current registers\n"); | ||
376 | show_registers(regs); | ||
377 | BUG(); | ||
378 | } | ||
379 | *regs = jprobe_saved_regs; | ||
380 | memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack, | ||
381 | MIN_STACK_SIZE(stack_addr)); | ||
382 | return 1; | ||
383 | } | ||
384 | return 0; | ||
385 | } | ||
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c new file mode 100644 index 000000000000..bb50afbee921 --- /dev/null +++ b/arch/i386/kernel/ldt.c | |||
@@ -0,0 +1,255 @@ | |||
1 | /* | ||
2 | * linux/kernel/ldt.c | ||
3 | * | ||
4 | * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds | ||
5 | * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> | ||
6 | */ | ||
7 | |||
8 | #include <linux/errno.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/string.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/smp_lock.h> | ||
14 | #include <linux/vmalloc.h> | ||
15 | #include <linux/slab.h> | ||
16 | |||
17 | #include <asm/uaccess.h> | ||
18 | #include <asm/system.h> | ||
19 | #include <asm/ldt.h> | ||
20 | #include <asm/desc.h> | ||
21 | |||
22 | #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ | ||
23 | static void flush_ldt(void *null) | ||
24 | { | ||
25 | if (current->active_mm) | ||
26 | load_LDT(¤t->active_mm->context); | ||
27 | } | ||
28 | #endif | ||
29 | |||
30 | static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | ||
31 | { | ||
32 | void *oldldt; | ||
33 | void *newldt; | ||
34 | int oldsize; | ||
35 | |||
36 | if (mincount <= pc->size) | ||
37 | return 0; | ||
38 | oldsize = pc->size; | ||
39 | mincount = (mincount+511)&(~511); | ||
40 | if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) | ||
41 | newldt = vmalloc(mincount*LDT_ENTRY_SIZE); | ||
42 | else | ||
43 | newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); | ||
44 | |||
45 | if (!newldt) | ||
46 | return -ENOMEM; | ||
47 | |||
48 | if (oldsize) | ||
49 | memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); | ||
50 | oldldt = pc->ldt; | ||
51 | memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); | ||
52 | pc->ldt = newldt; | ||
53 | wmb(); | ||
54 | pc->size = mincount; | ||
55 | wmb(); | ||
56 | |||
57 | if (reload) { | ||
58 | #ifdef CONFIG_SMP | ||
59 | cpumask_t mask; | ||
60 | preempt_disable(); | ||
61 | load_LDT(pc); | ||
62 | mask = cpumask_of_cpu(smp_processor_id()); | ||
63 | if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | ||
64 | smp_call_function(flush_ldt, NULL, 1, 1); | ||
65 | preempt_enable(); | ||
66 | #else | ||
67 | load_LDT(pc); | ||
68 | #endif | ||
69 | } | ||
70 | if (oldsize) { | ||
71 | if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) | ||
72 | vfree(oldldt); | ||
73 | else | ||
74 | kfree(oldldt); | ||
75 | } | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | ||
80 | { | ||
81 | int err = alloc_ldt(new, old->size, 0); | ||
82 | if (err < 0) | ||
83 | return err; | ||
84 | memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); | ||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * we do not have to muck with descriptors here, that is | ||
90 | * done in switch_mm() as needed. | ||
91 | */ | ||
92 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
93 | { | ||
94 | struct mm_struct * old_mm; | ||
95 | int retval = 0; | ||
96 | |||
97 | init_MUTEX(&mm->context.sem); | ||
98 | mm->context.size = 0; | ||
99 | old_mm = current->mm; | ||
100 | if (old_mm && old_mm->context.size > 0) { | ||
101 | down(&old_mm->context.sem); | ||
102 | retval = copy_ldt(&mm->context, &old_mm->context); | ||
103 | up(&old_mm->context.sem); | ||
104 | } | ||
105 | return retval; | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * No need to lock the MM as we are the last user | ||
110 | */ | ||
111 | void destroy_context(struct mm_struct *mm) | ||
112 | { | ||
113 | if (mm->context.size) { | ||
114 | if (mm == current->active_mm) | ||
115 | clear_LDT(); | ||
116 | if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) | ||
117 | vfree(mm->context.ldt); | ||
118 | else | ||
119 | kfree(mm->context.ldt); | ||
120 | mm->context.size = 0; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | static int read_ldt(void __user * ptr, unsigned long bytecount) | ||
125 | { | ||
126 | int err; | ||
127 | unsigned long size; | ||
128 | struct mm_struct * mm = current->mm; | ||
129 | |||
130 | if (!mm->context.size) | ||
131 | return 0; | ||
132 | if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) | ||
133 | bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; | ||
134 | |||
135 | down(&mm->context.sem); | ||
136 | size = mm->context.size*LDT_ENTRY_SIZE; | ||
137 | if (size > bytecount) | ||
138 | size = bytecount; | ||
139 | |||
140 | err = 0; | ||
141 | if (copy_to_user(ptr, mm->context.ldt, size)) | ||
142 | err = -EFAULT; | ||
143 | up(&mm->context.sem); | ||
144 | if (err < 0) | ||
145 | goto error_return; | ||
146 | if (size != bytecount) { | ||
147 | /* zero-fill the rest */ | ||
148 | if (clear_user(ptr+size, bytecount-size) != 0) { | ||
149 | err = -EFAULT; | ||
150 | goto error_return; | ||
151 | } | ||
152 | } | ||
153 | return bytecount; | ||
154 | error_return: | ||
155 | return err; | ||
156 | } | ||
157 | |||
158 | static int read_default_ldt(void __user * ptr, unsigned long bytecount) | ||
159 | { | ||
160 | int err; | ||
161 | unsigned long size; | ||
162 | void *address; | ||
163 | |||
164 | err = 0; | ||
165 | address = &default_ldt[0]; | ||
166 | size = 5*sizeof(struct desc_struct); | ||
167 | if (size > bytecount) | ||
168 | size = bytecount; | ||
169 | |||
170 | err = size; | ||
171 | if (copy_to_user(ptr, address, size)) | ||
172 | err = -EFAULT; | ||
173 | |||
174 | return err; | ||
175 | } | ||
176 | |||
177 | static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) | ||
178 | { | ||
179 | struct mm_struct * mm = current->mm; | ||
180 | __u32 entry_1, entry_2, *lp; | ||
181 | int error; | ||
182 | struct user_desc ldt_info; | ||
183 | |||
184 | error = -EINVAL; | ||
185 | if (bytecount != sizeof(ldt_info)) | ||
186 | goto out; | ||
187 | error = -EFAULT; | ||
188 | if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) | ||
189 | goto out; | ||
190 | |||
191 | error = -EINVAL; | ||
192 | if (ldt_info.entry_number >= LDT_ENTRIES) | ||
193 | goto out; | ||
194 | if (ldt_info.contents == 3) { | ||
195 | if (oldmode) | ||
196 | goto out; | ||
197 | if (ldt_info.seg_not_present == 0) | ||
198 | goto out; | ||
199 | } | ||
200 | |||
201 | down(&mm->context.sem); | ||
202 | if (ldt_info.entry_number >= mm->context.size) { | ||
203 | error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); | ||
204 | if (error < 0) | ||
205 | goto out_unlock; | ||
206 | } | ||
207 | |||
208 | lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.ldt); | ||
209 | |||
210 | /* Allow LDTs to be cleared by the user. */ | ||
211 | if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { | ||
212 | if (oldmode || LDT_empty(&ldt_info)) { | ||
213 | entry_1 = 0; | ||
214 | entry_2 = 0; | ||
215 | goto install; | ||
216 | } | ||
217 | } | ||
218 | |||
219 | entry_1 = LDT_entry_a(&ldt_info); | ||
220 | entry_2 = LDT_entry_b(&ldt_info); | ||
221 | if (oldmode) | ||
222 | entry_2 &= ~(1 << 20); | ||
223 | |||
224 | /* Install the new entry ... */ | ||
225 | install: | ||
226 | *lp = entry_1; | ||
227 | *(lp+1) = entry_2; | ||
228 | error = 0; | ||
229 | |||
230 | out_unlock: | ||
231 | up(&mm->context.sem); | ||
232 | out: | ||
233 | return error; | ||
234 | } | ||
235 | |||
236 | asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) | ||
237 | { | ||
238 | int ret = -ENOSYS; | ||
239 | |||
240 | switch (func) { | ||
241 | case 0: | ||
242 | ret = read_ldt(ptr, bytecount); | ||
243 | break; | ||
244 | case 1: | ||
245 | ret = write_ldt(ptr, bytecount, 1); | ||
246 | break; | ||
247 | case 2: | ||
248 | ret = read_default_ldt(ptr, bytecount); | ||
249 | break; | ||
250 | case 0x11: | ||
251 | ret = write_ldt(ptr, bytecount, 0); | ||
252 | break; | ||
253 | } | ||
254 | return ret; | ||
255 | } | ||
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c new file mode 100644 index 000000000000..8600faeea29d --- /dev/null +++ b/arch/i386/kernel/mca.c | |||
@@ -0,0 +1,474 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/mca.c | ||
3 | * Written by Martin Kolinek, February 1996 | ||
4 | * | ||
5 | * Changes: | ||
6 | * | ||
7 | * Chris Beauregard July 28th, 1996 | ||
8 | * - Fixed up integrated SCSI detection | ||
9 | * | ||
10 | * Chris Beauregard August 3rd, 1996 | ||
11 | * - Made mca_info local | ||
12 | * - Made integrated registers accessible through standard function calls | ||
13 | * - Added name field | ||
14 | * - More sanity checking | ||
15 | * | ||
16 | * Chris Beauregard August 9th, 1996 | ||
17 | * - Rewrote /proc/mca | ||
18 | * | ||
19 | * Chris Beauregard January 7th, 1997 | ||
20 | * - Added basic NMI-processing | ||
21 | * - Added more information to mca_info structure | ||
22 | * | ||
23 | * David Weinehall October 12th, 1998 | ||
24 | * - Made a lot of cleaning up in the source | ||
25 | * - Added use of save_flags / restore_flags | ||
26 | * - Added the 'driver_loaded' flag in MCA_adapter | ||
27 | * - Added an alternative implemention of ZP Gu's mca_find_unused_adapter | ||
28 | * | ||
29 | * David Weinehall March 24th, 1999 | ||
30 | * - Fixed the output of 'Driver Installed' in /proc/mca/pos | ||
31 | * - Made the Integrated Video & SCSI show up even if they have id 0000 | ||
32 | * | ||
33 | * Alexander Viro November 9th, 1999 | ||
34 | * - Switched to regular procfs methods | ||
35 | * | ||
36 | * Alfred Arnold & David Weinehall August 23rd, 2000 | ||
37 | * - Added support for Planar POS-registers | ||
38 | */ | ||
39 | |||
40 | #include <linux/module.h> | ||
41 | #include <linux/types.h> | ||
42 | #include <linux/errno.h> | ||
43 | #include <linux/kernel.h> | ||
44 | #include <linux/mca.h> | ||
45 | #include <asm/system.h> | ||
46 | #include <asm/io.h> | ||
47 | #include <linux/proc_fs.h> | ||
48 | #include <linux/mman.h> | ||
49 | #include <linux/config.h> | ||
50 | #include <linux/mm.h> | ||
51 | #include <linux/pagemap.h> | ||
52 | #include <linux/ioport.h> | ||
53 | #include <asm/uaccess.h> | ||
54 | #include <linux/init.h> | ||
55 | #include <asm/arch_hooks.h> | ||
56 | |||
57 | static unsigned char which_scsi = 0; | ||
58 | |||
59 | int MCA_bus = 0; | ||
60 | EXPORT_SYMBOL(MCA_bus); | ||
61 | |||
62 | /* | ||
63 | * Motherboard register spinlock. Untested on SMP at the moment, but | ||
64 | * are there any MCA SMP boxes? | ||
65 | * | ||
66 | * Yes - Alan | ||
67 | */ | ||
68 | static DEFINE_SPINLOCK(mca_lock); | ||
69 | |||
70 | /* Build the status info for the adapter */ | ||
71 | |||
72 | static void mca_configure_adapter_status(struct mca_device *mca_dev) { | ||
73 | mca_dev->status = MCA_ADAPTER_NONE; | ||
74 | |||
75 | mca_dev->pos_id = mca_dev->pos[0] | ||
76 | + (mca_dev->pos[1] << 8); | ||
77 | |||
78 | if(!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) { | ||
79 | |||
80 | /* id = 0x0000 usually indicates hardware failure, | ||
81 | * however, ZP Gu (zpg@castle.net> reports that his 9556 | ||
82 | * has 0x0000 as id and everything still works. There | ||
83 | * also seem to be an adapter with id = 0x0000; the | ||
84 | * NCR Parallel Bus Memory Card. Until this is confirmed, | ||
85 | * however, this code will stay. | ||
86 | */ | ||
87 | |||
88 | mca_dev->status = MCA_ADAPTER_ERROR; | ||
89 | |||
90 | return; | ||
91 | } else if(mca_dev->pos_id != 0xffff) { | ||
92 | |||
93 | /* 0xffff usually indicates that there's no adapter, | ||
94 | * however, some integrated adapters may have 0xffff as | ||
95 | * their id and still be valid. Examples are on-board | ||
96 | * VGA of the 55sx, the integrated SCSI of the 56 & 57, | ||
97 | * and possibly also the 95 ULTIMEDIA. | ||
98 | */ | ||
99 | |||
100 | mca_dev->status = MCA_ADAPTER_NORMAL; | ||
101 | } | ||
102 | |||
103 | if((mca_dev->pos_id == 0xffff || | ||
104 | mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) { | ||
105 | int j; | ||
106 | |||
107 | for(j = 2; j < 8; j++) { | ||
108 | if(mca_dev->pos[j] != 0xff) { | ||
109 | mca_dev->status = MCA_ADAPTER_NORMAL; | ||
110 | break; | ||
111 | } | ||
112 | } | ||
113 | } | ||
114 | |||
115 | if(!(mca_dev->pos[2] & MCA_ENABLED)) { | ||
116 | |||
117 | /* enabled bit is in POS 2 */ | ||
118 | |||
119 | mca_dev->status = MCA_ADAPTER_DISABLED; | ||
120 | } | ||
121 | } /* mca_configure_adapter_status */ | ||
122 | |||
123 | /*--------------------------------------------------------------------*/ | ||
124 | |||
125 | static struct resource mca_standard_resources[] = { | ||
126 | { .start = 0x60, .end = 0x60, .name = "system control port B (MCA)" }, | ||
127 | { .start = 0x90, .end = 0x90, .name = "arbitration (MCA)" }, | ||
128 | { .start = 0x91, .end = 0x91, .name = "card Select Feedback (MCA)" }, | ||
129 | { .start = 0x92, .end = 0x92, .name = "system Control port A (MCA)" }, | ||
130 | { .start = 0x94, .end = 0x94, .name = "system board setup (MCA)" }, | ||
131 | { .start = 0x96, .end = 0x97, .name = "POS (MCA)" }, | ||
132 | { .start = 0x100, .end = 0x107, .name = "POS (MCA)" } | ||
133 | }; | ||
134 | |||
135 | #define MCA_STANDARD_RESOURCES (sizeof(mca_standard_resources)/sizeof(struct resource)) | ||
136 | |||
137 | /** | ||
138 | * mca_read_and_store_pos - read the POS registers into a memory buffer | ||
139 | * @pos: a char pointer to 8 bytes, contains the POS register value on | ||
140 | * successful return | ||
141 | * | ||
142 | * Returns 1 if a card actually exists (i.e. the pos isn't | ||
143 | * all 0xff) or 0 otherwise | ||
144 | */ | ||
145 | static int mca_read_and_store_pos(unsigned char *pos) { | ||
146 | int j; | ||
147 | int found = 0; | ||
148 | |||
149 | for(j=0; j<8; j++) { | ||
150 | if((pos[j] = inb_p(MCA_POS_REG(j))) != 0xff) { | ||
151 | /* 0xff all across means no device. 0x00 means | ||
152 | * something's broken, but a device is | ||
153 | * probably there. However, if you get 0x00 | ||
154 | * from a motherboard register it won't matter | ||
155 | * what we find. For the record, on the | ||
156 | * 57SLC, the integrated SCSI adapter has | ||
157 | * 0xffff for the adapter ID, but nonzero for | ||
158 | * other registers. */ | ||
159 | |||
160 | found = 1; | ||
161 | } | ||
162 | } | ||
163 | return found; | ||
164 | } | ||
165 | |||
166 | static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg) | ||
167 | { | ||
168 | unsigned char byte; | ||
169 | unsigned long flags; | ||
170 | |||
171 | if(reg < 0 || reg >= 8) | ||
172 | return 0; | ||
173 | |||
174 | spin_lock_irqsave(&mca_lock, flags); | ||
175 | if(mca_dev->pos_register) { | ||
176 | /* Disable adapter setup, enable motherboard setup */ | ||
177 | |||
178 | outb_p(0, MCA_ADAPTER_SETUP_REG); | ||
179 | outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); | ||
180 | |||
181 | byte = inb_p(MCA_POS_REG(reg)); | ||
182 | outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); | ||
183 | } else { | ||
184 | |||
185 | /* Make sure motherboard setup is off */ | ||
186 | |||
187 | outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); | ||
188 | |||
189 | /* Read the appropriate register */ | ||
190 | |||
191 | outb_p(0x8|(mca_dev->slot & 0xf), MCA_ADAPTER_SETUP_REG); | ||
192 | byte = inb_p(MCA_POS_REG(reg)); | ||
193 | outb_p(0, MCA_ADAPTER_SETUP_REG); | ||
194 | } | ||
195 | spin_unlock_irqrestore(&mca_lock, flags); | ||
196 | |||
197 | mca_dev->pos[reg] = byte; | ||
198 | |||
199 | return byte; | ||
200 | } | ||
201 | |||
202 | static void mca_pc_write_pos(struct mca_device *mca_dev, int reg, | ||
203 | unsigned char byte) | ||
204 | { | ||
205 | unsigned long flags; | ||
206 | |||
207 | if(reg < 0 || reg >= 8) | ||
208 | return; | ||
209 | |||
210 | spin_lock_irqsave(&mca_lock, flags); | ||
211 | |||
212 | /* Make sure motherboard setup is off */ | ||
213 | |||
214 | outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); | ||
215 | |||
216 | /* Read in the appropriate register */ | ||
217 | |||
218 | outb_p(0x8|(mca_dev->slot&0xf), MCA_ADAPTER_SETUP_REG); | ||
219 | outb_p(byte, MCA_POS_REG(reg)); | ||
220 | outb_p(0, MCA_ADAPTER_SETUP_REG); | ||
221 | |||
222 | spin_unlock_irqrestore(&mca_lock, flags); | ||
223 | |||
224 | /* Update the global register list, while we have the byte */ | ||
225 | |||
226 | mca_dev->pos[reg] = byte; | ||
227 | |||
228 | } | ||
229 | |||
230 | /* for the primary MCA bus, we have identity transforms */ | ||
231 | static int mca_dummy_transform_irq(struct mca_device * mca_dev, int irq) | ||
232 | { | ||
233 | return irq; | ||
234 | } | ||
235 | |||
236 | static int mca_dummy_transform_ioport(struct mca_device * mca_dev, int port) | ||
237 | { | ||
238 | return port; | ||
239 | } | ||
240 | |||
241 | static void *mca_dummy_transform_memory(struct mca_device * mca_dev, void *mem) | ||
242 | { | ||
243 | return mem; | ||
244 | } | ||
245 | |||
246 | |||
247 | static int __init mca_init(void) | ||
248 | { | ||
249 | unsigned int i, j; | ||
250 | struct mca_device *mca_dev; | ||
251 | unsigned char pos[8]; | ||
252 | short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00}; | ||
253 | struct mca_bus *bus; | ||
254 | |||
255 | /* WARNING: Be careful when making changes here. Putting an adapter | ||
256 | * and the motherboard simultaneously into setup mode may result in | ||
257 | * damage to chips (according to The Indispensible PC Hardware Book | ||
258 | * by Hans-Peter Messmer). Also, we disable system interrupts (so | ||
259 | * that we are not disturbed in the middle of this). | ||
260 | */ | ||
261 | |||
262 | /* Make sure the MCA bus is present */ | ||
263 | |||
264 | if (mca_system_init()) { | ||
265 | printk(KERN_ERR "MCA bus system initialisation failed\n"); | ||
266 | return -ENODEV; | ||
267 | } | ||
268 | |||
269 | if (!MCA_bus) | ||
270 | return -ENODEV; | ||
271 | |||
272 | printk(KERN_INFO "Micro Channel bus detected.\n"); | ||
273 | |||
274 | /* All MCA systems have at least a primary bus */ | ||
275 | bus = mca_attach_bus(MCA_PRIMARY_BUS); | ||
276 | if (!bus) | ||
277 | goto out_nomem; | ||
278 | bus->default_dma_mask = 0xffffffffLL; | ||
279 | bus->f.mca_write_pos = mca_pc_write_pos; | ||
280 | bus->f.mca_read_pos = mca_pc_read_pos; | ||
281 | bus->f.mca_transform_irq = mca_dummy_transform_irq; | ||
282 | bus->f.mca_transform_ioport = mca_dummy_transform_ioport; | ||
283 | bus->f.mca_transform_memory = mca_dummy_transform_memory; | ||
284 | |||
285 | /* get the motherboard device */ | ||
286 | mca_dev = kmalloc(sizeof(struct mca_device), GFP_KERNEL); | ||
287 | if(unlikely(!mca_dev)) | ||
288 | goto out_nomem; | ||
289 | memset(mca_dev, 0, sizeof(struct mca_device)); | ||
290 | |||
291 | /* | ||
292 | * We do not expect many MCA interrupts during initialization, | ||
293 | * but let us be safe: | ||
294 | */ | ||
295 | spin_lock_irq(&mca_lock); | ||
296 | |||
297 | /* Make sure adapter setup is off */ | ||
298 | |||
299 | outb_p(0, MCA_ADAPTER_SETUP_REG); | ||
300 | |||
301 | /* Read motherboard POS registers */ | ||
302 | |||
303 | mca_dev->pos_register = 0x7f; | ||
304 | outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); | ||
305 | mca_dev->name[0] = 0; | ||
306 | mca_read_and_store_pos(mca_dev->pos); | ||
307 | mca_configure_adapter_status(mca_dev); | ||
308 | /* fake POS and slot for a motherboard */ | ||
309 | mca_dev->pos_id = MCA_MOTHERBOARD_POS; | ||
310 | mca_dev->slot = MCA_MOTHERBOARD; | ||
311 | mca_register_device(MCA_PRIMARY_BUS, mca_dev); | ||
312 | |||
313 | mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC); | ||
314 | if(unlikely(!mca_dev)) | ||
315 | goto out_unlock_nomem; | ||
316 | memset(mca_dev, 0, sizeof(struct mca_device)); | ||
317 | |||
318 | |||
319 | /* Put motherboard into video setup mode, read integrated video | ||
320 | * POS registers, and turn motherboard setup off. | ||
321 | */ | ||
322 | |||
323 | mca_dev->pos_register = 0xdf; | ||
324 | outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); | ||
325 | mca_dev->name[0] = 0; | ||
326 | mca_read_and_store_pos(mca_dev->pos); | ||
327 | mca_configure_adapter_status(mca_dev); | ||
328 | /* fake POS and slot for the integrated video */ | ||
329 | mca_dev->pos_id = MCA_INTEGVIDEO_POS; | ||
330 | mca_dev->slot = MCA_INTEGVIDEO; | ||
331 | mca_register_device(MCA_PRIMARY_BUS, mca_dev); | ||
332 | |||
333 | /* Put motherboard into scsi setup mode, read integrated scsi | ||
334 | * POS registers, and turn motherboard setup off. | ||
335 | * | ||
336 | * It seems there are two possible SCSI registers. Martin says that | ||
337 | * for the 56,57, 0xf7 is the one, but fails on the 76. | ||
338 | * Alfredo (apena@vnet.ibm.com) says | ||
339 | * 0xfd works on his machine. We'll try both of them. I figure it's | ||
340 | * a good bet that only one could be valid at a time. This could | ||
341 | * screw up though if one is used for something else on the other | ||
342 | * machine. | ||
343 | */ | ||
344 | |||
345 | for(i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) { | ||
346 | outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG); | ||
347 | if(mca_read_and_store_pos(pos)) | ||
348 | break; | ||
349 | } | ||
350 | if(which_scsi) { | ||
351 | /* found a scsi card */ | ||
352 | mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC); | ||
353 | if(unlikely(!mca_dev)) | ||
354 | goto out_unlock_nomem; | ||
355 | memset(mca_dev, 0, sizeof(struct mca_device)); | ||
356 | |||
357 | for(j = 0; j < 8; j++) | ||
358 | mca_dev->pos[j] = pos[j]; | ||
359 | |||
360 | mca_configure_adapter_status(mca_dev); | ||
361 | /* fake POS and slot for integrated SCSI controller */ | ||
362 | mca_dev->pos_id = MCA_INTEGSCSI_POS; | ||
363 | mca_dev->slot = MCA_INTEGSCSI; | ||
364 | mca_dev->pos_register = which_scsi; | ||
365 | mca_register_device(MCA_PRIMARY_BUS, mca_dev); | ||
366 | } | ||
367 | |||
368 | /* Turn off motherboard setup */ | ||
369 | |||
370 | outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); | ||
371 | |||
372 | /* Now loop over MCA slots: put each adapter into setup mode, and | ||
373 | * read its POS registers. Then put adapter setup off. | ||
374 | */ | ||
375 | |||
376 | for(i=0; i<MCA_MAX_SLOT_NR; i++) { | ||
377 | outb_p(0x8|(i&0xf), MCA_ADAPTER_SETUP_REG); | ||
378 | if(!mca_read_and_store_pos(pos)) | ||
379 | continue; | ||
380 | |||
381 | mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC); | ||
382 | if(unlikely(!mca_dev)) | ||
383 | goto out_unlock_nomem; | ||
384 | memset(mca_dev, 0, sizeof(struct mca_device)); | ||
385 | |||
386 | for(j=0; j<8; j++) | ||
387 | mca_dev->pos[j]=pos[j]; | ||
388 | |||
389 | mca_dev->driver_loaded = 0; | ||
390 | mca_dev->slot = i; | ||
391 | mca_dev->pos_register = 0; | ||
392 | mca_configure_adapter_status(mca_dev); | ||
393 | mca_register_device(MCA_PRIMARY_BUS, mca_dev); | ||
394 | } | ||
395 | outb_p(0, MCA_ADAPTER_SETUP_REG); | ||
396 | |||
397 | /* Enable interrupts and return memory start */ | ||
398 | spin_unlock_irq(&mca_lock); | ||
399 | |||
400 | for (i = 0; i < MCA_STANDARD_RESOURCES; i++) | ||
401 | request_resource(&ioport_resource, mca_standard_resources + i); | ||
402 | |||
403 | mca_do_proc_init(); | ||
404 | |||
405 | return 0; | ||
406 | |||
407 | out_unlock_nomem: | ||
408 | spin_unlock_irq(&mca_lock); | ||
409 | out_nomem: | ||
410 | printk(KERN_EMERG "Failed memory allocation in MCA setup!\n"); | ||
411 | return -ENOMEM; | ||
412 | } | ||
413 | |||
414 | subsys_initcall(mca_init); | ||
415 | |||
416 | /*--------------------------------------------------------------------*/ | ||
417 | |||
418 | static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) | ||
419 | { | ||
420 | int slot = mca_dev->slot; | ||
421 | |||
422 | if(slot == MCA_INTEGSCSI) { | ||
423 | printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n", | ||
424 | mca_dev->name); | ||
425 | } else if(slot == MCA_INTEGVIDEO) { | ||
426 | printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n", | ||
427 | mca_dev->name); | ||
428 | } else if(slot == MCA_MOTHERBOARD) { | ||
429 | printk(KERN_CRIT "NMI: caused by motherboard (%s)\n", | ||
430 | mca_dev->name); | ||
431 | } | ||
432 | |||
433 | /* More info available in POS 6 and 7? */ | ||
434 | |||
435 | if(check_flag) { | ||
436 | unsigned char pos6, pos7; | ||
437 | |||
438 | pos6 = mca_device_read_pos(mca_dev, 6); | ||
439 | pos7 = mca_device_read_pos(mca_dev, 7); | ||
440 | |||
441 | printk(KERN_CRIT "NMI: POS 6 = 0x%x, POS 7 = 0x%x\n", pos6, pos7); | ||
442 | } | ||
443 | |||
444 | } /* mca_handle_nmi_slot */ | ||
445 | |||
446 | /*--------------------------------------------------------------------*/ | ||
447 | |||
448 | static int mca_handle_nmi_callback(struct device *dev, void *data) | ||
449 | { | ||
450 | struct mca_device *mca_dev = to_mca_device(dev); | ||
451 | unsigned char pos5; | ||
452 | |||
453 | pos5 = mca_device_read_pos(mca_dev, 5); | ||
454 | |||
455 | if(!(pos5 & 0x80)) { | ||
456 | /* Bit 7 of POS 5 is reset when this adapter has a hardware | ||
457 | * error. Bit 7 it reset if there's error information | ||
458 | * available in POS 6 and 7. | ||
459 | */ | ||
460 | mca_handle_nmi_device(mca_dev, !(pos5 & 0x40)); | ||
461 | return 1; | ||
462 | } | ||
463 | return 0; | ||
464 | } | ||
465 | |||
466 | void mca_handle_nmi(void) | ||
467 | { | ||
468 | /* First try - scan the various adapters and see if a specific | ||
469 | * adapter was responsible for the error. | ||
470 | */ | ||
471 | bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); | ||
472 | |||
473 | mca_nmi_hook(); | ||
474 | } /* mca_handle_nmi */ | ||
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c new file mode 100644 index 000000000000..a77c612aad00 --- /dev/null +++ b/arch/i386/kernel/microcode.c | |||
@@ -0,0 +1,512 @@ | |||
1 | /* | ||
2 | * Intel CPU Microcode Update Driver for Linux | ||
3 | * | ||
4 | * Copyright (C) 2000-2004 Tigran Aivazian | ||
5 | * | ||
6 | * This driver allows to upgrade microcode on Intel processors | ||
7 | * belonging to IA-32 family - PentiumPro, Pentium II, | ||
8 | * Pentium III, Xeon, Pentium 4, etc. | ||
9 | * | ||
10 | * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, | ||
11 | * Order Number 245472 or free download from: | ||
12 | * | ||
13 | * http://developer.intel.com/design/pentium4/manuals/245472.htm | ||
14 | * | ||
15 | * For more information, go to http://www.urbanmyth.org/microcode | ||
16 | * | ||
17 | * This program is free software; you can redistribute it and/or | ||
18 | * modify it under the terms of the GNU General Public License | ||
19 | * as published by the Free Software Foundation; either version | ||
20 | * 2 of the License, or (at your option) any later version. | ||
21 | * | ||
22 | * 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
23 | * Initial release. | ||
24 | * 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
25 | * Added read() support + cleanups. | ||
26 | * 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
27 | * Added 'device trimming' support. open(O_WRONLY) zeroes | ||
28 | * and frees the saved copy of applied microcode. | ||
29 | * 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com> | ||
30 | * Made to use devfs (/dev/cpu/microcode) + cleanups. | ||
31 | * 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com> | ||
32 | * Added misc device support (now uses both devfs and misc). | ||
33 | * Added MICROCODE_IOCFREE ioctl to clear memory. | ||
34 | * 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com> | ||
35 | * Messages for error cases (non Intel & no suitable microcode). | ||
36 | * 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com> | ||
37 | * Removed ->release(). Removed exclusive open and status bitmap. | ||
38 | * Added microcode_rwsem to serialize read()/write()/ioctl(). | ||
39 | * Removed global kernel lock usage. | ||
40 | * 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com> | ||
41 | * Write 0 to 0x8B msr and then cpuid before reading revision, | ||
42 | * so that it works even if there were no update done by the | ||
43 | * BIOS. Otherwise, reading from 0x8B gives junk (which happened | ||
44 | * to be 0 on my machine which is why it worked even when I | ||
45 | * disabled update by the BIOS) | ||
46 | * Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix. | ||
47 | * 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and | ||
48 | * Tigran Aivazian <tigran@veritas.com> | ||
49 | * Intel Pentium 4 processor support and bugfixes. | ||
50 | * 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com> | ||
51 | * Bugfix for HT (Hyper-Threading) enabled processors | ||
52 | * whereby processor resources are shared by all logical processors | ||
53 | * in a single CPU package. | ||
54 | * 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and | ||
55 | * Tigran Aivazian <tigran@veritas.com>, | ||
56 | * Serialize updates as required on HT processors due to speculative | ||
57 | * nature of implementation. | ||
58 | * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> | ||
59 | * Fix the panic when writing zero-length microcode chunk. | ||
60 | * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, | ||
61 | * Jun Nakajima <jun.nakajima@intel.com> | ||
62 | * Support for the microcode updates in the new format. | ||
63 | * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> | ||
64 | * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl | ||
65 | * because we no longer hold a copy of applied microcode | ||
66 | * in kernel memory. | ||
67 | * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> | ||
68 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | ||
69 | * Thanks to Stuart Swales for pointing out this bug. | ||
70 | */ | ||
71 | |||
72 | //#define DEBUG /* pr_debug */ | ||
73 | #include <linux/kernel.h> | ||
74 | #include <linux/init.h> | ||
75 | #include <linux/sched.h> | ||
76 | #include <linux/module.h> | ||
77 | #include <linux/slab.h> | ||
78 | #include <linux/vmalloc.h> | ||
79 | #include <linux/miscdevice.h> | ||
80 | #include <linux/spinlock.h> | ||
81 | #include <linux/mm.h> | ||
82 | |||
83 | #include <asm/msr.h> | ||
84 | #include <asm/uaccess.h> | ||
85 | #include <asm/processor.h> | ||
86 | |||
87 | MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); | ||
88 | MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>"); | ||
89 | MODULE_LICENSE("GPL"); | ||
90 | |||
91 | #define MICROCODE_VERSION "1.14" | ||
92 | |||
93 | #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ | ||
94 | #define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ | ||
95 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ | ||
96 | #define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ | ||
97 | #define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ | ||
98 | #define DWSIZE (sizeof (u32)) | ||
99 | #define get_totalsize(mc) \ | ||
100 | (((microcode_t *)mc)->hdr.totalsize ? \ | ||
101 | ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) | ||
102 | #define get_datasize(mc) \ | ||
103 | (((microcode_t *)mc)->hdr.datasize ? \ | ||
104 | ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) | ||
105 | |||
106 | #define sigmatch(s1, s2, p1, p2) \ | ||
107 | (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) | ||
108 | |||
109 | #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) | ||
110 | |||
111 | /* serialize access to the physical write to MSR 0x79 */ | ||
112 | static DEFINE_SPINLOCK(microcode_update_lock); | ||
113 | |||
114 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ | ||
115 | static DECLARE_MUTEX(microcode_sem); | ||
116 | |||
117 | static void __user *user_buffer; /* user area microcode data buffer */ | ||
118 | static unsigned int user_buffer_size; /* it's size */ | ||
119 | |||
120 | typedef enum mc_error_code { | ||
121 | MC_SUCCESS = 0, | ||
122 | MC_NOTFOUND = 1, | ||
123 | MC_MARKED = 2, | ||
124 | MC_ALLOCATED = 3, | ||
125 | } mc_error_code_t; | ||
126 | |||
127 | static struct ucode_cpu_info { | ||
128 | unsigned int sig; | ||
129 | unsigned int pf; | ||
130 | unsigned int rev; | ||
131 | unsigned int cksum; | ||
132 | mc_error_code_t err; | ||
133 | microcode_t *mc; | ||
134 | } ucode_cpu_info[NR_CPUS]; | ||
135 | |||
136 | static int microcode_open (struct inode *unused1, struct file *unused2) | ||
137 | { | ||
138 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | ||
139 | } | ||
140 | |||
141 | static void collect_cpu_info (void *unused) | ||
142 | { | ||
143 | int cpu_num = smp_processor_id(); | ||
144 | struct cpuinfo_x86 *c = cpu_data + cpu_num; | ||
145 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
146 | unsigned int val[2]; | ||
147 | |||
148 | uci->sig = uci->pf = uci->rev = uci->cksum = 0; | ||
149 | uci->err = MC_NOTFOUND; | ||
150 | uci->mc = NULL; | ||
151 | |||
152 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || | ||
153 | cpu_has(c, X86_FEATURE_IA64)) { | ||
154 | printk(KERN_ERR "microcode: CPU%d not a capable Intel processor\n", cpu_num); | ||
155 | return; | ||
156 | } else { | ||
157 | uci->sig = cpuid_eax(0x00000001); | ||
158 | |||
159 | if ((c->x86_model >= 5) || (c->x86 > 6)) { | ||
160 | /* get processor flags from MSR 0x17 */ | ||
161 | rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); | ||
162 | uci->pf = 1 << ((val[1] >> 18) & 7); | ||
163 | } | ||
164 | } | ||
165 | |||
166 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
167 | __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); | ||
168 | /* get the current revision from MSR 0x8B */ | ||
169 | rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); | ||
170 | pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", | ||
171 | uci->sig, uci->pf, uci->rev); | ||
172 | } | ||
173 | |||
174 | static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_header, int sig, int pf, int cksum) | ||
175 | { | ||
176 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
177 | |||
178 | pr_debug("Microcode Found.\n"); | ||
179 | pr_debug(" Header Revision 0x%x\n", mc_header->hdrver); | ||
180 | pr_debug(" Loader Revision 0x%x\n", mc_header->ldrver); | ||
181 | pr_debug(" Revision 0x%x \n", mc_header->rev); | ||
182 | pr_debug(" Date %x/%x/%x\n", | ||
183 | ((mc_header->date >> 24 ) & 0xff), | ||
184 | ((mc_header->date >> 16 ) & 0xff), | ||
185 | (mc_header->date & 0xFFFF)); | ||
186 | pr_debug(" Signature 0x%x\n", sig); | ||
187 | pr_debug(" Type 0x%x Family 0x%x Model 0x%x Stepping 0x%x\n", | ||
188 | ((sig >> 12) & 0x3), | ||
189 | ((sig >> 8) & 0xf), | ||
190 | ((sig >> 4) & 0xf), | ||
191 | ((sig & 0xf))); | ||
192 | pr_debug(" Processor Flags 0x%x\n", pf); | ||
193 | pr_debug(" Checksum 0x%x\n", cksum); | ||
194 | |||
195 | if (mc_header->rev < uci->rev) { | ||
196 | printk(KERN_ERR "microcode: CPU%d not 'upgrading' to earlier revision" | ||
197 | " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); | ||
198 | goto out; | ||
199 | } else if (mc_header->rev == uci->rev) { | ||
200 | /* notify the caller of success on this cpu */ | ||
201 | uci->err = MC_SUCCESS; | ||
202 | printk(KERN_ERR "microcode: CPU%d already at revision" | ||
203 | " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); | ||
204 | goto out; | ||
205 | } | ||
206 | |||
207 | pr_debug("microcode: CPU%d found a matching microcode update with " | ||
208 | " revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); | ||
209 | uci->cksum = cksum; | ||
210 | uci->pf = pf; /* keep the original mc pf for cksum calculation */ | ||
211 | uci->err = MC_MARKED; /* found the match */ | ||
212 | out: | ||
213 | return; | ||
214 | } | ||
215 | |||
216 | static int find_matching_ucodes (void) | ||
217 | { | ||
218 | int cursor = 0; | ||
219 | int error = 0; | ||
220 | |||
221 | while (cursor + MC_HEADER_SIZE < user_buffer_size) { | ||
222 | microcode_header_t mc_header; | ||
223 | void *newmc = NULL; | ||
224 | int i, sum, cpu_num, allocated_flag, total_size, data_size, ext_table_size; | ||
225 | |||
226 | if (copy_from_user(&mc_header, user_buffer + cursor, MC_HEADER_SIZE)) { | ||
227 | printk(KERN_ERR "microcode: error! Can not read user data\n"); | ||
228 | error = -EFAULT; | ||
229 | goto out; | ||
230 | } | ||
231 | |||
232 | total_size = get_totalsize(&mc_header); | ||
233 | if ((cursor + total_size > user_buffer_size) || (total_size < DEFAULT_UCODE_TOTALSIZE)) { | ||
234 | printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); | ||
235 | error = -EINVAL; | ||
236 | goto out; | ||
237 | } | ||
238 | |||
239 | data_size = get_datasize(&mc_header); | ||
240 | if ((data_size + MC_HEADER_SIZE > total_size) || (data_size < DEFAULT_UCODE_DATASIZE)) { | ||
241 | printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); | ||
242 | error = -EINVAL; | ||
243 | goto out; | ||
244 | } | ||
245 | |||
246 | if (mc_header.ldrver != 1 || mc_header.hdrver != 1) { | ||
247 | printk(KERN_ERR "microcode: error! Unknown microcode update format\n"); | ||
248 | error = -EINVAL; | ||
249 | goto out; | ||
250 | } | ||
251 | |||
252 | for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) { | ||
253 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
254 | if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/ | ||
255 | continue; | ||
256 | |||
257 | if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->pf)) | ||
258 | mark_microcode_update(cpu_num, &mc_header, mc_header.sig, mc_header.pf, mc_header.cksum); | ||
259 | } | ||
260 | |||
261 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | ||
262 | if (ext_table_size) { | ||
263 | struct extended_sigtable ext_header; | ||
264 | struct extended_signature ext_sig; | ||
265 | int ext_sigcount; | ||
266 | |||
267 | if ((ext_table_size < EXT_HEADER_SIZE) | ||
268 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { | ||
269 | printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); | ||
270 | error = -EINVAL; | ||
271 | goto out; | ||
272 | } | ||
273 | if (copy_from_user(&ext_header, user_buffer + cursor | ||
274 | + MC_HEADER_SIZE + data_size, EXT_HEADER_SIZE)) { | ||
275 | printk(KERN_ERR "microcode: error! Can not read user data\n"); | ||
276 | error = -EFAULT; | ||
277 | goto out; | ||
278 | } | ||
279 | if (ext_table_size != exttable_size(&ext_header)) { | ||
280 | printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); | ||
281 | error = -EFAULT; | ||
282 | goto out; | ||
283 | } | ||
284 | |||
285 | ext_sigcount = ext_header.count; | ||
286 | |||
287 | for (i = 0; i < ext_sigcount; i++) { | ||
288 | if (copy_from_user(&ext_sig, user_buffer + cursor + MC_HEADER_SIZE + data_size + EXT_HEADER_SIZE | ||
289 | + EXT_SIGNATURE_SIZE * i, EXT_SIGNATURE_SIZE)) { | ||
290 | printk(KERN_ERR "microcode: error! Can not read user data\n"); | ||
291 | error = -EFAULT; | ||
292 | goto out; | ||
293 | } | ||
294 | for (cpu_num = 0; cpu_num < num_online_cpus(); cpu_num++) { | ||
295 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
296 | if (uci->err != MC_NOTFOUND) /* already found a match or not an online cpu*/ | ||
297 | continue; | ||
298 | if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->pf)) { | ||
299 | mark_microcode_update(cpu_num, &mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum); | ||
300 | } | ||
301 | } | ||
302 | } | ||
303 | } | ||
304 | /* now check if any cpu has matched */ | ||
305 | for (cpu_num = 0, allocated_flag = 0, sum = 0; cpu_num < num_online_cpus(); cpu_num++) { | ||
306 | if (ucode_cpu_info[cpu_num].err == MC_MARKED) { | ||
307 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
308 | if (!allocated_flag) { | ||
309 | allocated_flag = 1; | ||
310 | newmc = vmalloc(total_size); | ||
311 | if (!newmc) { | ||
312 | printk(KERN_ERR "microcode: error! Can not allocate memory\n"); | ||
313 | error = -ENOMEM; | ||
314 | goto out; | ||
315 | } | ||
316 | if (copy_from_user(newmc + MC_HEADER_SIZE, | ||
317 | user_buffer + cursor + MC_HEADER_SIZE, | ||
318 | total_size - MC_HEADER_SIZE)) { | ||
319 | printk(KERN_ERR "microcode: error! Can not read user data\n"); | ||
320 | vfree(newmc); | ||
321 | error = -EFAULT; | ||
322 | goto out; | ||
323 | } | ||
324 | memcpy(newmc, &mc_header, MC_HEADER_SIZE); | ||
325 | /* check extended table checksum */ | ||
326 | if (ext_table_size) { | ||
327 | int ext_table_sum = 0; | ||
328 | int * ext_tablep = (((void *) newmc) + MC_HEADER_SIZE + data_size); | ||
329 | i = ext_table_size / DWSIZE; | ||
330 | while (i--) ext_table_sum += ext_tablep[i]; | ||
331 | if (ext_table_sum) { | ||
332 | printk(KERN_WARNING "microcode: aborting, bad extended signature table checksum\n"); | ||
333 | vfree(newmc); | ||
334 | error = -EINVAL; | ||
335 | goto out; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | /* calculate the checksum */ | ||
340 | i = (MC_HEADER_SIZE + data_size) / DWSIZE; | ||
341 | while (i--) sum += ((int *)newmc)[i]; | ||
342 | sum -= (mc_header.sig + mc_header.pf + mc_header.cksum); | ||
343 | } | ||
344 | ucode_cpu_info[cpu_num].mc = newmc; | ||
345 | ucode_cpu_info[cpu_num].err = MC_ALLOCATED; /* mc updated */ | ||
346 | if (sum + uci->sig + uci->pf + uci->cksum != 0) { | ||
347 | printk(KERN_ERR "microcode: CPU%d aborting, bad checksum\n", cpu_num); | ||
348 | error = -EINVAL; | ||
349 | goto out; | ||
350 | } | ||
351 | } | ||
352 | } | ||
353 | cursor += total_size; /* goto the next update patch */ | ||
354 | } /* end of while */ | ||
355 | out: | ||
356 | return error; | ||
357 | } | ||
358 | |||
359 | static void do_update_one (void * unused) | ||
360 | { | ||
361 | unsigned long flags; | ||
362 | unsigned int val[2]; | ||
363 | int cpu_num = smp_processor_id(); | ||
364 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | ||
365 | |||
366 | if (uci->mc == NULL) { | ||
367 | printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num); | ||
368 | return; | ||
369 | } | ||
370 | |||
371 | /* serialize access to the physical write to MSR 0x79 */ | ||
372 | spin_lock_irqsave(µcode_update_lock, flags); | ||
373 | |||
374 | /* write microcode via MSR 0x79 */ | ||
375 | wrmsr(MSR_IA32_UCODE_WRITE, | ||
376 | (unsigned long) uci->mc->bits, | ||
377 | (unsigned long) uci->mc->bits >> 16 >> 16); | ||
378 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
379 | |||
380 | __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); | ||
381 | /* get the current revision from MSR 0x8B */ | ||
382 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | ||
383 | |||
384 | /* notify the caller of success on this cpu */ | ||
385 | uci->err = MC_SUCCESS; | ||
386 | spin_unlock_irqrestore(µcode_update_lock, flags); | ||
387 | printk(KERN_INFO "microcode: CPU%d updated from revision " | ||
388 | "0x%x to 0x%x, date = %08x \n", | ||
389 | cpu_num, uci->rev, val[1], uci->mc->hdr.date); | ||
390 | return; | ||
391 | } | ||
392 | |||
393 | static int do_microcode_update (void) | ||
394 | { | ||
395 | int i, error; | ||
396 | |||
397 | if (on_each_cpu(collect_cpu_info, NULL, 1, 1) != 0) { | ||
398 | printk(KERN_ERR "microcode: Error! Could not run on all processors\n"); | ||
399 | error = -EIO; | ||
400 | goto out; | ||
401 | } | ||
402 | |||
403 | if ((error = find_matching_ucodes())) { | ||
404 | printk(KERN_ERR "microcode: Error in the microcode data\n"); | ||
405 | goto out_free; | ||
406 | } | ||
407 | |||
408 | if (on_each_cpu(do_update_one, NULL, 1, 1) != 0) { | ||
409 | printk(KERN_ERR "microcode: Error! Could not run on all processors\n"); | ||
410 | error = -EIO; | ||
411 | } | ||
412 | |||
413 | out_free: | ||
414 | for (i = 0; i < num_online_cpus(); i++) { | ||
415 | if (ucode_cpu_info[i].mc) { | ||
416 | int j; | ||
417 | void *tmp = ucode_cpu_info[i].mc; | ||
418 | vfree(tmp); | ||
419 | for (j = i; j < num_online_cpus(); j++) { | ||
420 | if (ucode_cpu_info[j].mc == tmp) | ||
421 | ucode_cpu_info[j].mc = NULL; | ||
422 | } | ||
423 | } | ||
424 | } | ||
425 | out: | ||
426 | return error; | ||
427 | } | ||
428 | |||
429 | static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) | ||
430 | { | ||
431 | ssize_t ret; | ||
432 | |||
433 | if (len < DEFAULT_UCODE_TOTALSIZE) { | ||
434 | printk(KERN_ERR "microcode: not enough data\n"); | ||
435 | return -EINVAL; | ||
436 | } | ||
437 | |||
438 | if ((len >> PAGE_SHIFT) > num_physpages) { | ||
439 | printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); | ||
440 | return -EINVAL; | ||
441 | } | ||
442 | |||
443 | down(µcode_sem); | ||
444 | |||
445 | user_buffer = (void __user *) buf; | ||
446 | user_buffer_size = (int) len; | ||
447 | |||
448 | ret = do_microcode_update(); | ||
449 | if (!ret) | ||
450 | ret = (ssize_t)len; | ||
451 | |||
452 | up(µcode_sem); | ||
453 | |||
454 | return ret; | ||
455 | } | ||
456 | |||
457 | static int microcode_ioctl (struct inode *inode, struct file *file, | ||
458 | unsigned int cmd, unsigned long arg) | ||
459 | { | ||
460 | switch (cmd) { | ||
461 | /* | ||
462 | * XXX: will be removed after microcode_ctl | ||
463 | * is updated to ignore failure of this ioctl() | ||
464 | */ | ||
465 | case MICROCODE_IOCFREE: | ||
466 | return 0; | ||
467 | default: | ||
468 | return -EINVAL; | ||
469 | } | ||
470 | return -EINVAL; | ||
471 | } | ||
472 | |||
473 | static struct file_operations microcode_fops = { | ||
474 | .owner = THIS_MODULE, | ||
475 | .write = microcode_write, | ||
476 | .ioctl = microcode_ioctl, | ||
477 | .open = microcode_open, | ||
478 | }; | ||
479 | |||
480 | static struct miscdevice microcode_dev = { | ||
481 | .minor = MICROCODE_MINOR, | ||
482 | .name = "microcode", | ||
483 | .devfs_name = "cpu/microcode", | ||
484 | .fops = µcode_fops, | ||
485 | }; | ||
486 | |||
487 | static int __init microcode_init (void) | ||
488 | { | ||
489 | int error; | ||
490 | |||
491 | error = misc_register(µcode_dev); | ||
492 | if (error) { | ||
493 | printk(KERN_ERR | ||
494 | "microcode: can't misc_register on minor=%d\n", | ||
495 | MICROCODE_MINOR); | ||
496 | return error; | ||
497 | } | ||
498 | |||
499 | printk(KERN_INFO | ||
500 | "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n"); | ||
501 | return 0; | ||
502 | } | ||
503 | |||
504 | static void __exit microcode_exit (void) | ||
505 | { | ||
506 | misc_deregister(µcode_dev); | ||
507 | printk(KERN_INFO "IA-32 Microcode Update Driver v" MICROCODE_VERSION " unregistered\n"); | ||
508 | } | ||
509 | |||
510 | module_init(microcode_init) | ||
511 | module_exit(microcode_exit) | ||
512 | MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | ||
diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c new file mode 100644 index 000000000000..5149c8a621f0 --- /dev/null +++ b/arch/i386/kernel/module.c | |||
@@ -0,0 +1,129 @@ | |||
1 | /* Kernel module help for i386. | ||
2 | Copyright (C) 2001 Rusty Russell. | ||
3 | |||
4 | This program is free software; you can redistribute it and/or modify | ||
5 | it under the terms of the GNU General Public License as published by | ||
6 | the Free Software Foundation; either version 2 of the License, or | ||
7 | (at your option) any later version. | ||
8 | |||
9 | This program is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | GNU General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU General Public License | ||
15 | along with this program; if not, write to the Free Software | ||
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | #include <linux/moduleloader.h> | ||
19 | #include <linux/elf.h> | ||
20 | #include <linux/vmalloc.h> | ||
21 | #include <linux/fs.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/kernel.h> | ||
24 | |||
25 | #if 0 | ||
26 | #define DEBUGP printk | ||
27 | #else | ||
28 | #define DEBUGP(fmt...) | ||
29 | #endif | ||
30 | |||
31 | void *module_alloc(unsigned long size) | ||
32 | { | ||
33 | if (size == 0) | ||
34 | return NULL; | ||
35 | return vmalloc_exec(size); | ||
36 | } | ||
37 | |||
38 | |||
39 | /* Free memory returned from module_alloc */ | ||
40 | void module_free(struct module *mod, void *module_region) | ||
41 | { | ||
42 | vfree(module_region); | ||
43 | /* FIXME: If module_region == mod->init_region, trim exception | ||
44 | table entries. */ | ||
45 | } | ||
46 | |||
47 | /* We don't need anything special. */ | ||
48 | int module_frob_arch_sections(Elf_Ehdr *hdr, | ||
49 | Elf_Shdr *sechdrs, | ||
50 | char *secstrings, | ||
51 | struct module *mod) | ||
52 | { | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | int apply_relocate(Elf32_Shdr *sechdrs, | ||
57 | const char *strtab, | ||
58 | unsigned int symindex, | ||
59 | unsigned int relsec, | ||
60 | struct module *me) | ||
61 | { | ||
62 | unsigned int i; | ||
63 | Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; | ||
64 | Elf32_Sym *sym; | ||
65 | uint32_t *location; | ||
66 | |||
67 | DEBUGP("Applying relocate section %u to %u\n", relsec, | ||
68 | sechdrs[relsec].sh_info); | ||
69 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { | ||
70 | /* This is where to make the change */ | ||
71 | location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr | ||
72 | + rel[i].r_offset; | ||
73 | /* This is the symbol it is referring to. Note that all | ||
74 | undefined symbols have been resolved. */ | ||
75 | sym = (Elf32_Sym *)sechdrs[symindex].sh_addr | ||
76 | + ELF32_R_SYM(rel[i].r_info); | ||
77 | |||
78 | switch (ELF32_R_TYPE(rel[i].r_info)) { | ||
79 | case R_386_32: | ||
80 | /* We add the value into the location given */ | ||
81 | *location += sym->st_value; | ||
82 | break; | ||
83 | case R_386_PC32: | ||
84 | /* Add the value, subtract its postition */ | ||
85 | *location += sym->st_value - (uint32_t)location; | ||
86 | break; | ||
87 | default: | ||
88 | printk(KERN_ERR "module %s: Unknown relocation: %u\n", | ||
89 | me->name, ELF32_R_TYPE(rel[i].r_info)); | ||
90 | return -ENOEXEC; | ||
91 | } | ||
92 | } | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | int apply_relocate_add(Elf32_Shdr *sechdrs, | ||
97 | const char *strtab, | ||
98 | unsigned int symindex, | ||
99 | unsigned int relsec, | ||
100 | struct module *me) | ||
101 | { | ||
102 | printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", | ||
103 | me->name); | ||
104 | return -ENOEXEC; | ||
105 | } | ||
106 | |||
107 | extern void apply_alternatives(void *start, void *end); | ||
108 | |||
109 | int module_finalize(const Elf_Ehdr *hdr, | ||
110 | const Elf_Shdr *sechdrs, | ||
111 | struct module *me) | ||
112 | { | ||
113 | const Elf_Shdr *s; | ||
114 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | ||
115 | |||
116 | /* look for .altinstructions to patch */ | ||
117 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { | ||
118 | void *seg; | ||
119 | if (strcmp(".altinstructions", secstrings + s->sh_name)) | ||
120 | continue; | ||
121 | seg = (void *)s->sh_addr; | ||
122 | apply_alternatives(seg, seg + s->sh_size); | ||
123 | } | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | void module_arch_cleanup(struct module *mod) | ||
128 | { | ||
129 | } | ||
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c new file mode 100644 index 000000000000..1347ab4939e7 --- /dev/null +++ b/arch/i386/kernel/mpparse.c | |||
@@ -0,0 +1,1109 @@ | |||
1 | /* | ||
2 | * Intel Multiprocessor Specification 1.1 and 1.4 | ||
3 | * compliant MP-table parsing routines. | ||
4 | * | ||
5 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | ||
6 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> | ||
7 | * | ||
8 | * Fixes | ||
9 | * Erich Boleyn : MP v1.4 and additional changes. | ||
10 | * Alan Cox : Added EBDA scanning | ||
11 | * Ingo Molnar : various cleanups and rewrites | ||
12 | * Maciej W. Rozycki: Bits for default MP configurations | ||
13 | * Paul Diefenbaugh: Added full ACPI support | ||
14 | */ | ||
15 | |||
16 | #include <linux/mm.h> | ||
17 | #include <linux/irq.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/acpi.h> | ||
20 | #include <linux/delay.h> | ||
21 | #include <linux/config.h> | ||
22 | #include <linux/bootmem.h> | ||
23 | #include <linux/smp_lock.h> | ||
24 | #include <linux/kernel_stat.h> | ||
25 | #include <linux/mc146818rtc.h> | ||
26 | #include <linux/bitops.h> | ||
27 | |||
28 | #include <asm/smp.h> | ||
29 | #include <asm/acpi.h> | ||
30 | #include <asm/mtrr.h> | ||
31 | #include <asm/mpspec.h> | ||
32 | #include <asm/io_apic.h> | ||
33 | |||
34 | #include <mach_apic.h> | ||
35 | #include <mach_mpparse.h> | ||
36 | #include <bios_ebda.h> | ||
37 | |||
38 | /* Have we found an MP table */ | ||
39 | int smp_found_config; | ||
40 | unsigned int __initdata maxcpus = NR_CPUS; | ||
41 | |||
42 | /* | ||
43 | * Various Linux-internal data structures created from the | ||
44 | * MP-table. | ||
45 | */ | ||
46 | int apic_version [MAX_APICS]; | ||
47 | int mp_bus_id_to_type [MAX_MP_BUSSES]; | ||
48 | int mp_bus_id_to_node [MAX_MP_BUSSES]; | ||
49 | int mp_bus_id_to_local [MAX_MP_BUSSES]; | ||
50 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
51 | int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; | ||
52 | static int mp_current_pci_id; | ||
53 | |||
54 | /* I/O APIC entries */ | ||
55 | struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; | ||
56 | |||
57 | /* # of MP IRQ source entries */ | ||
58 | struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; | ||
59 | |||
60 | /* MP IRQ source entries */ | ||
61 | int mp_irq_entries; | ||
62 | |||
63 | int nr_ioapics; | ||
64 | |||
65 | int pic_mode; | ||
66 | unsigned long mp_lapic_addr; | ||
67 | |||
68 | /* Processor that is doing the boot up */ | ||
69 | unsigned int boot_cpu_physical_apicid = -1U; | ||
70 | unsigned int boot_cpu_logical_apicid = -1U; | ||
71 | /* Internal processor count */ | ||
72 | static unsigned int __initdata num_processors; | ||
73 | |||
74 | /* Bitmask of physically existing CPUs */ | ||
75 | physid_mask_t phys_cpu_present_map; | ||
76 | |||
77 | u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
78 | |||
79 | /* | ||
80 | * Intel MP BIOS table parsing routines: | ||
81 | */ | ||
82 | |||
83 | |||
84 | /* | ||
85 | * Checksum an MP configuration block. | ||
86 | */ | ||
87 | |||
88 | static int __init mpf_checksum(unsigned char *mp, int len) | ||
89 | { | ||
90 | int sum = 0; | ||
91 | |||
92 | while (len--) | ||
93 | sum += *mp++; | ||
94 | |||
95 | return sum & 0xFF; | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * Have to match translation table entries to main table entries by counter | ||
100 | * hence the mpc_record variable .... can't see a less disgusting way of | ||
101 | * doing this .... | ||
102 | */ | ||
103 | |||
104 | static int mpc_record; | ||
105 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; | ||
106 | |||
107 | #ifdef CONFIG_X86_NUMAQ | ||
108 | static int MP_valid_apicid(int apicid, int version) | ||
109 | { | ||
110 | return hweight_long(apicid & 0xf) == 1 && (apicid >> 4) != 0xf; | ||
111 | } | ||
112 | #else | ||
113 | static int MP_valid_apicid(int apicid, int version) | ||
114 | { | ||
115 | if (version >= 0x14) | ||
116 | return apicid < 0xff; | ||
117 | else | ||
118 | return apicid < 0xf; | ||
119 | } | ||
120 | #endif | ||
121 | |||
122 | static void __init MP_processor_info (struct mpc_config_processor *m) | ||
123 | { | ||
124 | int ver, apicid; | ||
125 | physid_mask_t tmp; | ||
126 | |||
127 | if (!(m->mpc_cpuflag & CPU_ENABLED)) | ||
128 | return; | ||
129 | |||
130 | apicid = mpc_apic_id(m, translation_table[mpc_record]); | ||
131 | |||
132 | if (m->mpc_featureflag&(1<<0)) | ||
133 | Dprintk(" Floating point unit present.\n"); | ||
134 | if (m->mpc_featureflag&(1<<7)) | ||
135 | Dprintk(" Machine Exception supported.\n"); | ||
136 | if (m->mpc_featureflag&(1<<8)) | ||
137 | Dprintk(" 64 bit compare & exchange supported.\n"); | ||
138 | if (m->mpc_featureflag&(1<<9)) | ||
139 | Dprintk(" Internal APIC present.\n"); | ||
140 | if (m->mpc_featureflag&(1<<11)) | ||
141 | Dprintk(" SEP present.\n"); | ||
142 | if (m->mpc_featureflag&(1<<12)) | ||
143 | Dprintk(" MTRR present.\n"); | ||
144 | if (m->mpc_featureflag&(1<<13)) | ||
145 | Dprintk(" PGE present.\n"); | ||
146 | if (m->mpc_featureflag&(1<<14)) | ||
147 | Dprintk(" MCA present.\n"); | ||
148 | if (m->mpc_featureflag&(1<<15)) | ||
149 | Dprintk(" CMOV present.\n"); | ||
150 | if (m->mpc_featureflag&(1<<16)) | ||
151 | Dprintk(" PAT present.\n"); | ||
152 | if (m->mpc_featureflag&(1<<17)) | ||
153 | Dprintk(" PSE present.\n"); | ||
154 | if (m->mpc_featureflag&(1<<18)) | ||
155 | Dprintk(" PSN present.\n"); | ||
156 | if (m->mpc_featureflag&(1<<19)) | ||
157 | Dprintk(" Cache Line Flush Instruction present.\n"); | ||
158 | /* 20 Reserved */ | ||
159 | if (m->mpc_featureflag&(1<<21)) | ||
160 | Dprintk(" Debug Trace and EMON Store present.\n"); | ||
161 | if (m->mpc_featureflag&(1<<22)) | ||
162 | Dprintk(" ACPI Thermal Throttle Registers present.\n"); | ||
163 | if (m->mpc_featureflag&(1<<23)) | ||
164 | Dprintk(" MMX present.\n"); | ||
165 | if (m->mpc_featureflag&(1<<24)) | ||
166 | Dprintk(" FXSR present.\n"); | ||
167 | if (m->mpc_featureflag&(1<<25)) | ||
168 | Dprintk(" XMM present.\n"); | ||
169 | if (m->mpc_featureflag&(1<<26)) | ||
170 | Dprintk(" Willamette New Instructions present.\n"); | ||
171 | if (m->mpc_featureflag&(1<<27)) | ||
172 | Dprintk(" Self Snoop present.\n"); | ||
173 | if (m->mpc_featureflag&(1<<28)) | ||
174 | Dprintk(" HT present.\n"); | ||
175 | if (m->mpc_featureflag&(1<<29)) | ||
176 | Dprintk(" Thermal Monitor present.\n"); | ||
177 | /* 30, 31 Reserved */ | ||
178 | |||
179 | |||
180 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { | ||
181 | Dprintk(" Bootup CPU\n"); | ||
182 | boot_cpu_physical_apicid = m->mpc_apicid; | ||
183 | boot_cpu_logical_apicid = apicid; | ||
184 | } | ||
185 | |||
186 | if (num_processors >= NR_CPUS) { | ||
187 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." | ||
188 | " Processor ignored.\n", NR_CPUS); | ||
189 | return; | ||
190 | } | ||
191 | |||
192 | if (num_processors >= maxcpus) { | ||
193 | printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." | ||
194 | " Processor ignored.\n", maxcpus); | ||
195 | return; | ||
196 | } | ||
197 | num_processors++; | ||
198 | ver = m->mpc_apicver; | ||
199 | |||
200 | if (!MP_valid_apicid(apicid, ver)) { | ||
201 | printk(KERN_WARNING "Processor #%d INVALID. (Max ID: %d).\n", | ||
202 | m->mpc_apicid, MAX_APICS); | ||
203 | --num_processors; | ||
204 | return; | ||
205 | } | ||
206 | |||
207 | tmp = apicid_to_cpu_present(apicid); | ||
208 | physids_or(phys_cpu_present_map, phys_cpu_present_map, tmp); | ||
209 | |||
210 | /* | ||
211 | * Validate version | ||
212 | */ | ||
213 | if (ver == 0x0) { | ||
214 | printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); | ||
215 | ver = 0x10; | ||
216 | } | ||
217 | apic_version[m->mpc_apicid] = ver; | ||
218 | bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; | ||
219 | } | ||
220 | |||
221 | static void __init MP_bus_info (struct mpc_config_bus *m) | ||
222 | { | ||
223 | char str[7]; | ||
224 | |||
225 | memcpy(str, m->mpc_bustype, 6); | ||
226 | str[6] = 0; | ||
227 | |||
228 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); | ||
229 | |||
230 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { | ||
231 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; | ||
232 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { | ||
233 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; | ||
234 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { | ||
235 | mpc_oem_pci_bus(m, translation_table[mpc_record]); | ||
236 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; | ||
237 | mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; | ||
238 | mp_current_pci_id++; | ||
239 | } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { | ||
240 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; | ||
241 | } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) { | ||
242 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98; | ||
243 | } else { | ||
244 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | static void __init MP_ioapic_info (struct mpc_config_ioapic *m) | ||
249 | { | ||
250 | if (!(m->mpc_flags & MPC_APIC_USABLE)) | ||
251 | return; | ||
252 | |||
253 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n", | ||
254 | m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); | ||
255 | if (nr_ioapics >= MAX_IO_APICS) { | ||
256 | printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", | ||
257 | MAX_IO_APICS, nr_ioapics); | ||
258 | panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); | ||
259 | } | ||
260 | if (!m->mpc_apicaddr) { | ||
261 | printk(KERN_ERR "WARNING: bogus zero I/O APIC address" | ||
262 | " found in MP table, skipping!\n"); | ||
263 | return; | ||
264 | } | ||
265 | mp_ioapics[nr_ioapics] = *m; | ||
266 | nr_ioapics++; | ||
267 | } | ||
268 | |||
269 | static void __init MP_intsrc_info (struct mpc_config_intsrc *m) | ||
270 | { | ||
271 | mp_irqs [mp_irq_entries] = *m; | ||
272 | Dprintk("Int: type %d, pol %d, trig %d, bus %d," | ||
273 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | ||
274 | m->mpc_irqtype, m->mpc_irqflag & 3, | ||
275 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | ||
276 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); | ||
277 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | ||
278 | panic("Max # of irq sources exceeded!!\n"); | ||
279 | } | ||
280 | |||
281 | static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) | ||
282 | { | ||
283 | Dprintk("Lint: type %d, pol %d, trig %d, bus %d," | ||
284 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", | ||
285 | m->mpc_irqtype, m->mpc_irqflag & 3, | ||
286 | (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, | ||
287 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); | ||
288 | /* | ||
289 | * Well it seems all SMP boards in existence | ||
290 | * use ExtINT/LVT1 == LINT0 and | ||
291 | * NMI/LVT2 == LINT1 - the following check | ||
292 | * will show us if this assumptions is false. | ||
293 | * Until then we do not have to add baggage. | ||
294 | */ | ||
295 | if ((m->mpc_irqtype == mp_ExtINT) && | ||
296 | (m->mpc_destapiclint != 0)) | ||
297 | BUG(); | ||
298 | if ((m->mpc_irqtype == mp_NMI) && | ||
299 | (m->mpc_destapiclint != 1)) | ||
300 | BUG(); | ||
301 | } | ||
302 | |||
303 | #ifdef CONFIG_X86_NUMAQ | ||
304 | static void __init MP_translation_info (struct mpc_config_translation *m) | ||
305 | { | ||
306 | printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); | ||
307 | |||
308 | if (mpc_record >= MAX_MPC_ENTRY) | ||
309 | printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | ||
310 | else | ||
311 | translation_table[mpc_record] = m; /* stash this for later */ | ||
312 | if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | ||
313 | node_set_online(m->trans_quad); | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * Read/parse the MPC oem tables | ||
318 | */ | ||
319 | |||
320 | static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \ | ||
321 | unsigned short oemsize) | ||
322 | { | ||
323 | int count = sizeof (*oemtable); /* the header size */ | ||
324 | unsigned char *oemptr = ((unsigned char *)oemtable)+count; | ||
325 | |||
326 | mpc_record = 0; | ||
327 | printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); | ||
328 | if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4)) | ||
329 | { | ||
330 | printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | ||
331 | oemtable->oem_signature[0], | ||
332 | oemtable->oem_signature[1], | ||
333 | oemtable->oem_signature[2], | ||
334 | oemtable->oem_signature[3]); | ||
335 | return; | ||
336 | } | ||
337 | if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length)) | ||
338 | { | ||
339 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | ||
340 | return; | ||
341 | } | ||
342 | while (count < oemtable->oem_length) { | ||
343 | switch (*oemptr) { | ||
344 | case MP_TRANSLATION: | ||
345 | { | ||
346 | struct mpc_config_translation *m= | ||
347 | (struct mpc_config_translation *)oemptr; | ||
348 | MP_translation_info(m); | ||
349 | oemptr += sizeof(*m); | ||
350 | count += sizeof(*m); | ||
351 | ++mpc_record; | ||
352 | break; | ||
353 | } | ||
354 | default: | ||
355 | { | ||
356 | printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr); | ||
357 | return; | ||
358 | } | ||
359 | } | ||
360 | } | ||
361 | } | ||
362 | |||
363 | static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
364 | char *productid) | ||
365 | { | ||
366 | if (strncmp(oem, "IBM NUMA", 8)) | ||
367 | printk("Warning! May not be a NUMA-Q system!\n"); | ||
368 | if (mpc->mpc_oemptr) | ||
369 | smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, | ||
370 | mpc->mpc_oemsize); | ||
371 | } | ||
372 | #endif /* CONFIG_X86_NUMAQ */ | ||
373 | |||
374 | /* | ||
375 | * Read/parse the MPC | ||
376 | */ | ||
377 | |||
378 | static int __init smp_read_mpc(struct mp_config_table *mpc) | ||
379 | { | ||
380 | char str[16]; | ||
381 | char oem[10]; | ||
382 | int count=sizeof(*mpc); | ||
383 | unsigned char *mpt=((unsigned char *)mpc)+count; | ||
384 | |||
385 | if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { | ||
386 | printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n", | ||
387 | *(u32 *)mpc->mpc_signature); | ||
388 | return 0; | ||
389 | } | ||
390 | if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { | ||
391 | printk(KERN_ERR "SMP mptable: checksum error!\n"); | ||
392 | return 0; | ||
393 | } | ||
394 | if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { | ||
395 | printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", | ||
396 | mpc->mpc_spec); | ||
397 | return 0; | ||
398 | } | ||
399 | if (!mpc->mpc_lapic) { | ||
400 | printk(KERN_ERR "SMP mptable: null local APIC address!\n"); | ||
401 | return 0; | ||
402 | } | ||
403 | memcpy(oem,mpc->mpc_oem,8); | ||
404 | oem[8]=0; | ||
405 | printk(KERN_INFO "OEM ID: %s ",oem); | ||
406 | |||
407 | memcpy(str,mpc->mpc_productid,12); | ||
408 | str[12]=0; | ||
409 | printk("Product ID: %s ",str); | ||
410 | |||
411 | mps_oem_check(mpc, oem, str); | ||
412 | |||
413 | printk("APIC at: 0x%lX\n",mpc->mpc_lapic); | ||
414 | |||
415 | /* | ||
416 | * Save the local APIC address (it might be non-default) -- but only | ||
417 | * if we're not using ACPI. | ||
418 | */ | ||
419 | if (!acpi_lapic) | ||
420 | mp_lapic_addr = mpc->mpc_lapic; | ||
421 | |||
422 | /* | ||
423 | * Now process the configuration blocks. | ||
424 | */ | ||
425 | mpc_record = 0; | ||
426 | while (count < mpc->mpc_length) { | ||
427 | switch(*mpt) { | ||
428 | case MP_PROCESSOR: | ||
429 | { | ||
430 | struct mpc_config_processor *m= | ||
431 | (struct mpc_config_processor *)mpt; | ||
432 | /* ACPI may have already provided this data */ | ||
433 | if (!acpi_lapic) | ||
434 | MP_processor_info(m); | ||
435 | mpt += sizeof(*m); | ||
436 | count += sizeof(*m); | ||
437 | break; | ||
438 | } | ||
439 | case MP_BUS: | ||
440 | { | ||
441 | struct mpc_config_bus *m= | ||
442 | (struct mpc_config_bus *)mpt; | ||
443 | MP_bus_info(m); | ||
444 | mpt += sizeof(*m); | ||
445 | count += sizeof(*m); | ||
446 | break; | ||
447 | } | ||
448 | case MP_IOAPIC: | ||
449 | { | ||
450 | struct mpc_config_ioapic *m= | ||
451 | (struct mpc_config_ioapic *)mpt; | ||
452 | MP_ioapic_info(m); | ||
453 | mpt+=sizeof(*m); | ||
454 | count+=sizeof(*m); | ||
455 | break; | ||
456 | } | ||
457 | case MP_INTSRC: | ||
458 | { | ||
459 | struct mpc_config_intsrc *m= | ||
460 | (struct mpc_config_intsrc *)mpt; | ||
461 | |||
462 | MP_intsrc_info(m); | ||
463 | mpt+=sizeof(*m); | ||
464 | count+=sizeof(*m); | ||
465 | break; | ||
466 | } | ||
467 | case MP_LINTSRC: | ||
468 | { | ||
469 | struct mpc_config_lintsrc *m= | ||
470 | (struct mpc_config_lintsrc *)mpt; | ||
471 | MP_lintsrc_info(m); | ||
472 | mpt+=sizeof(*m); | ||
473 | count+=sizeof(*m); | ||
474 | break; | ||
475 | } | ||
476 | default: | ||
477 | { | ||
478 | count = mpc->mpc_length; | ||
479 | break; | ||
480 | } | ||
481 | } | ||
482 | ++mpc_record; | ||
483 | } | ||
484 | clustered_apic_check(); | ||
485 | if (!num_processors) | ||
486 | printk(KERN_ERR "SMP mptable: no processors registered!\n"); | ||
487 | return num_processors; | ||
488 | } | ||
489 | |||
490 | static int __init ELCR_trigger(unsigned int irq) | ||
491 | { | ||
492 | unsigned int port; | ||
493 | |||
494 | port = 0x4d0 + (irq >> 3); | ||
495 | return (inb(port) >> (irq & 7)) & 1; | ||
496 | } | ||
497 | |||
498 | static void __init construct_default_ioirq_mptable(int mpc_default_type) | ||
499 | { | ||
500 | struct mpc_config_intsrc intsrc; | ||
501 | int i; | ||
502 | int ELCR_fallback = 0; | ||
503 | |||
504 | intsrc.mpc_type = MP_INTSRC; | ||
505 | intsrc.mpc_irqflag = 0; /* conforming */ | ||
506 | intsrc.mpc_srcbus = 0; | ||
507 | intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; | ||
508 | |||
509 | intsrc.mpc_irqtype = mp_INT; | ||
510 | |||
511 | /* | ||
512 | * If true, we have an ISA/PCI system with no IRQ entries | ||
513 | * in the MP table. To prevent the PCI interrupts from being set up | ||
514 | * incorrectly, we try to use the ELCR. The sanity check to see if | ||
515 | * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can | ||
516 | * never be level sensitive, so we simply see if the ELCR agrees. | ||
517 | * If it does, we assume it's valid. | ||
518 | */ | ||
519 | if (mpc_default_type == 5) { | ||
520 | printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); | ||
521 | |||
522 | if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) | ||
523 | printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n"); | ||
524 | else { | ||
525 | printk(KERN_INFO "Using ELCR to identify PCI interrupts\n"); | ||
526 | ELCR_fallback = 1; | ||
527 | } | ||
528 | } | ||
529 | |||
530 | for (i = 0; i < 16; i++) { | ||
531 | switch (mpc_default_type) { | ||
532 | case 2: | ||
533 | if (i == 0 || i == 13) | ||
534 | continue; /* IRQ0 & IRQ13 not connected */ | ||
535 | /* fall through */ | ||
536 | default: | ||
537 | if (i == 2) | ||
538 | continue; /* IRQ2 is never connected */ | ||
539 | } | ||
540 | |||
541 | if (ELCR_fallback) { | ||
542 | /* | ||
543 | * If the ELCR indicates a level-sensitive interrupt, we | ||
544 | * copy that information over to the MP table in the | ||
545 | * irqflag field (level sensitive, active high polarity). | ||
546 | */ | ||
547 | if (ELCR_trigger(i)) | ||
548 | intsrc.mpc_irqflag = 13; | ||
549 | else | ||
550 | intsrc.mpc_irqflag = 0; | ||
551 | } | ||
552 | |||
553 | intsrc.mpc_srcbusirq = i; | ||
554 | intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ | ||
555 | MP_intsrc_info(&intsrc); | ||
556 | } | ||
557 | |||
558 | intsrc.mpc_irqtype = mp_ExtINT; | ||
559 | intsrc.mpc_srcbusirq = 0; | ||
560 | intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ | ||
561 | MP_intsrc_info(&intsrc); | ||
562 | } | ||
563 | |||
564 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) | ||
565 | { | ||
566 | struct mpc_config_processor processor; | ||
567 | struct mpc_config_bus bus; | ||
568 | struct mpc_config_ioapic ioapic; | ||
569 | struct mpc_config_lintsrc lintsrc; | ||
570 | int linttypes[2] = { mp_ExtINT, mp_NMI }; | ||
571 | int i; | ||
572 | |||
573 | /* | ||
574 | * local APIC has default address | ||
575 | */ | ||
576 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
577 | |||
578 | /* | ||
579 | * 2 CPUs, numbered 0 & 1. | ||
580 | */ | ||
581 | processor.mpc_type = MP_PROCESSOR; | ||
582 | /* Either an integrated APIC or a discrete 82489DX. */ | ||
583 | processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | ||
584 | processor.mpc_cpuflag = CPU_ENABLED; | ||
585 | processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | ||
586 | (boot_cpu_data.x86_model << 4) | | ||
587 | boot_cpu_data.x86_mask; | ||
588 | processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | ||
589 | processor.mpc_reserved[0] = 0; | ||
590 | processor.mpc_reserved[1] = 0; | ||
591 | for (i = 0; i < 2; i++) { | ||
592 | processor.mpc_apicid = i; | ||
593 | MP_processor_info(&processor); | ||
594 | } | ||
595 | |||
596 | bus.mpc_type = MP_BUS; | ||
597 | bus.mpc_busid = 0; | ||
598 | switch (mpc_default_type) { | ||
599 | default: | ||
600 | printk("???\n"); | ||
601 | printk(KERN_ERR "Unknown standard configuration %d\n", | ||
602 | mpc_default_type); | ||
603 | /* fall through */ | ||
604 | case 1: | ||
605 | case 5: | ||
606 | memcpy(bus.mpc_bustype, "ISA ", 6); | ||
607 | break; | ||
608 | case 2: | ||
609 | case 6: | ||
610 | case 3: | ||
611 | memcpy(bus.mpc_bustype, "EISA ", 6); | ||
612 | break; | ||
613 | case 4: | ||
614 | case 7: | ||
615 | memcpy(bus.mpc_bustype, "MCA ", 6); | ||
616 | } | ||
617 | MP_bus_info(&bus); | ||
618 | if (mpc_default_type > 4) { | ||
619 | bus.mpc_busid = 1; | ||
620 | memcpy(bus.mpc_bustype, "PCI ", 6); | ||
621 | MP_bus_info(&bus); | ||
622 | } | ||
623 | |||
624 | ioapic.mpc_type = MP_IOAPIC; | ||
625 | ioapic.mpc_apicid = 2; | ||
626 | ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; | ||
627 | ioapic.mpc_flags = MPC_APIC_USABLE; | ||
628 | ioapic.mpc_apicaddr = 0xFEC00000; | ||
629 | MP_ioapic_info(&ioapic); | ||
630 | |||
631 | /* | ||
632 | * We set up most of the low 16 IO-APIC pins according to MPS rules. | ||
633 | */ | ||
634 | construct_default_ioirq_mptable(mpc_default_type); | ||
635 | |||
636 | lintsrc.mpc_type = MP_LINTSRC; | ||
637 | lintsrc.mpc_irqflag = 0; /* conforming */ | ||
638 | lintsrc.mpc_srcbusid = 0; | ||
639 | lintsrc.mpc_srcbusirq = 0; | ||
640 | lintsrc.mpc_destapic = MP_APIC_ALL; | ||
641 | for (i = 0; i < 2; i++) { | ||
642 | lintsrc.mpc_irqtype = linttypes[i]; | ||
643 | lintsrc.mpc_destapiclint = i; | ||
644 | MP_lintsrc_info(&lintsrc); | ||
645 | } | ||
646 | } | ||
647 | |||
648 | static struct intel_mp_floating *mpf_found; | ||
649 | |||
650 | /* | ||
651 | * Scan the memory blocks for an SMP configuration block. | ||
652 | */ | ||
653 | void __init get_smp_config (void) | ||
654 | { | ||
655 | struct intel_mp_floating *mpf = mpf_found; | ||
656 | |||
657 | /* | ||
658 | * ACPI may be used to obtain the entire SMP configuration or just to | ||
659 | * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that | ||
660 | * ACPI supports both logical (e.g. Hyper-Threading) and physical | ||
661 | * processors, where MPS only supports physical. | ||
662 | */ | ||
663 | if (acpi_lapic && acpi_ioapic) { | ||
664 | printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); | ||
665 | return; | ||
666 | } | ||
667 | else if (acpi_lapic) | ||
668 | printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); | ||
669 | |||
670 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); | ||
671 | if (mpf->mpf_feature2 & (1<<7)) { | ||
672 | printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); | ||
673 | pic_mode = 1; | ||
674 | } else { | ||
675 | printk(KERN_INFO " Virtual Wire compatibility mode.\n"); | ||
676 | pic_mode = 0; | ||
677 | } | ||
678 | |||
679 | /* | ||
680 | * Now see if we need to read further. | ||
681 | */ | ||
682 | if (mpf->mpf_feature1 != 0) { | ||
683 | |||
684 | printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); | ||
685 | construct_default_ISA_mptable(mpf->mpf_feature1); | ||
686 | |||
687 | } else if (mpf->mpf_physptr) { | ||
688 | |||
689 | /* | ||
690 | * Read the physical hardware table. Anything here will | ||
691 | * override the defaults. | ||
692 | */ | ||
693 | if (!smp_read_mpc((void *)mpf->mpf_physptr)) { | ||
694 | smp_found_config = 0; | ||
695 | printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); | ||
696 | printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); | ||
697 | return; | ||
698 | } | ||
699 | /* | ||
700 | * If there are no explicit MP IRQ entries, then we are | ||
701 | * broken. We set up most of the low 16 IO-APIC pins to | ||
702 | * ISA defaults and hope it will work. | ||
703 | */ | ||
704 | if (!mp_irq_entries) { | ||
705 | struct mpc_config_bus bus; | ||
706 | |||
707 | printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); | ||
708 | |||
709 | bus.mpc_type = MP_BUS; | ||
710 | bus.mpc_busid = 0; | ||
711 | memcpy(bus.mpc_bustype, "ISA ", 6); | ||
712 | MP_bus_info(&bus); | ||
713 | |||
714 | construct_default_ioirq_mptable(0); | ||
715 | } | ||
716 | |||
717 | } else | ||
718 | BUG(); | ||
719 | |||
720 | printk(KERN_INFO "Processors: %d\n", num_processors); | ||
721 | /* | ||
722 | * Only use the first configuration found. | ||
723 | */ | ||
724 | } | ||
725 | |||
726 | static int __init smp_scan_config (unsigned long base, unsigned long length) | ||
727 | { | ||
728 | unsigned long *bp = phys_to_virt(base); | ||
729 | struct intel_mp_floating *mpf; | ||
730 | |||
731 | Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); | ||
732 | if (sizeof(*mpf) != 16) | ||
733 | printk("Error: MPF size\n"); | ||
734 | |||
735 | while (length > 0) { | ||
736 | mpf = (struct intel_mp_floating *)bp; | ||
737 | if ((*bp == SMP_MAGIC_IDENT) && | ||
738 | (mpf->mpf_length == 1) && | ||
739 | !mpf_checksum((unsigned char *)bp, 16) && | ||
740 | ((mpf->mpf_specification == 1) | ||
741 | || (mpf->mpf_specification == 4)) ) { | ||
742 | |||
743 | smp_found_config = 1; | ||
744 | printk(KERN_INFO "found SMP MP-table at %08lx\n", | ||
745 | virt_to_phys(mpf)); | ||
746 | reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); | ||
747 | if (mpf->mpf_physptr) { | ||
748 | /* | ||
749 | * We cannot access to MPC table to compute | ||
750 | * table size yet, as only few megabytes from | ||
751 | * the bottom is mapped now. | ||
752 | * PC-9800's MPC table places on the very last | ||
753 | * of physical memory; so that simply reserving | ||
754 | * PAGE_SIZE from mpg->mpf_physptr yields BUG() | ||
755 | * in reserve_bootmem. | ||
756 | */ | ||
757 | unsigned long size = PAGE_SIZE; | ||
758 | unsigned long end = max_low_pfn * PAGE_SIZE; | ||
759 | if (mpf->mpf_physptr + size > end) | ||
760 | size = end - mpf->mpf_physptr; | ||
761 | reserve_bootmem(mpf->mpf_physptr, size); | ||
762 | } | ||
763 | |||
764 | mpf_found = mpf; | ||
765 | return 1; | ||
766 | } | ||
767 | bp += 4; | ||
768 | length -= 16; | ||
769 | } | ||
770 | return 0; | ||
771 | } | ||
772 | |||
773 | void __init find_smp_config (void) | ||
774 | { | ||
775 | unsigned int address; | ||
776 | |||
777 | /* | ||
778 | * FIXME: Linux assumes you have 640K of base ram.. | ||
779 | * this continues the error... | ||
780 | * | ||
781 | * 1) Scan the bottom 1K for a signature | ||
782 | * 2) Scan the top 1K of base RAM | ||
783 | * 3) Scan the 64K of bios | ||
784 | */ | ||
785 | if (smp_scan_config(0x0,0x400) || | ||
786 | smp_scan_config(639*0x400,0x400) || | ||
787 | smp_scan_config(0xF0000,0x10000)) | ||
788 | return; | ||
789 | /* | ||
790 | * If it is an SMP machine we should know now, unless the | ||
791 | * configuration is in an EISA/MCA bus machine with an | ||
792 | * extended bios data area. | ||
793 | * | ||
794 | * there is a real-mode segmented pointer pointing to the | ||
795 | * 4K EBDA area at 0x40E, calculate and scan it here. | ||
796 | * | ||
797 | * NOTE! There are Linux loaders that will corrupt the EBDA | ||
798 | * area, and as such this kind of SMP config may be less | ||
799 | * trustworthy, simply because the SMP table may have been | ||
800 | * stomped on during early boot. These loaders are buggy and | ||
801 | * should be fixed. | ||
802 | * | ||
803 | * MP1.4 SPEC states to only scan first 1K of 4K EBDA. | ||
804 | */ | ||
805 | |||
806 | address = get_bios_ebda(); | ||
807 | if (address) | ||
808 | smp_scan_config(address, 0x400); | ||
809 | } | ||
810 | |||
811 | /* -------------------------------------------------------------------------- | ||
812 | ACPI-based MP Configuration | ||
813 | -------------------------------------------------------------------------- */ | ||
814 | |||
815 | #ifdef CONFIG_ACPI_BOOT | ||
816 | |||
817 | void __init mp_register_lapic_address ( | ||
818 | u64 address) | ||
819 | { | ||
820 | mp_lapic_addr = (unsigned long) address; | ||
821 | |||
822 | set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); | ||
823 | |||
824 | if (boot_cpu_physical_apicid == -1U) | ||
825 | boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); | ||
826 | |||
827 | Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); | ||
828 | } | ||
829 | |||
830 | |||
831 | void __init mp_register_lapic ( | ||
832 | u8 id, | ||
833 | u8 enabled) | ||
834 | { | ||
835 | struct mpc_config_processor processor; | ||
836 | int boot_cpu = 0; | ||
837 | |||
838 | if (MAX_APICS - id <= 0) { | ||
839 | printk(KERN_WARNING "Processor #%d invalid (max %d)\n", | ||
840 | id, MAX_APICS); | ||
841 | return; | ||
842 | } | ||
843 | |||
844 | if (id == boot_cpu_physical_apicid) | ||
845 | boot_cpu = 1; | ||
846 | |||
847 | processor.mpc_type = MP_PROCESSOR; | ||
848 | processor.mpc_apicid = id; | ||
849 | processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
850 | processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); | ||
851 | processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); | ||
852 | processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | | ||
853 | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; | ||
854 | processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; | ||
855 | processor.mpc_reserved[0] = 0; | ||
856 | processor.mpc_reserved[1] = 0; | ||
857 | |||
858 | MP_processor_info(&processor); | ||
859 | } | ||
860 | |||
861 | #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT)) | ||
862 | |||
863 | #define MP_ISA_BUS 0 | ||
864 | #define MP_MAX_IOAPIC_PIN 127 | ||
865 | |||
866 | static struct mp_ioapic_routing { | ||
867 | int apic_id; | ||
868 | int gsi_base; | ||
869 | int gsi_end; | ||
870 | u32 pin_programmed[4]; | ||
871 | } mp_ioapic_routing[MAX_IO_APICS]; | ||
872 | |||
873 | |||
874 | static int mp_find_ioapic ( | ||
875 | int gsi) | ||
876 | { | ||
877 | int i = 0; | ||
878 | |||
879 | /* Find the IOAPIC that manages this GSI. */ | ||
880 | for (i = 0; i < nr_ioapics; i++) { | ||
881 | if ((gsi >= mp_ioapic_routing[i].gsi_base) | ||
882 | && (gsi <= mp_ioapic_routing[i].gsi_end)) | ||
883 | return i; | ||
884 | } | ||
885 | |||
886 | printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); | ||
887 | |||
888 | return -1; | ||
889 | } | ||
890 | |||
891 | |||
892 | void __init mp_register_ioapic ( | ||
893 | u8 id, | ||
894 | u32 address, | ||
895 | u32 gsi_base) | ||
896 | { | ||
897 | int idx = 0; | ||
898 | |||
899 | if (nr_ioapics >= MAX_IO_APICS) { | ||
900 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " | ||
901 | "(found %d)\n", MAX_IO_APICS, nr_ioapics); | ||
902 | panic("Recompile kernel with bigger MAX_IO_APICS!\n"); | ||
903 | } | ||
904 | if (!address) { | ||
905 | printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" | ||
906 | " found in MADT table, skipping!\n"); | ||
907 | return; | ||
908 | } | ||
909 | |||
910 | idx = nr_ioapics++; | ||
911 | |||
912 | mp_ioapics[idx].mpc_type = MP_IOAPIC; | ||
913 | mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; | ||
914 | mp_ioapics[idx].mpc_apicaddr = address; | ||
915 | |||
916 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | ||
917 | mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); | ||
918 | mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); | ||
919 | |||
920 | /* | ||
921 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | ||
922 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | ||
923 | */ | ||
924 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; | ||
925 | mp_ioapic_routing[idx].gsi_base = gsi_base; | ||
926 | mp_ioapic_routing[idx].gsi_end = gsi_base + | ||
927 | io_apic_get_redir_entries(idx); | ||
928 | |||
929 | printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " | ||
930 | "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, | ||
931 | mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, | ||
932 | mp_ioapic_routing[idx].gsi_base, | ||
933 | mp_ioapic_routing[idx].gsi_end); | ||
934 | |||
935 | return; | ||
936 | } | ||
937 | |||
938 | |||
939 | void __init mp_override_legacy_irq ( | ||
940 | u8 bus_irq, | ||
941 | u8 polarity, | ||
942 | u8 trigger, | ||
943 | u32 gsi) | ||
944 | { | ||
945 | struct mpc_config_intsrc intsrc; | ||
946 | int ioapic = -1; | ||
947 | int pin = -1; | ||
948 | |||
949 | /* | ||
950 | * Convert 'gsi' to 'ioapic.pin'. | ||
951 | */ | ||
952 | ioapic = mp_find_ioapic(gsi); | ||
953 | if (ioapic < 0) | ||
954 | return; | ||
955 | pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
956 | |||
957 | /* | ||
958 | * TBD: This check is for faulty timer entries, where the override | ||
959 | * erroneously sets the trigger to level, resulting in a HUGE | ||
960 | * increase of timer interrupts! | ||
961 | */ | ||
962 | if ((bus_irq == 0) && (trigger == 3)) | ||
963 | trigger = 1; | ||
964 | |||
965 | intsrc.mpc_type = MP_INTSRC; | ||
966 | intsrc.mpc_irqtype = mp_INT; | ||
967 | intsrc.mpc_irqflag = (trigger << 2) | polarity; | ||
968 | intsrc.mpc_srcbus = MP_ISA_BUS; | ||
969 | intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ | ||
970 | intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ | ||
971 | intsrc.mpc_dstirq = pin; /* INTIN# */ | ||
972 | |||
973 | Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", | ||
974 | intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, | ||
975 | (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, | ||
976 | intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); | ||
977 | |||
978 | mp_irqs[mp_irq_entries] = intsrc; | ||
979 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | ||
980 | panic("Max # of irq sources exceeded!\n"); | ||
981 | |||
982 | return; | ||
983 | } | ||
984 | |||
985 | int es7000_plat; | ||
986 | |||
987 | void __init mp_config_acpi_legacy_irqs (void) | ||
988 | { | ||
989 | struct mpc_config_intsrc intsrc; | ||
990 | int i = 0; | ||
991 | int ioapic = -1; | ||
992 | |||
993 | /* | ||
994 | * Fabricate the legacy ISA bus (bus #31). | ||
995 | */ | ||
996 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; | ||
997 | Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); | ||
998 | |||
999 | /* | ||
1000 | * Older generations of ES7000 have no legacy identity mappings | ||
1001 | */ | ||
1002 | if (es7000_plat == 1) | ||
1003 | return; | ||
1004 | |||
1005 | /* | ||
1006 | * Locate the IOAPIC that manages the ISA IRQs (0-15). | ||
1007 | */ | ||
1008 | ioapic = mp_find_ioapic(0); | ||
1009 | if (ioapic < 0) | ||
1010 | return; | ||
1011 | |||
1012 | intsrc.mpc_type = MP_INTSRC; | ||
1013 | intsrc.mpc_irqflag = 0; /* Conforming */ | ||
1014 | intsrc.mpc_srcbus = MP_ISA_BUS; | ||
1015 | intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; | ||
1016 | |||
1017 | /* | ||
1018 | * Use the default configuration for the IRQs 0-15. Unless | ||
1019 | * overriden by (MADT) interrupt source override entries. | ||
1020 | */ | ||
1021 | for (i = 0; i < 16; i++) { | ||
1022 | int idx; | ||
1023 | |||
1024 | for (idx = 0; idx < mp_irq_entries; idx++) { | ||
1025 | struct mpc_config_intsrc *irq = mp_irqs + idx; | ||
1026 | |||
1027 | /* Do we already have a mapping for this ISA IRQ? */ | ||
1028 | if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i) | ||
1029 | break; | ||
1030 | |||
1031 | /* Do we already have a mapping for this IOAPIC pin */ | ||
1032 | if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && | ||
1033 | (irq->mpc_dstirq == i)) | ||
1034 | break; | ||
1035 | } | ||
1036 | |||
1037 | if (idx != mp_irq_entries) { | ||
1038 | printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); | ||
1039 | continue; /* IRQ already used */ | ||
1040 | } | ||
1041 | |||
1042 | intsrc.mpc_irqtype = mp_INT; | ||
1043 | intsrc.mpc_srcbusirq = i; /* Identity mapped */ | ||
1044 | intsrc.mpc_dstirq = i; | ||
1045 | |||
1046 | Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " | ||
1047 | "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, | ||
1048 | (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, | ||
1049 | intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, | ||
1050 | intsrc.mpc_dstirq); | ||
1051 | |||
1052 | mp_irqs[mp_irq_entries] = intsrc; | ||
1053 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | ||
1054 | panic("Max # of irq sources exceeded!\n"); | ||
1055 | } | ||
1056 | } | ||
1057 | |||
1058 | int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) | ||
1059 | { | ||
1060 | int ioapic = -1; | ||
1061 | int ioapic_pin = 0; | ||
1062 | int idx, bit = 0; | ||
1063 | |||
1064 | #ifdef CONFIG_ACPI_BUS | ||
1065 | /* Don't set up the ACPI SCI because it's already set up */ | ||
1066 | if (acpi_fadt.sci_int == gsi) | ||
1067 | return gsi; | ||
1068 | #endif | ||
1069 | |||
1070 | ioapic = mp_find_ioapic(gsi); | ||
1071 | if (ioapic < 0) { | ||
1072 | printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); | ||
1073 | return gsi; | ||
1074 | } | ||
1075 | |||
1076 | ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; | ||
1077 | |||
1078 | if (ioapic_renumber_irq) | ||
1079 | gsi = ioapic_renumber_irq(ioapic, gsi); | ||
1080 | |||
1081 | /* | ||
1082 | * Avoid pin reprogramming. PRTs typically include entries | ||
1083 | * with redundant pin->gsi mappings (but unique PCI devices); | ||
1084 | * we only program the IOAPIC on the first. | ||
1085 | */ | ||
1086 | bit = ioapic_pin % 32; | ||
1087 | idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); | ||
1088 | if (idx > 3) { | ||
1089 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | ||
1090 | "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, | ||
1091 | ioapic_pin); | ||
1092 | return gsi; | ||
1093 | } | ||
1094 | if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { | ||
1095 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | ||
1096 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | ||
1097 | return gsi; | ||
1098 | } | ||
1099 | |||
1100 | mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); | ||
1101 | |||
1102 | io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, | ||
1103 | edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, | ||
1104 | active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); | ||
1105 | return gsi; | ||
1106 | } | ||
1107 | |||
1108 | #endif /*CONFIG_X86_IO_APIC && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)*/ | ||
1109 | #endif /*CONFIG_ACPI_BOOT*/ | ||
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c new file mode 100644 index 000000000000..05d9f8f363a6 --- /dev/null +++ b/arch/i386/kernel/msr.c | |||
@@ -0,0 +1,346 @@ | |||
1 | /* ----------------------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2000 H. Peter Anvin - All Rights Reserved | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, | ||
8 | * USA; either version 2 of the License, or (at your option) any later | ||
9 | * version; incorporated herein by reference. | ||
10 | * | ||
11 | * ----------------------------------------------------------------------- */ | ||
12 | |||
13 | /* | ||
14 | * msr.c | ||
15 | * | ||
16 | * x86 MSR access device | ||
17 | * | ||
18 | * This device is accessed by lseek() to the appropriate register number | ||
19 | * and then read/write in chunks of 8 bytes. A larger size means multiple | ||
20 | * reads or writes of the same register. | ||
21 | * | ||
22 | * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on | ||
23 | * an SMP box will direct the access to CPU %d. | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/config.h> | ||
28 | |||
29 | #include <linux/types.h> | ||
30 | #include <linux/errno.h> | ||
31 | #include <linux/fcntl.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <linux/poll.h> | ||
34 | #include <linux/smp.h> | ||
35 | #include <linux/smp_lock.h> | ||
36 | #include <linux/major.h> | ||
37 | #include <linux/fs.h> | ||
38 | #include <linux/device.h> | ||
39 | #include <linux/cpu.h> | ||
40 | #include <linux/notifier.h> | ||
41 | |||
42 | #include <asm/processor.h> | ||
43 | #include <asm/msr.h> | ||
44 | #include <asm/uaccess.h> | ||
45 | #include <asm/system.h> | ||
46 | |||
47 | static struct class_simple *msr_class; | ||
48 | |||
49 | /* Note: "err" is handled in a funny way below. Otherwise one version | ||
50 | of gcc or another breaks. */ | ||
51 | |||
52 | static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx) | ||
53 | { | ||
54 | int err; | ||
55 | |||
56 | asm volatile ("1: wrmsr\n" | ||
57 | "2:\n" | ||
58 | ".section .fixup,\"ax\"\n" | ||
59 | "3: movl %4,%0\n" | ||
60 | " jmp 2b\n" | ||
61 | ".previous\n" | ||
62 | ".section __ex_table,\"a\"\n" | ||
63 | " .align 4\n" " .long 1b,3b\n" ".previous":"=&bDS" (err) | ||
64 | :"a"(eax), "d"(edx), "c"(reg), "i"(-EIO), "0"(0)); | ||
65 | |||
66 | return err; | ||
67 | } | ||
68 | |||
69 | static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx) | ||
70 | { | ||
71 | int err; | ||
72 | |||
73 | asm volatile ("1: rdmsr\n" | ||
74 | "2:\n" | ||
75 | ".section .fixup,\"ax\"\n" | ||
76 | "3: movl %4,%0\n" | ||
77 | " jmp 2b\n" | ||
78 | ".previous\n" | ||
79 | ".section __ex_table,\"a\"\n" | ||
80 | " .align 4\n" | ||
81 | " .long 1b,3b\n" | ||
82 | ".previous":"=&bDS" (err), "=a"(*eax), "=d"(*edx) | ||
83 | :"c"(reg), "i"(-EIO), "0"(0)); | ||
84 | |||
85 | return err; | ||
86 | } | ||
87 | |||
88 | #ifdef CONFIG_SMP | ||
89 | |||
90 | struct msr_command { | ||
91 | int cpu; | ||
92 | int err; | ||
93 | u32 reg; | ||
94 | u32 data[2]; | ||
95 | }; | ||
96 | |||
97 | static void msr_smp_wrmsr(void *cmd_block) | ||
98 | { | ||
99 | struct msr_command *cmd = (struct msr_command *)cmd_block; | ||
100 | |||
101 | if (cmd->cpu == smp_processor_id()) | ||
102 | cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]); | ||
103 | } | ||
104 | |||
105 | static void msr_smp_rdmsr(void *cmd_block) | ||
106 | { | ||
107 | struct msr_command *cmd = (struct msr_command *)cmd_block; | ||
108 | |||
109 | if (cmd->cpu == smp_processor_id()) | ||
110 | cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]); | ||
111 | } | ||
112 | |||
113 | static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) | ||
114 | { | ||
115 | struct msr_command cmd; | ||
116 | int ret; | ||
117 | |||
118 | preempt_disable(); | ||
119 | if (cpu == smp_processor_id()) { | ||
120 | ret = wrmsr_eio(reg, eax, edx); | ||
121 | } else { | ||
122 | cmd.cpu = cpu; | ||
123 | cmd.reg = reg; | ||
124 | cmd.data[0] = eax; | ||
125 | cmd.data[1] = edx; | ||
126 | |||
127 | smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); | ||
128 | ret = cmd.err; | ||
129 | } | ||
130 | preempt_enable(); | ||
131 | return ret; | ||
132 | } | ||
133 | |||
134 | static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx) | ||
135 | { | ||
136 | struct msr_command cmd; | ||
137 | int ret; | ||
138 | |||
139 | preempt_disable(); | ||
140 | if (cpu == smp_processor_id()) { | ||
141 | ret = rdmsr_eio(reg, eax, edx); | ||
142 | } else { | ||
143 | cmd.cpu = cpu; | ||
144 | cmd.reg = reg; | ||
145 | |||
146 | smp_call_function(msr_smp_rdmsr, &cmd, 1, 1); | ||
147 | |||
148 | *eax = cmd.data[0]; | ||
149 | *edx = cmd.data[1]; | ||
150 | |||
151 | ret = cmd.err; | ||
152 | } | ||
153 | preempt_enable(); | ||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | #else /* ! CONFIG_SMP */ | ||
158 | |||
159 | static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) | ||
160 | { | ||
161 | return wrmsr_eio(reg, eax, edx); | ||
162 | } | ||
163 | |||
164 | static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx) | ||
165 | { | ||
166 | return rdmsr_eio(reg, eax, edx); | ||
167 | } | ||
168 | |||
169 | #endif /* ! CONFIG_SMP */ | ||
170 | |||
171 | static loff_t msr_seek(struct file *file, loff_t offset, int orig) | ||
172 | { | ||
173 | loff_t ret = -EINVAL; | ||
174 | |||
175 | lock_kernel(); | ||
176 | switch (orig) { | ||
177 | case 0: | ||
178 | file->f_pos = offset; | ||
179 | ret = file->f_pos; | ||
180 | break; | ||
181 | case 1: | ||
182 | file->f_pos += offset; | ||
183 | ret = file->f_pos; | ||
184 | } | ||
185 | unlock_kernel(); | ||
186 | return ret; | ||
187 | } | ||
188 | |||
189 | static ssize_t msr_read(struct file *file, char __user * buf, | ||
190 | size_t count, loff_t * ppos) | ||
191 | { | ||
192 | u32 __user *tmp = (u32 __user *) buf; | ||
193 | u32 data[2]; | ||
194 | size_t rv; | ||
195 | u32 reg = *ppos; | ||
196 | int cpu = iminor(file->f_dentry->d_inode); | ||
197 | int err; | ||
198 | |||
199 | if (count % 8) | ||
200 | return -EINVAL; /* Invalid chunk size */ | ||
201 | |||
202 | for (rv = 0; count; count -= 8) { | ||
203 | err = do_rdmsr(cpu, reg, &data[0], &data[1]); | ||
204 | if (err) | ||
205 | return err; | ||
206 | if (copy_to_user(tmp, &data, 8)) | ||
207 | return -EFAULT; | ||
208 | tmp += 2; | ||
209 | } | ||
210 | |||
211 | return ((char __user *)tmp) - buf; | ||
212 | } | ||
213 | |||
214 | static ssize_t msr_write(struct file *file, const char __user *buf, | ||
215 | size_t count, loff_t *ppos) | ||
216 | { | ||
217 | const u32 __user *tmp = (const u32 __user *)buf; | ||
218 | u32 data[2]; | ||
219 | size_t rv; | ||
220 | u32 reg = *ppos; | ||
221 | int cpu = iminor(file->f_dentry->d_inode); | ||
222 | int err; | ||
223 | |||
224 | if (count % 8) | ||
225 | return -EINVAL; /* Invalid chunk size */ | ||
226 | |||
227 | for (rv = 0; count; count -= 8) { | ||
228 | if (copy_from_user(&data, tmp, 8)) | ||
229 | return -EFAULT; | ||
230 | err = do_wrmsr(cpu, reg, data[0], data[1]); | ||
231 | if (err) | ||
232 | return err; | ||
233 | tmp += 2; | ||
234 | } | ||
235 | |||
236 | return ((char __user *)tmp) - buf; | ||
237 | } | ||
238 | |||
239 | static int msr_open(struct inode *inode, struct file *file) | ||
240 | { | ||
241 | unsigned int cpu = iminor(file->f_dentry->d_inode); | ||
242 | struct cpuinfo_x86 *c = &(cpu_data)[cpu]; | ||
243 | |||
244 | if (cpu >= NR_CPUS || !cpu_online(cpu)) | ||
245 | return -ENXIO; /* No such CPU */ | ||
246 | if (!cpu_has(c, X86_FEATURE_MSR)) | ||
247 | return -EIO; /* MSR not supported */ | ||
248 | |||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * File operations we support | ||
254 | */ | ||
255 | static struct file_operations msr_fops = { | ||
256 | .owner = THIS_MODULE, | ||
257 | .llseek = msr_seek, | ||
258 | .read = msr_read, | ||
259 | .write = msr_write, | ||
260 | .open = msr_open, | ||
261 | }; | ||
262 | |||
263 | static int msr_class_simple_device_add(int i) | ||
264 | { | ||
265 | int err = 0; | ||
266 | struct class_device *class_err; | ||
267 | |||
268 | class_err = class_simple_device_add(msr_class, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i); | ||
269 | if (IS_ERR(class_err)) | ||
270 | err = PTR_ERR(class_err); | ||
271 | return err; | ||
272 | } | ||
273 | |||
274 | static int __devinit msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
275 | { | ||
276 | unsigned int cpu = (unsigned long)hcpu; | ||
277 | |||
278 | switch (action) { | ||
279 | case CPU_ONLINE: | ||
280 | msr_class_simple_device_add(cpu); | ||
281 | break; | ||
282 | case CPU_DEAD: | ||
283 | class_simple_device_remove(MKDEV(MSR_MAJOR, cpu)); | ||
284 | break; | ||
285 | } | ||
286 | return NOTIFY_OK; | ||
287 | } | ||
288 | |||
289 | static struct notifier_block msr_class_cpu_notifier = | ||
290 | { | ||
291 | .notifier_call = msr_class_cpu_callback, | ||
292 | }; | ||
293 | |||
294 | static int __init msr_init(void) | ||
295 | { | ||
296 | int i, err = 0; | ||
297 | i = 0; | ||
298 | |||
299 | if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { | ||
300 | printk(KERN_ERR "msr: unable to get major %d for msr\n", | ||
301 | MSR_MAJOR); | ||
302 | err = -EBUSY; | ||
303 | goto out; | ||
304 | } | ||
305 | msr_class = class_simple_create(THIS_MODULE, "msr"); | ||
306 | if (IS_ERR(msr_class)) { | ||
307 | err = PTR_ERR(msr_class); | ||
308 | goto out_chrdev; | ||
309 | } | ||
310 | for_each_online_cpu(i) { | ||
311 | err = msr_class_simple_device_add(i); | ||
312 | if (err != 0) | ||
313 | goto out_class; | ||
314 | } | ||
315 | register_cpu_notifier(&msr_class_cpu_notifier); | ||
316 | |||
317 | err = 0; | ||
318 | goto out; | ||
319 | |||
320 | out_class: | ||
321 | i = 0; | ||
322 | for_each_online_cpu(i) | ||
323 | class_simple_device_remove(MKDEV(MSR_MAJOR, i)); | ||
324 | class_simple_destroy(msr_class); | ||
325 | out_chrdev: | ||
326 | unregister_chrdev(MSR_MAJOR, "cpu/msr"); | ||
327 | out: | ||
328 | return err; | ||
329 | } | ||
330 | |||
331 | static void __exit msr_exit(void) | ||
332 | { | ||
333 | int cpu = 0; | ||
334 | for_each_online_cpu(cpu) | ||
335 | class_simple_device_remove(MKDEV(MSR_MAJOR, cpu)); | ||
336 | class_simple_destroy(msr_class); | ||
337 | unregister_chrdev(MSR_MAJOR, "cpu/msr"); | ||
338 | unregister_cpu_notifier(&msr_class_cpu_notifier); | ||
339 | } | ||
340 | |||
341 | module_init(msr_init); | ||
342 | module_exit(msr_exit) | ||
343 | |||
344 | MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>"); | ||
345 | MODULE_DESCRIPTION("x86 generic MSR driver"); | ||
346 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c new file mode 100644 index 000000000000..f5b0c5081bd6 --- /dev/null +++ b/arch/i386/kernel/nmi.c | |||
@@ -0,0 +1,570 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/nmi.c | ||
3 | * | ||
4 | * NMI watchdog support on APIC systems | ||
5 | * | ||
6 | * Started by Ingo Molnar <mingo@redhat.com> | ||
7 | * | ||
8 | * Fixes: | ||
9 | * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. | ||
10 | * Mikael Pettersson : Power Management for local APIC NMI watchdog. | ||
11 | * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. | ||
12 | * Pavel Machek and | ||
13 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. | ||
14 | */ | ||
15 | |||
16 | #include <linux/config.h> | ||
17 | #include <linux/mm.h> | ||
18 | #include <linux/irq.h> | ||
19 | #include <linux/delay.h> | ||
20 | #include <linux/bootmem.h> | ||
21 | #include <linux/smp_lock.h> | ||
22 | #include <linux/interrupt.h> | ||
23 | #include <linux/mc146818rtc.h> | ||
24 | #include <linux/kernel_stat.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/nmi.h> | ||
27 | #include <linux/sysdev.h> | ||
28 | #include <linux/sysctl.h> | ||
29 | |||
30 | #include <asm/smp.h> | ||
31 | #include <asm/mtrr.h> | ||
32 | #include <asm/mpspec.h> | ||
33 | #include <asm/nmi.h> | ||
34 | |||
35 | #include "mach_traps.h" | ||
36 | |||
37 | unsigned int nmi_watchdog = NMI_NONE; | ||
38 | extern int unknown_nmi_panic; | ||
39 | static unsigned int nmi_hz = HZ; | ||
40 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | ||
41 | static unsigned int nmi_p4_cccr_val; | ||
42 | extern void show_registers(struct pt_regs *regs); | ||
43 | |||
44 | /* | ||
45 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | ||
46 | * - it may be reserved by some other driver, or not | ||
47 | * - when not reserved by some other driver, it may be used for | ||
48 | * the NMI watchdog, or not | ||
49 | * | ||
50 | * This is maintained separately from nmi_active because the NMI | ||
51 | * watchdog may also be driven from the I/O APIC timer. | ||
52 | */ | ||
53 | static DEFINE_SPINLOCK(lapic_nmi_owner_lock); | ||
54 | static unsigned int lapic_nmi_owner; | ||
55 | #define LAPIC_NMI_WATCHDOG (1<<0) | ||
56 | #define LAPIC_NMI_RESERVED (1<<1) | ||
57 | |||
58 | /* nmi_active: | ||
59 | * +1: the lapic NMI watchdog is active, but can be disabled | ||
60 | * 0: the lapic NMI watchdog has not been set up, and cannot | ||
61 | * be enabled | ||
62 | * -1: the lapic NMI watchdog is disabled, but can be enabled | ||
63 | */ | ||
64 | int nmi_active; | ||
65 | |||
66 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
67 | #define K7_EVNTSEL_INT (1 << 20) | ||
68 | #define K7_EVNTSEL_OS (1 << 17) | ||
69 | #define K7_EVNTSEL_USR (1 << 16) | ||
70 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
71 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
72 | |||
73 | #define P6_EVNTSEL0_ENABLE (1 << 22) | ||
74 | #define P6_EVNTSEL_INT (1 << 20) | ||
75 | #define P6_EVNTSEL_OS (1 << 17) | ||
76 | #define P6_EVNTSEL_USR (1 << 16) | ||
77 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
78 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
79 | |||
80 | #define MSR_P4_MISC_ENABLE 0x1A0 | ||
81 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
82 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) | ||
83 | #define MSR_P4_PERFCTR0 0x300 | ||
84 | #define MSR_P4_CCCR0 0x360 | ||
85 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
86 | #define P4_ESCR_OS (1<<3) | ||
87 | #define P4_ESCR_USR (1<<2) | ||
88 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
89 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
90 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
91 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
92 | #define P4_CCCR_COMPARE (1<<18) | ||
93 | #define P4_CCCR_REQUIRED (3<<16) | ||
94 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
95 | #define P4_CCCR_ENABLE (1<<12) | ||
96 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
97 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
98 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
99 | #define MSR_P4_IQ_COUNTER0 0x30C | ||
100 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) | ||
101 | #define P4_NMI_IQ_CCCR0 \ | ||
102 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | ||
103 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | ||
104 | |||
105 | int __init check_nmi_watchdog (void) | ||
106 | { | ||
107 | unsigned int prev_nmi_count[NR_CPUS]; | ||
108 | int cpu; | ||
109 | |||
110 | printk(KERN_INFO "testing NMI watchdog ... "); | ||
111 | |||
112 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
113 | prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; | ||
114 | local_irq_enable(); | ||
115 | mdelay((10*1000)/nmi_hz); // wait 10 ticks | ||
116 | |||
117 | /* FIXME: Only boot CPU is online at this stage. Check CPUs | ||
118 | as they come up. */ | ||
119 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
120 | #ifdef CONFIG_SMP | ||
121 | /* Check cpu_callin_map here because that is set | ||
122 | after the timer is started. */ | ||
123 | if (!cpu_isset(cpu, cpu_callin_map)) | ||
124 | continue; | ||
125 | #endif | ||
126 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | ||
127 | printk("CPU#%d: NMI appears to be stuck!\n", cpu); | ||
128 | nmi_active = 0; | ||
129 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | ||
130 | return -1; | ||
131 | } | ||
132 | } | ||
133 | printk("OK.\n"); | ||
134 | |||
135 | /* now that we know it works we can reduce NMI frequency to | ||
136 | something more reasonable; makes a difference in some configs */ | ||
137 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
138 | nmi_hz = 1; | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int __init setup_nmi_watchdog(char *str) | ||
144 | { | ||
145 | int nmi; | ||
146 | |||
147 | get_option(&str, &nmi); | ||
148 | |||
149 | if (nmi >= NMI_INVALID) | ||
150 | return 0; | ||
151 | if (nmi == NMI_NONE) | ||
152 | nmi_watchdog = nmi; | ||
153 | /* | ||
154 | * If any other x86 CPU has a local APIC, then | ||
155 | * please test the NMI stuff there and send me the | ||
156 | * missing bits. Right now Intel P6/P4 and AMD K7 only. | ||
157 | */ | ||
158 | if ((nmi == NMI_LOCAL_APIC) && | ||
159 | (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | ||
160 | (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) | ||
161 | nmi_watchdog = nmi; | ||
162 | if ((nmi == NMI_LOCAL_APIC) && | ||
163 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && | ||
164 | (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) | ||
165 | nmi_watchdog = nmi; | ||
166 | /* | ||
167 | * We can enable the IO-APIC watchdog | ||
168 | * unconditionally. | ||
169 | */ | ||
170 | if (nmi == NMI_IO_APIC) { | ||
171 | nmi_active = 1; | ||
172 | nmi_watchdog = nmi; | ||
173 | } | ||
174 | return 1; | ||
175 | } | ||
176 | |||
177 | __setup("nmi_watchdog=", setup_nmi_watchdog); | ||
178 | |||
179 | static void disable_lapic_nmi_watchdog(void) | ||
180 | { | ||
181 | if (nmi_active <= 0) | ||
182 | return; | ||
183 | switch (boot_cpu_data.x86_vendor) { | ||
184 | case X86_VENDOR_AMD: | ||
185 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | ||
186 | break; | ||
187 | case X86_VENDOR_INTEL: | ||
188 | switch (boot_cpu_data.x86) { | ||
189 | case 6: | ||
190 | if (boot_cpu_data.x86_model > 0xd) | ||
191 | break; | ||
192 | |||
193 | wrmsr(MSR_P6_EVNTSEL0, 0, 0); | ||
194 | break; | ||
195 | case 15: | ||
196 | if (boot_cpu_data.x86_model > 0x3) | ||
197 | break; | ||
198 | |||
199 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | ||
200 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | ||
201 | break; | ||
202 | } | ||
203 | break; | ||
204 | } | ||
205 | nmi_active = -1; | ||
206 | /* tell do_nmi() and others that we're not active any more */ | ||
207 | nmi_watchdog = 0; | ||
208 | } | ||
209 | |||
210 | static void enable_lapic_nmi_watchdog(void) | ||
211 | { | ||
212 | if (nmi_active < 0) { | ||
213 | nmi_watchdog = NMI_LOCAL_APIC; | ||
214 | setup_apic_nmi_watchdog(); | ||
215 | } | ||
216 | } | ||
217 | |||
218 | int reserve_lapic_nmi(void) | ||
219 | { | ||
220 | unsigned int old_owner; | ||
221 | |||
222 | spin_lock(&lapic_nmi_owner_lock); | ||
223 | old_owner = lapic_nmi_owner; | ||
224 | lapic_nmi_owner |= LAPIC_NMI_RESERVED; | ||
225 | spin_unlock(&lapic_nmi_owner_lock); | ||
226 | if (old_owner & LAPIC_NMI_RESERVED) | ||
227 | return -EBUSY; | ||
228 | if (old_owner & LAPIC_NMI_WATCHDOG) | ||
229 | disable_lapic_nmi_watchdog(); | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | void release_lapic_nmi(void) | ||
234 | { | ||
235 | unsigned int new_owner; | ||
236 | |||
237 | spin_lock(&lapic_nmi_owner_lock); | ||
238 | new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; | ||
239 | lapic_nmi_owner = new_owner; | ||
240 | spin_unlock(&lapic_nmi_owner_lock); | ||
241 | if (new_owner & LAPIC_NMI_WATCHDOG) | ||
242 | enable_lapic_nmi_watchdog(); | ||
243 | } | ||
244 | |||
245 | void disable_timer_nmi_watchdog(void) | ||
246 | { | ||
247 | if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) | ||
248 | return; | ||
249 | |||
250 | unset_nmi_callback(); | ||
251 | nmi_active = -1; | ||
252 | nmi_watchdog = NMI_NONE; | ||
253 | } | ||
254 | |||
255 | void enable_timer_nmi_watchdog(void) | ||
256 | { | ||
257 | if (nmi_active < 0) { | ||
258 | nmi_watchdog = NMI_IO_APIC; | ||
259 | touch_nmi_watchdog(); | ||
260 | nmi_active = 1; | ||
261 | } | ||
262 | } | ||
263 | |||
264 | #ifdef CONFIG_PM | ||
265 | |||
266 | static int nmi_pm_active; /* nmi_active before suspend */ | ||
267 | |||
268 | static int lapic_nmi_suspend(struct sys_device *dev, u32 state) | ||
269 | { | ||
270 | nmi_pm_active = nmi_active; | ||
271 | disable_lapic_nmi_watchdog(); | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | static int lapic_nmi_resume(struct sys_device *dev) | ||
276 | { | ||
277 | if (nmi_pm_active > 0) | ||
278 | enable_lapic_nmi_watchdog(); | ||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | |||
283 | static struct sysdev_class nmi_sysclass = { | ||
284 | set_kset_name("lapic_nmi"), | ||
285 | .resume = lapic_nmi_resume, | ||
286 | .suspend = lapic_nmi_suspend, | ||
287 | }; | ||
288 | |||
289 | static struct sys_device device_lapic_nmi = { | ||
290 | .id = 0, | ||
291 | .cls = &nmi_sysclass, | ||
292 | }; | ||
293 | |||
294 | static int __init init_lapic_nmi_sysfs(void) | ||
295 | { | ||
296 | int error; | ||
297 | |||
298 | if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) | ||
299 | return 0; | ||
300 | |||
301 | error = sysdev_class_register(&nmi_sysclass); | ||
302 | if (!error) | ||
303 | error = sysdev_register(&device_lapic_nmi); | ||
304 | return error; | ||
305 | } | ||
306 | /* must come after the local APIC's device_initcall() */ | ||
307 | late_initcall(init_lapic_nmi_sysfs); | ||
308 | |||
309 | #endif /* CONFIG_PM */ | ||
310 | |||
311 | /* | ||
312 | * Activate the NMI watchdog via the local APIC. | ||
313 | * Original code written by Keith Owens. | ||
314 | */ | ||
315 | |||
316 | static void clear_msr_range(unsigned int base, unsigned int n) | ||
317 | { | ||
318 | unsigned int i; | ||
319 | |||
320 | for(i = 0; i < n; ++i) | ||
321 | wrmsr(base+i, 0, 0); | ||
322 | } | ||
323 | |||
324 | static void setup_k7_watchdog(void) | ||
325 | { | ||
326 | unsigned int evntsel; | ||
327 | |||
328 | nmi_perfctr_msr = MSR_K7_PERFCTR0; | ||
329 | |||
330 | clear_msr_range(MSR_K7_EVNTSEL0, 4); | ||
331 | clear_msr_range(MSR_K7_PERFCTR0, 4); | ||
332 | |||
333 | evntsel = K7_EVNTSEL_INT | ||
334 | | K7_EVNTSEL_OS | ||
335 | | K7_EVNTSEL_USR | ||
336 | | K7_NMI_EVENT; | ||
337 | |||
338 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | ||
339 | Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); | ||
340 | wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); | ||
341 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
342 | evntsel |= K7_EVNTSEL_ENABLE; | ||
343 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | ||
344 | } | ||
345 | |||
346 | static void setup_p6_watchdog(void) | ||
347 | { | ||
348 | unsigned int evntsel; | ||
349 | |||
350 | nmi_perfctr_msr = MSR_P6_PERFCTR0; | ||
351 | |||
352 | clear_msr_range(MSR_P6_EVNTSEL0, 2); | ||
353 | clear_msr_range(MSR_P6_PERFCTR0, 2); | ||
354 | |||
355 | evntsel = P6_EVNTSEL_INT | ||
356 | | P6_EVNTSEL_OS | ||
357 | | P6_EVNTSEL_USR | ||
358 | | P6_NMI_EVENT; | ||
359 | |||
360 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); | ||
361 | Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); | ||
362 | wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0); | ||
363 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
364 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
365 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); | ||
366 | } | ||
367 | |||
368 | static int setup_p4_watchdog(void) | ||
369 | { | ||
370 | unsigned int misc_enable, dummy; | ||
371 | |||
372 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); | ||
373 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
374 | return 0; | ||
375 | |||
376 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; | ||
377 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; | ||
378 | #ifdef CONFIG_SMP | ||
379 | if (smp_num_siblings == 2) | ||
380 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; | ||
381 | #endif | ||
382 | |||
383 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) | ||
384 | clear_msr_range(0x3F1, 2); | ||
385 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current | ||
386 | docs doesn't fully define it, so leave it alone for now. */ | ||
387 | if (boot_cpu_data.x86_model >= 0x3) { | ||
388 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ | ||
389 | clear_msr_range(0x3A0, 26); | ||
390 | clear_msr_range(0x3BC, 3); | ||
391 | } else { | ||
392 | clear_msr_range(0x3A0, 31); | ||
393 | } | ||
394 | clear_msr_range(0x3C0, 6); | ||
395 | clear_msr_range(0x3C8, 6); | ||
396 | clear_msr_range(0x3E0, 2); | ||
397 | clear_msr_range(MSR_P4_CCCR0, 18); | ||
398 | clear_msr_range(MSR_P4_PERFCTR0, 18); | ||
399 | |||
400 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); | ||
401 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); | ||
402 | Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); | ||
403 | wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); | ||
404 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
405 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | ||
406 | return 1; | ||
407 | } | ||
408 | |||
409 | void setup_apic_nmi_watchdog (void) | ||
410 | { | ||
411 | switch (boot_cpu_data.x86_vendor) { | ||
412 | case X86_VENDOR_AMD: | ||
413 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | ||
414 | return; | ||
415 | setup_k7_watchdog(); | ||
416 | break; | ||
417 | case X86_VENDOR_INTEL: | ||
418 | switch (boot_cpu_data.x86) { | ||
419 | case 6: | ||
420 | if (boot_cpu_data.x86_model > 0xd) | ||
421 | return; | ||
422 | |||
423 | setup_p6_watchdog(); | ||
424 | break; | ||
425 | case 15: | ||
426 | if (boot_cpu_data.x86_model > 0x3) | ||
427 | return; | ||
428 | |||
429 | if (!setup_p4_watchdog()) | ||
430 | return; | ||
431 | break; | ||
432 | default: | ||
433 | return; | ||
434 | } | ||
435 | break; | ||
436 | default: | ||
437 | return; | ||
438 | } | ||
439 | lapic_nmi_owner = LAPIC_NMI_WATCHDOG; | ||
440 | nmi_active = 1; | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * the best way to detect whether a CPU has a 'hard lockup' problem | ||
445 | * is to check it's local APIC timer IRQ counts. If they are not | ||
446 | * changing then that CPU has some problem. | ||
447 | * | ||
448 | * as these watchdog NMI IRQs are generated on every CPU, we only | ||
449 | * have to check the current processor. | ||
450 | * | ||
451 | * since NMIs don't listen to _any_ locks, we have to be extremely | ||
452 | * careful not to rely on unsafe variables. The printk might lock | ||
453 | * up though, so we have to break up any console locks first ... | ||
454 | * [when there will be more tty-related locks, break them up | ||
455 | * here too!] | ||
456 | */ | ||
457 | |||
458 | static unsigned int | ||
459 | last_irq_sums [NR_CPUS], | ||
460 | alert_counter [NR_CPUS]; | ||
461 | |||
462 | void touch_nmi_watchdog (void) | ||
463 | { | ||
464 | int i; | ||
465 | |||
466 | /* | ||
467 | * Just reset the alert counters, (other CPUs might be | ||
468 | * spinning on locks we hold): | ||
469 | */ | ||
470 | for (i = 0; i < NR_CPUS; i++) | ||
471 | alert_counter[i] = 0; | ||
472 | } | ||
473 | |||
474 | extern void die_nmi(struct pt_regs *, const char *msg); | ||
475 | |||
476 | void nmi_watchdog_tick (struct pt_regs * regs) | ||
477 | { | ||
478 | |||
479 | /* | ||
480 | * Since current_thread_info()-> is always on the stack, and we | ||
481 | * always switch the stack NMI-atomically, it's safe to use | ||
482 | * smp_processor_id(). | ||
483 | */ | ||
484 | int sum, cpu = smp_processor_id(); | ||
485 | |||
486 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; | ||
487 | |||
488 | if (last_irq_sums[cpu] == sum) { | ||
489 | /* | ||
490 | * Ayiee, looks like this CPU is stuck ... | ||
491 | * wait a few IRQs (5 seconds) before doing the oops ... | ||
492 | */ | ||
493 | alert_counter[cpu]++; | ||
494 | if (alert_counter[cpu] == 5*nmi_hz) | ||
495 | die_nmi(regs, "NMI Watchdog detected LOCKUP"); | ||
496 | } else { | ||
497 | last_irq_sums[cpu] = sum; | ||
498 | alert_counter[cpu] = 0; | ||
499 | } | ||
500 | if (nmi_perfctr_msr) { | ||
501 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { | ||
502 | /* | ||
503 | * P4 quirks: | ||
504 | * - An overflown perfctr will assert its interrupt | ||
505 | * until the OVF flag in its CCCR is cleared. | ||
506 | * - LVTPC is masked on interrupt and must be | ||
507 | * unmasked by the LVTPC handler. | ||
508 | */ | ||
509 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | ||
510 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
511 | } | ||
512 | else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { | ||
513 | /* Only P6 based Pentium M need to re-unmask | ||
514 | * the apic vector but it doesn't hurt | ||
515 | * other P6 variant */ | ||
516 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
517 | } | ||
518 | wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); | ||
519 | } | ||
520 | } | ||
521 | |||
522 | #ifdef CONFIG_SYSCTL | ||
523 | |||
524 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | ||
525 | { | ||
526 | unsigned char reason = get_nmi_reason(); | ||
527 | char buf[64]; | ||
528 | |||
529 | if (!(reason & 0xc0)) { | ||
530 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); | ||
531 | die_nmi(regs, buf); | ||
532 | } | ||
533 | return 0; | ||
534 | } | ||
535 | |||
536 | /* | ||
537 | * proc handler for /proc/sys/kernel/unknown_nmi_panic | ||
538 | */ | ||
539 | int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, | ||
540 | void __user *buffer, size_t *length, loff_t *ppos) | ||
541 | { | ||
542 | int old_state; | ||
543 | |||
544 | old_state = unknown_nmi_panic; | ||
545 | proc_dointvec(table, write, file, buffer, length, ppos); | ||
546 | if (!!old_state == !!unknown_nmi_panic) | ||
547 | return 0; | ||
548 | |||
549 | if (unknown_nmi_panic) { | ||
550 | if (reserve_lapic_nmi() < 0) { | ||
551 | unknown_nmi_panic = 0; | ||
552 | return -EBUSY; | ||
553 | } else { | ||
554 | set_nmi_callback(unknown_nmi_panic_callback); | ||
555 | } | ||
556 | } else { | ||
557 | release_lapic_nmi(); | ||
558 | unset_nmi_callback(); | ||
559 | } | ||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | #endif | ||
564 | |||
565 | EXPORT_SYMBOL(nmi_active); | ||
566 | EXPORT_SYMBOL(nmi_watchdog); | ||
567 | EXPORT_SYMBOL(reserve_lapic_nmi); | ||
568 | EXPORT_SYMBOL(release_lapic_nmi); | ||
569 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); | ||
570 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); | ||
diff --git a/arch/i386/kernel/numaq.c b/arch/i386/kernel/numaq.c new file mode 100644 index 000000000000..e51edf0a6564 --- /dev/null +++ b/arch/i386/kernel/numaq.c | |||
@@ -0,0 +1,79 @@ | |||
1 | /* | ||
2 | * Written by: Patricia Gaughen, IBM Corporation | ||
3 | * | ||
4 | * Copyright (C) 2002, IBM Corp. | ||
5 | * | ||
6 | * All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, but | ||
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
16 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
17 | * details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
22 | * | ||
23 | * Send feedback to <gone@us.ibm.com> | ||
24 | */ | ||
25 | |||
26 | #include <linux/config.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/bootmem.h> | ||
29 | #include <linux/mmzone.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/nodemask.h> | ||
32 | #include <asm/numaq.h> | ||
33 | #include <asm/topology.h> | ||
34 | |||
35 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | ||
36 | |||
37 | /* | ||
38 | * Function: smp_dump_qct() | ||
39 | * | ||
40 | * Description: gets memory layout from the quad config table. This | ||
41 | * function also updates node_online_map with the nodes (quads) present. | ||
42 | */ | ||
43 | static void __init smp_dump_qct(void) | ||
44 | { | ||
45 | int node; | ||
46 | struct eachquadmem *eq; | ||
47 | struct sys_cfg_data *scd = | ||
48 | (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR); | ||
49 | |||
50 | nodes_clear(node_online_map); | ||
51 | for_each_node(node) { | ||
52 | if (scd->quads_present31_0 & (1 << node)) { | ||
53 | node_set_online(node); | ||
54 | eq = &scd->eq[node]; | ||
55 | /* Convert to pages */ | ||
56 | node_start_pfn[node] = MB_TO_PAGES( | ||
57 | eq->hi_shrd_mem_start - eq->priv_mem_size); | ||
58 | node_end_pfn[node] = MB_TO_PAGES( | ||
59 | eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); | ||
60 | |||
61 | memory_present(node, | ||
62 | node_start_pfn[node], node_end_pfn[node]); | ||
63 | node_remap_size[node] = node_memmap_size_bytes(node, | ||
64 | node_start_pfn[node], | ||
65 | node_end_pfn[node]); | ||
66 | } | ||
67 | } | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * Unlike Summit, we don't really care to let the NUMA-Q | ||
72 | * fall back to flat mode. Don't compile for NUMA-Q | ||
73 | * unless you really need it! | ||
74 | */ | ||
75 | int __init get_memcfg_numaq(void) | ||
76 | { | ||
77 | smp_dump_qct(); | ||
78 | return 1; | ||
79 | } | ||
diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c new file mode 100644 index 000000000000..4de2e03c7b45 --- /dev/null +++ b/arch/i386/kernel/pci-dma.c | |||
@@ -0,0 +1,147 @@ | |||
1 | /* | ||
2 | * Dynamic DMA mapping support. | ||
3 | * | ||
4 | * On i386 there is no hardware dynamic DMA address translation, | ||
5 | * so consistent alloc/free are merely page allocation/freeing. | ||
6 | * The rest of the dynamic DMA mapping interface is implemented | ||
7 | * in asm/pci.h. | ||
8 | */ | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/pci.h> | ||
14 | #include <asm/io.h> | ||
15 | |||
16 | struct dma_coherent_mem { | ||
17 | void *virt_base; | ||
18 | u32 device_base; | ||
19 | int size; | ||
20 | int flags; | ||
21 | unsigned long *bitmap; | ||
22 | }; | ||
23 | |||
24 | void *dma_alloc_coherent(struct device *dev, size_t size, | ||
25 | dma_addr_t *dma_handle, unsigned int __nocast gfp) | ||
26 | { | ||
27 | void *ret; | ||
28 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
29 | int order = get_order(size); | ||
30 | /* ignore region specifiers */ | ||
31 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); | ||
32 | |||
33 | if (mem) { | ||
34 | int page = bitmap_find_free_region(mem->bitmap, mem->size, | ||
35 | order); | ||
36 | if (page >= 0) { | ||
37 | *dma_handle = mem->device_base + (page << PAGE_SHIFT); | ||
38 | ret = mem->virt_base + (page << PAGE_SHIFT); | ||
39 | memset(ret, 0, size); | ||
40 | return ret; | ||
41 | } | ||
42 | if (mem->flags & DMA_MEMORY_EXCLUSIVE) | ||
43 | return NULL; | ||
44 | } | ||
45 | |||
46 | if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) | ||
47 | gfp |= GFP_DMA; | ||
48 | |||
49 | ret = (void *)__get_free_pages(gfp, order); | ||
50 | |||
51 | if (ret != NULL) { | ||
52 | memset(ret, 0, size); | ||
53 | *dma_handle = virt_to_phys(ret); | ||
54 | } | ||
55 | return ret; | ||
56 | } | ||
57 | |||
58 | void dma_free_coherent(struct device *dev, size_t size, | ||
59 | void *vaddr, dma_addr_t dma_handle) | ||
60 | { | ||
61 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
62 | int order = get_order(size); | ||
63 | |||
64 | if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { | ||
65 | int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; | ||
66 | |||
67 | bitmap_release_region(mem->bitmap, page, order); | ||
68 | } else | ||
69 | free_pages((unsigned long)vaddr, order); | ||
70 | } | ||
71 | |||
72 | int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | ||
73 | dma_addr_t device_addr, size_t size, int flags) | ||
74 | { | ||
75 | void __iomem *mem_base; | ||
76 | int pages = size >> PAGE_SHIFT; | ||
77 | int bitmap_size = (pages + 31)/32; | ||
78 | |||
79 | if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) | ||
80 | goto out; | ||
81 | if (!size) | ||
82 | goto out; | ||
83 | if (dev->dma_mem) | ||
84 | goto out; | ||
85 | |||
86 | /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ | ||
87 | |||
88 | mem_base = ioremap(bus_addr, size); | ||
89 | if (!mem_base) | ||
90 | goto out; | ||
91 | |||
92 | dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | ||
93 | if (!dev->dma_mem) | ||
94 | goto out; | ||
95 | memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); | ||
96 | dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); | ||
97 | if (!dev->dma_mem->bitmap) | ||
98 | goto free1_out; | ||
99 | memset(dev->dma_mem->bitmap, 0, bitmap_size); | ||
100 | |||
101 | dev->dma_mem->virt_base = mem_base; | ||
102 | dev->dma_mem->device_base = device_addr; | ||
103 | dev->dma_mem->size = pages; | ||
104 | dev->dma_mem->flags = flags; | ||
105 | |||
106 | if (flags & DMA_MEMORY_MAP) | ||
107 | return DMA_MEMORY_MAP; | ||
108 | |||
109 | return DMA_MEMORY_IO; | ||
110 | |||
111 | free1_out: | ||
112 | kfree(dev->dma_mem->bitmap); | ||
113 | out: | ||
114 | return 0; | ||
115 | } | ||
116 | EXPORT_SYMBOL(dma_declare_coherent_memory); | ||
117 | |||
118 | void dma_release_declared_memory(struct device *dev) | ||
119 | { | ||
120 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
121 | |||
122 | if(!mem) | ||
123 | return; | ||
124 | dev->dma_mem = NULL; | ||
125 | iounmap(mem->virt_base); | ||
126 | kfree(mem->bitmap); | ||
127 | kfree(mem); | ||
128 | } | ||
129 | EXPORT_SYMBOL(dma_release_declared_memory); | ||
130 | |||
131 | void *dma_mark_declared_memory_occupied(struct device *dev, | ||
132 | dma_addr_t device_addr, size_t size) | ||
133 | { | ||
134 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
135 | int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
136 | int pos, err; | ||
137 | |||
138 | if (!mem) | ||
139 | return ERR_PTR(-EINVAL); | ||
140 | |||
141 | pos = (device_addr - mem->device_base) >> PAGE_SHIFT; | ||
142 | err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); | ||
143 | if (err != 0) | ||
144 | return ERR_PTR(err); | ||
145 | return mem->virt_base + (pos << PAGE_SHIFT); | ||
146 | } | ||
147 | EXPORT_SYMBOL(dma_mark_declared_memory_occupied); | ||
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c new file mode 100644 index 000000000000..c36fedf40e95 --- /dev/null +++ b/arch/i386/kernel/process.c | |||
@@ -0,0 +1,848 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/process.c | ||
3 | * | ||
4 | * Copyright (C) 1995 Linus Torvalds | ||
5 | * | ||
6 | * Pentium III FXSR, SSE support | ||
7 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
8 | */ | ||
9 | |||
10 | /* | ||
11 | * This file handles the architecture-dependent parts of process handling.. | ||
12 | */ | ||
13 | |||
14 | #include <stdarg.h> | ||
15 | |||
16 | #include <linux/errno.h> | ||
17 | #include <linux/sched.h> | ||
18 | #include <linux/fs.h> | ||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/mm.h> | ||
21 | #include <linux/elfcore.h> | ||
22 | #include <linux/smp.h> | ||
23 | #include <linux/smp_lock.h> | ||
24 | #include <linux/stddef.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/vmalloc.h> | ||
27 | #include <linux/user.h> | ||
28 | #include <linux/a.out.h> | ||
29 | #include <linux/interrupt.h> | ||
30 | #include <linux/config.h> | ||
31 | #include <linux/utsname.h> | ||
32 | #include <linux/delay.h> | ||
33 | #include <linux/reboot.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/mc146818rtc.h> | ||
36 | #include <linux/module.h> | ||
37 | #include <linux/kallsyms.h> | ||
38 | #include <linux/ptrace.h> | ||
39 | #include <linux/random.h> | ||
40 | |||
41 | #include <asm/uaccess.h> | ||
42 | #include <asm/pgtable.h> | ||
43 | #include <asm/system.h> | ||
44 | #include <asm/io.h> | ||
45 | #include <asm/ldt.h> | ||
46 | #include <asm/processor.h> | ||
47 | #include <asm/i387.h> | ||
48 | #include <asm/irq.h> | ||
49 | #include <asm/desc.h> | ||
50 | #ifdef CONFIG_MATH_EMULATION | ||
51 | #include <asm/math_emu.h> | ||
52 | #endif | ||
53 | |||
54 | #include <linux/irq.h> | ||
55 | #include <linux/err.h> | ||
56 | |||
57 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | ||
58 | |||
59 | static int hlt_counter; | ||
60 | |||
61 | unsigned long boot_option_idle_override = 0; | ||
62 | EXPORT_SYMBOL(boot_option_idle_override); | ||
63 | |||
64 | /* | ||
65 | * Return saved PC of a blocked thread. | ||
66 | */ | ||
67 | unsigned long thread_saved_pc(struct task_struct *tsk) | ||
68 | { | ||
69 | return ((unsigned long *)tsk->thread.esp)[3]; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Powermanagement idle function, if any.. | ||
74 | */ | ||
75 | void (*pm_idle)(void); | ||
76 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | ||
77 | |||
78 | void disable_hlt(void) | ||
79 | { | ||
80 | hlt_counter++; | ||
81 | } | ||
82 | |||
83 | EXPORT_SYMBOL(disable_hlt); | ||
84 | |||
85 | void enable_hlt(void) | ||
86 | { | ||
87 | hlt_counter--; | ||
88 | } | ||
89 | |||
90 | EXPORT_SYMBOL(enable_hlt); | ||
91 | |||
92 | /* | ||
93 | * We use this if we don't have any better | ||
94 | * idle routine.. | ||
95 | */ | ||
96 | void default_idle(void) | ||
97 | { | ||
98 | if (!hlt_counter && boot_cpu_data.hlt_works_ok) { | ||
99 | local_irq_disable(); | ||
100 | if (!need_resched()) | ||
101 | safe_halt(); | ||
102 | else | ||
103 | local_irq_enable(); | ||
104 | } else { | ||
105 | cpu_relax(); | ||
106 | } | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * On SMP it's slightly faster (but much more power-consuming!) | ||
111 | * to poll the ->work.need_resched flag instead of waiting for the | ||
112 | * cross-CPU IPI to arrive. Use this option with caution. | ||
113 | */ | ||
114 | static void poll_idle (void) | ||
115 | { | ||
116 | int oldval; | ||
117 | |||
118 | local_irq_enable(); | ||
119 | |||
120 | /* | ||
121 | * Deal with another CPU just having chosen a thread to | ||
122 | * run here: | ||
123 | */ | ||
124 | oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); | ||
125 | |||
126 | if (!oldval) { | ||
127 | set_thread_flag(TIF_POLLING_NRFLAG); | ||
128 | asm volatile( | ||
129 | "2:" | ||
130 | "testl %0, %1;" | ||
131 | "rep; nop;" | ||
132 | "je 2b;" | ||
133 | : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); | ||
134 | |||
135 | clear_thread_flag(TIF_POLLING_NRFLAG); | ||
136 | } else { | ||
137 | set_need_resched(); | ||
138 | } | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * The idle thread. There's no useful work to be | ||
143 | * done, so just try to conserve power and have a | ||
144 | * low exit latency (ie sit in a loop waiting for | ||
145 | * somebody to say that they'd like to reschedule) | ||
146 | */ | ||
147 | void cpu_idle (void) | ||
148 | { | ||
149 | /* endless idle loop with no priority at all */ | ||
150 | while (1) { | ||
151 | while (!need_resched()) { | ||
152 | void (*idle)(void); | ||
153 | |||
154 | if (__get_cpu_var(cpu_idle_state)) | ||
155 | __get_cpu_var(cpu_idle_state) = 0; | ||
156 | |||
157 | rmb(); | ||
158 | idle = pm_idle; | ||
159 | |||
160 | if (!idle) | ||
161 | idle = default_idle; | ||
162 | |||
163 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; | ||
164 | idle(); | ||
165 | } | ||
166 | schedule(); | ||
167 | } | ||
168 | } | ||
169 | |||
170 | void cpu_idle_wait(void) | ||
171 | { | ||
172 | unsigned int cpu, this_cpu = get_cpu(); | ||
173 | cpumask_t map; | ||
174 | |||
175 | set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); | ||
176 | put_cpu(); | ||
177 | |||
178 | cpus_clear(map); | ||
179 | for_each_online_cpu(cpu) { | ||
180 | per_cpu(cpu_idle_state, cpu) = 1; | ||
181 | cpu_set(cpu, map); | ||
182 | } | ||
183 | |||
184 | __get_cpu_var(cpu_idle_state) = 0; | ||
185 | |||
186 | wmb(); | ||
187 | do { | ||
188 | ssleep(1); | ||
189 | for_each_online_cpu(cpu) { | ||
190 | if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) | ||
191 | cpu_clear(cpu, map); | ||
192 | } | ||
193 | cpus_and(map, map, cpu_online_map); | ||
194 | } while (!cpus_empty(map)); | ||
195 | } | ||
196 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | ||
197 | |||
198 | /* | ||
199 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, | ||
200 | * which can obviate IPI to trigger checking of need_resched. | ||
201 | * We execute MONITOR against need_resched and enter optimized wait state | ||
202 | * through MWAIT. Whenever someone changes need_resched, we would be woken | ||
203 | * up from MWAIT (without an IPI). | ||
204 | */ | ||
205 | static void mwait_idle(void) | ||
206 | { | ||
207 | local_irq_enable(); | ||
208 | |||
209 | if (!need_resched()) { | ||
210 | set_thread_flag(TIF_POLLING_NRFLAG); | ||
211 | do { | ||
212 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | ||
213 | if (need_resched()) | ||
214 | break; | ||
215 | __mwait(0, 0); | ||
216 | } while (!need_resched()); | ||
217 | clear_thread_flag(TIF_POLLING_NRFLAG); | ||
218 | } | ||
219 | } | ||
220 | |||
221 | void __init select_idle_routine(const struct cpuinfo_x86 *c) | ||
222 | { | ||
223 | if (cpu_has(c, X86_FEATURE_MWAIT)) { | ||
224 | printk("monitor/mwait feature present.\n"); | ||
225 | /* | ||
226 | * Skip, if setup has overridden idle. | ||
227 | * One CPU supports mwait => All CPUs supports mwait | ||
228 | */ | ||
229 | if (!pm_idle) { | ||
230 | printk("using mwait in idle threads.\n"); | ||
231 | pm_idle = mwait_idle; | ||
232 | } | ||
233 | } | ||
234 | } | ||
235 | |||
236 | static int __init idle_setup (char *str) | ||
237 | { | ||
238 | if (!strncmp(str, "poll", 4)) { | ||
239 | printk("using polling idle threads.\n"); | ||
240 | pm_idle = poll_idle; | ||
241 | #ifdef CONFIG_X86_SMP | ||
242 | if (smp_num_siblings > 1) | ||
243 | printk("WARNING: polling idle and HT enabled, performance may degrade.\n"); | ||
244 | #endif | ||
245 | } else if (!strncmp(str, "halt", 4)) { | ||
246 | printk("using halt in idle threads.\n"); | ||
247 | pm_idle = default_idle; | ||
248 | } | ||
249 | |||
250 | boot_option_idle_override = 1; | ||
251 | return 1; | ||
252 | } | ||
253 | |||
254 | __setup("idle=", idle_setup); | ||
255 | |||
256 | void show_regs(struct pt_regs * regs) | ||
257 | { | ||
258 | unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; | ||
259 | |||
260 | printk("\n"); | ||
261 | printk("Pid: %d, comm: %20s\n", current->pid, current->comm); | ||
262 | printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); | ||
263 | print_symbol("EIP is at %s\n", regs->eip); | ||
264 | |||
265 | if (regs->xcs & 3) | ||
266 | printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); | ||
267 | printk(" EFLAGS: %08lx %s (%s)\n", | ||
268 | regs->eflags, print_tainted(), system_utsname.release); | ||
269 | printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", | ||
270 | regs->eax,regs->ebx,regs->ecx,regs->edx); | ||
271 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", | ||
272 | regs->esi, regs->edi, regs->ebp); | ||
273 | printk(" DS: %04x ES: %04x\n", | ||
274 | 0xffff & regs->xds,0xffff & regs->xes); | ||
275 | |||
276 | __asm__("movl %%cr0, %0": "=r" (cr0)); | ||
277 | __asm__("movl %%cr2, %0": "=r" (cr2)); | ||
278 | __asm__("movl %%cr3, %0": "=r" (cr3)); | ||
279 | /* This could fault if %cr4 does not exist */ | ||
280 | __asm__("1: movl %%cr4, %0 \n" | ||
281 | "2: \n" | ||
282 | ".section __ex_table,\"a\" \n" | ||
283 | ".long 1b,2b \n" | ||
284 | ".previous \n" | ||
285 | : "=r" (cr4): "0" (0)); | ||
286 | printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); | ||
287 | show_trace(NULL, ®s->esp); | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * This gets run with %ebx containing the | ||
292 | * function to call, and %edx containing | ||
293 | * the "args". | ||
294 | */ | ||
295 | extern void kernel_thread_helper(void); | ||
296 | __asm__(".section .text\n" | ||
297 | ".align 4\n" | ||
298 | "kernel_thread_helper:\n\t" | ||
299 | "movl %edx,%eax\n\t" | ||
300 | "pushl %edx\n\t" | ||
301 | "call *%ebx\n\t" | ||
302 | "pushl %eax\n\t" | ||
303 | "call do_exit\n" | ||
304 | ".previous"); | ||
305 | |||
306 | /* | ||
307 | * Create a kernel thread | ||
308 | */ | ||
309 | int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | ||
310 | { | ||
311 | struct pt_regs regs; | ||
312 | |||
313 | memset(®s, 0, sizeof(regs)); | ||
314 | |||
315 | regs.ebx = (unsigned long) fn; | ||
316 | regs.edx = (unsigned long) arg; | ||
317 | |||
318 | regs.xds = __USER_DS; | ||
319 | regs.xes = __USER_DS; | ||
320 | regs.orig_eax = -1; | ||
321 | regs.eip = (unsigned long) kernel_thread_helper; | ||
322 | regs.xcs = __KERNEL_CS; | ||
323 | regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; | ||
324 | |||
325 | /* Ok, create the new process.. */ | ||
326 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * Free current thread data structures etc.. | ||
331 | */ | ||
332 | void exit_thread(void) | ||
333 | { | ||
334 | struct task_struct *tsk = current; | ||
335 | struct thread_struct *t = &tsk->thread; | ||
336 | |||
337 | /* The process may have allocated an io port bitmap... nuke it. */ | ||
338 | if (unlikely(NULL != t->io_bitmap_ptr)) { | ||
339 | int cpu = get_cpu(); | ||
340 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
341 | |||
342 | kfree(t->io_bitmap_ptr); | ||
343 | t->io_bitmap_ptr = NULL; | ||
344 | /* | ||
345 | * Careful, clear this in the TSS too: | ||
346 | */ | ||
347 | memset(tss->io_bitmap, 0xff, tss->io_bitmap_max); | ||
348 | t->io_bitmap_max = 0; | ||
349 | tss->io_bitmap_owner = NULL; | ||
350 | tss->io_bitmap_max = 0; | ||
351 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | ||
352 | put_cpu(); | ||
353 | } | ||
354 | } | ||
355 | |||
356 | void flush_thread(void) | ||
357 | { | ||
358 | struct task_struct *tsk = current; | ||
359 | |||
360 | memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); | ||
361 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | ||
362 | /* | ||
363 | * Forget coprocessor state.. | ||
364 | */ | ||
365 | clear_fpu(tsk); | ||
366 | clear_used_math(); | ||
367 | } | ||
368 | |||
369 | void release_thread(struct task_struct *dead_task) | ||
370 | { | ||
371 | if (dead_task->mm) { | ||
372 | // temporary debugging check | ||
373 | if (dead_task->mm->context.size) { | ||
374 | printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", | ||
375 | dead_task->comm, | ||
376 | dead_task->mm->context.ldt, | ||
377 | dead_task->mm->context.size); | ||
378 | BUG(); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | release_vm86_irqs(dead_task); | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * This gets called before we allocate a new thread and copy | ||
387 | * the current task into it. | ||
388 | */ | ||
389 | void prepare_to_copy(struct task_struct *tsk) | ||
390 | { | ||
391 | unlazy_fpu(tsk); | ||
392 | } | ||
393 | |||
394 | int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | ||
395 | unsigned long unused, | ||
396 | struct task_struct * p, struct pt_regs * regs) | ||
397 | { | ||
398 | struct pt_regs * childregs; | ||
399 | struct task_struct *tsk; | ||
400 | int err; | ||
401 | |||
402 | childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; | ||
403 | *childregs = *regs; | ||
404 | childregs->eax = 0; | ||
405 | childregs->esp = esp; | ||
406 | |||
407 | p->thread.esp = (unsigned long) childregs; | ||
408 | p->thread.esp0 = (unsigned long) (childregs+1); | ||
409 | |||
410 | p->thread.eip = (unsigned long) ret_from_fork; | ||
411 | |||
412 | savesegment(fs,p->thread.fs); | ||
413 | savesegment(gs,p->thread.gs); | ||
414 | |||
415 | tsk = current; | ||
416 | if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { | ||
417 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | ||
418 | if (!p->thread.io_bitmap_ptr) { | ||
419 | p->thread.io_bitmap_max = 0; | ||
420 | return -ENOMEM; | ||
421 | } | ||
422 | memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr, | ||
423 | IO_BITMAP_BYTES); | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * Set a new TLS for the child thread? | ||
428 | */ | ||
429 | if (clone_flags & CLONE_SETTLS) { | ||
430 | struct desc_struct *desc; | ||
431 | struct user_desc info; | ||
432 | int idx; | ||
433 | |||
434 | err = -EFAULT; | ||
435 | if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info))) | ||
436 | goto out; | ||
437 | err = -EINVAL; | ||
438 | if (LDT_empty(&info)) | ||
439 | goto out; | ||
440 | |||
441 | idx = info.entry_number; | ||
442 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
443 | goto out; | ||
444 | |||
445 | desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; | ||
446 | desc->a = LDT_entry_a(&info); | ||
447 | desc->b = LDT_entry_b(&info); | ||
448 | } | ||
449 | |||
450 | err = 0; | ||
451 | out: | ||
452 | if (err && p->thread.io_bitmap_ptr) { | ||
453 | kfree(p->thread.io_bitmap_ptr); | ||
454 | p->thread.io_bitmap_max = 0; | ||
455 | } | ||
456 | return err; | ||
457 | } | ||
458 | |||
459 | /* | ||
460 | * fill in the user structure for a core dump.. | ||
461 | */ | ||
462 | void dump_thread(struct pt_regs * regs, struct user * dump) | ||
463 | { | ||
464 | int i; | ||
465 | |||
466 | /* changed the size calculations - should hopefully work better. lbt */ | ||
467 | dump->magic = CMAGIC; | ||
468 | dump->start_code = 0; | ||
469 | dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); | ||
470 | dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; | ||
471 | dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; | ||
472 | dump->u_dsize -= dump->u_tsize; | ||
473 | dump->u_ssize = 0; | ||
474 | for (i = 0; i < 8; i++) | ||
475 | dump->u_debugreg[i] = current->thread.debugreg[i]; | ||
476 | |||
477 | if (dump->start_stack < TASK_SIZE) | ||
478 | dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; | ||
479 | |||
480 | dump->regs.ebx = regs->ebx; | ||
481 | dump->regs.ecx = regs->ecx; | ||
482 | dump->regs.edx = regs->edx; | ||
483 | dump->regs.esi = regs->esi; | ||
484 | dump->regs.edi = regs->edi; | ||
485 | dump->regs.ebp = regs->ebp; | ||
486 | dump->regs.eax = regs->eax; | ||
487 | dump->regs.ds = regs->xds; | ||
488 | dump->regs.es = regs->xes; | ||
489 | savesegment(fs,dump->regs.fs); | ||
490 | savesegment(gs,dump->regs.gs); | ||
491 | dump->regs.orig_eax = regs->orig_eax; | ||
492 | dump->regs.eip = regs->eip; | ||
493 | dump->regs.cs = regs->xcs; | ||
494 | dump->regs.eflags = regs->eflags; | ||
495 | dump->regs.esp = regs->esp; | ||
496 | dump->regs.ss = regs->xss; | ||
497 | |||
498 | dump->u_fpvalid = dump_fpu (regs, &dump->i387); | ||
499 | } | ||
500 | |||
501 | /* | ||
502 | * Capture the user space registers if the task is not running (in user space) | ||
503 | */ | ||
504 | int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) | ||
505 | { | ||
506 | struct pt_regs ptregs; | ||
507 | |||
508 | ptregs = *(struct pt_regs *) | ||
509 | ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs)); | ||
510 | ptregs.xcs &= 0xffff; | ||
511 | ptregs.xds &= 0xffff; | ||
512 | ptregs.xes &= 0xffff; | ||
513 | ptregs.xss &= 0xffff; | ||
514 | |||
515 | elf_core_copy_regs(regs, &ptregs); | ||
516 | |||
517 | return 1; | ||
518 | } | ||
519 | |||
520 | static inline void | ||
521 | handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss) | ||
522 | { | ||
523 | if (!next->io_bitmap_ptr) { | ||
524 | /* | ||
525 | * Disable the bitmap via an invalid offset. We still cache | ||
526 | * the previous bitmap owner and the IO bitmap contents: | ||
527 | */ | ||
528 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | ||
529 | return; | ||
530 | } | ||
531 | if (likely(next == tss->io_bitmap_owner)) { | ||
532 | /* | ||
533 | * Previous owner of the bitmap (hence the bitmap content) | ||
534 | * matches the next task, we dont have to do anything but | ||
535 | * to set a valid offset in the TSS: | ||
536 | */ | ||
537 | tss->io_bitmap_base = IO_BITMAP_OFFSET; | ||
538 | return; | ||
539 | } | ||
540 | /* | ||
541 | * Lazy TSS's I/O bitmap copy. We set an invalid offset here | ||
542 | * and we let the task to get a GPF in case an I/O instruction | ||
543 | * is performed. The handler of the GPF will verify that the | ||
544 | * faulting task has a valid I/O bitmap and, it true, does the | ||
545 | * real copy and restart the instruction. This will save us | ||
546 | * redundant copies when the currently switched task does not | ||
547 | * perform any I/O during its timeslice. | ||
548 | */ | ||
549 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; | ||
550 | } | ||
551 | /* | ||
552 | * This special macro can be used to load a debugging register | ||
553 | */ | ||
554 | #define loaddebug(thread,register) \ | ||
555 | __asm__("movl %0,%%db" #register \ | ||
556 | : /* no output */ \ | ||
557 | :"r" (thread->debugreg[register])) | ||
558 | |||
559 | /* | ||
560 | * switch_to(x,yn) should switch tasks from x to y. | ||
561 | * | ||
562 | * We fsave/fwait so that an exception goes off at the right time | ||
563 | * (as a call from the fsave or fwait in effect) rather than to | ||
564 | * the wrong process. Lazy FP saving no longer makes any sense | ||
565 | * with modern CPU's, and this simplifies a lot of things (SMP | ||
566 | * and UP become the same). | ||
567 | * | ||
568 | * NOTE! We used to use the x86 hardware context switching. The | ||
569 | * reason for not using it any more becomes apparent when you | ||
570 | * try to recover gracefully from saved state that is no longer | ||
571 | * valid (stale segment register values in particular). With the | ||
572 | * hardware task-switch, there is no way to fix up bad state in | ||
573 | * a reasonable manner. | ||
574 | * | ||
575 | * The fact that Intel documents the hardware task-switching to | ||
576 | * be slow is a fairly red herring - this code is not noticeably | ||
577 | * faster. However, there _is_ some room for improvement here, | ||
578 | * so the performance issues may eventually be a valid point. | ||
579 | * More important, however, is the fact that this allows us much | ||
580 | * more flexibility. | ||
581 | * | ||
582 | * The return value (in %eax) will be the "prev" task after | ||
583 | * the task-switch, and shows up in ret_from_fork in entry.S, | ||
584 | * for example. | ||
585 | */ | ||
586 | struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
587 | { | ||
588 | struct thread_struct *prev = &prev_p->thread, | ||
589 | *next = &next_p->thread; | ||
590 | int cpu = smp_processor_id(); | ||
591 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
592 | |||
593 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | ||
594 | |||
595 | __unlazy_fpu(prev_p); | ||
596 | |||
597 | /* | ||
598 | * Reload esp0, LDT and the page table pointer: | ||
599 | */ | ||
600 | load_esp0(tss, next); | ||
601 | |||
602 | /* | ||
603 | * Load the per-thread Thread-Local Storage descriptor. | ||
604 | */ | ||
605 | load_TLS(next, cpu); | ||
606 | |||
607 | /* | ||
608 | * Save away %fs and %gs. No need to save %es and %ds, as | ||
609 | * those are always kernel segments while inside the kernel. | ||
610 | */ | ||
611 | asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs)); | ||
612 | asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs)); | ||
613 | |||
614 | /* | ||
615 | * Restore %fs and %gs if needed. | ||
616 | */ | ||
617 | if (unlikely(prev->fs | prev->gs | next->fs | next->gs)) { | ||
618 | loadsegment(fs, next->fs); | ||
619 | loadsegment(gs, next->gs); | ||
620 | } | ||
621 | |||
622 | /* | ||
623 | * Now maybe reload the debug registers | ||
624 | */ | ||
625 | if (unlikely(next->debugreg[7])) { | ||
626 | loaddebug(next, 0); | ||
627 | loaddebug(next, 1); | ||
628 | loaddebug(next, 2); | ||
629 | loaddebug(next, 3); | ||
630 | /* no 4 and 5 */ | ||
631 | loaddebug(next, 6); | ||
632 | loaddebug(next, 7); | ||
633 | } | ||
634 | |||
635 | if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) | ||
636 | handle_io_bitmap(next, tss); | ||
637 | |||
638 | return prev_p; | ||
639 | } | ||
640 | |||
641 | asmlinkage int sys_fork(struct pt_regs regs) | ||
642 | { | ||
643 | return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); | ||
644 | } | ||
645 | |||
646 | asmlinkage int sys_clone(struct pt_regs regs) | ||
647 | { | ||
648 | unsigned long clone_flags; | ||
649 | unsigned long newsp; | ||
650 | int __user *parent_tidptr, *child_tidptr; | ||
651 | |||
652 | clone_flags = regs.ebx; | ||
653 | newsp = regs.ecx; | ||
654 | parent_tidptr = (int __user *)regs.edx; | ||
655 | child_tidptr = (int __user *)regs.edi; | ||
656 | if (!newsp) | ||
657 | newsp = regs.esp; | ||
658 | return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * This is trivial, and on the face of it looks like it | ||
663 | * could equally well be done in user mode. | ||
664 | * | ||
665 | * Not so, for quite unobvious reasons - register pressure. | ||
666 | * In user mode vfork() cannot have a stack frame, and if | ||
667 | * done by calling the "clone()" system call directly, you | ||
668 | * do not have enough call-clobbered registers to hold all | ||
669 | * the information you need. | ||
670 | */ | ||
671 | asmlinkage int sys_vfork(struct pt_regs regs) | ||
672 | { | ||
673 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); | ||
674 | } | ||
675 | |||
676 | /* | ||
677 | * sys_execve() executes a new program. | ||
678 | */ | ||
679 | asmlinkage int sys_execve(struct pt_regs regs) | ||
680 | { | ||
681 | int error; | ||
682 | char * filename; | ||
683 | |||
684 | filename = getname((char __user *) regs.ebx); | ||
685 | error = PTR_ERR(filename); | ||
686 | if (IS_ERR(filename)) | ||
687 | goto out; | ||
688 | error = do_execve(filename, | ||
689 | (char __user * __user *) regs.ecx, | ||
690 | (char __user * __user *) regs.edx, | ||
691 | ®s); | ||
692 | if (error == 0) { | ||
693 | task_lock(current); | ||
694 | current->ptrace &= ~PT_DTRACE; | ||
695 | task_unlock(current); | ||
696 | /* Make sure we don't return using sysenter.. */ | ||
697 | set_thread_flag(TIF_IRET); | ||
698 | } | ||
699 | putname(filename); | ||
700 | out: | ||
701 | return error; | ||
702 | } | ||
703 | |||
704 | #define top_esp (THREAD_SIZE - sizeof(unsigned long)) | ||
705 | #define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) | ||
706 | |||
707 | unsigned long get_wchan(struct task_struct *p) | ||
708 | { | ||
709 | unsigned long ebp, esp, eip; | ||
710 | unsigned long stack_page; | ||
711 | int count = 0; | ||
712 | if (!p || p == current || p->state == TASK_RUNNING) | ||
713 | return 0; | ||
714 | stack_page = (unsigned long)p->thread_info; | ||
715 | esp = p->thread.esp; | ||
716 | if (!stack_page || esp < stack_page || esp > top_esp+stack_page) | ||
717 | return 0; | ||
718 | /* include/asm-i386/system.h:switch_to() pushes ebp last. */ | ||
719 | ebp = *(unsigned long *) esp; | ||
720 | do { | ||
721 | if (ebp < stack_page || ebp > top_ebp+stack_page) | ||
722 | return 0; | ||
723 | eip = *(unsigned long *) (ebp+4); | ||
724 | if (!in_sched_functions(eip)) | ||
725 | return eip; | ||
726 | ebp = *(unsigned long *) ebp; | ||
727 | } while (count++ < 16); | ||
728 | return 0; | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * sys_alloc_thread_area: get a yet unused TLS descriptor index. | ||
733 | */ | ||
734 | static int get_free_idx(void) | ||
735 | { | ||
736 | struct thread_struct *t = ¤t->thread; | ||
737 | int idx; | ||
738 | |||
739 | for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) | ||
740 | if (desc_empty(t->tls_array + idx)) | ||
741 | return idx + GDT_ENTRY_TLS_MIN; | ||
742 | return -ESRCH; | ||
743 | } | ||
744 | |||
745 | /* | ||
746 | * Set a given TLS descriptor: | ||
747 | */ | ||
748 | asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) | ||
749 | { | ||
750 | struct thread_struct *t = ¤t->thread; | ||
751 | struct user_desc info; | ||
752 | struct desc_struct *desc; | ||
753 | int cpu, idx; | ||
754 | |||
755 | if (copy_from_user(&info, u_info, sizeof(info))) | ||
756 | return -EFAULT; | ||
757 | idx = info.entry_number; | ||
758 | |||
759 | /* | ||
760 | * index -1 means the kernel should try to find and | ||
761 | * allocate an empty descriptor: | ||
762 | */ | ||
763 | if (idx == -1) { | ||
764 | idx = get_free_idx(); | ||
765 | if (idx < 0) | ||
766 | return idx; | ||
767 | if (put_user(idx, &u_info->entry_number)) | ||
768 | return -EFAULT; | ||
769 | } | ||
770 | |||
771 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
772 | return -EINVAL; | ||
773 | |||
774 | desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; | ||
775 | |||
776 | /* | ||
777 | * We must not get preempted while modifying the TLS. | ||
778 | */ | ||
779 | cpu = get_cpu(); | ||
780 | |||
781 | if (LDT_empty(&info)) { | ||
782 | desc->a = 0; | ||
783 | desc->b = 0; | ||
784 | } else { | ||
785 | desc->a = LDT_entry_a(&info); | ||
786 | desc->b = LDT_entry_b(&info); | ||
787 | } | ||
788 | load_TLS(t, cpu); | ||
789 | |||
790 | put_cpu(); | ||
791 | |||
792 | return 0; | ||
793 | } | ||
794 | |||
795 | /* | ||
796 | * Get the current Thread-Local Storage area: | ||
797 | */ | ||
798 | |||
799 | #define GET_BASE(desc) ( \ | ||
800 | (((desc)->a >> 16) & 0x0000ffff) | \ | ||
801 | (((desc)->b << 16) & 0x00ff0000) | \ | ||
802 | ( (desc)->b & 0xff000000) ) | ||
803 | |||
804 | #define GET_LIMIT(desc) ( \ | ||
805 | ((desc)->a & 0x0ffff) | \ | ||
806 | ((desc)->b & 0xf0000) ) | ||
807 | |||
808 | #define GET_32BIT(desc) (((desc)->b >> 22) & 1) | ||
809 | #define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) | ||
810 | #define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) | ||
811 | #define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) | ||
812 | #define GET_PRESENT(desc) (((desc)->b >> 15) & 1) | ||
813 | #define GET_USEABLE(desc) (((desc)->b >> 20) & 1) | ||
814 | |||
815 | asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) | ||
816 | { | ||
817 | struct user_desc info; | ||
818 | struct desc_struct *desc; | ||
819 | int idx; | ||
820 | |||
821 | if (get_user(idx, &u_info->entry_number)) | ||
822 | return -EFAULT; | ||
823 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
824 | return -EINVAL; | ||
825 | |||
826 | desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; | ||
827 | |||
828 | info.entry_number = idx; | ||
829 | info.base_addr = GET_BASE(desc); | ||
830 | info.limit = GET_LIMIT(desc); | ||
831 | info.seg_32bit = GET_32BIT(desc); | ||
832 | info.contents = GET_CONTENTS(desc); | ||
833 | info.read_exec_only = !GET_WRITABLE(desc); | ||
834 | info.limit_in_pages = GET_LIMIT_PAGES(desc); | ||
835 | info.seg_not_present = !GET_PRESENT(desc); | ||
836 | info.useable = GET_USEABLE(desc); | ||
837 | |||
838 | if (copy_to_user(u_info, &info, sizeof(info))) | ||
839 | return -EFAULT; | ||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | unsigned long arch_align_stack(unsigned long sp) | ||
844 | { | ||
845 | if (randomize_va_space) | ||
846 | sp -= get_random_int() % 8192; | ||
847 | return sp & ~0xf; | ||
848 | } | ||
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c new file mode 100644 index 000000000000..b2f17640ceff --- /dev/null +++ b/arch/i386/kernel/ptrace.c | |||
@@ -0,0 +1,717 @@ | |||
1 | /* ptrace.c */ | ||
2 | /* By Ross Biro 1/23/92 */ | ||
3 | /* | ||
4 | * Pentium III FXSR, SSE support | ||
5 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
6 | */ | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/mm.h> | ||
11 | #include <linux/smp.h> | ||
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/ptrace.h> | ||
15 | #include <linux/user.h> | ||
16 | #include <linux/security.h> | ||
17 | #include <linux/audit.h> | ||
18 | #include <linux/seccomp.h> | ||
19 | |||
20 | #include <asm/uaccess.h> | ||
21 | #include <asm/pgtable.h> | ||
22 | #include <asm/system.h> | ||
23 | #include <asm/processor.h> | ||
24 | #include <asm/i387.h> | ||
25 | #include <asm/debugreg.h> | ||
26 | #include <asm/ldt.h> | ||
27 | #include <asm/desc.h> | ||
28 | |||
29 | /* | ||
30 | * does not yet catch signals sent when the child dies. | ||
31 | * in exit.c or in signal.c. | ||
32 | */ | ||
33 | |||
34 | /* determines which flags the user has access to. */ | ||
35 | /* 1 = access 0 = no access */ | ||
36 | #define FLAG_MASK 0x00044dd5 | ||
37 | |||
38 | /* set's the trap flag. */ | ||
39 | #define TRAP_FLAG 0x100 | ||
40 | |||
41 | /* | ||
42 | * Offset of eflags on child stack.. | ||
43 | */ | ||
44 | #define EFL_OFFSET ((EFL-2)*4-sizeof(struct pt_regs)) | ||
45 | |||
46 | static inline struct pt_regs *get_child_regs(struct task_struct *task) | ||
47 | { | ||
48 | void *stack_top = (void *)task->thread.esp0; | ||
49 | return stack_top - sizeof(struct pt_regs); | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * this routine will get a word off of the processes privileged stack. | ||
54 | * the offset is how far from the base addr as stored in the TSS. | ||
55 | * this routine assumes that all the privileged stacks are in our | ||
56 | * data space. | ||
57 | */ | ||
58 | static inline int get_stack_long(struct task_struct *task, int offset) | ||
59 | { | ||
60 | unsigned char *stack; | ||
61 | |||
62 | stack = (unsigned char *)task->thread.esp0; | ||
63 | stack += offset; | ||
64 | return (*((int *)stack)); | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * this routine will put a word on the processes privileged stack. | ||
69 | * the offset is how far from the base addr as stored in the TSS. | ||
70 | * this routine assumes that all the privileged stacks are in our | ||
71 | * data space. | ||
72 | */ | ||
73 | static inline int put_stack_long(struct task_struct *task, int offset, | ||
74 | unsigned long data) | ||
75 | { | ||
76 | unsigned char * stack; | ||
77 | |||
78 | stack = (unsigned char *) task->thread.esp0; | ||
79 | stack += offset; | ||
80 | *(unsigned long *) stack = data; | ||
81 | return 0; | ||
82 | } | ||
83 | |||
84 | static int putreg(struct task_struct *child, | ||
85 | unsigned long regno, unsigned long value) | ||
86 | { | ||
87 | switch (regno >> 2) { | ||
88 | case FS: | ||
89 | if (value && (value & 3) != 3) | ||
90 | return -EIO; | ||
91 | child->thread.fs = value; | ||
92 | return 0; | ||
93 | case GS: | ||
94 | if (value && (value & 3) != 3) | ||
95 | return -EIO; | ||
96 | child->thread.gs = value; | ||
97 | return 0; | ||
98 | case DS: | ||
99 | case ES: | ||
100 | if (value && (value & 3) != 3) | ||
101 | return -EIO; | ||
102 | value &= 0xffff; | ||
103 | break; | ||
104 | case SS: | ||
105 | case CS: | ||
106 | if ((value & 3) != 3) | ||
107 | return -EIO; | ||
108 | value &= 0xffff; | ||
109 | break; | ||
110 | case EFL: | ||
111 | value &= FLAG_MASK; | ||
112 | value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; | ||
113 | break; | ||
114 | } | ||
115 | if (regno > GS*4) | ||
116 | regno -= 2*4; | ||
117 | put_stack_long(child, regno - sizeof(struct pt_regs), value); | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static unsigned long getreg(struct task_struct *child, | ||
122 | unsigned long regno) | ||
123 | { | ||
124 | unsigned long retval = ~0UL; | ||
125 | |||
126 | switch (regno >> 2) { | ||
127 | case FS: | ||
128 | retval = child->thread.fs; | ||
129 | break; | ||
130 | case GS: | ||
131 | retval = child->thread.gs; | ||
132 | break; | ||
133 | case DS: | ||
134 | case ES: | ||
135 | case SS: | ||
136 | case CS: | ||
137 | retval = 0xffff; | ||
138 | /* fall through */ | ||
139 | default: | ||
140 | if (regno > GS*4) | ||
141 | regno -= 2*4; | ||
142 | regno = regno - sizeof(struct pt_regs); | ||
143 | retval &= get_stack_long(child, regno); | ||
144 | } | ||
145 | return retval; | ||
146 | } | ||
147 | |||
148 | #define LDT_SEGMENT 4 | ||
149 | |||
150 | static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs) | ||
151 | { | ||
152 | unsigned long addr, seg; | ||
153 | |||
154 | addr = regs->eip; | ||
155 | seg = regs->xcs & 0xffff; | ||
156 | if (regs->eflags & VM_MASK) { | ||
157 | addr = (addr & 0xffff) + (seg << 4); | ||
158 | return addr; | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * We'll assume that the code segments in the GDT | ||
163 | * are all zero-based. That is largely true: the | ||
164 | * TLS segments are used for data, and the PNPBIOS | ||
165 | * and APM bios ones we just ignore here. | ||
166 | */ | ||
167 | if (seg & LDT_SEGMENT) { | ||
168 | u32 *desc; | ||
169 | unsigned long base; | ||
170 | |||
171 | down(&child->mm->context.sem); | ||
172 | desc = child->mm->context.ldt + (seg & ~7); | ||
173 | base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000); | ||
174 | |||
175 | /* 16-bit code segment? */ | ||
176 | if (!((desc[1] >> 22) & 1)) | ||
177 | addr &= 0xffff; | ||
178 | addr += base; | ||
179 | up(&child->mm->context.sem); | ||
180 | } | ||
181 | return addr; | ||
182 | } | ||
183 | |||
184 | static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs) | ||
185 | { | ||
186 | int i, copied; | ||
187 | unsigned char opcode[16]; | ||
188 | unsigned long addr = convert_eip_to_linear(child, regs); | ||
189 | |||
190 | copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); | ||
191 | for (i = 0; i < copied; i++) { | ||
192 | switch (opcode[i]) { | ||
193 | /* popf */ | ||
194 | case 0x9d: | ||
195 | return 1; | ||
196 | /* opcode and address size prefixes */ | ||
197 | case 0x66: case 0x67: | ||
198 | continue; | ||
199 | /* irrelevant prefixes (segment overrides and repeats) */ | ||
200 | case 0x26: case 0x2e: | ||
201 | case 0x36: case 0x3e: | ||
202 | case 0x64: case 0x65: | ||
203 | case 0xf0: case 0xf2: case 0xf3: | ||
204 | continue; | ||
205 | |||
206 | /* | ||
207 | * pushf: NOTE! We should probably not let | ||
208 | * the user see the TF bit being set. But | ||
209 | * it's more pain than it's worth to avoid | ||
210 | * it, and a debugger could emulate this | ||
211 | * all in user space if it _really_ cares. | ||
212 | */ | ||
213 | case 0x9c: | ||
214 | default: | ||
215 | return 0; | ||
216 | } | ||
217 | } | ||
218 | return 0; | ||
219 | } | ||
220 | |||
221 | static void set_singlestep(struct task_struct *child) | ||
222 | { | ||
223 | struct pt_regs *regs = get_child_regs(child); | ||
224 | |||
225 | /* | ||
226 | * Always set TIF_SINGLESTEP - this guarantees that | ||
227 | * we single-step system calls etc.. This will also | ||
228 | * cause us to set TF when returning to user mode. | ||
229 | */ | ||
230 | set_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
231 | |||
232 | /* | ||
233 | * If TF was already set, don't do anything else | ||
234 | */ | ||
235 | if (regs->eflags & TRAP_FLAG) | ||
236 | return; | ||
237 | |||
238 | /* Set TF on the kernel stack.. */ | ||
239 | regs->eflags |= TRAP_FLAG; | ||
240 | |||
241 | /* | ||
242 | * ..but if TF is changed by the instruction we will trace, | ||
243 | * don't mark it as being "us" that set it, so that we | ||
244 | * won't clear it by hand later. | ||
245 | */ | ||
246 | if (is_at_popf(child, regs)) | ||
247 | return; | ||
248 | |||
249 | child->ptrace |= PT_DTRACE; | ||
250 | } | ||
251 | |||
252 | static void clear_singlestep(struct task_struct *child) | ||
253 | { | ||
254 | /* Always clear TIF_SINGLESTEP... */ | ||
255 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | ||
256 | |||
257 | /* But touch TF only if it was set by us.. */ | ||
258 | if (child->ptrace & PT_DTRACE) { | ||
259 | struct pt_regs *regs = get_child_regs(child); | ||
260 | regs->eflags &= ~TRAP_FLAG; | ||
261 | child->ptrace &= ~PT_DTRACE; | ||
262 | } | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Called by kernel/ptrace.c when detaching.. | ||
267 | * | ||
268 | * Make sure the single step bit is not set. | ||
269 | */ | ||
270 | void ptrace_disable(struct task_struct *child) | ||
271 | { | ||
272 | clear_singlestep(child); | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Perform get_thread_area on behalf of the traced child. | ||
277 | */ | ||
278 | static int | ||
279 | ptrace_get_thread_area(struct task_struct *child, | ||
280 | int idx, struct user_desc __user *user_desc) | ||
281 | { | ||
282 | struct user_desc info; | ||
283 | struct desc_struct *desc; | ||
284 | |||
285 | /* | ||
286 | * Get the current Thread-Local Storage area: | ||
287 | */ | ||
288 | |||
289 | #define GET_BASE(desc) ( \ | ||
290 | (((desc)->a >> 16) & 0x0000ffff) | \ | ||
291 | (((desc)->b << 16) & 0x00ff0000) | \ | ||
292 | ( (desc)->b & 0xff000000) ) | ||
293 | |||
294 | #define GET_LIMIT(desc) ( \ | ||
295 | ((desc)->a & 0x0ffff) | \ | ||
296 | ((desc)->b & 0xf0000) ) | ||
297 | |||
298 | #define GET_32BIT(desc) (((desc)->b >> 22) & 1) | ||
299 | #define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) | ||
300 | #define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) | ||
301 | #define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) | ||
302 | #define GET_PRESENT(desc) (((desc)->b >> 15) & 1) | ||
303 | #define GET_USEABLE(desc) (((desc)->b >> 20) & 1) | ||
304 | |||
305 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
306 | return -EINVAL; | ||
307 | |||
308 | desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; | ||
309 | |||
310 | info.entry_number = idx; | ||
311 | info.base_addr = GET_BASE(desc); | ||
312 | info.limit = GET_LIMIT(desc); | ||
313 | info.seg_32bit = GET_32BIT(desc); | ||
314 | info.contents = GET_CONTENTS(desc); | ||
315 | info.read_exec_only = !GET_WRITABLE(desc); | ||
316 | info.limit_in_pages = GET_LIMIT_PAGES(desc); | ||
317 | info.seg_not_present = !GET_PRESENT(desc); | ||
318 | info.useable = GET_USEABLE(desc); | ||
319 | |||
320 | if (copy_to_user(user_desc, &info, sizeof(info))) | ||
321 | return -EFAULT; | ||
322 | |||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | /* | ||
327 | * Perform set_thread_area on behalf of the traced child. | ||
328 | */ | ||
329 | static int | ||
330 | ptrace_set_thread_area(struct task_struct *child, | ||
331 | int idx, struct user_desc __user *user_desc) | ||
332 | { | ||
333 | struct user_desc info; | ||
334 | struct desc_struct *desc; | ||
335 | |||
336 | if (copy_from_user(&info, user_desc, sizeof(info))) | ||
337 | return -EFAULT; | ||
338 | |||
339 | if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) | ||
340 | return -EINVAL; | ||
341 | |||
342 | desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; | ||
343 | if (LDT_empty(&info)) { | ||
344 | desc->a = 0; | ||
345 | desc->b = 0; | ||
346 | } else { | ||
347 | desc->a = LDT_entry_a(&info); | ||
348 | desc->b = LDT_entry_b(&info); | ||
349 | } | ||
350 | |||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | asmlinkage int sys_ptrace(long request, long pid, long addr, long data) | ||
355 | { | ||
356 | struct task_struct *child; | ||
357 | struct user * dummy = NULL; | ||
358 | int i, ret; | ||
359 | unsigned long __user *datap = (unsigned long __user *)data; | ||
360 | |||
361 | lock_kernel(); | ||
362 | ret = -EPERM; | ||
363 | if (request == PTRACE_TRACEME) { | ||
364 | /* are we already being traced? */ | ||
365 | if (current->ptrace & PT_PTRACED) | ||
366 | goto out; | ||
367 | ret = security_ptrace(current->parent, current); | ||
368 | if (ret) | ||
369 | goto out; | ||
370 | /* set the ptrace bit in the process flags. */ | ||
371 | current->ptrace |= PT_PTRACED; | ||
372 | ret = 0; | ||
373 | goto out; | ||
374 | } | ||
375 | ret = -ESRCH; | ||
376 | read_lock(&tasklist_lock); | ||
377 | child = find_task_by_pid(pid); | ||
378 | if (child) | ||
379 | get_task_struct(child); | ||
380 | read_unlock(&tasklist_lock); | ||
381 | if (!child) | ||
382 | goto out; | ||
383 | |||
384 | ret = -EPERM; | ||
385 | if (pid == 1) /* you may not mess with init */ | ||
386 | goto out_tsk; | ||
387 | |||
388 | if (request == PTRACE_ATTACH) { | ||
389 | ret = ptrace_attach(child); | ||
390 | goto out_tsk; | ||
391 | } | ||
392 | |||
393 | ret = ptrace_check_attach(child, request == PTRACE_KILL); | ||
394 | if (ret < 0) | ||
395 | goto out_tsk; | ||
396 | |||
397 | switch (request) { | ||
398 | /* when I and D space are separate, these will need to be fixed. */ | ||
399 | case PTRACE_PEEKTEXT: /* read word at location addr. */ | ||
400 | case PTRACE_PEEKDATA: { | ||
401 | unsigned long tmp; | ||
402 | int copied; | ||
403 | |||
404 | copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); | ||
405 | ret = -EIO; | ||
406 | if (copied != sizeof(tmp)) | ||
407 | break; | ||
408 | ret = put_user(tmp, datap); | ||
409 | break; | ||
410 | } | ||
411 | |||
412 | /* read the word at location addr in the USER area. */ | ||
413 | case PTRACE_PEEKUSR: { | ||
414 | unsigned long tmp; | ||
415 | |||
416 | ret = -EIO; | ||
417 | if ((addr & 3) || addr < 0 || | ||
418 | addr > sizeof(struct user) - 3) | ||
419 | break; | ||
420 | |||
421 | tmp = 0; /* Default return condition */ | ||
422 | if(addr < FRAME_SIZE*sizeof(long)) | ||
423 | tmp = getreg(child, addr); | ||
424 | if(addr >= (long) &dummy->u_debugreg[0] && | ||
425 | addr <= (long) &dummy->u_debugreg[7]){ | ||
426 | addr -= (long) &dummy->u_debugreg[0]; | ||
427 | addr = addr >> 2; | ||
428 | tmp = child->thread.debugreg[addr]; | ||
429 | } | ||
430 | ret = put_user(tmp, datap); | ||
431 | break; | ||
432 | } | ||
433 | |||
434 | /* when I and D space are separate, this will have to be fixed. */ | ||
435 | case PTRACE_POKETEXT: /* write the word at location addr. */ | ||
436 | case PTRACE_POKEDATA: | ||
437 | ret = 0; | ||
438 | if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) | ||
439 | break; | ||
440 | ret = -EIO; | ||
441 | break; | ||
442 | |||
443 | case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ | ||
444 | ret = -EIO; | ||
445 | if ((addr & 3) || addr < 0 || | ||
446 | addr > sizeof(struct user) - 3) | ||
447 | break; | ||
448 | |||
449 | if (addr < FRAME_SIZE*sizeof(long)) { | ||
450 | ret = putreg(child, addr, data); | ||
451 | break; | ||
452 | } | ||
453 | /* We need to be very careful here. We implicitly | ||
454 | want to modify a portion of the task_struct, and we | ||
455 | have to be selective about what portions we allow someone | ||
456 | to modify. */ | ||
457 | |||
458 | ret = -EIO; | ||
459 | if(addr >= (long) &dummy->u_debugreg[0] && | ||
460 | addr <= (long) &dummy->u_debugreg[7]){ | ||
461 | |||
462 | if(addr == (long) &dummy->u_debugreg[4]) break; | ||
463 | if(addr == (long) &dummy->u_debugreg[5]) break; | ||
464 | if(addr < (long) &dummy->u_debugreg[4] && | ||
465 | ((unsigned long) data) >= TASK_SIZE-3) break; | ||
466 | |||
467 | /* Sanity-check data. Take one half-byte at once with | ||
468 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | ||
469 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | ||
470 | * 2 and 3 are LENi. Given a list of invalid values, | ||
471 | * we do mask |= 1 << invalid_value, so that | ||
472 | * (mask >> check) & 1 is a correct test for invalid | ||
473 | * values. | ||
474 | * | ||
475 | * R/Wi contains the type of the breakpoint / | ||
476 | * watchpoint, LENi contains the length of the watched | ||
477 | * data in the watchpoint case. | ||
478 | * | ||
479 | * The invalid values are: | ||
480 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. | ||
481 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
482 | * mask |= 0x4444. | ||
483 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
484 | * 0x1110. | ||
485 | * | ||
486 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
487 | * | ||
488 | * See the Intel Manual "System Programming Guide", | ||
489 | * 15.2.4 | ||
490 | * | ||
491 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
492 | * mode (i.e. even for 32-bit userspace software, but | ||
493 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
494 | * See the AMD manual no. 24593 (AMD64 System | ||
495 | * Programming)*/ | ||
496 | |||
497 | if(addr == (long) &dummy->u_debugreg[7]) { | ||
498 | data &= ~DR_CONTROL_RESERVED; | ||
499 | for(i=0; i<4; i++) | ||
500 | if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) | ||
501 | goto out_tsk; | ||
502 | } | ||
503 | |||
504 | addr -= (long) &dummy->u_debugreg; | ||
505 | addr = addr >> 2; | ||
506 | child->thread.debugreg[addr] = data; | ||
507 | ret = 0; | ||
508 | } | ||
509 | break; | ||
510 | |||
511 | case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ | ||
512 | case PTRACE_CONT: /* restart after signal. */ | ||
513 | ret = -EIO; | ||
514 | if ((unsigned long) data > _NSIG) | ||
515 | break; | ||
516 | if (request == PTRACE_SYSCALL) { | ||
517 | set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
518 | } | ||
519 | else { | ||
520 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
521 | } | ||
522 | child->exit_code = data; | ||
523 | /* make sure the single step bit is not set. */ | ||
524 | clear_singlestep(child); | ||
525 | wake_up_process(child); | ||
526 | ret = 0; | ||
527 | break; | ||
528 | |||
529 | /* | ||
530 | * make the child exit. Best I can do is send it a sigkill. | ||
531 | * perhaps it should be put in the status that it wants to | ||
532 | * exit. | ||
533 | */ | ||
534 | case PTRACE_KILL: | ||
535 | ret = 0; | ||
536 | if (child->exit_state == EXIT_ZOMBIE) /* already dead */ | ||
537 | break; | ||
538 | child->exit_code = SIGKILL; | ||
539 | /* make sure the single step bit is not set. */ | ||
540 | clear_singlestep(child); | ||
541 | wake_up_process(child); | ||
542 | break; | ||
543 | |||
544 | case PTRACE_SINGLESTEP: /* set the trap flag. */ | ||
545 | ret = -EIO; | ||
546 | if ((unsigned long) data > _NSIG) | ||
547 | break; | ||
548 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
549 | set_singlestep(child); | ||
550 | child->exit_code = data; | ||
551 | /* give it a chance to run. */ | ||
552 | wake_up_process(child); | ||
553 | ret = 0; | ||
554 | break; | ||
555 | |||
556 | case PTRACE_DETACH: | ||
557 | /* detach a process that was attached. */ | ||
558 | ret = ptrace_detach(child, data); | ||
559 | break; | ||
560 | |||
561 | case PTRACE_GETREGS: { /* Get all gp regs from the child. */ | ||
562 | if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) { | ||
563 | ret = -EIO; | ||
564 | break; | ||
565 | } | ||
566 | for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { | ||
567 | __put_user(getreg(child, i), datap); | ||
568 | datap++; | ||
569 | } | ||
570 | ret = 0; | ||
571 | break; | ||
572 | } | ||
573 | |||
574 | case PTRACE_SETREGS: { /* Set all gp regs in the child. */ | ||
575 | unsigned long tmp; | ||
576 | if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) { | ||
577 | ret = -EIO; | ||
578 | break; | ||
579 | } | ||
580 | for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { | ||
581 | __get_user(tmp, datap); | ||
582 | putreg(child, i, tmp); | ||
583 | datap++; | ||
584 | } | ||
585 | ret = 0; | ||
586 | break; | ||
587 | } | ||
588 | |||
589 | case PTRACE_GETFPREGS: { /* Get the child FPU state. */ | ||
590 | if (!access_ok(VERIFY_WRITE, datap, | ||
591 | sizeof(struct user_i387_struct))) { | ||
592 | ret = -EIO; | ||
593 | break; | ||
594 | } | ||
595 | ret = 0; | ||
596 | if (!tsk_used_math(child)) | ||
597 | init_fpu(child); | ||
598 | get_fpregs((struct user_i387_struct __user *)data, child); | ||
599 | break; | ||
600 | } | ||
601 | |||
602 | case PTRACE_SETFPREGS: { /* Set the child FPU state. */ | ||
603 | if (!access_ok(VERIFY_READ, datap, | ||
604 | sizeof(struct user_i387_struct))) { | ||
605 | ret = -EIO; | ||
606 | break; | ||
607 | } | ||
608 | set_stopped_child_used_math(child); | ||
609 | set_fpregs(child, (struct user_i387_struct __user *)data); | ||
610 | ret = 0; | ||
611 | break; | ||
612 | } | ||
613 | |||
614 | case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */ | ||
615 | if (!access_ok(VERIFY_WRITE, datap, | ||
616 | sizeof(struct user_fxsr_struct))) { | ||
617 | ret = -EIO; | ||
618 | break; | ||
619 | } | ||
620 | if (!tsk_used_math(child)) | ||
621 | init_fpu(child); | ||
622 | ret = get_fpxregs((struct user_fxsr_struct __user *)data, child); | ||
623 | break; | ||
624 | } | ||
625 | |||
626 | case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */ | ||
627 | if (!access_ok(VERIFY_READ, datap, | ||
628 | sizeof(struct user_fxsr_struct))) { | ||
629 | ret = -EIO; | ||
630 | break; | ||
631 | } | ||
632 | set_stopped_child_used_math(child); | ||
633 | ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data); | ||
634 | break; | ||
635 | } | ||
636 | |||
637 | case PTRACE_GET_THREAD_AREA: | ||
638 | ret = ptrace_get_thread_area(child, addr, | ||
639 | (struct user_desc __user *) data); | ||
640 | break; | ||
641 | |||
642 | case PTRACE_SET_THREAD_AREA: | ||
643 | ret = ptrace_set_thread_area(child, addr, | ||
644 | (struct user_desc __user *) data); | ||
645 | break; | ||
646 | |||
647 | default: | ||
648 | ret = ptrace_request(child, request, addr, data); | ||
649 | break; | ||
650 | } | ||
651 | out_tsk: | ||
652 | put_task_struct(child); | ||
653 | out: | ||
654 | unlock_kernel(); | ||
655 | return ret; | ||
656 | } | ||
657 | |||
658 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | ||
659 | { | ||
660 | struct siginfo info; | ||
661 | |||
662 | tsk->thread.trap_no = 1; | ||
663 | tsk->thread.error_code = error_code; | ||
664 | |||
665 | memset(&info, 0, sizeof(info)); | ||
666 | info.si_signo = SIGTRAP; | ||
667 | info.si_code = TRAP_BRKPT; | ||
668 | |||
669 | /* User-mode eip? */ | ||
670 | info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; | ||
671 | |||
672 | /* Send us the fakey SIGTRAP */ | ||
673 | force_sig_info(SIGTRAP, &info, tsk); | ||
674 | } | ||
675 | |||
676 | /* notification of system call entry/exit | ||
677 | * - triggered by current->work.syscall_trace | ||
678 | */ | ||
679 | __attribute__((regparm(3))) | ||
680 | void do_syscall_trace(struct pt_regs *regs, int entryexit) | ||
681 | { | ||
682 | /* do the secure computing check first */ | ||
683 | secure_computing(regs->orig_eax); | ||
684 | |||
685 | if (unlikely(current->audit_context)) { | ||
686 | if (!entryexit) | ||
687 | audit_syscall_entry(current, regs->orig_eax, | ||
688 | regs->ebx, regs->ecx, | ||
689 | regs->edx, regs->esi); | ||
690 | else | ||
691 | audit_syscall_exit(current, regs->eax); | ||
692 | } | ||
693 | |||
694 | if (!(current->ptrace & PT_PTRACED)) | ||
695 | return; | ||
696 | |||
697 | /* Fake a debug trap */ | ||
698 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
699 | send_sigtrap(current, regs, 0); | ||
700 | |||
701 | if (!test_thread_flag(TIF_SYSCALL_TRACE)) | ||
702 | return; | ||
703 | |||
704 | /* the 0x80 provides a way for the tracing parent to distinguish | ||
705 | between a syscall stop and SIGTRAP delivery */ | ||
706 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); | ||
707 | |||
708 | /* | ||
709 | * this isn't the same as continuing with a signal, but it will do | ||
710 | * for normal use. strace only continues with a signal if the | ||
711 | * stopping signal is not SIGTRAP. -brl | ||
712 | */ | ||
713 | if (current->exit_code) { | ||
714 | send_sig(current->exit_code, current, 1); | ||
715 | current->exit_code = 0; | ||
716 | } | ||
717 | } | ||
diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c new file mode 100644 index 000000000000..aaf89cb2bc51 --- /dev/null +++ b/arch/i386/kernel/quirks.c | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | * This file contains work-arounds for x86 and x86_64 platform bugs. | ||
3 | */ | ||
4 | #include <linux/config.h> | ||
5 | #include <linux/pci.h> | ||
6 | #include <linux/irq.h> | ||
7 | |||
8 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) | ||
9 | |||
10 | static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | ||
11 | { | ||
12 | u8 config, rev; | ||
13 | u32 word; | ||
14 | |||
15 | /* BIOS may enable hardware IRQ balancing for | ||
16 | * E7520/E7320/E7525(revision ID 0x9 and below) | ||
17 | * based platforms. | ||
18 | * Disable SW irqbalance/affinity on those platforms. | ||
19 | */ | ||
20 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | ||
21 | if (rev > 0x9) | ||
22 | return; | ||
23 | |||
24 | printk(KERN_INFO "Intel E7520/7320/7525 detected."); | ||
25 | |||
26 | /* enable access to config space*/ | ||
27 | pci_read_config_byte(dev, 0xf4, &config); | ||
28 | config |= 0x2; | ||
29 | pci_write_config_byte(dev, 0xf4, config); | ||
30 | |||
31 | /* read xTPR register */ | ||
32 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); | ||
33 | |||
34 | if (!(word & (1 << 13))) { | ||
35 | printk(KERN_INFO "Disabling irq balancing and affinity\n"); | ||
36 | #ifdef CONFIG_IRQBALANCE | ||
37 | irqbalance_disable(""); | ||
38 | #endif | ||
39 | noirqdebug_setup(""); | ||
40 | #ifdef CONFIG_PROC_FS | ||
41 | no_irq_affinity = 1; | ||
42 | #endif | ||
43 | } | ||
44 | |||
45 | config &= ~0x2; | ||
46 | /* disable access to config space*/ | ||
47 | pci_write_config_byte(dev, 0xf4, config); | ||
48 | } | ||
49 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); | ||
50 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); | ||
51 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); | ||
52 | #endif | ||
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c new file mode 100644 index 000000000000..3d7e994563df --- /dev/null +++ b/arch/i386/kernel/reboot.c | |||
@@ -0,0 +1,382 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/reboot.c | ||
3 | */ | ||
4 | |||
5 | #include <linux/mm.h> | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/delay.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/mc146818rtc.h> | ||
11 | #include <linux/efi.h> | ||
12 | #include <linux/dmi.h> | ||
13 | #include <asm/uaccess.h> | ||
14 | #include <asm/apic.h> | ||
15 | #include "mach_reboot.h" | ||
16 | |||
17 | /* | ||
18 | * Power off function, if any | ||
19 | */ | ||
20 | void (*pm_power_off)(void); | ||
21 | |||
22 | static int reboot_mode; | ||
23 | static int reboot_thru_bios; | ||
24 | |||
25 | #ifdef CONFIG_SMP | ||
26 | int reboot_smp = 0; | ||
27 | static int reboot_cpu = -1; | ||
28 | /* shamelessly grabbed from lib/vsprintf.c for readability */ | ||
29 | #define is_digit(c) ((c) >= '0' && (c) <= '9') | ||
30 | #endif | ||
31 | static int __init reboot_setup(char *str) | ||
32 | { | ||
33 | while(1) { | ||
34 | switch (*str) { | ||
35 | case 'w': /* "warm" reboot (no memory testing etc) */ | ||
36 | reboot_mode = 0x1234; | ||
37 | break; | ||
38 | case 'c': /* "cold" reboot (with memory testing etc) */ | ||
39 | reboot_mode = 0x0; | ||
40 | break; | ||
41 | case 'b': /* "bios" reboot by jumping through the BIOS */ | ||
42 | reboot_thru_bios = 1; | ||
43 | break; | ||
44 | case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */ | ||
45 | reboot_thru_bios = 0; | ||
46 | break; | ||
47 | #ifdef CONFIG_SMP | ||
48 | case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ | ||
49 | reboot_smp = 1; | ||
50 | if (is_digit(*(str+1))) { | ||
51 | reboot_cpu = (int) (*(str+1) - '0'); | ||
52 | if (is_digit(*(str+2))) | ||
53 | reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); | ||
54 | } | ||
55 | /* we will leave sorting out the final value | ||
56 | when we are ready to reboot, since we might not | ||
57 | have set up boot_cpu_id or smp_num_cpu */ | ||
58 | break; | ||
59 | #endif | ||
60 | } | ||
61 | if((str = strchr(str,',')) != NULL) | ||
62 | str++; | ||
63 | else | ||
64 | break; | ||
65 | } | ||
66 | return 1; | ||
67 | } | ||
68 | |||
69 | __setup("reboot=", reboot_setup); | ||
70 | |||
71 | /* | ||
72 | * Reboot options and system auto-detection code provided by | ||
73 | * Dell Inc. so their systems "just work". :-) | ||
74 | */ | ||
75 | |||
76 | /* | ||
77 | * Some machines require the "reboot=b" commandline option, this quirk makes that automatic. | ||
78 | */ | ||
79 | static int __init set_bios_reboot(struct dmi_system_id *d) | ||
80 | { | ||
81 | if (!reboot_thru_bios) { | ||
82 | reboot_thru_bios = 1; | ||
83 | printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); | ||
84 | } | ||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Some machines require the "reboot=s" commandline option, this quirk makes that automatic. | ||
90 | */ | ||
91 | static int __init set_smp_reboot(struct dmi_system_id *d) | ||
92 | { | ||
93 | #ifdef CONFIG_SMP | ||
94 | if (!reboot_smp) { | ||
95 | reboot_smp = 1; | ||
96 | printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident); | ||
97 | } | ||
98 | #endif | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Some machines require the "reboot=b,s" commandline option, this quirk makes that automatic. | ||
104 | */ | ||
105 | static int __init set_smp_bios_reboot(struct dmi_system_id *d) | ||
106 | { | ||
107 | set_smp_reboot(d); | ||
108 | set_bios_reboot(d); | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | static struct dmi_system_id __initdata reboot_dmi_table[] = { | ||
113 | { /* Handle problems with rebooting on Dell 1300's */ | ||
114 | .callback = set_smp_bios_reboot, | ||
115 | .ident = "Dell PowerEdge 1300", | ||
116 | .matches = { | ||
117 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | ||
118 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"), | ||
119 | }, | ||
120 | }, | ||
121 | { /* Handle problems with rebooting on Dell 300's */ | ||
122 | .callback = set_bios_reboot, | ||
123 | .ident = "Dell PowerEdge 300", | ||
124 | .matches = { | ||
125 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | ||
126 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), | ||
127 | }, | ||
128 | }, | ||
129 | { /* Handle problems with rebooting on Dell 2400's */ | ||
130 | .callback = set_bios_reboot, | ||
131 | .ident = "Dell PowerEdge 2400", | ||
132 | .matches = { | ||
133 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), | ||
134 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), | ||
135 | }, | ||
136 | }, | ||
137 | { } | ||
138 | }; | ||
139 | |||
140 | static int __init reboot_init(void) | ||
141 | { | ||
142 | dmi_check_system(reboot_dmi_table); | ||
143 | return 0; | ||
144 | } | ||
145 | |||
146 | core_initcall(reboot_init); | ||
147 | |||
148 | /* The following code and data reboots the machine by switching to real | ||
149 | mode and jumping to the BIOS reset entry point, as if the CPU has | ||
150 | really been reset. The previous version asked the keyboard | ||
151 | controller to pulse the CPU reset line, which is more thorough, but | ||
152 | doesn't work with at least one type of 486 motherboard. It is easy | ||
153 | to stop this code working; hence the copious comments. */ | ||
154 | |||
155 | static unsigned long long | ||
156 | real_mode_gdt_entries [3] = | ||
157 | { | ||
158 | 0x0000000000000000ULL, /* Null descriptor */ | ||
159 | 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ | ||
160 | 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ | ||
161 | }; | ||
162 | |||
163 | static struct | ||
164 | { | ||
165 | unsigned short size __attribute__ ((packed)); | ||
166 | unsigned long long * base __attribute__ ((packed)); | ||
167 | } | ||
168 | real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries }, | ||
169 | real_mode_idt = { 0x3ff, NULL }, | ||
170 | no_idt = { 0, NULL }; | ||
171 | |||
172 | |||
173 | /* This is 16-bit protected mode code to disable paging and the cache, | ||
174 | switch to real mode and jump to the BIOS reset code. | ||
175 | |||
176 | The instruction that switches to real mode by writing to CR0 must be | ||
177 | followed immediately by a far jump instruction, which set CS to a | ||
178 | valid value for real mode, and flushes the prefetch queue to avoid | ||
179 | running instructions that have already been decoded in protected | ||
180 | mode. | ||
181 | |||
182 | Clears all the flags except ET, especially PG (paging), PE | ||
183 | (protected-mode enable) and TS (task switch for coprocessor state | ||
184 | save). Flushes the TLB after paging has been disabled. Sets CD and | ||
185 | NW, to disable the cache on a 486, and invalidates the cache. This | ||
186 | is more like the state of a 486 after reset. I don't know if | ||
187 | something else should be done for other chips. | ||
188 | |||
189 | More could be done here to set up the registers as if a CPU reset had | ||
190 | occurred; hopefully real BIOSs don't assume much. */ | ||
191 | |||
192 | static unsigned char real_mode_switch [] = | ||
193 | { | ||
194 | 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ | ||
195 | 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ | ||
196 | 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */ | ||
197 | 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */ | ||
198 | 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */ | ||
199 | 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */ | ||
200 | 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */ | ||
201 | 0x74, 0x02, /* jz f */ | ||
202 | 0x0f, 0x09, /* wbinvd */ | ||
203 | 0x24, 0x10, /* f: andb $0x10,al */ | ||
204 | 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ | ||
205 | }; | ||
206 | static unsigned char jump_to_bios [] = | ||
207 | { | ||
208 | 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ | ||
209 | }; | ||
210 | |||
211 | /* | ||
212 | * Switch to real mode and then execute the code | ||
213 | * specified by the code and length parameters. | ||
214 | * We assume that length will aways be less that 100! | ||
215 | */ | ||
216 | void machine_real_restart(unsigned char *code, int length) | ||
217 | { | ||
218 | unsigned long flags; | ||
219 | |||
220 | local_irq_disable(); | ||
221 | |||
222 | /* Write zero to CMOS register number 0x0f, which the BIOS POST | ||
223 | routine will recognize as telling it to do a proper reboot. (Well | ||
224 | that's what this book in front of me says -- it may only apply to | ||
225 | the Phoenix BIOS though, it's not clear). At the same time, | ||
226 | disable NMIs by setting the top bit in the CMOS address register, | ||
227 | as we're about to do peculiar things to the CPU. I'm not sure if | ||
228 | `outb_p' is needed instead of just `outb'. Use it to be on the | ||
229 | safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) | ||
230 | */ | ||
231 | |||
232 | spin_lock_irqsave(&rtc_lock, flags); | ||
233 | CMOS_WRITE(0x00, 0x8f); | ||
234 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
235 | |||
236 | /* Remap the kernel at virtual address zero, as well as offset zero | ||
237 | from the kernel segment. This assumes the kernel segment starts at | ||
238 | virtual address PAGE_OFFSET. */ | ||
239 | |||
240 | memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | ||
241 | sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); | ||
242 | |||
243 | /* | ||
244 | * Use `swapper_pg_dir' as our page directory. | ||
245 | */ | ||
246 | load_cr3(swapper_pg_dir); | ||
247 | |||
248 | /* Write 0x1234 to absolute memory location 0x472. The BIOS reads | ||
249 | this on booting to tell it to "Bypass memory test (also warm | ||
250 | boot)". This seems like a fairly standard thing that gets set by | ||
251 | REBOOT.COM programs, and the previous reset routine did this | ||
252 | too. */ | ||
253 | |||
254 | *((unsigned short *)0x472) = reboot_mode; | ||
255 | |||
256 | /* For the switch to real mode, copy some code to low memory. It has | ||
257 | to be in the first 64k because it is running in 16-bit mode, and it | ||
258 | has to have the same physical and virtual address, because it turns | ||
259 | off paging. Copy it near the end of the first page, out of the way | ||
260 | of BIOS variables. */ | ||
261 | |||
262 | memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100), | ||
263 | real_mode_switch, sizeof (real_mode_switch)); | ||
264 | memcpy ((void *) (0x1000 - 100), code, length); | ||
265 | |||
266 | /* Set up the IDT for real mode. */ | ||
267 | |||
268 | __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt)); | ||
269 | |||
270 | /* Set up a GDT from which we can load segment descriptors for real | ||
271 | mode. The GDT is not used in real mode; it is just needed here to | ||
272 | prepare the descriptors. */ | ||
273 | |||
274 | __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt)); | ||
275 | |||
276 | /* Load the data segment registers, and thus the descriptors ready for | ||
277 | real mode. The base address of each segment is 0x100, 16 times the | ||
278 | selector value being loaded here. This is so that the segment | ||
279 | registers don't have to be reloaded after switching to real mode: | ||
280 | the values are consistent for real mode operation already. */ | ||
281 | |||
282 | __asm__ __volatile__ ("movl $0x0010,%%eax\n" | ||
283 | "\tmovl %%eax,%%ds\n" | ||
284 | "\tmovl %%eax,%%es\n" | ||
285 | "\tmovl %%eax,%%fs\n" | ||
286 | "\tmovl %%eax,%%gs\n" | ||
287 | "\tmovl %%eax,%%ss" : : : "eax"); | ||
288 | |||
289 | /* Jump to the 16-bit code that we copied earlier. It disables paging | ||
290 | and the cache, switches to real mode, and jumps to the BIOS reset | ||
291 | entry point. */ | ||
292 | |||
293 | __asm__ __volatile__ ("ljmp $0x0008,%0" | ||
294 | : | ||
295 | : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); | ||
296 | } | ||
297 | |||
298 | void machine_restart(char * __unused) | ||
299 | { | ||
300 | #ifdef CONFIG_SMP | ||
301 | int cpuid; | ||
302 | |||
303 | cpuid = GET_APIC_ID(apic_read(APIC_ID)); | ||
304 | |||
305 | if (reboot_smp) { | ||
306 | |||
307 | /* check to see if reboot_cpu is valid | ||
308 | if its not, default to the BSP */ | ||
309 | if ((reboot_cpu == -1) || | ||
310 | (reboot_cpu > (NR_CPUS -1)) || | ||
311 | !physid_isset(cpuid, phys_cpu_present_map)) | ||
312 | reboot_cpu = boot_cpu_physical_apicid; | ||
313 | |||
314 | reboot_smp = 0; /* use this as a flag to only go through this once*/ | ||
315 | /* re-run this function on the other CPUs | ||
316 | it will fall though this section since we have | ||
317 | cleared reboot_smp, and do the reboot if it is the | ||
318 | correct CPU, otherwise it halts. */ | ||
319 | if (reboot_cpu != cpuid) | ||
320 | smp_call_function((void *)machine_restart , NULL, 1, 0); | ||
321 | } | ||
322 | |||
323 | /* if reboot_cpu is still -1, then we want a tradional reboot, | ||
324 | and if we are not running on the reboot_cpu,, halt */ | ||
325 | if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) { | ||
326 | for (;;) | ||
327 | __asm__ __volatile__ ("hlt"); | ||
328 | } | ||
329 | /* | ||
330 | * Stop all CPUs and turn off local APICs and the IO-APIC, so | ||
331 | * other OSs see a clean IRQ state. | ||
332 | */ | ||
333 | smp_send_stop(); | ||
334 | #endif /* CONFIG_SMP */ | ||
335 | |||
336 | lapic_shutdown(); | ||
337 | |||
338 | #ifdef CONFIG_X86_IO_APIC | ||
339 | disable_IO_APIC(); | ||
340 | #endif | ||
341 | |||
342 | if (!reboot_thru_bios) { | ||
343 | if (efi_enabled) { | ||
344 | efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); | ||
345 | __asm__ __volatile__("lidt %0": :"m" (no_idt)); | ||
346 | __asm__ __volatile__("int3"); | ||
347 | } | ||
348 | /* rebooting needs to touch the page at absolute addr 0 */ | ||
349 | *((unsigned short *)__va(0x472)) = reboot_mode; | ||
350 | for (;;) { | ||
351 | mach_reboot(); | ||
352 | /* That didn't work - force a triple fault.. */ | ||
353 | __asm__ __volatile__("lidt %0": :"m" (no_idt)); | ||
354 | __asm__ __volatile__("int3"); | ||
355 | } | ||
356 | } | ||
357 | if (efi_enabled) | ||
358 | efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL); | ||
359 | |||
360 | machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); | ||
361 | } | ||
362 | |||
363 | EXPORT_SYMBOL(machine_restart); | ||
364 | |||
365 | void machine_halt(void) | ||
366 | { | ||
367 | } | ||
368 | |||
369 | EXPORT_SYMBOL(machine_halt); | ||
370 | |||
371 | void machine_power_off(void) | ||
372 | { | ||
373 | lapic_shutdown(); | ||
374 | |||
375 | if (efi_enabled) | ||
376 | efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); | ||
377 | if (pm_power_off) | ||
378 | pm_power_off(); | ||
379 | } | ||
380 | |||
381 | EXPORT_SYMBOL(machine_power_off); | ||
382 | |||
diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c new file mode 100644 index 000000000000..69e203a0d330 --- /dev/null +++ b/arch/i386/kernel/scx200.c | |||
@@ -0,0 +1,167 @@ | |||
1 | /* linux/arch/i386/kernel/scx200.c | ||
2 | |||
3 | Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com> | ||
4 | |||
5 | National Semiconductor SCx200 support. */ | ||
6 | |||
7 | #include <linux/config.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/errno.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/pci.h> | ||
13 | |||
14 | #include <linux/scx200.h> | ||
15 | |||
16 | /* Verify that the configuration block really is there */ | ||
17 | #define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base)) | ||
18 | |||
19 | #define NAME "scx200" | ||
20 | |||
21 | MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>"); | ||
22 | MODULE_DESCRIPTION("NatSemi SCx200 Driver"); | ||
23 | MODULE_LICENSE("GPL"); | ||
24 | |||
25 | unsigned scx200_gpio_base = 0; | ||
26 | long scx200_gpio_shadow[2]; | ||
27 | |||
28 | unsigned scx200_cb_base = 0; | ||
29 | |||
30 | static struct pci_device_id scx200_tbl[] = { | ||
31 | { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) }, | ||
32 | { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) }, | ||
33 | { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) }, | ||
34 | { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) }, | ||
35 | { }, | ||
36 | }; | ||
37 | MODULE_DEVICE_TABLE(pci,scx200_tbl); | ||
38 | |||
39 | static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *); | ||
40 | |||
41 | static struct pci_driver scx200_pci_driver = { | ||
42 | .name = "scx200", | ||
43 | .id_table = scx200_tbl, | ||
44 | .probe = scx200_probe, | ||
45 | }; | ||
46 | |||
47 | static DEFINE_SPINLOCK(scx200_gpio_config_lock); | ||
48 | |||
49 | static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) | ||
50 | { | ||
51 | int bank; | ||
52 | unsigned base; | ||
53 | |||
54 | if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || | ||
55 | pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) { | ||
56 | base = pci_resource_start(pdev, 0); | ||
57 | printk(KERN_INFO NAME ": GPIO base 0x%x\n", base); | ||
58 | |||
59 | if (request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO") == 0) { | ||
60 | printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n"); | ||
61 | return -EBUSY; | ||
62 | } | ||
63 | |||
64 | scx200_gpio_base = base; | ||
65 | |||
66 | /* read the current values driven on the GPIO signals */ | ||
67 | for (bank = 0; bank < 2; ++bank) | ||
68 | scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); | ||
69 | |||
70 | } else { | ||
71 | /* find the base of the Configuration Block */ | ||
72 | if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) { | ||
73 | scx200_cb_base = SCx200_CB_BASE_FIXED; | ||
74 | } else { | ||
75 | pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base); | ||
76 | if (scx200_cb_probe(base)) { | ||
77 | scx200_cb_base = base; | ||
78 | } else { | ||
79 | printk(KERN_WARNING NAME ": Configuration Block not found\n"); | ||
80 | return -ENODEV; | ||
81 | } | ||
82 | } | ||
83 | printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base); | ||
84 | } | ||
85 | |||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | u32 scx200_gpio_configure(int index, u32 mask, u32 bits) | ||
90 | { | ||
91 | u32 config, new_config; | ||
92 | unsigned long flags; | ||
93 | |||
94 | spin_lock_irqsave(&scx200_gpio_config_lock, flags); | ||
95 | |||
96 | outl(index, scx200_gpio_base + 0x20); | ||
97 | config = inl(scx200_gpio_base + 0x24); | ||
98 | |||
99 | new_config = (config & mask) | bits; | ||
100 | outl(new_config, scx200_gpio_base + 0x24); | ||
101 | |||
102 | spin_unlock_irqrestore(&scx200_gpio_config_lock, flags); | ||
103 | |||
104 | return config; | ||
105 | } | ||
106 | |||
107 | #if 0 | ||
108 | void scx200_gpio_dump(unsigned index) | ||
109 | { | ||
110 | u32 config = scx200_gpio_configure(index, ~0, 0); | ||
111 | printk(KERN_DEBUG "GPIO%02u: 0x%08lx", index, (unsigned long)config); | ||
112 | |||
113 | if (config & 1) | ||
114 | printk(" OE"); /* output enabled */ | ||
115 | else | ||
116 | printk(" TS"); /* tristate */ | ||
117 | if (config & 2) | ||
118 | printk(" PP"); /* push pull */ | ||
119 | else | ||
120 | printk(" OD"); /* open drain */ | ||
121 | if (config & 4) | ||
122 | printk(" PUE"); /* pull up enabled */ | ||
123 | else | ||
124 | printk(" PUD"); /* pull up disabled */ | ||
125 | if (config & 8) | ||
126 | printk(" LOCKED"); /* locked */ | ||
127 | if (config & 16) | ||
128 | printk(" LEVEL"); /* level input */ | ||
129 | else | ||
130 | printk(" EDGE"); /* edge input */ | ||
131 | if (config & 32) | ||
132 | printk(" HI"); /* trigger on rising edge */ | ||
133 | else | ||
134 | printk(" LO"); /* trigger on falling edge */ | ||
135 | if (config & 64) | ||
136 | printk(" DEBOUNCE"); /* debounce */ | ||
137 | printk("\n"); | ||
138 | } | ||
139 | #endif /* 0 */ | ||
140 | |||
141 | static int __init scx200_init(void) | ||
142 | { | ||
143 | printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); | ||
144 | |||
145 | return pci_module_init(&scx200_pci_driver); | ||
146 | } | ||
147 | |||
148 | static void __exit scx200_cleanup(void) | ||
149 | { | ||
150 | pci_unregister_driver(&scx200_pci_driver); | ||
151 | release_region(scx200_gpio_base, SCx200_GPIO_SIZE); | ||
152 | } | ||
153 | |||
154 | module_init(scx200_init); | ||
155 | module_exit(scx200_cleanup); | ||
156 | |||
157 | EXPORT_SYMBOL(scx200_gpio_base); | ||
158 | EXPORT_SYMBOL(scx200_gpio_shadow); | ||
159 | EXPORT_SYMBOL(scx200_gpio_configure); | ||
160 | EXPORT_SYMBOL(scx200_cb_base); | ||
161 | |||
162 | /* | ||
163 | Local variables: | ||
164 | compile-command: "make -k -C ../../.. SUBDIRS=arch/i386/kernel modules" | ||
165 | c-basic-offset: 8 | ||
166 | End: | ||
167 | */ | ||
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c new file mode 100644 index 000000000000..469f496e55c0 --- /dev/null +++ b/arch/i386/kernel/semaphore.c | |||
@@ -0,0 +1,297 @@ | |||
1 | /* | ||
2 | * i386 semaphore implementation. | ||
3 | * | ||
4 | * (C) Copyright 1999 Linus Torvalds | ||
5 | * | ||
6 | * Portions Copyright 1999 Red Hat, Inc. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> | ||
14 | */ | ||
15 | #include <linux/config.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/err.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <asm/semaphore.h> | ||
20 | |||
21 | /* | ||
22 | * Semaphores are implemented using a two-way counter: | ||
23 | * The "count" variable is decremented for each process | ||
24 | * that tries to acquire the semaphore, while the "sleeping" | ||
25 | * variable is a count of such acquires. | ||
26 | * | ||
27 | * Notably, the inline "up()" and "down()" functions can | ||
28 | * efficiently test if they need to do any extra work (up | ||
29 | * needs to do something only if count was negative before | ||
30 | * the increment operation. | ||
31 | * | ||
32 | * "sleeping" and the contention routine ordering is protected | ||
33 | * by the spinlock in the semaphore's waitqueue head. | ||
34 | * | ||
35 | * Note that these functions are only called when there is | ||
36 | * contention on the lock, and as such all this is the | ||
37 | * "non-critical" part of the whole semaphore business. The | ||
38 | * critical part is the inline stuff in <asm/semaphore.h> | ||
39 | * where we want to avoid any extra jumps and calls. | ||
40 | */ | ||
41 | |||
42 | /* | ||
43 | * Logic: | ||
44 | * - only on a boundary condition do we need to care. When we go | ||
45 | * from a negative count to a non-negative, we wake people up. | ||
46 | * - when we go from a non-negative count to a negative do we | ||
47 | * (a) synchronize with the "sleeper" count and (b) make sure | ||
48 | * that we're on the wakeup list before we synchronize so that | ||
49 | * we cannot lose wakeup events. | ||
50 | */ | ||
51 | |||
52 | static fastcall void __attribute_used__ __up(struct semaphore *sem) | ||
53 | { | ||
54 | wake_up(&sem->wait); | ||
55 | } | ||
56 | |||
57 | static fastcall void __attribute_used__ __sched __down(struct semaphore * sem) | ||
58 | { | ||
59 | struct task_struct *tsk = current; | ||
60 | DECLARE_WAITQUEUE(wait, tsk); | ||
61 | unsigned long flags; | ||
62 | |||
63 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
64 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
65 | add_wait_queue_exclusive_locked(&sem->wait, &wait); | ||
66 | |||
67 | sem->sleepers++; | ||
68 | for (;;) { | ||
69 | int sleepers = sem->sleepers; | ||
70 | |||
71 | /* | ||
72 | * Add "everybody else" into it. They aren't | ||
73 | * playing, because we own the spinlock in | ||
74 | * the wait_queue_head. | ||
75 | */ | ||
76 | if (!atomic_add_negative(sleepers - 1, &sem->count)) { | ||
77 | sem->sleepers = 0; | ||
78 | break; | ||
79 | } | ||
80 | sem->sleepers = 1; /* us - see -1 above */ | ||
81 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
82 | |||
83 | schedule(); | ||
84 | |||
85 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
86 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
87 | } | ||
88 | remove_wait_queue_locked(&sem->wait, &wait); | ||
89 | wake_up_locked(&sem->wait); | ||
90 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
91 | tsk->state = TASK_RUNNING; | ||
92 | } | ||
93 | |||
94 | static fastcall int __attribute_used__ __sched __down_interruptible(struct semaphore * sem) | ||
95 | { | ||
96 | int retval = 0; | ||
97 | struct task_struct *tsk = current; | ||
98 | DECLARE_WAITQUEUE(wait, tsk); | ||
99 | unsigned long flags; | ||
100 | |||
101 | tsk->state = TASK_INTERRUPTIBLE; | ||
102 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
103 | add_wait_queue_exclusive_locked(&sem->wait, &wait); | ||
104 | |||
105 | sem->sleepers++; | ||
106 | for (;;) { | ||
107 | int sleepers = sem->sleepers; | ||
108 | |||
109 | /* | ||
110 | * With signals pending, this turns into | ||
111 | * the trylock failure case - we won't be | ||
112 | * sleeping, and we* can't get the lock as | ||
113 | * it has contention. Just correct the count | ||
114 | * and exit. | ||
115 | */ | ||
116 | if (signal_pending(current)) { | ||
117 | retval = -EINTR; | ||
118 | sem->sleepers = 0; | ||
119 | atomic_add(sleepers, &sem->count); | ||
120 | break; | ||
121 | } | ||
122 | |||
123 | /* | ||
124 | * Add "everybody else" into it. They aren't | ||
125 | * playing, because we own the spinlock in | ||
126 | * wait_queue_head. The "-1" is because we're | ||
127 | * still hoping to get the semaphore. | ||
128 | */ | ||
129 | if (!atomic_add_negative(sleepers - 1, &sem->count)) { | ||
130 | sem->sleepers = 0; | ||
131 | break; | ||
132 | } | ||
133 | sem->sleepers = 1; /* us - see -1 above */ | ||
134 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
135 | |||
136 | schedule(); | ||
137 | |||
138 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
139 | tsk->state = TASK_INTERRUPTIBLE; | ||
140 | } | ||
141 | remove_wait_queue_locked(&sem->wait, &wait); | ||
142 | wake_up_locked(&sem->wait); | ||
143 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
144 | |||
145 | tsk->state = TASK_RUNNING; | ||
146 | return retval; | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Trylock failed - make sure we correct for | ||
151 | * having decremented the count. | ||
152 | * | ||
153 | * We could have done the trylock with a | ||
154 | * single "cmpxchg" without failure cases, | ||
155 | * but then it wouldn't work on a 386. | ||
156 | */ | ||
157 | static fastcall int __attribute_used__ __down_trylock(struct semaphore * sem) | ||
158 | { | ||
159 | int sleepers; | ||
160 | unsigned long flags; | ||
161 | |||
162 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
163 | sleepers = sem->sleepers + 1; | ||
164 | sem->sleepers = 0; | ||
165 | |||
166 | /* | ||
167 | * Add "everybody else" and us into it. They aren't | ||
168 | * playing, because we own the spinlock in the | ||
169 | * wait_queue_head. | ||
170 | */ | ||
171 | if (!atomic_add_negative(sleepers, &sem->count)) { | ||
172 | wake_up_locked(&sem->wait); | ||
173 | } | ||
174 | |||
175 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
176 | return 1; | ||
177 | } | ||
178 | |||
179 | |||
180 | /* | ||
181 | * The semaphore operations have a special calling sequence that | ||
182 | * allow us to do a simpler in-line version of them. These routines | ||
183 | * need to convert that sequence back into the C sequence when | ||
184 | * there is contention on the semaphore. | ||
185 | * | ||
186 | * %eax contains the semaphore pointer on entry. Save the C-clobbered | ||
187 | * registers (%eax, %edx and %ecx) except %eax whish is either a return | ||
188 | * value or just clobbered.. | ||
189 | */ | ||
190 | asm( | ||
191 | ".section .sched.text\n" | ||
192 | ".align 4\n" | ||
193 | ".globl __down_failed\n" | ||
194 | "__down_failed:\n\t" | ||
195 | #if defined(CONFIG_FRAME_POINTER) | ||
196 | "pushl %ebp\n\t" | ||
197 | "movl %esp,%ebp\n\t" | ||
198 | #endif | ||
199 | "pushl %edx\n\t" | ||
200 | "pushl %ecx\n\t" | ||
201 | "call __down\n\t" | ||
202 | "popl %ecx\n\t" | ||
203 | "popl %edx\n\t" | ||
204 | #if defined(CONFIG_FRAME_POINTER) | ||
205 | "movl %ebp,%esp\n\t" | ||
206 | "popl %ebp\n\t" | ||
207 | #endif | ||
208 | "ret" | ||
209 | ); | ||
210 | |||
211 | asm( | ||
212 | ".section .sched.text\n" | ||
213 | ".align 4\n" | ||
214 | ".globl __down_failed_interruptible\n" | ||
215 | "__down_failed_interruptible:\n\t" | ||
216 | #if defined(CONFIG_FRAME_POINTER) | ||
217 | "pushl %ebp\n\t" | ||
218 | "movl %esp,%ebp\n\t" | ||
219 | #endif | ||
220 | "pushl %edx\n\t" | ||
221 | "pushl %ecx\n\t" | ||
222 | "call __down_interruptible\n\t" | ||
223 | "popl %ecx\n\t" | ||
224 | "popl %edx\n\t" | ||
225 | #if defined(CONFIG_FRAME_POINTER) | ||
226 | "movl %ebp,%esp\n\t" | ||
227 | "popl %ebp\n\t" | ||
228 | #endif | ||
229 | "ret" | ||
230 | ); | ||
231 | |||
232 | asm( | ||
233 | ".section .sched.text\n" | ||
234 | ".align 4\n" | ||
235 | ".globl __down_failed_trylock\n" | ||
236 | "__down_failed_trylock:\n\t" | ||
237 | #if defined(CONFIG_FRAME_POINTER) | ||
238 | "pushl %ebp\n\t" | ||
239 | "movl %esp,%ebp\n\t" | ||
240 | #endif | ||
241 | "pushl %edx\n\t" | ||
242 | "pushl %ecx\n\t" | ||
243 | "call __down_trylock\n\t" | ||
244 | "popl %ecx\n\t" | ||
245 | "popl %edx\n\t" | ||
246 | #if defined(CONFIG_FRAME_POINTER) | ||
247 | "movl %ebp,%esp\n\t" | ||
248 | "popl %ebp\n\t" | ||
249 | #endif | ||
250 | "ret" | ||
251 | ); | ||
252 | |||
253 | asm( | ||
254 | ".section .sched.text\n" | ||
255 | ".align 4\n" | ||
256 | ".globl __up_wakeup\n" | ||
257 | "__up_wakeup:\n\t" | ||
258 | "pushl %edx\n\t" | ||
259 | "pushl %ecx\n\t" | ||
260 | "call __up\n\t" | ||
261 | "popl %ecx\n\t" | ||
262 | "popl %edx\n\t" | ||
263 | "ret" | ||
264 | ); | ||
265 | |||
266 | /* | ||
267 | * rw spinlock fallbacks | ||
268 | */ | ||
269 | #if defined(CONFIG_SMP) | ||
270 | asm( | ||
271 | ".section .sched.text\n" | ||
272 | ".align 4\n" | ||
273 | ".globl __write_lock_failed\n" | ||
274 | "__write_lock_failed:\n\t" | ||
275 | LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" | ||
276 | "1: rep; nop\n\t" | ||
277 | "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" | ||
278 | "jne 1b\n\t" | ||
279 | LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" | ||
280 | "jnz __write_lock_failed\n\t" | ||
281 | "ret" | ||
282 | ); | ||
283 | |||
284 | asm( | ||
285 | ".section .sched.text\n" | ||
286 | ".align 4\n" | ||
287 | ".globl __read_lock_failed\n" | ||
288 | "__read_lock_failed:\n\t" | ||
289 | LOCK "incl (%eax)\n" | ||
290 | "1: rep; nop\n\t" | ||
291 | "cmpl $1,(%eax)\n\t" | ||
292 | "js 1b\n\t" | ||
293 | LOCK "decl (%eax)\n\t" | ||
294 | "js __read_lock_failed\n\t" | ||
295 | "ret" | ||
296 | ); | ||
297 | #endif | ||
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c new file mode 100644 index 000000000000..945ec73163c8 --- /dev/null +++ b/arch/i386/kernel/setup.c | |||
@@ -0,0 +1,1535 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/setup.c | ||
3 | * | ||
4 | * Copyright (C) 1995 Linus Torvalds | ||
5 | * | ||
6 | * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 | ||
7 | * | ||
8 | * Memory region support | ||
9 | * David Parsons <orc@pell.chi.il.us>, July-August 1999 | ||
10 | * | ||
11 | * Added E820 sanitization routine (removes overlapping memory regions); | ||
12 | * Brian Moyle <bmoyle@mvista.com>, February 2001 | ||
13 | * | ||
14 | * Moved CPU detection code to cpu/${cpu}.c | ||
15 | * Patrick Mochel <mochel@osdl.org>, March 2002 | ||
16 | * | ||
17 | * Provisions for empty E820 memory regions (reported by certain BIOSes). | ||
18 | * Alex Achenbach <xela@slit.de>, December 2002. | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | /* | ||
23 | * This file handles the architecture-dependent parts of initialization | ||
24 | */ | ||
25 | |||
26 | #include <linux/sched.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/tty.h> | ||
29 | #include <linux/ioport.h> | ||
30 | #include <linux/acpi.h> | ||
31 | #include <linux/apm_bios.h> | ||
32 | #include <linux/initrd.h> | ||
33 | #include <linux/bootmem.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/console.h> | ||
36 | #include <linux/mca.h> | ||
37 | #include <linux/root_dev.h> | ||
38 | #include <linux/highmem.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/efi.h> | ||
41 | #include <linux/init.h> | ||
42 | #include <linux/edd.h> | ||
43 | #include <linux/nodemask.h> | ||
44 | #include <video/edid.h> | ||
45 | #include <asm/e820.h> | ||
46 | #include <asm/mpspec.h> | ||
47 | #include <asm/setup.h> | ||
48 | #include <asm/arch_hooks.h> | ||
49 | #include <asm/sections.h> | ||
50 | #include <asm/io_apic.h> | ||
51 | #include <asm/ist.h> | ||
52 | #include <asm/io.h> | ||
53 | #include "setup_arch_pre.h" | ||
54 | #include <bios_ebda.h> | ||
55 | |||
56 | /* This value is set up by the early boot code to point to the value | ||
57 | immediately after the boot time page tables. It contains a *physical* | ||
58 | address, and must not be in the .bss segment! */ | ||
59 | unsigned long init_pg_tables_end __initdata = ~0UL; | ||
60 | |||
61 | int disable_pse __initdata = 0; | ||
62 | |||
63 | /* | ||
64 | * Machine setup.. | ||
65 | */ | ||
66 | |||
67 | #ifdef CONFIG_EFI | ||
68 | int efi_enabled = 0; | ||
69 | EXPORT_SYMBOL(efi_enabled); | ||
70 | #endif | ||
71 | |||
72 | /* cpu data as detected by the assembly code in head.S */ | ||
73 | struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | ||
74 | /* common cpu data for all cpus */ | ||
75 | struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | ||
76 | |||
77 | unsigned long mmu_cr4_features; | ||
78 | |||
79 | #ifdef CONFIG_ACPI_INTERPRETER | ||
80 | int acpi_disabled = 0; | ||
81 | #else | ||
82 | int acpi_disabled = 1; | ||
83 | #endif | ||
84 | EXPORT_SYMBOL(acpi_disabled); | ||
85 | |||
86 | #ifdef CONFIG_ACPI_BOOT | ||
87 | int __initdata acpi_force = 0; | ||
88 | extern acpi_interrupt_flags acpi_sci_flags; | ||
89 | #endif | ||
90 | |||
91 | /* for MCA, but anyone else can use it if they want */ | ||
92 | unsigned int machine_id; | ||
93 | unsigned int machine_submodel_id; | ||
94 | unsigned int BIOS_revision; | ||
95 | unsigned int mca_pentium_flag; | ||
96 | |||
97 | /* For PCI or other memory-mapped resources */ | ||
98 | unsigned long pci_mem_start = 0x10000000; | ||
99 | |||
100 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ | ||
101 | int bootloader_type; | ||
102 | |||
103 | /* user-defined highmem size */ | ||
104 | static unsigned int highmem_pages = -1; | ||
105 | |||
106 | /* | ||
107 | * Setup options | ||
108 | */ | ||
109 | struct drive_info_struct { char dummy[32]; } drive_info; | ||
110 | struct screen_info screen_info; | ||
111 | struct apm_info apm_info; | ||
112 | struct sys_desc_table_struct { | ||
113 | unsigned short length; | ||
114 | unsigned char table[0]; | ||
115 | }; | ||
116 | struct edid_info edid_info; | ||
117 | struct ist_info ist_info; | ||
118 | struct e820map e820; | ||
119 | |||
120 | extern void early_cpu_init(void); | ||
121 | extern void dmi_scan_machine(void); | ||
122 | extern void generic_apic_probe(char *); | ||
123 | extern int root_mountflags; | ||
124 | |||
125 | unsigned long saved_videomode; | ||
126 | |||
127 | #define RAMDISK_IMAGE_START_MASK 0x07FF | ||
128 | #define RAMDISK_PROMPT_FLAG 0x8000 | ||
129 | #define RAMDISK_LOAD_FLAG 0x4000 | ||
130 | |||
131 | static char command_line[COMMAND_LINE_SIZE]; | ||
132 | |||
133 | unsigned char __initdata boot_params[PARAM_SIZE]; | ||
134 | |||
135 | static struct resource data_resource = { | ||
136 | .name = "Kernel data", | ||
137 | .start = 0, | ||
138 | .end = 0, | ||
139 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
140 | }; | ||
141 | |||
142 | static struct resource code_resource = { | ||
143 | .name = "Kernel code", | ||
144 | .start = 0, | ||
145 | .end = 0, | ||
146 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
147 | }; | ||
148 | |||
149 | static struct resource system_rom_resource = { | ||
150 | .name = "System ROM", | ||
151 | .start = 0xf0000, | ||
152 | .end = 0xfffff, | ||
153 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
154 | }; | ||
155 | |||
156 | static struct resource extension_rom_resource = { | ||
157 | .name = "Extension ROM", | ||
158 | .start = 0xe0000, | ||
159 | .end = 0xeffff, | ||
160 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
161 | }; | ||
162 | |||
163 | static struct resource adapter_rom_resources[] = { { | ||
164 | .name = "Adapter ROM", | ||
165 | .start = 0xc8000, | ||
166 | .end = 0, | ||
167 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
168 | }, { | ||
169 | .name = "Adapter ROM", | ||
170 | .start = 0, | ||
171 | .end = 0, | ||
172 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
173 | }, { | ||
174 | .name = "Adapter ROM", | ||
175 | .start = 0, | ||
176 | .end = 0, | ||
177 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
178 | }, { | ||
179 | .name = "Adapter ROM", | ||
180 | .start = 0, | ||
181 | .end = 0, | ||
182 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
183 | }, { | ||
184 | .name = "Adapter ROM", | ||
185 | .start = 0, | ||
186 | .end = 0, | ||
187 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
188 | }, { | ||
189 | .name = "Adapter ROM", | ||
190 | .start = 0, | ||
191 | .end = 0, | ||
192 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
193 | } }; | ||
194 | |||
195 | #define ADAPTER_ROM_RESOURCES \ | ||
196 | (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) | ||
197 | |||
198 | static struct resource video_rom_resource = { | ||
199 | .name = "Video ROM", | ||
200 | .start = 0xc0000, | ||
201 | .end = 0xc7fff, | ||
202 | .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
203 | }; | ||
204 | |||
205 | static struct resource video_ram_resource = { | ||
206 | .name = "Video RAM area", | ||
207 | .start = 0xa0000, | ||
208 | .end = 0xbffff, | ||
209 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
210 | }; | ||
211 | |||
212 | static struct resource standard_io_resources[] = { { | ||
213 | .name = "dma1", | ||
214 | .start = 0x0000, | ||
215 | .end = 0x001f, | ||
216 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
217 | }, { | ||
218 | .name = "pic1", | ||
219 | .start = 0x0020, | ||
220 | .end = 0x0021, | ||
221 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
222 | }, { | ||
223 | .name = "timer0", | ||
224 | .start = 0x0040, | ||
225 | .end = 0x0043, | ||
226 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
227 | }, { | ||
228 | .name = "timer1", | ||
229 | .start = 0x0050, | ||
230 | .end = 0x0053, | ||
231 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
232 | }, { | ||
233 | .name = "keyboard", | ||
234 | .start = 0x0060, | ||
235 | .end = 0x006f, | ||
236 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
237 | }, { | ||
238 | .name = "dma page reg", | ||
239 | .start = 0x0080, | ||
240 | .end = 0x008f, | ||
241 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
242 | }, { | ||
243 | .name = "pic2", | ||
244 | .start = 0x00a0, | ||
245 | .end = 0x00a1, | ||
246 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
247 | }, { | ||
248 | .name = "dma2", | ||
249 | .start = 0x00c0, | ||
250 | .end = 0x00df, | ||
251 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
252 | }, { | ||
253 | .name = "fpu", | ||
254 | .start = 0x00f0, | ||
255 | .end = 0x00ff, | ||
256 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
257 | } }; | ||
258 | |||
259 | #define STANDARD_IO_RESOURCES \ | ||
260 | (sizeof standard_io_resources / sizeof standard_io_resources[0]) | ||
261 | |||
262 | #define romsignature(x) (*(unsigned short *)(x) == 0xaa55) | ||
263 | |||
264 | static int __init romchecksum(unsigned char *rom, unsigned long length) | ||
265 | { | ||
266 | unsigned char *p, sum = 0; | ||
267 | |||
268 | for (p = rom; p < rom + length; p++) | ||
269 | sum += *p; | ||
270 | return sum == 0; | ||
271 | } | ||
272 | |||
273 | static void __init probe_roms(void) | ||
274 | { | ||
275 | unsigned long start, length, upper; | ||
276 | unsigned char *rom; | ||
277 | int i; | ||
278 | |||
279 | /* video rom */ | ||
280 | upper = adapter_rom_resources[0].start; | ||
281 | for (start = video_rom_resource.start; start < upper; start += 2048) { | ||
282 | rom = isa_bus_to_virt(start); | ||
283 | if (!romsignature(rom)) | ||
284 | continue; | ||
285 | |||
286 | video_rom_resource.start = start; | ||
287 | |||
288 | /* 0 < length <= 0x7f * 512, historically */ | ||
289 | length = rom[2] * 512; | ||
290 | |||
291 | /* if checksum okay, trust length byte */ | ||
292 | if (length && romchecksum(rom, length)) | ||
293 | video_rom_resource.end = start + length - 1; | ||
294 | |||
295 | request_resource(&iomem_resource, &video_rom_resource); | ||
296 | break; | ||
297 | } | ||
298 | |||
299 | start = (video_rom_resource.end + 1 + 2047) & ~2047UL; | ||
300 | if (start < upper) | ||
301 | start = upper; | ||
302 | |||
303 | /* system rom */ | ||
304 | request_resource(&iomem_resource, &system_rom_resource); | ||
305 | upper = system_rom_resource.start; | ||
306 | |||
307 | /* check for extension rom (ignore length byte!) */ | ||
308 | rom = isa_bus_to_virt(extension_rom_resource.start); | ||
309 | if (romsignature(rom)) { | ||
310 | length = extension_rom_resource.end - extension_rom_resource.start + 1; | ||
311 | if (romchecksum(rom, length)) { | ||
312 | request_resource(&iomem_resource, &extension_rom_resource); | ||
313 | upper = extension_rom_resource.start; | ||
314 | } | ||
315 | } | ||
316 | |||
317 | /* check for adapter roms on 2k boundaries */ | ||
318 | for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { | ||
319 | rom = isa_bus_to_virt(start); | ||
320 | if (!romsignature(rom)) | ||
321 | continue; | ||
322 | |||
323 | /* 0 < length <= 0x7f * 512, historically */ | ||
324 | length = rom[2] * 512; | ||
325 | |||
326 | /* but accept any length that fits if checksum okay */ | ||
327 | if (!length || start + length > upper || !romchecksum(rom, length)) | ||
328 | continue; | ||
329 | |||
330 | adapter_rom_resources[i].start = start; | ||
331 | adapter_rom_resources[i].end = start + length - 1; | ||
332 | request_resource(&iomem_resource, &adapter_rom_resources[i]); | ||
333 | |||
334 | start = adapter_rom_resources[i++].end & ~2047UL; | ||
335 | } | ||
336 | } | ||
337 | |||
338 | static void __init limit_regions(unsigned long long size) | ||
339 | { | ||
340 | unsigned long long current_addr = 0; | ||
341 | int i; | ||
342 | |||
343 | if (efi_enabled) { | ||
344 | for (i = 0; i < memmap.nr_map; i++) { | ||
345 | current_addr = memmap.map[i].phys_addr + | ||
346 | (memmap.map[i].num_pages << 12); | ||
347 | if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) { | ||
348 | if (current_addr >= size) { | ||
349 | memmap.map[i].num_pages -= | ||
350 | (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); | ||
351 | memmap.nr_map = i + 1; | ||
352 | return; | ||
353 | } | ||
354 | } | ||
355 | } | ||
356 | } | ||
357 | for (i = 0; i < e820.nr_map; i++) { | ||
358 | if (e820.map[i].type == E820_RAM) { | ||
359 | current_addr = e820.map[i].addr + e820.map[i].size; | ||
360 | if (current_addr >= size) { | ||
361 | e820.map[i].size -= current_addr-size; | ||
362 | e820.nr_map = i + 1; | ||
363 | return; | ||
364 | } | ||
365 | } | ||
366 | } | ||
367 | } | ||
368 | |||
369 | static void __init add_memory_region(unsigned long long start, | ||
370 | unsigned long long size, int type) | ||
371 | { | ||
372 | int x; | ||
373 | |||
374 | if (!efi_enabled) { | ||
375 | x = e820.nr_map; | ||
376 | |||
377 | if (x == E820MAX) { | ||
378 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | ||
379 | return; | ||
380 | } | ||
381 | |||
382 | e820.map[x].addr = start; | ||
383 | e820.map[x].size = size; | ||
384 | e820.map[x].type = type; | ||
385 | e820.nr_map++; | ||
386 | } | ||
387 | } /* add_memory_region */ | ||
388 | |||
389 | #define E820_DEBUG 1 | ||
390 | |||
391 | static void __init print_memory_map(char *who) | ||
392 | { | ||
393 | int i; | ||
394 | |||
395 | for (i = 0; i < e820.nr_map; i++) { | ||
396 | printk(" %s: %016Lx - %016Lx ", who, | ||
397 | e820.map[i].addr, | ||
398 | e820.map[i].addr + e820.map[i].size); | ||
399 | switch (e820.map[i].type) { | ||
400 | case E820_RAM: printk("(usable)\n"); | ||
401 | break; | ||
402 | case E820_RESERVED: | ||
403 | printk("(reserved)\n"); | ||
404 | break; | ||
405 | case E820_ACPI: | ||
406 | printk("(ACPI data)\n"); | ||
407 | break; | ||
408 | case E820_NVS: | ||
409 | printk("(ACPI NVS)\n"); | ||
410 | break; | ||
411 | default: printk("type %lu\n", e820.map[i].type); | ||
412 | break; | ||
413 | } | ||
414 | } | ||
415 | } | ||
416 | |||
417 | /* | ||
418 | * Sanitize the BIOS e820 map. | ||
419 | * | ||
420 | * Some e820 responses include overlapping entries. The following | ||
421 | * replaces the original e820 map with a new one, removing overlaps. | ||
422 | * | ||
423 | */ | ||
424 | struct change_member { | ||
425 | struct e820entry *pbios; /* pointer to original bios entry */ | ||
426 | unsigned long long addr; /* address for this change point */ | ||
427 | }; | ||
428 | static struct change_member change_point_list[2*E820MAX] __initdata; | ||
429 | static struct change_member *change_point[2*E820MAX] __initdata; | ||
430 | static struct e820entry *overlap_list[E820MAX] __initdata; | ||
431 | static struct e820entry new_bios[E820MAX] __initdata; | ||
432 | |||
433 | static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | ||
434 | { | ||
435 | struct change_member *change_tmp; | ||
436 | unsigned long current_type, last_type; | ||
437 | unsigned long long last_addr; | ||
438 | int chgidx, still_changing; | ||
439 | int overlap_entries; | ||
440 | int new_bios_entry; | ||
441 | int old_nr, new_nr, chg_nr; | ||
442 | int i; | ||
443 | |||
444 | /* | ||
445 | Visually we're performing the following (1,2,3,4 = memory types)... | ||
446 | |||
447 | Sample memory map (w/overlaps): | ||
448 | ____22__________________ | ||
449 | ______________________4_ | ||
450 | ____1111________________ | ||
451 | _44_____________________ | ||
452 | 11111111________________ | ||
453 | ____________________33__ | ||
454 | ___________44___________ | ||
455 | __________33333_________ | ||
456 | ______________22________ | ||
457 | ___________________2222_ | ||
458 | _________111111111______ | ||
459 | _____________________11_ | ||
460 | _________________4______ | ||
461 | |||
462 | Sanitized equivalent (no overlap): | ||
463 | 1_______________________ | ||
464 | _44_____________________ | ||
465 | ___1____________________ | ||
466 | ____22__________________ | ||
467 | ______11________________ | ||
468 | _________1______________ | ||
469 | __________3_____________ | ||
470 | ___________44___________ | ||
471 | _____________33_________ | ||
472 | _______________2________ | ||
473 | ________________1_______ | ||
474 | _________________4______ | ||
475 | ___________________2____ | ||
476 | ____________________33__ | ||
477 | ______________________4_ | ||
478 | */ | ||
479 | |||
480 | /* if there's only one memory region, don't bother */ | ||
481 | if (*pnr_map < 2) | ||
482 | return -1; | ||
483 | |||
484 | old_nr = *pnr_map; | ||
485 | |||
486 | /* bail out if we find any unreasonable addresses in bios map */ | ||
487 | for (i=0; i<old_nr; i++) | ||
488 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | ||
489 | return -1; | ||
490 | |||
491 | /* create pointers for initial change-point information (for sorting) */ | ||
492 | for (i=0; i < 2*old_nr; i++) | ||
493 | change_point[i] = &change_point_list[i]; | ||
494 | |||
495 | /* record all known change-points (starting and ending addresses), | ||
496 | omitting those that are for empty memory regions */ | ||
497 | chgidx = 0; | ||
498 | for (i=0; i < old_nr; i++) { | ||
499 | if (biosmap[i].size != 0) { | ||
500 | change_point[chgidx]->addr = biosmap[i].addr; | ||
501 | change_point[chgidx++]->pbios = &biosmap[i]; | ||
502 | change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; | ||
503 | change_point[chgidx++]->pbios = &biosmap[i]; | ||
504 | } | ||
505 | } | ||
506 | chg_nr = chgidx; /* true number of change-points */ | ||
507 | |||
508 | /* sort change-point list by memory addresses (low -> high) */ | ||
509 | still_changing = 1; | ||
510 | while (still_changing) { | ||
511 | still_changing = 0; | ||
512 | for (i=1; i < chg_nr; i++) { | ||
513 | /* if <current_addr> > <last_addr>, swap */ | ||
514 | /* or, if current=<start_addr> & last=<end_addr>, swap */ | ||
515 | if ((change_point[i]->addr < change_point[i-1]->addr) || | ||
516 | ((change_point[i]->addr == change_point[i-1]->addr) && | ||
517 | (change_point[i]->addr == change_point[i]->pbios->addr) && | ||
518 | (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) | ||
519 | ) | ||
520 | { | ||
521 | change_tmp = change_point[i]; | ||
522 | change_point[i] = change_point[i-1]; | ||
523 | change_point[i-1] = change_tmp; | ||
524 | still_changing=1; | ||
525 | } | ||
526 | } | ||
527 | } | ||
528 | |||
529 | /* create a new bios memory map, removing overlaps */ | ||
530 | overlap_entries=0; /* number of entries in the overlap table */ | ||
531 | new_bios_entry=0; /* index for creating new bios map entries */ | ||
532 | last_type = 0; /* start with undefined memory type */ | ||
533 | last_addr = 0; /* start with 0 as last starting address */ | ||
534 | /* loop through change-points, determining affect on the new bios map */ | ||
535 | for (chgidx=0; chgidx < chg_nr; chgidx++) | ||
536 | { | ||
537 | /* keep track of all overlapping bios entries */ | ||
538 | if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) | ||
539 | { | ||
540 | /* add map entry to overlap list (> 1 entry implies an overlap) */ | ||
541 | overlap_list[overlap_entries++]=change_point[chgidx]->pbios; | ||
542 | } | ||
543 | else | ||
544 | { | ||
545 | /* remove entry from list (order independent, so swap with last) */ | ||
546 | for (i=0; i<overlap_entries; i++) | ||
547 | { | ||
548 | if (overlap_list[i] == change_point[chgidx]->pbios) | ||
549 | overlap_list[i] = overlap_list[overlap_entries-1]; | ||
550 | } | ||
551 | overlap_entries--; | ||
552 | } | ||
553 | /* if there are overlapping entries, decide which "type" to use */ | ||
554 | /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ | ||
555 | current_type = 0; | ||
556 | for (i=0; i<overlap_entries; i++) | ||
557 | if (overlap_list[i]->type > current_type) | ||
558 | current_type = overlap_list[i]->type; | ||
559 | /* continue building up new bios map based on this information */ | ||
560 | if (current_type != last_type) { | ||
561 | if (last_type != 0) { | ||
562 | new_bios[new_bios_entry].size = | ||
563 | change_point[chgidx]->addr - last_addr; | ||
564 | /* move forward only if the new size was non-zero */ | ||
565 | if (new_bios[new_bios_entry].size != 0) | ||
566 | if (++new_bios_entry >= E820MAX) | ||
567 | break; /* no more space left for new bios entries */ | ||
568 | } | ||
569 | if (current_type != 0) { | ||
570 | new_bios[new_bios_entry].addr = change_point[chgidx]->addr; | ||
571 | new_bios[new_bios_entry].type = current_type; | ||
572 | last_addr=change_point[chgidx]->addr; | ||
573 | } | ||
574 | last_type = current_type; | ||
575 | } | ||
576 | } | ||
577 | new_nr = new_bios_entry; /* retain count for new bios entries */ | ||
578 | |||
579 | /* copy new bios mapping into original location */ | ||
580 | memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | ||
581 | *pnr_map = new_nr; | ||
582 | |||
583 | return 0; | ||
584 | } | ||
585 | |||
586 | /* | ||
587 | * Copy the BIOS e820 map into a safe place. | ||
588 | * | ||
589 | * Sanity-check it while we're at it.. | ||
590 | * | ||
591 | * If we're lucky and live on a modern system, the setup code | ||
592 | * will have given us a memory map that we can use to properly | ||
593 | * set up memory. If we aren't, we'll fake a memory map. | ||
594 | * | ||
595 | * We check to see that the memory map contains at least 2 elements | ||
596 | * before we'll use it, because the detection code in setup.S may | ||
597 | * not be perfect and most every PC known to man has two memory | ||
598 | * regions: one from 0 to 640k, and one from 1mb up. (The IBM | ||
599 | * thinkpad 560x, for example, does not cooperate with the memory | ||
600 | * detection code.) | ||
601 | */ | ||
602 | static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | ||
603 | { | ||
604 | /* Only one memory region (or negative)? Ignore it */ | ||
605 | if (nr_map < 2) | ||
606 | return -1; | ||
607 | |||
608 | do { | ||
609 | unsigned long long start = biosmap->addr; | ||
610 | unsigned long long size = biosmap->size; | ||
611 | unsigned long long end = start + size; | ||
612 | unsigned long type = biosmap->type; | ||
613 | |||
614 | /* Overflow in 64 bits? Ignore the memory map. */ | ||
615 | if (start > end) | ||
616 | return -1; | ||
617 | |||
618 | /* | ||
619 | * Some BIOSes claim RAM in the 640k - 1M region. | ||
620 | * Not right. Fix it up. | ||
621 | */ | ||
622 | if (type == E820_RAM) { | ||
623 | if (start < 0x100000ULL && end > 0xA0000ULL) { | ||
624 | if (start < 0xA0000ULL) | ||
625 | add_memory_region(start, 0xA0000ULL-start, type); | ||
626 | if (end <= 0x100000ULL) | ||
627 | continue; | ||
628 | start = 0x100000ULL; | ||
629 | size = end - start; | ||
630 | } | ||
631 | } | ||
632 | add_memory_region(start, size, type); | ||
633 | } while (biosmap++,--nr_map); | ||
634 | return 0; | ||
635 | } | ||
636 | |||
637 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | ||
638 | struct edd edd; | ||
639 | #ifdef CONFIG_EDD_MODULE | ||
640 | EXPORT_SYMBOL(edd); | ||
641 | #endif | ||
642 | /** | ||
643 | * copy_edd() - Copy the BIOS EDD information | ||
644 | * from boot_params into a safe place. | ||
645 | * | ||
646 | */ | ||
647 | static inline void copy_edd(void) | ||
648 | { | ||
649 | memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); | ||
650 | memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); | ||
651 | edd.mbr_signature_nr = EDD_MBR_SIG_NR; | ||
652 | edd.edd_info_nr = EDD_NR; | ||
653 | } | ||
654 | #else | ||
655 | static inline void copy_edd(void) | ||
656 | { | ||
657 | } | ||
658 | #endif | ||
659 | |||
660 | /* | ||
661 | * Do NOT EVER look at the BIOS memory size location. | ||
662 | * It does not work on many machines. | ||
663 | */ | ||
664 | #define LOWMEMSIZE() (0x9f000) | ||
665 | |||
666 | static void __init parse_cmdline_early (char ** cmdline_p) | ||
667 | { | ||
668 | char c = ' ', *to = command_line, *from = saved_command_line; | ||
669 | int len = 0; | ||
670 | int userdef = 0; | ||
671 | |||
672 | /* Save unparsed command line copy for /proc/cmdline */ | ||
673 | saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; | ||
674 | |||
675 | for (;;) { | ||
676 | if (c != ' ') | ||
677 | goto next_char; | ||
678 | /* | ||
679 | * "mem=nopentium" disables the 4MB page tables. | ||
680 | * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM | ||
681 | * to <mem>, overriding the bios size. | ||
682 | * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from | ||
683 | * <start> to <start>+<mem>, overriding the bios size. | ||
684 | * | ||
685 | * HPA tells me bootloaders need to parse mem=, so no new | ||
686 | * option should be mem= [also see Documentation/i386/boot.txt] | ||
687 | */ | ||
688 | if (!memcmp(from, "mem=", 4)) { | ||
689 | if (to != command_line) | ||
690 | to--; | ||
691 | if (!memcmp(from+4, "nopentium", 9)) { | ||
692 | from += 9+4; | ||
693 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | ||
694 | disable_pse = 1; | ||
695 | } else { | ||
696 | /* If the user specifies memory size, we | ||
697 | * limit the BIOS-provided memory map to | ||
698 | * that size. exactmap can be used to specify | ||
699 | * the exact map. mem=number can be used to | ||
700 | * trim the existing memory map. | ||
701 | */ | ||
702 | unsigned long long mem_size; | ||
703 | |||
704 | mem_size = memparse(from+4, &from); | ||
705 | limit_regions(mem_size); | ||
706 | userdef=1; | ||
707 | } | ||
708 | } | ||
709 | |||
710 | else if (!memcmp(from, "memmap=", 7)) { | ||
711 | if (to != command_line) | ||
712 | to--; | ||
713 | if (!memcmp(from+7, "exactmap", 8)) { | ||
714 | from += 8+7; | ||
715 | e820.nr_map = 0; | ||
716 | userdef = 1; | ||
717 | } else { | ||
718 | /* If the user specifies memory size, we | ||
719 | * limit the BIOS-provided memory map to | ||
720 | * that size. exactmap can be used to specify | ||
721 | * the exact map. mem=number can be used to | ||
722 | * trim the existing memory map. | ||
723 | */ | ||
724 | unsigned long long start_at, mem_size; | ||
725 | |||
726 | mem_size = memparse(from+7, &from); | ||
727 | if (*from == '@') { | ||
728 | start_at = memparse(from+1, &from); | ||
729 | add_memory_region(start_at, mem_size, E820_RAM); | ||
730 | } else if (*from == '#') { | ||
731 | start_at = memparse(from+1, &from); | ||
732 | add_memory_region(start_at, mem_size, E820_ACPI); | ||
733 | } else if (*from == '$') { | ||
734 | start_at = memparse(from+1, &from); | ||
735 | add_memory_region(start_at, mem_size, E820_RESERVED); | ||
736 | } else { | ||
737 | limit_regions(mem_size); | ||
738 | userdef=1; | ||
739 | } | ||
740 | } | ||
741 | } | ||
742 | |||
743 | else if (!memcmp(from, "noexec=", 7)) | ||
744 | noexec_setup(from + 7); | ||
745 | |||
746 | |||
747 | #ifdef CONFIG_X86_SMP | ||
748 | /* | ||
749 | * If the BIOS enumerates physical processors before logical, | ||
750 | * maxcpus=N at enumeration-time can be used to disable HT. | ||
751 | */ | ||
752 | else if (!memcmp(from, "maxcpus=", 8)) { | ||
753 | extern unsigned int maxcpus; | ||
754 | |||
755 | maxcpus = simple_strtoul(from + 8, NULL, 0); | ||
756 | } | ||
757 | #endif | ||
758 | |||
759 | #ifdef CONFIG_ACPI_BOOT | ||
760 | /* "acpi=off" disables both ACPI table parsing and interpreter */ | ||
761 | else if (!memcmp(from, "acpi=off", 8)) { | ||
762 | disable_acpi(); | ||
763 | } | ||
764 | |||
765 | /* acpi=force to over-ride black-list */ | ||
766 | else if (!memcmp(from, "acpi=force", 10)) { | ||
767 | acpi_force = 1; | ||
768 | acpi_ht = 1; | ||
769 | acpi_disabled = 0; | ||
770 | } | ||
771 | |||
772 | /* acpi=strict disables out-of-spec workarounds */ | ||
773 | else if (!memcmp(from, "acpi=strict", 11)) { | ||
774 | acpi_strict = 1; | ||
775 | } | ||
776 | |||
777 | /* Limit ACPI just to boot-time to enable HT */ | ||
778 | else if (!memcmp(from, "acpi=ht", 7)) { | ||
779 | if (!acpi_force) | ||
780 | disable_acpi(); | ||
781 | acpi_ht = 1; | ||
782 | } | ||
783 | |||
784 | /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */ | ||
785 | else if (!memcmp(from, "pci=noacpi", 10)) { | ||
786 | acpi_disable_pci(); | ||
787 | } | ||
788 | /* "acpi=noirq" disables ACPI interrupt routing */ | ||
789 | else if (!memcmp(from, "acpi=noirq", 10)) { | ||
790 | acpi_noirq_set(); | ||
791 | } | ||
792 | |||
793 | else if (!memcmp(from, "acpi_sci=edge", 13)) | ||
794 | acpi_sci_flags.trigger = 1; | ||
795 | |||
796 | else if (!memcmp(from, "acpi_sci=level", 14)) | ||
797 | acpi_sci_flags.trigger = 3; | ||
798 | |||
799 | else if (!memcmp(from, "acpi_sci=high", 13)) | ||
800 | acpi_sci_flags.polarity = 1; | ||
801 | |||
802 | else if (!memcmp(from, "acpi_sci=low", 12)) | ||
803 | acpi_sci_flags.polarity = 3; | ||
804 | |||
805 | #ifdef CONFIG_X86_IO_APIC | ||
806 | else if (!memcmp(from, "acpi_skip_timer_override", 24)) | ||
807 | acpi_skip_timer_override = 1; | ||
808 | #endif | ||
809 | |||
810 | #ifdef CONFIG_X86_LOCAL_APIC | ||
811 | /* disable IO-APIC */ | ||
812 | else if (!memcmp(from, "noapic", 6)) | ||
813 | disable_ioapic_setup(); | ||
814 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
815 | #endif /* CONFIG_ACPI_BOOT */ | ||
816 | |||
817 | /* | ||
818 | * highmem=size forces highmem to be exactly 'size' bytes. | ||
819 | * This works even on boxes that have no highmem otherwise. | ||
820 | * This also works to reduce highmem size on bigger boxes. | ||
821 | */ | ||
822 | else if (!memcmp(from, "highmem=", 8)) | ||
823 | highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; | ||
824 | |||
825 | /* | ||
826 | * vmalloc=size forces the vmalloc area to be exactly 'size' | ||
827 | * bytes. This can be used to increase (or decrease) the | ||
828 | * vmalloc area - the default is 128m. | ||
829 | */ | ||
830 | else if (!memcmp(from, "vmalloc=", 8)) | ||
831 | __VMALLOC_RESERVE = memparse(from+8, &from); | ||
832 | |||
833 | next_char: | ||
834 | c = *(from++); | ||
835 | if (!c) | ||
836 | break; | ||
837 | if (COMMAND_LINE_SIZE <= ++len) | ||
838 | break; | ||
839 | *(to++) = c; | ||
840 | } | ||
841 | *to = '\0'; | ||
842 | *cmdline_p = command_line; | ||
843 | if (userdef) { | ||
844 | printk(KERN_INFO "user-defined physical RAM map:\n"); | ||
845 | print_memory_map("user"); | ||
846 | } | ||
847 | } | ||
848 | |||
849 | /* | ||
850 | * Callback for efi_memory_walk. | ||
851 | */ | ||
852 | static int __init | ||
853 | efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) | ||
854 | { | ||
855 | unsigned long *max_pfn = arg, pfn; | ||
856 | |||
857 | if (start < end) { | ||
858 | pfn = PFN_UP(end -1); | ||
859 | if (pfn > *max_pfn) | ||
860 | *max_pfn = pfn; | ||
861 | } | ||
862 | return 0; | ||
863 | } | ||
864 | |||
865 | |||
866 | /* | ||
867 | * Find the highest page frame number we have available | ||
868 | */ | ||
869 | void __init find_max_pfn(void) | ||
870 | { | ||
871 | int i; | ||
872 | |||
873 | max_pfn = 0; | ||
874 | if (efi_enabled) { | ||
875 | efi_memmap_walk(efi_find_max_pfn, &max_pfn); | ||
876 | return; | ||
877 | } | ||
878 | |||
879 | for (i = 0; i < e820.nr_map; i++) { | ||
880 | unsigned long start, end; | ||
881 | /* RAM? */ | ||
882 | if (e820.map[i].type != E820_RAM) | ||
883 | continue; | ||
884 | start = PFN_UP(e820.map[i].addr); | ||
885 | end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | ||
886 | if (start >= end) | ||
887 | continue; | ||
888 | if (end > max_pfn) | ||
889 | max_pfn = end; | ||
890 | } | ||
891 | } | ||
892 | |||
893 | /* | ||
894 | * Determine low and high memory ranges: | ||
895 | */ | ||
896 | unsigned long __init find_max_low_pfn(void) | ||
897 | { | ||
898 | unsigned long max_low_pfn; | ||
899 | |||
900 | max_low_pfn = max_pfn; | ||
901 | if (max_low_pfn > MAXMEM_PFN) { | ||
902 | if (highmem_pages == -1) | ||
903 | highmem_pages = max_pfn - MAXMEM_PFN; | ||
904 | if (highmem_pages + MAXMEM_PFN < max_pfn) | ||
905 | max_pfn = MAXMEM_PFN + highmem_pages; | ||
906 | if (highmem_pages + MAXMEM_PFN > max_pfn) { | ||
907 | printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages)); | ||
908 | highmem_pages = 0; | ||
909 | } | ||
910 | max_low_pfn = MAXMEM_PFN; | ||
911 | #ifndef CONFIG_HIGHMEM | ||
912 | /* Maximum memory usable is what is directly addressable */ | ||
913 | printk(KERN_WARNING "Warning only %ldMB will be used.\n", | ||
914 | MAXMEM>>20); | ||
915 | if (max_pfn > MAX_NONPAE_PFN) | ||
916 | printk(KERN_WARNING "Use a PAE enabled kernel.\n"); | ||
917 | else | ||
918 | printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); | ||
919 | max_pfn = MAXMEM_PFN; | ||
920 | #else /* !CONFIG_HIGHMEM */ | ||
921 | #ifndef CONFIG_X86_PAE | ||
922 | if (max_pfn > MAX_NONPAE_PFN) { | ||
923 | max_pfn = MAX_NONPAE_PFN; | ||
924 | printk(KERN_WARNING "Warning only 4GB will be used.\n"); | ||
925 | printk(KERN_WARNING "Use a PAE enabled kernel.\n"); | ||
926 | } | ||
927 | #endif /* !CONFIG_X86_PAE */ | ||
928 | #endif /* !CONFIG_HIGHMEM */ | ||
929 | } else { | ||
930 | if (highmem_pages == -1) | ||
931 | highmem_pages = 0; | ||
932 | #ifdef CONFIG_HIGHMEM | ||
933 | if (highmem_pages >= max_pfn) { | ||
934 | printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); | ||
935 | highmem_pages = 0; | ||
936 | } | ||
937 | if (highmem_pages) { | ||
938 | if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){ | ||
939 | printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages)); | ||
940 | highmem_pages = 0; | ||
941 | } | ||
942 | max_low_pfn -= highmem_pages; | ||
943 | } | ||
944 | #else | ||
945 | if (highmem_pages) | ||
946 | printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n"); | ||
947 | #endif | ||
948 | } | ||
949 | return max_low_pfn; | ||
950 | } | ||
951 | |||
952 | /* | ||
953 | * Free all available memory for boot time allocation. Used | ||
954 | * as a callback function by efi_memory_walk() | ||
955 | */ | ||
956 | |||
957 | static int __init | ||
958 | free_available_memory(unsigned long start, unsigned long end, void *arg) | ||
959 | { | ||
960 | /* check max_low_pfn */ | ||
961 | if (start >= ((max_low_pfn + 1) << PAGE_SHIFT)) | ||
962 | return 0; | ||
963 | if (end >= ((max_low_pfn + 1) << PAGE_SHIFT)) | ||
964 | end = (max_low_pfn + 1) << PAGE_SHIFT; | ||
965 | if (start < end) | ||
966 | free_bootmem(start, end - start); | ||
967 | |||
968 | return 0; | ||
969 | } | ||
970 | /* | ||
971 | * Register fully available low RAM pages with the bootmem allocator. | ||
972 | */ | ||
973 | static void __init register_bootmem_low_pages(unsigned long max_low_pfn) | ||
974 | { | ||
975 | int i; | ||
976 | |||
977 | if (efi_enabled) { | ||
978 | efi_memmap_walk(free_available_memory, NULL); | ||
979 | return; | ||
980 | } | ||
981 | for (i = 0; i < e820.nr_map; i++) { | ||
982 | unsigned long curr_pfn, last_pfn, size; | ||
983 | /* | ||
984 | * Reserve usable low memory | ||
985 | */ | ||
986 | if (e820.map[i].type != E820_RAM) | ||
987 | continue; | ||
988 | /* | ||
989 | * We are rounding up the start address of usable memory: | ||
990 | */ | ||
991 | curr_pfn = PFN_UP(e820.map[i].addr); | ||
992 | if (curr_pfn >= max_low_pfn) | ||
993 | continue; | ||
994 | /* | ||
995 | * ... and at the end of the usable range downwards: | ||
996 | */ | ||
997 | last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | ||
998 | |||
999 | if (last_pfn > max_low_pfn) | ||
1000 | last_pfn = max_low_pfn; | ||
1001 | |||
1002 | /* | ||
1003 | * .. finally, did all the rounding and playing | ||
1004 | * around just make the area go away? | ||
1005 | */ | ||
1006 | if (last_pfn <= curr_pfn) | ||
1007 | continue; | ||
1008 | |||
1009 | size = last_pfn - curr_pfn; | ||
1010 | free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); | ||
1011 | } | ||
1012 | } | ||
1013 | |||
1014 | /* | ||
1015 | * workaround for Dell systems that neglect to reserve EBDA | ||
1016 | */ | ||
1017 | static void __init reserve_ebda_region(void) | ||
1018 | { | ||
1019 | unsigned int addr; | ||
1020 | addr = get_bios_ebda(); | ||
1021 | if (addr) | ||
1022 | reserve_bootmem(addr, PAGE_SIZE); | ||
1023 | } | ||
1024 | |||
1025 | #ifndef CONFIG_DISCONTIGMEM | ||
1026 | void __init setup_bootmem_allocator(void); | ||
1027 | static unsigned long __init setup_memory(void) | ||
1028 | { | ||
1029 | /* | ||
1030 | * partially used pages are not usable - thus | ||
1031 | * we are rounding upwards: | ||
1032 | */ | ||
1033 | min_low_pfn = PFN_UP(init_pg_tables_end); | ||
1034 | |||
1035 | find_max_pfn(); | ||
1036 | |||
1037 | max_low_pfn = find_max_low_pfn(); | ||
1038 | |||
1039 | #ifdef CONFIG_HIGHMEM | ||
1040 | highstart_pfn = highend_pfn = max_pfn; | ||
1041 | if (max_pfn > max_low_pfn) { | ||
1042 | highstart_pfn = max_low_pfn; | ||
1043 | } | ||
1044 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | ||
1045 | pages_to_mb(highend_pfn - highstart_pfn)); | ||
1046 | #endif | ||
1047 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", | ||
1048 | pages_to_mb(max_low_pfn)); | ||
1049 | |||
1050 | setup_bootmem_allocator(); | ||
1051 | |||
1052 | return max_low_pfn; | ||
1053 | } | ||
1054 | |||
1055 | void __init zone_sizes_init(void) | ||
1056 | { | ||
1057 | unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; | ||
1058 | unsigned int max_dma, low; | ||
1059 | |||
1060 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
1061 | low = max_low_pfn; | ||
1062 | |||
1063 | if (low < max_dma) | ||
1064 | zones_size[ZONE_DMA] = low; | ||
1065 | else { | ||
1066 | zones_size[ZONE_DMA] = max_dma; | ||
1067 | zones_size[ZONE_NORMAL] = low - max_dma; | ||
1068 | #ifdef CONFIG_HIGHMEM | ||
1069 | zones_size[ZONE_HIGHMEM] = highend_pfn - low; | ||
1070 | #endif | ||
1071 | } | ||
1072 | free_area_init(zones_size); | ||
1073 | } | ||
1074 | #else | ||
1075 | extern unsigned long setup_memory(void); | ||
1076 | extern void zone_sizes_init(void); | ||
1077 | #endif /* !CONFIG_DISCONTIGMEM */ | ||
1078 | |||
1079 | void __init setup_bootmem_allocator(void) | ||
1080 | { | ||
1081 | unsigned long bootmap_size; | ||
1082 | /* | ||
1083 | * Initialize the boot-time allocator (with low memory only): | ||
1084 | */ | ||
1085 | bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); | ||
1086 | |||
1087 | register_bootmem_low_pages(max_low_pfn); | ||
1088 | |||
1089 | /* | ||
1090 | * Reserve the bootmem bitmap itself as well. We do this in two | ||
1091 | * steps (first step was init_bootmem()) because this catches | ||
1092 | * the (very unlikely) case of us accidentally initializing the | ||
1093 | * bootmem allocator with an invalid RAM area. | ||
1094 | */ | ||
1095 | reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) + | ||
1096 | bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); | ||
1097 | |||
1098 | /* | ||
1099 | * reserve physical page 0 - it's a special BIOS page on many boxes, | ||
1100 | * enabling clean reboots, SMP operation, laptop functions. | ||
1101 | */ | ||
1102 | reserve_bootmem(0, PAGE_SIZE); | ||
1103 | |||
1104 | /* reserve EBDA region, it's a 4K region */ | ||
1105 | reserve_ebda_region(); | ||
1106 | |||
1107 | /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent | ||
1108 | PCI prefetch into it (errata #56). Usually the page is reserved anyways, | ||
1109 | unless you have no PS/2 mouse plugged in. */ | ||
1110 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
1111 | boot_cpu_data.x86 == 6) | ||
1112 | reserve_bootmem(0xa0000 - 4096, 4096); | ||
1113 | |||
1114 | #ifdef CONFIG_SMP | ||
1115 | /* | ||
1116 | * But first pinch a few for the stack/trampoline stuff | ||
1117 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
1118 | * trampoline before removing it. (see the GDT stuff) | ||
1119 | */ | ||
1120 | reserve_bootmem(PAGE_SIZE, PAGE_SIZE); | ||
1121 | #endif | ||
1122 | #ifdef CONFIG_ACPI_SLEEP | ||
1123 | /* | ||
1124 | * Reserve low memory region for sleep support. | ||
1125 | */ | ||
1126 | acpi_reserve_bootmem(); | ||
1127 | #endif | ||
1128 | #ifdef CONFIG_X86_FIND_SMP_CONFIG | ||
1129 | /* | ||
1130 | * Find and reserve possible boot-time SMP configuration: | ||
1131 | */ | ||
1132 | find_smp_config(); | ||
1133 | #endif | ||
1134 | |||
1135 | #ifdef CONFIG_BLK_DEV_INITRD | ||
1136 | if (LOADER_TYPE && INITRD_START) { | ||
1137 | if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { | ||
1138 | reserve_bootmem(INITRD_START, INITRD_SIZE); | ||
1139 | initrd_start = | ||
1140 | INITRD_START ? INITRD_START + PAGE_OFFSET : 0; | ||
1141 | initrd_end = initrd_start+INITRD_SIZE; | ||
1142 | } | ||
1143 | else { | ||
1144 | printk(KERN_ERR "initrd extends beyond end of memory " | ||
1145 | "(0x%08lx > 0x%08lx)\ndisabling initrd\n", | ||
1146 | INITRD_START + INITRD_SIZE, | ||
1147 | max_low_pfn << PAGE_SHIFT); | ||
1148 | initrd_start = 0; | ||
1149 | } | ||
1150 | } | ||
1151 | #endif | ||
1152 | } | ||
1153 | |||
1154 | /* | ||
1155 | * The node 0 pgdat is initialized before all of these because | ||
1156 | * it's needed for bootmem. node>0 pgdats have their virtual | ||
1157 | * space allocated before the pagetables are in place to access | ||
1158 | * them, so they can't be cleared then. | ||
1159 | * | ||
1160 | * This should all compile down to nothing when NUMA is off. | ||
1161 | */ | ||
1162 | void __init remapped_pgdat_init(void) | ||
1163 | { | ||
1164 | int nid; | ||
1165 | |||
1166 | for_each_online_node(nid) { | ||
1167 | if (nid != 0) | ||
1168 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | ||
1169 | } | ||
1170 | } | ||
1171 | |||
1172 | /* | ||
1173 | * Request address space for all standard RAM and ROM resources | ||
1174 | * and also for regions reported as reserved by the e820. | ||
1175 | */ | ||
1176 | static void __init | ||
1177 | legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) | ||
1178 | { | ||
1179 | int i; | ||
1180 | |||
1181 | probe_roms(); | ||
1182 | for (i = 0; i < e820.nr_map; i++) { | ||
1183 | struct resource *res; | ||
1184 | if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) | ||
1185 | continue; | ||
1186 | res = alloc_bootmem_low(sizeof(struct resource)); | ||
1187 | switch (e820.map[i].type) { | ||
1188 | case E820_RAM: res->name = "System RAM"; break; | ||
1189 | case E820_ACPI: res->name = "ACPI Tables"; break; | ||
1190 | case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | ||
1191 | default: res->name = "reserved"; | ||
1192 | } | ||
1193 | res->start = e820.map[i].addr; | ||
1194 | res->end = res->start + e820.map[i].size - 1; | ||
1195 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
1196 | request_resource(&iomem_resource, res); | ||
1197 | if (e820.map[i].type == E820_RAM) { | ||
1198 | /* | ||
1199 | * We don't know which RAM region contains kernel data, | ||
1200 | * so we try it repeatedly and let the resource manager | ||
1201 | * test it. | ||
1202 | */ | ||
1203 | request_resource(res, code_resource); | ||
1204 | request_resource(res, data_resource); | ||
1205 | } | ||
1206 | } | ||
1207 | } | ||
1208 | |||
1209 | /* | ||
1210 | * Request address space for all standard resources | ||
1211 | */ | ||
1212 | static void __init register_memory(void) | ||
1213 | { | ||
1214 | unsigned long gapstart, gapsize; | ||
1215 | unsigned long long last; | ||
1216 | int i; | ||
1217 | |||
1218 | if (efi_enabled) | ||
1219 | efi_initialize_iomem_resources(&code_resource, &data_resource); | ||
1220 | else | ||
1221 | legacy_init_iomem_resources(&code_resource, &data_resource); | ||
1222 | |||
1223 | /* EFI systems may still have VGA */ | ||
1224 | request_resource(&iomem_resource, &video_ram_resource); | ||
1225 | |||
1226 | /* request I/O space for devices used on all i[345]86 PCs */ | ||
1227 | for (i = 0; i < STANDARD_IO_RESOURCES; i++) | ||
1228 | request_resource(&ioport_resource, &standard_io_resources[i]); | ||
1229 | |||
1230 | /* | ||
1231 | * Search for the bigest gap in the low 32 bits of the e820 | ||
1232 | * memory space. | ||
1233 | */ | ||
1234 | last = 0x100000000ull; | ||
1235 | gapstart = 0x10000000; | ||
1236 | gapsize = 0x400000; | ||
1237 | i = e820.nr_map; | ||
1238 | while (--i >= 0) { | ||
1239 | unsigned long long start = e820.map[i].addr; | ||
1240 | unsigned long long end = start + e820.map[i].size; | ||
1241 | |||
1242 | /* | ||
1243 | * Since "last" is at most 4GB, we know we'll | ||
1244 | * fit in 32 bits if this condition is true | ||
1245 | */ | ||
1246 | if (last > end) { | ||
1247 | unsigned long gap = last - end; | ||
1248 | |||
1249 | if (gap > gapsize) { | ||
1250 | gapsize = gap; | ||
1251 | gapstart = end; | ||
1252 | } | ||
1253 | } | ||
1254 | if (start < last) | ||
1255 | last = start; | ||
1256 | } | ||
1257 | |||
1258 | /* | ||
1259 | * Start allocating dynamic PCI memory a bit into the gap, | ||
1260 | * aligned up to the nearest megabyte. | ||
1261 | * | ||
1262 | * Question: should we try to pad it up a bit (do something | ||
1263 | * like " + (gapsize >> 3)" in there too?). We now have the | ||
1264 | * technology. | ||
1265 | */ | ||
1266 | pci_mem_start = (gapstart + 0xfffff) & ~0xfffff; | ||
1267 | |||
1268 | printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", | ||
1269 | pci_mem_start, gapstart, gapsize); | ||
1270 | } | ||
1271 | |||
1272 | /* Use inline assembly to define this because the nops are defined | ||
1273 | as inline assembly strings in the include files and we cannot | ||
1274 | get them easily into strings. */ | ||
1275 | asm("\t.data\nintelnops: " | ||
1276 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 | ||
1277 | GENERIC_NOP7 GENERIC_NOP8); | ||
1278 | asm("\t.data\nk8nops: " | ||
1279 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | ||
1280 | K8_NOP7 K8_NOP8); | ||
1281 | asm("\t.data\nk7nops: " | ||
1282 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 | ||
1283 | K7_NOP7 K7_NOP8); | ||
1284 | |||
1285 | extern unsigned char intelnops[], k8nops[], k7nops[]; | ||
1286 | static unsigned char *intel_nops[ASM_NOP_MAX+1] = { | ||
1287 | NULL, | ||
1288 | intelnops, | ||
1289 | intelnops + 1, | ||
1290 | intelnops + 1 + 2, | ||
1291 | intelnops + 1 + 2 + 3, | ||
1292 | intelnops + 1 + 2 + 3 + 4, | ||
1293 | intelnops + 1 + 2 + 3 + 4 + 5, | ||
1294 | intelnops + 1 + 2 + 3 + 4 + 5 + 6, | ||
1295 | intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | ||
1296 | }; | ||
1297 | static unsigned char *k8_nops[ASM_NOP_MAX+1] = { | ||
1298 | NULL, | ||
1299 | k8nops, | ||
1300 | k8nops + 1, | ||
1301 | k8nops + 1 + 2, | ||
1302 | k8nops + 1 + 2 + 3, | ||
1303 | k8nops + 1 + 2 + 3 + 4, | ||
1304 | k8nops + 1 + 2 + 3 + 4 + 5, | ||
1305 | k8nops + 1 + 2 + 3 + 4 + 5 + 6, | ||
1306 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | ||
1307 | }; | ||
1308 | static unsigned char *k7_nops[ASM_NOP_MAX+1] = { | ||
1309 | NULL, | ||
1310 | k7nops, | ||
1311 | k7nops + 1, | ||
1312 | k7nops + 1 + 2, | ||
1313 | k7nops + 1 + 2 + 3, | ||
1314 | k7nops + 1 + 2 + 3 + 4, | ||
1315 | k7nops + 1 + 2 + 3 + 4 + 5, | ||
1316 | k7nops + 1 + 2 + 3 + 4 + 5 + 6, | ||
1317 | k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | ||
1318 | }; | ||
1319 | static struct nop { | ||
1320 | int cpuid; | ||
1321 | unsigned char **noptable; | ||
1322 | } noptypes[] = { | ||
1323 | { X86_FEATURE_K8, k8_nops }, | ||
1324 | { X86_FEATURE_K7, k7_nops }, | ||
1325 | { -1, NULL } | ||
1326 | }; | ||
1327 | |||
1328 | /* Replace instructions with better alternatives for this CPU type. | ||
1329 | |||
1330 | This runs before SMP is initialized to avoid SMP problems with | ||
1331 | self modifying code. This implies that assymetric systems where | ||
1332 | APs have less capabilities than the boot processor are not handled. | ||
1333 | In this case boot with "noreplacement". */ | ||
1334 | void apply_alternatives(void *start, void *end) | ||
1335 | { | ||
1336 | struct alt_instr *a; | ||
1337 | int diff, i, k; | ||
1338 | unsigned char **noptable = intel_nops; | ||
1339 | for (i = 0; noptypes[i].cpuid >= 0; i++) { | ||
1340 | if (boot_cpu_has(noptypes[i].cpuid)) { | ||
1341 | noptable = noptypes[i].noptable; | ||
1342 | break; | ||
1343 | } | ||
1344 | } | ||
1345 | for (a = start; (void *)a < end; a++) { | ||
1346 | if (!boot_cpu_has(a->cpuid)) | ||
1347 | continue; | ||
1348 | BUG_ON(a->replacementlen > a->instrlen); | ||
1349 | memcpy(a->instr, a->replacement, a->replacementlen); | ||
1350 | diff = a->instrlen - a->replacementlen; | ||
1351 | /* Pad the rest with nops */ | ||
1352 | for (i = a->replacementlen; diff > 0; diff -= k, i += k) { | ||
1353 | k = diff; | ||
1354 | if (k > ASM_NOP_MAX) | ||
1355 | k = ASM_NOP_MAX; | ||
1356 | memcpy(a->instr + i, noptable[k], k); | ||
1357 | } | ||
1358 | } | ||
1359 | } | ||
1360 | |||
1361 | static int no_replacement __initdata = 0; | ||
1362 | |||
1363 | void __init alternative_instructions(void) | ||
1364 | { | ||
1365 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | ||
1366 | if (no_replacement) | ||
1367 | return; | ||
1368 | apply_alternatives(__alt_instructions, __alt_instructions_end); | ||
1369 | } | ||
1370 | |||
1371 | static int __init noreplacement_setup(char *s) | ||
1372 | { | ||
1373 | no_replacement = 1; | ||
1374 | return 0; | ||
1375 | } | ||
1376 | |||
1377 | __setup("noreplacement", noreplacement_setup); | ||
1378 | |||
1379 | static char * __init machine_specific_memory_setup(void); | ||
1380 | |||
1381 | #ifdef CONFIG_MCA | ||
1382 | static void set_mca_bus(int x) | ||
1383 | { | ||
1384 | MCA_bus = x; | ||
1385 | } | ||
1386 | #else | ||
1387 | static void set_mca_bus(int x) { } | ||
1388 | #endif | ||
1389 | |||
1390 | /* | ||
1391 | * Determine if we were loaded by an EFI loader. If so, then we have also been | ||
1392 | * passed the efi memmap, systab, etc., so we should use these data structures | ||
1393 | * for initialization. Note, the efi init code path is determined by the | ||
1394 | * global efi_enabled. This allows the same kernel image to be used on existing | ||
1395 | * systems (with a traditional BIOS) as well as on EFI systems. | ||
1396 | */ | ||
1397 | void __init setup_arch(char **cmdline_p) | ||
1398 | { | ||
1399 | unsigned long max_low_pfn; | ||
1400 | |||
1401 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | ||
1402 | pre_setup_arch_hook(); | ||
1403 | early_cpu_init(); | ||
1404 | |||
1405 | /* | ||
1406 | * FIXME: This isn't an official loader_type right | ||
1407 | * now but does currently work with elilo. | ||
1408 | * If we were configured as an EFI kernel, check to make | ||
1409 | * sure that we were loaded correctly from elilo and that | ||
1410 | * the system table is valid. If not, then initialize normally. | ||
1411 | */ | ||
1412 | #ifdef CONFIG_EFI | ||
1413 | if ((LOADER_TYPE == 0x50) && EFI_SYSTAB) | ||
1414 | efi_enabled = 1; | ||
1415 | #endif | ||
1416 | |||
1417 | ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); | ||
1418 | drive_info = DRIVE_INFO; | ||
1419 | screen_info = SCREEN_INFO; | ||
1420 | edid_info = EDID_INFO; | ||
1421 | apm_info.bios = APM_BIOS_INFO; | ||
1422 | ist_info = IST_INFO; | ||
1423 | saved_videomode = VIDEO_MODE; | ||
1424 | if( SYS_DESC_TABLE.length != 0 ) { | ||
1425 | set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2); | ||
1426 | machine_id = SYS_DESC_TABLE.table[0]; | ||
1427 | machine_submodel_id = SYS_DESC_TABLE.table[1]; | ||
1428 | BIOS_revision = SYS_DESC_TABLE.table[2]; | ||
1429 | } | ||
1430 | bootloader_type = LOADER_TYPE; | ||
1431 | |||
1432 | #ifdef CONFIG_BLK_DEV_RAM | ||
1433 | rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; | ||
1434 | rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); | ||
1435 | rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); | ||
1436 | #endif | ||
1437 | ARCH_SETUP | ||
1438 | if (efi_enabled) | ||
1439 | efi_init(); | ||
1440 | else { | ||
1441 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | ||
1442 | print_memory_map(machine_specific_memory_setup()); | ||
1443 | } | ||
1444 | |||
1445 | copy_edd(); | ||
1446 | |||
1447 | if (!MOUNT_ROOT_RDONLY) | ||
1448 | root_mountflags &= ~MS_RDONLY; | ||
1449 | init_mm.start_code = (unsigned long) _text; | ||
1450 | init_mm.end_code = (unsigned long) _etext; | ||
1451 | init_mm.end_data = (unsigned long) _edata; | ||
1452 | init_mm.brk = init_pg_tables_end + PAGE_OFFSET; | ||
1453 | |||
1454 | code_resource.start = virt_to_phys(_text); | ||
1455 | code_resource.end = virt_to_phys(_etext)-1; | ||
1456 | data_resource.start = virt_to_phys(_etext); | ||
1457 | data_resource.end = virt_to_phys(_edata)-1; | ||
1458 | |||
1459 | parse_cmdline_early(cmdline_p); | ||
1460 | |||
1461 | max_low_pfn = setup_memory(); | ||
1462 | |||
1463 | /* | ||
1464 | * NOTE: before this point _nobody_ is allowed to allocate | ||
1465 | * any memory using the bootmem allocator. Although the | ||
1466 | * alloctor is now initialised only the first 8Mb of the kernel | ||
1467 | * virtual address space has been mapped. All allocations before | ||
1468 | * paging_init() has completed must use the alloc_bootmem_low_pages() | ||
1469 | * variant (which allocates DMA'able memory) and care must be taken | ||
1470 | * not to exceed the 8Mb limit. | ||
1471 | */ | ||
1472 | |||
1473 | #ifdef CONFIG_SMP | ||
1474 | smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ | ||
1475 | #endif | ||
1476 | paging_init(); | ||
1477 | remapped_pgdat_init(); | ||
1478 | zone_sizes_init(); | ||
1479 | |||
1480 | /* | ||
1481 | * NOTE: at this point the bootmem allocator is fully available. | ||
1482 | */ | ||
1483 | |||
1484 | #ifdef CONFIG_EARLY_PRINTK | ||
1485 | { | ||
1486 | char *s = strstr(*cmdline_p, "earlyprintk="); | ||
1487 | if (s) { | ||
1488 | extern void setup_early_printk(char *); | ||
1489 | |||
1490 | setup_early_printk(s); | ||
1491 | printk("early console enabled\n"); | ||
1492 | } | ||
1493 | } | ||
1494 | #endif | ||
1495 | |||
1496 | |||
1497 | dmi_scan_machine(); | ||
1498 | |||
1499 | #ifdef CONFIG_X86_GENERICARCH | ||
1500 | generic_apic_probe(*cmdline_p); | ||
1501 | #endif | ||
1502 | if (efi_enabled) | ||
1503 | efi_map_memmap(); | ||
1504 | |||
1505 | /* | ||
1506 | * Parse the ACPI tables for possible boot-time SMP configuration. | ||
1507 | */ | ||
1508 | acpi_boot_table_init(); | ||
1509 | acpi_boot_init(); | ||
1510 | |||
1511 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1512 | if (smp_found_config) | ||
1513 | get_smp_config(); | ||
1514 | #endif | ||
1515 | |||
1516 | register_memory(); | ||
1517 | |||
1518 | #ifdef CONFIG_VT | ||
1519 | #if defined(CONFIG_VGA_CONSOLE) | ||
1520 | if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) | ||
1521 | conswitchp = &vga_con; | ||
1522 | #elif defined(CONFIG_DUMMY_CONSOLE) | ||
1523 | conswitchp = &dummy_con; | ||
1524 | #endif | ||
1525 | #endif | ||
1526 | } | ||
1527 | |||
1528 | #include "setup_arch_post.h" | ||
1529 | /* | ||
1530 | * Local Variables: | ||
1531 | * mode:c | ||
1532 | * c-file-style:"k&r" | ||
1533 | * c-basic-offset:8 | ||
1534 | * End: | ||
1535 | */ | ||
diff --git a/arch/i386/kernel/sigframe.h b/arch/i386/kernel/sigframe.h new file mode 100644 index 000000000000..d21b14f5c25c --- /dev/null +++ b/arch/i386/kernel/sigframe.h | |||
@@ -0,0 +1,21 @@ | |||
1 | struct sigframe | ||
2 | { | ||
3 | char *pretcode; | ||
4 | int sig; | ||
5 | struct sigcontext sc; | ||
6 | struct _fpstate fpstate; | ||
7 | unsigned long extramask[_NSIG_WORDS-1]; | ||
8 | char retcode[8]; | ||
9 | }; | ||
10 | |||
11 | struct rt_sigframe | ||
12 | { | ||
13 | char *pretcode; | ||
14 | int sig; | ||
15 | struct siginfo *pinfo; | ||
16 | void *puc; | ||
17 | struct siginfo info; | ||
18 | struct ucontext uc; | ||
19 | struct _fpstate fpstate; | ||
20 | char retcode[8]; | ||
21 | }; | ||
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c new file mode 100644 index 000000000000..ef3602e1c052 --- /dev/null +++ b/arch/i386/kernel/signal.c | |||
@@ -0,0 +1,665 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/signal.c | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
5 | * | ||
6 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson | ||
7 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | ||
8 | */ | ||
9 | |||
10 | #include <linux/sched.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/smp_lock.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/signal.h> | ||
16 | #include <linux/errno.h> | ||
17 | #include <linux/wait.h> | ||
18 | #include <linux/unistd.h> | ||
19 | #include <linux/stddef.h> | ||
20 | #include <linux/personality.h> | ||
21 | #include <linux/suspend.h> | ||
22 | #include <linux/ptrace.h> | ||
23 | #include <linux/elf.h> | ||
24 | #include <asm/processor.h> | ||
25 | #include <asm/ucontext.h> | ||
26 | #include <asm/uaccess.h> | ||
27 | #include <asm/i387.h> | ||
28 | #include "sigframe.h" | ||
29 | |||
30 | #define DEBUG_SIG 0 | ||
31 | |||
32 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | ||
33 | |||
34 | /* | ||
35 | * Atomically swap in the new signal mask, and wait for a signal. | ||
36 | */ | ||
37 | asmlinkage int | ||
38 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | ||
39 | { | ||
40 | struct pt_regs * regs = (struct pt_regs *) &history0; | ||
41 | sigset_t saveset; | ||
42 | |||
43 | mask &= _BLOCKABLE; | ||
44 | spin_lock_irq(¤t->sighand->siglock); | ||
45 | saveset = current->blocked; | ||
46 | siginitset(¤t->blocked, mask); | ||
47 | recalc_sigpending(); | ||
48 | spin_unlock_irq(¤t->sighand->siglock); | ||
49 | |||
50 | regs->eax = -EINTR; | ||
51 | while (1) { | ||
52 | current->state = TASK_INTERRUPTIBLE; | ||
53 | schedule(); | ||
54 | if (do_signal(regs, &saveset)) | ||
55 | return -EINTR; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | asmlinkage int | ||
60 | sys_rt_sigsuspend(struct pt_regs regs) | ||
61 | { | ||
62 | sigset_t saveset, newset; | ||
63 | |||
64 | /* XXX: Don't preclude handling different sized sigset_t's. */ | ||
65 | if (regs.ecx != sizeof(sigset_t)) | ||
66 | return -EINVAL; | ||
67 | |||
68 | if (copy_from_user(&newset, (sigset_t __user *)regs.ebx, sizeof(newset))) | ||
69 | return -EFAULT; | ||
70 | sigdelsetmask(&newset, ~_BLOCKABLE); | ||
71 | |||
72 | spin_lock_irq(¤t->sighand->siglock); | ||
73 | saveset = current->blocked; | ||
74 | current->blocked = newset; | ||
75 | recalc_sigpending(); | ||
76 | spin_unlock_irq(¤t->sighand->siglock); | ||
77 | |||
78 | regs.eax = -EINTR; | ||
79 | while (1) { | ||
80 | current->state = TASK_INTERRUPTIBLE; | ||
81 | schedule(); | ||
82 | if (do_signal(®s, &saveset)) | ||
83 | return -EINTR; | ||
84 | } | ||
85 | } | ||
86 | |||
87 | asmlinkage int | ||
88 | sys_sigaction(int sig, const struct old_sigaction __user *act, | ||
89 | struct old_sigaction __user *oact) | ||
90 | { | ||
91 | struct k_sigaction new_ka, old_ka; | ||
92 | int ret; | ||
93 | |||
94 | if (act) { | ||
95 | old_sigset_t mask; | ||
96 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | ||
97 | __get_user(new_ka.sa.sa_handler, &act->sa_handler) || | ||
98 | __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) | ||
99 | return -EFAULT; | ||
100 | __get_user(new_ka.sa.sa_flags, &act->sa_flags); | ||
101 | __get_user(mask, &act->sa_mask); | ||
102 | siginitset(&new_ka.sa.sa_mask, mask); | ||
103 | } | ||
104 | |||
105 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
106 | |||
107 | if (!ret && oact) { | ||
108 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | ||
109 | __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || | ||
110 | __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) | ||
111 | return -EFAULT; | ||
112 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | ||
113 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
114 | } | ||
115 | |||
116 | return ret; | ||
117 | } | ||
118 | |||
119 | asmlinkage int | ||
120 | sys_sigaltstack(unsigned long ebx) | ||
121 | { | ||
122 | /* This is needed to make gcc realize it doesn't own the "struct pt_regs" */ | ||
123 | struct pt_regs *regs = (struct pt_regs *)&ebx; | ||
124 | const stack_t __user *uss = (const stack_t __user *)ebx; | ||
125 | stack_t __user *uoss = (stack_t __user *)regs->ecx; | ||
126 | |||
127 | return do_sigaltstack(uss, uoss, regs->esp); | ||
128 | } | ||
129 | |||
130 | |||
131 | /* | ||
132 | * Do a signal return; undo the signal stack. | ||
133 | */ | ||
134 | |||
135 | static int | ||
136 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax) | ||
137 | { | ||
138 | unsigned int err = 0; | ||
139 | |||
140 | /* Always make any pending restarted system calls return -EINTR */ | ||
141 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | ||
142 | |||
143 | #define COPY(x) err |= __get_user(regs->x, &sc->x) | ||
144 | |||
145 | #define COPY_SEG(seg) \ | ||
146 | { unsigned short tmp; \ | ||
147 | err |= __get_user(tmp, &sc->seg); \ | ||
148 | regs->x##seg = tmp; } | ||
149 | |||
150 | #define COPY_SEG_STRICT(seg) \ | ||
151 | { unsigned short tmp; \ | ||
152 | err |= __get_user(tmp, &sc->seg); \ | ||
153 | regs->x##seg = tmp|3; } | ||
154 | |||
155 | #define GET_SEG(seg) \ | ||
156 | { unsigned short tmp; \ | ||
157 | err |= __get_user(tmp, &sc->seg); \ | ||
158 | loadsegment(seg,tmp); } | ||
159 | |||
160 | #define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \ | ||
161 | X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ | ||
162 | X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) | ||
163 | |||
164 | GET_SEG(gs); | ||
165 | GET_SEG(fs); | ||
166 | COPY_SEG(es); | ||
167 | COPY_SEG(ds); | ||
168 | COPY(edi); | ||
169 | COPY(esi); | ||
170 | COPY(ebp); | ||
171 | COPY(esp); | ||
172 | COPY(ebx); | ||
173 | COPY(edx); | ||
174 | COPY(ecx); | ||
175 | COPY(eip); | ||
176 | COPY_SEG_STRICT(cs); | ||
177 | COPY_SEG_STRICT(ss); | ||
178 | |||
179 | { | ||
180 | unsigned int tmpflags; | ||
181 | err |= __get_user(tmpflags, &sc->eflags); | ||
182 | regs->eflags = (regs->eflags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | ||
183 | regs->orig_eax = -1; /* disable syscall checks */ | ||
184 | } | ||
185 | |||
186 | { | ||
187 | struct _fpstate __user * buf; | ||
188 | err |= __get_user(buf, &sc->fpstate); | ||
189 | if (buf) { | ||
190 | if (!access_ok(VERIFY_READ, buf, sizeof(*buf))) | ||
191 | goto badframe; | ||
192 | err |= restore_i387(buf); | ||
193 | } else { | ||
194 | struct task_struct *me = current; | ||
195 | if (used_math()) { | ||
196 | clear_fpu(me); | ||
197 | clear_used_math(); | ||
198 | } | ||
199 | } | ||
200 | } | ||
201 | |||
202 | err |= __get_user(*peax, &sc->eax); | ||
203 | return err; | ||
204 | |||
205 | badframe: | ||
206 | return 1; | ||
207 | } | ||
208 | |||
209 | asmlinkage int sys_sigreturn(unsigned long __unused) | ||
210 | { | ||
211 | struct pt_regs *regs = (struct pt_regs *) &__unused; | ||
212 | struct sigframe __user *frame = (struct sigframe __user *)(regs->esp - 8); | ||
213 | sigset_t set; | ||
214 | int eax; | ||
215 | |||
216 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
217 | goto badframe; | ||
218 | if (__get_user(set.sig[0], &frame->sc.oldmask) | ||
219 | || (_NSIG_WORDS > 1 | ||
220 | && __copy_from_user(&set.sig[1], &frame->extramask, | ||
221 | sizeof(frame->extramask)))) | ||
222 | goto badframe; | ||
223 | |||
224 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
225 | spin_lock_irq(¤t->sighand->siglock); | ||
226 | current->blocked = set; | ||
227 | recalc_sigpending(); | ||
228 | spin_unlock_irq(¤t->sighand->siglock); | ||
229 | |||
230 | if (restore_sigcontext(regs, &frame->sc, &eax)) | ||
231 | goto badframe; | ||
232 | return eax; | ||
233 | |||
234 | badframe: | ||
235 | force_sig(SIGSEGV, current); | ||
236 | return 0; | ||
237 | } | ||
238 | |||
239 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) | ||
240 | { | ||
241 | struct pt_regs *regs = (struct pt_regs *) &__unused; | ||
242 | struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->esp - 4); | ||
243 | sigset_t set; | ||
244 | int eax; | ||
245 | |||
246 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
247 | goto badframe; | ||
248 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
249 | goto badframe; | ||
250 | |||
251 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
252 | spin_lock_irq(¤t->sighand->siglock); | ||
253 | current->blocked = set; | ||
254 | recalc_sigpending(); | ||
255 | spin_unlock_irq(¤t->sighand->siglock); | ||
256 | |||
257 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) | ||
258 | goto badframe; | ||
259 | |||
260 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->esp) == -EFAULT) | ||
261 | goto badframe; | ||
262 | |||
263 | return eax; | ||
264 | |||
265 | badframe: | ||
266 | force_sig(SIGSEGV, current); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * Set up a signal frame. | ||
272 | */ | ||
273 | |||
274 | static int | ||
275 | setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, | ||
276 | struct pt_regs *regs, unsigned long mask) | ||
277 | { | ||
278 | int tmp, err = 0; | ||
279 | |||
280 | tmp = 0; | ||
281 | __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); | ||
282 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); | ||
283 | __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); | ||
284 | err |= __put_user(tmp, (unsigned int __user *)&sc->fs); | ||
285 | |||
286 | err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); | ||
287 | err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); | ||
288 | err |= __put_user(regs->edi, &sc->edi); | ||
289 | err |= __put_user(regs->esi, &sc->esi); | ||
290 | err |= __put_user(regs->ebp, &sc->ebp); | ||
291 | err |= __put_user(regs->esp, &sc->esp); | ||
292 | err |= __put_user(regs->ebx, &sc->ebx); | ||
293 | err |= __put_user(regs->edx, &sc->edx); | ||
294 | err |= __put_user(regs->ecx, &sc->ecx); | ||
295 | err |= __put_user(regs->eax, &sc->eax); | ||
296 | err |= __put_user(current->thread.trap_no, &sc->trapno); | ||
297 | err |= __put_user(current->thread.error_code, &sc->err); | ||
298 | err |= __put_user(regs->eip, &sc->eip); | ||
299 | err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs); | ||
300 | err |= __put_user(regs->eflags, &sc->eflags); | ||
301 | err |= __put_user(regs->esp, &sc->esp_at_signal); | ||
302 | err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss); | ||
303 | |||
304 | tmp = save_i387(fpstate); | ||
305 | if (tmp < 0) | ||
306 | err = 1; | ||
307 | else | ||
308 | err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); | ||
309 | |||
310 | /* non-iBCS2 extensions.. */ | ||
311 | err |= __put_user(mask, &sc->oldmask); | ||
312 | err |= __put_user(current->thread.cr2, &sc->cr2); | ||
313 | |||
314 | return err; | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * Determine which stack to use.. | ||
319 | */ | ||
320 | static inline void __user * | ||
321 | get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) | ||
322 | { | ||
323 | unsigned long esp; | ||
324 | |||
325 | /* Default to using normal stack */ | ||
326 | esp = regs->esp; | ||
327 | |||
328 | /* This is the X/Open sanctioned signal stack switching. */ | ||
329 | if (ka->sa.sa_flags & SA_ONSTACK) { | ||
330 | if (sas_ss_flags(esp) == 0) | ||
331 | esp = current->sas_ss_sp + current->sas_ss_size; | ||
332 | } | ||
333 | |||
334 | /* This is the legacy signal stack switching. */ | ||
335 | else if ((regs->xss & 0xffff) != __USER_DS && | ||
336 | !(ka->sa.sa_flags & SA_RESTORER) && | ||
337 | ka->sa.sa_restorer) { | ||
338 | esp = (unsigned long) ka->sa.sa_restorer; | ||
339 | } | ||
340 | |||
341 | return (void __user *)((esp - frame_size) & -8ul); | ||
342 | } | ||
343 | |||
344 | /* These symbols are defined with the addresses in the vsyscall page. | ||
345 | See vsyscall-sigreturn.S. */ | ||
346 | extern void __user __kernel_sigreturn; | ||
347 | extern void __user __kernel_rt_sigreturn; | ||
348 | |||
349 | static void setup_frame(int sig, struct k_sigaction *ka, | ||
350 | sigset_t *set, struct pt_regs * regs) | ||
351 | { | ||
352 | void __user *restorer; | ||
353 | struct sigframe __user *frame; | ||
354 | int err = 0; | ||
355 | int usig; | ||
356 | |||
357 | frame = get_sigframe(ka, regs, sizeof(*frame)); | ||
358 | |||
359 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
360 | goto give_sigsegv; | ||
361 | |||
362 | usig = current_thread_info()->exec_domain | ||
363 | && current_thread_info()->exec_domain->signal_invmap | ||
364 | && sig < 32 | ||
365 | ? current_thread_info()->exec_domain->signal_invmap[sig] | ||
366 | : sig; | ||
367 | |||
368 | err = __put_user(usig, &frame->sig); | ||
369 | if (err) | ||
370 | goto give_sigsegv; | ||
371 | |||
372 | err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); | ||
373 | if (err) | ||
374 | goto give_sigsegv; | ||
375 | |||
376 | if (_NSIG_WORDS > 1) { | ||
377 | err = __copy_to_user(&frame->extramask, &set->sig[1], | ||
378 | sizeof(frame->extramask)); | ||
379 | if (err) | ||
380 | goto give_sigsegv; | ||
381 | } | ||
382 | |||
383 | restorer = &__kernel_sigreturn; | ||
384 | if (ka->sa.sa_flags & SA_RESTORER) | ||
385 | restorer = ka->sa.sa_restorer; | ||
386 | |||
387 | /* Set up to return from userspace. */ | ||
388 | err |= __put_user(restorer, &frame->pretcode); | ||
389 | |||
390 | /* | ||
391 | * This is popl %eax ; movl $,%eax ; int $0x80 | ||
392 | * | ||
393 | * WE DO NOT USE IT ANY MORE! It's only left here for historical | ||
394 | * reasons and because gdb uses it as a signature to notice | ||
395 | * signal handler stack frames. | ||
396 | */ | ||
397 | err |= __put_user(0xb858, (short __user *)(frame->retcode+0)); | ||
398 | err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2)); | ||
399 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); | ||
400 | |||
401 | if (err) | ||
402 | goto give_sigsegv; | ||
403 | |||
404 | /* Set up registers for signal handler */ | ||
405 | regs->esp = (unsigned long) frame; | ||
406 | regs->eip = (unsigned long) ka->sa.sa_handler; | ||
407 | regs->eax = (unsigned long) sig; | ||
408 | regs->edx = (unsigned long) 0; | ||
409 | regs->ecx = (unsigned long) 0; | ||
410 | |||
411 | set_fs(USER_DS); | ||
412 | regs->xds = __USER_DS; | ||
413 | regs->xes = __USER_DS; | ||
414 | regs->xss = __USER_DS; | ||
415 | regs->xcs = __USER_CS; | ||
416 | |||
417 | /* | ||
418 | * Clear TF when entering the signal handler, but | ||
419 | * notify any tracer that was single-stepping it. | ||
420 | * The tracer may want to single-step inside the | ||
421 | * handler too. | ||
422 | */ | ||
423 | regs->eflags &= ~TF_MASK; | ||
424 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
425 | ptrace_notify(SIGTRAP); | ||
426 | |||
427 | #if DEBUG_SIG | ||
428 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", | ||
429 | current->comm, current->pid, frame, regs->eip, frame->pretcode); | ||
430 | #endif | ||
431 | |||
432 | return; | ||
433 | |||
434 | give_sigsegv: | ||
435 | force_sigsegv(sig, current); | ||
436 | } | ||
437 | |||
438 | static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
439 | sigset_t *set, struct pt_regs * regs) | ||
440 | { | ||
441 | void __user *restorer; | ||
442 | struct rt_sigframe __user *frame; | ||
443 | int err = 0; | ||
444 | int usig; | ||
445 | |||
446 | frame = get_sigframe(ka, regs, sizeof(*frame)); | ||
447 | |||
448 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
449 | goto give_sigsegv; | ||
450 | |||
451 | usig = current_thread_info()->exec_domain | ||
452 | && current_thread_info()->exec_domain->signal_invmap | ||
453 | && sig < 32 | ||
454 | ? current_thread_info()->exec_domain->signal_invmap[sig] | ||
455 | : sig; | ||
456 | |||
457 | err |= __put_user(usig, &frame->sig); | ||
458 | err |= __put_user(&frame->info, &frame->pinfo); | ||
459 | err |= __put_user(&frame->uc, &frame->puc); | ||
460 | err |= copy_siginfo_to_user(&frame->info, info); | ||
461 | if (err) | ||
462 | goto give_sigsegv; | ||
463 | |||
464 | /* Create the ucontext. */ | ||
465 | err |= __put_user(0, &frame->uc.uc_flags); | ||
466 | err |= __put_user(0, &frame->uc.uc_link); | ||
467 | err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
468 | err |= __put_user(sas_ss_flags(regs->esp), | ||
469 | &frame->uc.uc_stack.ss_flags); | ||
470 | err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
471 | err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, | ||
472 | regs, set->sig[0]); | ||
473 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
474 | if (err) | ||
475 | goto give_sigsegv; | ||
476 | |||
477 | /* Set up to return from userspace. */ | ||
478 | restorer = &__kernel_rt_sigreturn; | ||
479 | if (ka->sa.sa_flags & SA_RESTORER) | ||
480 | restorer = ka->sa.sa_restorer; | ||
481 | err |= __put_user(restorer, &frame->pretcode); | ||
482 | |||
483 | /* | ||
484 | * This is movl $,%eax ; int $0x80 | ||
485 | * | ||
486 | * WE DO NOT USE IT ANY MORE! It's only left here for historical | ||
487 | * reasons and because gdb uses it as a signature to notice | ||
488 | * signal handler stack frames. | ||
489 | */ | ||
490 | err |= __put_user(0xb8, (char __user *)(frame->retcode+0)); | ||
491 | err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1)); | ||
492 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); | ||
493 | |||
494 | if (err) | ||
495 | goto give_sigsegv; | ||
496 | |||
497 | /* Set up registers for signal handler */ | ||
498 | regs->esp = (unsigned long) frame; | ||
499 | regs->eip = (unsigned long) ka->sa.sa_handler; | ||
500 | regs->eax = (unsigned long) usig; | ||
501 | regs->edx = (unsigned long) &frame->info; | ||
502 | regs->ecx = (unsigned long) &frame->uc; | ||
503 | |||
504 | set_fs(USER_DS); | ||
505 | regs->xds = __USER_DS; | ||
506 | regs->xes = __USER_DS; | ||
507 | regs->xss = __USER_DS; | ||
508 | regs->xcs = __USER_CS; | ||
509 | |||
510 | /* | ||
511 | * Clear TF when entering the signal handler, but | ||
512 | * notify any tracer that was single-stepping it. | ||
513 | * The tracer may want to single-step inside the | ||
514 | * handler too. | ||
515 | */ | ||
516 | regs->eflags &= ~TF_MASK; | ||
517 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
518 | ptrace_notify(SIGTRAP); | ||
519 | |||
520 | #if DEBUG_SIG | ||
521 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", | ||
522 | current->comm, current->pid, frame, regs->eip, frame->pretcode); | ||
523 | #endif | ||
524 | |||
525 | return; | ||
526 | |||
527 | give_sigsegv: | ||
528 | force_sigsegv(sig, current); | ||
529 | } | ||
530 | |||
531 | /* | ||
532 | * OK, we're invoking a handler | ||
533 | */ | ||
534 | |||
535 | static void | ||
536 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | ||
537 | sigset_t *oldset, struct pt_regs * regs) | ||
538 | { | ||
539 | /* Are we from a system call? */ | ||
540 | if (regs->orig_eax >= 0) { | ||
541 | /* If so, check system call restarting.. */ | ||
542 | switch (regs->eax) { | ||
543 | case -ERESTART_RESTARTBLOCK: | ||
544 | case -ERESTARTNOHAND: | ||
545 | regs->eax = -EINTR; | ||
546 | break; | ||
547 | |||
548 | case -ERESTARTSYS: | ||
549 | if (!(ka->sa.sa_flags & SA_RESTART)) { | ||
550 | regs->eax = -EINTR; | ||
551 | break; | ||
552 | } | ||
553 | /* fallthrough */ | ||
554 | case -ERESTARTNOINTR: | ||
555 | regs->eax = regs->orig_eax; | ||
556 | regs->eip -= 2; | ||
557 | } | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so | ||
562 | * that register information in the sigcontext is correct. | ||
563 | */ | ||
564 | if (unlikely(regs->eflags & TF_MASK) | ||
565 | && likely(current->ptrace & PT_DTRACE)) { | ||
566 | current->ptrace &= ~PT_DTRACE; | ||
567 | regs->eflags &= ~TF_MASK; | ||
568 | } | ||
569 | |||
570 | /* Set up the stack frame */ | ||
571 | if (ka->sa.sa_flags & SA_SIGINFO) | ||
572 | setup_rt_frame(sig, ka, info, oldset, regs); | ||
573 | else | ||
574 | setup_frame(sig, ka, oldset, regs); | ||
575 | |||
576 | if (!(ka->sa.sa_flags & SA_NODEFER)) { | ||
577 | spin_lock_irq(¤t->sighand->siglock); | ||
578 | sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); | ||
579 | sigaddset(¤t->blocked,sig); | ||
580 | recalc_sigpending(); | ||
581 | spin_unlock_irq(¤t->sighand->siglock); | ||
582 | } | ||
583 | } | ||
584 | |||
585 | /* | ||
586 | * Note that 'init' is a special process: it doesn't get signals it doesn't | ||
587 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | ||
588 | * mistake. | ||
589 | */ | ||
590 | int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) | ||
591 | { | ||
592 | siginfo_t info; | ||
593 | int signr; | ||
594 | struct k_sigaction ka; | ||
595 | |||
596 | /* | ||
597 | * We want the common case to go fast, which | ||
598 | * is why we may in certain cases get here from | ||
599 | * kernel mode. Just return without doing anything | ||
600 | * if so. | ||
601 | */ | ||
602 | if ((regs->xcs & 3) != 3) | ||
603 | return 1; | ||
604 | |||
605 | if (current->flags & PF_FREEZE) { | ||
606 | refrigerator(0); | ||
607 | goto no_signal; | ||
608 | } | ||
609 | |||
610 | if (!oldset) | ||
611 | oldset = ¤t->blocked; | ||
612 | |||
613 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | ||
614 | if (signr > 0) { | ||
615 | /* Reenable any watchpoints before delivering the | ||
616 | * signal to user space. The processor register will | ||
617 | * have been cleared if the watchpoint triggered | ||
618 | * inside the kernel. | ||
619 | */ | ||
620 | if (unlikely(current->thread.debugreg[7])) { | ||
621 | __asm__("movl %0,%%db7" : : "r" (current->thread.debugreg[7])); | ||
622 | } | ||
623 | |||
624 | /* Whee! Actually deliver the signal. */ | ||
625 | handle_signal(signr, &info, &ka, oldset, regs); | ||
626 | return 1; | ||
627 | } | ||
628 | |||
629 | no_signal: | ||
630 | /* Did we come from a system call? */ | ||
631 | if (regs->orig_eax >= 0) { | ||
632 | /* Restart the system call - no handlers present */ | ||
633 | if (regs->eax == -ERESTARTNOHAND || | ||
634 | regs->eax == -ERESTARTSYS || | ||
635 | regs->eax == -ERESTARTNOINTR) { | ||
636 | regs->eax = regs->orig_eax; | ||
637 | regs->eip -= 2; | ||
638 | } | ||
639 | if (regs->eax == -ERESTART_RESTARTBLOCK){ | ||
640 | regs->eax = __NR_restart_syscall; | ||
641 | regs->eip -= 2; | ||
642 | } | ||
643 | } | ||
644 | return 0; | ||
645 | } | ||
646 | |||
647 | /* | ||
648 | * notification of userspace execution resumption | ||
649 | * - triggered by current->work.notify_resume | ||
650 | */ | ||
651 | __attribute__((regparm(3))) | ||
652 | void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, | ||
653 | __u32 thread_info_flags) | ||
654 | { | ||
655 | /* Pending single-step? */ | ||
656 | if (thread_info_flags & _TIF_SINGLESTEP) { | ||
657 | regs->eflags |= TF_MASK; | ||
658 | clear_thread_flag(TIF_SINGLESTEP); | ||
659 | } | ||
660 | /* deal with pending signal delivery */ | ||
661 | if (thread_info_flags & _TIF_SIGPENDING) | ||
662 | do_signal(regs,oldset); | ||
663 | |||
664 | clear_thread_flag(TIF_IRET); | ||
665 | } | ||
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c new file mode 100644 index 000000000000..6223c33ac91c --- /dev/null +++ b/arch/i386/kernel/smp.c | |||
@@ -0,0 +1,612 @@ | |||
1 | /* | ||
2 | * Intel SMP support routines. | ||
3 | * | ||
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | ||
5 | * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> | ||
6 | * | ||
7 | * This code is released under the GNU General Public License version 2 or | ||
8 | * later. | ||
9 | */ | ||
10 | |||
11 | #include <linux/init.h> | ||
12 | |||
13 | #include <linux/mm.h> | ||
14 | #include <linux/irq.h> | ||
15 | #include <linux/delay.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/smp_lock.h> | ||
18 | #include <linux/kernel_stat.h> | ||
19 | #include <linux/mc146818rtc.h> | ||
20 | #include <linux/cache.h> | ||
21 | #include <linux/interrupt.h> | ||
22 | |||
23 | #include <asm/mtrr.h> | ||
24 | #include <asm/tlbflush.h> | ||
25 | #include <mach_apic.h> | ||
26 | |||
27 | /* | ||
28 | * Some notes on x86 processor bugs affecting SMP operation: | ||
29 | * | ||
30 | * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. | ||
31 | * The Linux implications for SMP are handled as follows: | ||
32 | * | ||
33 | * Pentium III / [Xeon] | ||
34 | * None of the E1AP-E3AP errata are visible to the user. | ||
35 | * | ||
36 | * E1AP. see PII A1AP | ||
37 | * E2AP. see PII A2AP | ||
38 | * E3AP. see PII A3AP | ||
39 | * | ||
40 | * Pentium II / [Xeon] | ||
41 | * None of the A1AP-A3AP errata are visible to the user. | ||
42 | * | ||
43 | * A1AP. see PPro 1AP | ||
44 | * A2AP. see PPro 2AP | ||
45 | * A3AP. see PPro 7AP | ||
46 | * | ||
47 | * Pentium Pro | ||
48 | * None of 1AP-9AP errata are visible to the normal user, | ||
49 | * except occasional delivery of 'spurious interrupt' as trap #15. | ||
50 | * This is very rare and a non-problem. | ||
51 | * | ||
52 | * 1AP. Linux maps APIC as non-cacheable | ||
53 | * 2AP. worked around in hardware | ||
54 | * 3AP. fixed in C0 and above steppings microcode update. | ||
55 | * Linux does not use excessive STARTUP_IPIs. | ||
56 | * 4AP. worked around in hardware | ||
57 | * 5AP. symmetric IO mode (normal Linux operation) not affected. | ||
58 | * 'noapic' mode has vector 0xf filled out properly. | ||
59 | * 6AP. 'noapic' mode might be affected - fixed in later steppings | ||
60 | * 7AP. We do not assume writes to the LVT deassering IRQs | ||
61 | * 8AP. We do not enable low power mode (deep sleep) during MP bootup | ||
62 | * 9AP. We do not use mixed mode | ||
63 | * | ||
64 | * Pentium | ||
65 | * There is a marginal case where REP MOVS on 100MHz SMP | ||
66 | * machines with B stepping processors can fail. XXX should provide | ||
67 | * an L1cache=Writethrough or L1cache=off option. | ||
68 | * | ||
69 | * B stepping CPUs may hang. There are hardware work arounds | ||
70 | * for this. We warn about it in case your board doesn't have the work | ||
71 | * arounds. Basically thats so I can tell anyone with a B stepping | ||
72 | * CPU and SMP problems "tough". | ||
73 | * | ||
74 | * Specific items [From Pentium Processor Specification Update] | ||
75 | * | ||
76 | * 1AP. Linux doesn't use remote read | ||
77 | * 2AP. Linux doesn't trust APIC errors | ||
78 | * 3AP. We work around this | ||
79 | * 4AP. Linux never generated 3 interrupts of the same priority | ||
80 | * to cause a lost local interrupt. | ||
81 | * 5AP. Remote read is never used | ||
82 | * 6AP. not affected - worked around in hardware | ||
83 | * 7AP. not affected - worked around in hardware | ||
84 | * 8AP. worked around in hardware - we get explicit CS errors if not | ||
85 | * 9AP. only 'noapic' mode affected. Might generate spurious | ||
86 | * interrupts, we log only the first one and count the | ||
87 | * rest silently. | ||
88 | * 10AP. not affected - worked around in hardware | ||
89 | * 11AP. Linux reads the APIC between writes to avoid this, as per | ||
90 | * the documentation. Make sure you preserve this as it affects | ||
91 | * the C stepping chips too. | ||
92 | * 12AP. not affected - worked around in hardware | ||
93 | * 13AP. not affected - worked around in hardware | ||
94 | * 14AP. we always deassert INIT during bootup | ||
95 | * 15AP. not affected - worked around in hardware | ||
96 | * 16AP. not affected - worked around in hardware | ||
97 | * 17AP. not affected - worked around in hardware | ||
98 | * 18AP. not affected - worked around in hardware | ||
99 | * 19AP. not affected - worked around in BIOS | ||
100 | * | ||
101 | * If this sounds worrying believe me these bugs are either ___RARE___, | ||
102 | * or are signal timing bugs worked around in hardware and there's | ||
103 | * about nothing of note with C stepping upwards. | ||
104 | */ | ||
105 | |||
106 | DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, }; | ||
107 | |||
108 | /* | ||
109 | * the following functions deal with sending IPIs between CPUs. | ||
110 | * | ||
111 | * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. | ||
112 | */ | ||
113 | |||
114 | static inline int __prepare_ICR (unsigned int shortcut, int vector) | ||
115 | { | ||
116 | return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL; | ||
117 | } | ||
118 | |||
119 | static inline int __prepare_ICR2 (unsigned int mask) | ||
120 | { | ||
121 | return SET_APIC_DEST_FIELD(mask); | ||
122 | } | ||
123 | |||
124 | void __send_IPI_shortcut(unsigned int shortcut, int vector) | ||
125 | { | ||
126 | /* | ||
127 | * Subtle. In the case of the 'never do double writes' workaround | ||
128 | * we have to lock out interrupts to be safe. As we don't care | ||
129 | * of the value read we use an atomic rmw access to avoid costly | ||
130 | * cli/sti. Otherwise we use an even cheaper single atomic write | ||
131 | * to the APIC. | ||
132 | */ | ||
133 | unsigned int cfg; | ||
134 | |||
135 | /* | ||
136 | * Wait for idle. | ||
137 | */ | ||
138 | apic_wait_icr_idle(); | ||
139 | |||
140 | /* | ||
141 | * No need to touch the target chip field | ||
142 | */ | ||
143 | cfg = __prepare_ICR(shortcut, vector); | ||
144 | |||
145 | /* | ||
146 | * Send the IPI. The write to APIC_ICR fires this off. | ||
147 | */ | ||
148 | apic_write_around(APIC_ICR, cfg); | ||
149 | } | ||
150 | |||
151 | void fastcall send_IPI_self(int vector) | ||
152 | { | ||
153 | __send_IPI_shortcut(APIC_DEST_SELF, vector); | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * This is only used on smaller machines. | ||
158 | */ | ||
159 | void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) | ||
160 | { | ||
161 | unsigned long mask = cpus_addr(cpumask)[0]; | ||
162 | unsigned long cfg; | ||
163 | unsigned long flags; | ||
164 | |||
165 | local_irq_save(flags); | ||
166 | |||
167 | /* | ||
168 | * Wait for idle. | ||
169 | */ | ||
170 | apic_wait_icr_idle(); | ||
171 | |||
172 | /* | ||
173 | * prepare target chip field | ||
174 | */ | ||
175 | cfg = __prepare_ICR2(mask); | ||
176 | apic_write_around(APIC_ICR2, cfg); | ||
177 | |||
178 | /* | ||
179 | * program the ICR | ||
180 | */ | ||
181 | cfg = __prepare_ICR(0, vector); | ||
182 | |||
183 | /* | ||
184 | * Send the IPI. The write to APIC_ICR fires this off. | ||
185 | */ | ||
186 | apic_write_around(APIC_ICR, cfg); | ||
187 | |||
188 | local_irq_restore(flags); | ||
189 | } | ||
190 | |||
191 | void send_IPI_mask_sequence(cpumask_t mask, int vector) | ||
192 | { | ||
193 | unsigned long cfg, flags; | ||
194 | unsigned int query_cpu; | ||
195 | |||
196 | /* | ||
197 | * Hack. The clustered APIC addressing mode doesn't allow us to send | ||
198 | * to an arbitrary mask, so I do a unicasts to each CPU instead. This | ||
199 | * should be modified to do 1 message per cluster ID - mbligh | ||
200 | */ | ||
201 | |||
202 | local_irq_save(flags); | ||
203 | |||
204 | for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { | ||
205 | if (cpu_isset(query_cpu, mask)) { | ||
206 | |||
207 | /* | ||
208 | * Wait for idle. | ||
209 | */ | ||
210 | apic_wait_icr_idle(); | ||
211 | |||
212 | /* | ||
213 | * prepare target chip field | ||
214 | */ | ||
215 | cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu)); | ||
216 | apic_write_around(APIC_ICR2, cfg); | ||
217 | |||
218 | /* | ||
219 | * program the ICR | ||
220 | */ | ||
221 | cfg = __prepare_ICR(0, vector); | ||
222 | |||
223 | /* | ||
224 | * Send the IPI. The write to APIC_ICR fires this off. | ||
225 | */ | ||
226 | apic_write_around(APIC_ICR, cfg); | ||
227 | } | ||
228 | } | ||
229 | local_irq_restore(flags); | ||
230 | } | ||
231 | |||
232 | #include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */ | ||
233 | |||
234 | /* | ||
235 | * Smarter SMP flushing macros. | ||
236 | * c/o Linus Torvalds. | ||
237 | * | ||
238 | * These mean you can really definitely utterly forget about | ||
239 | * writing to user space from interrupts. (Its not allowed anyway). | ||
240 | * | ||
241 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | ||
242 | */ | ||
243 | |||
244 | static cpumask_t flush_cpumask; | ||
245 | static struct mm_struct * flush_mm; | ||
246 | static unsigned long flush_va; | ||
247 | static DEFINE_SPINLOCK(tlbstate_lock); | ||
248 | #define FLUSH_ALL 0xffffffff | ||
249 | |||
250 | /* | ||
251 | * We cannot call mmdrop() because we are in interrupt context, | ||
252 | * instead update mm->cpu_vm_mask. | ||
253 | * | ||
254 | * We need to reload %cr3 since the page tables may be going | ||
255 | * away from under us.. | ||
256 | */ | ||
257 | static inline void leave_mm (unsigned long cpu) | ||
258 | { | ||
259 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) | ||
260 | BUG(); | ||
261 | cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); | ||
262 | load_cr3(swapper_pg_dir); | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * | ||
267 | * The flush IPI assumes that a thread switch happens in this order: | ||
268 | * [cpu0: the cpu that switches] | ||
269 | * 1) switch_mm() either 1a) or 1b) | ||
270 | * 1a) thread switch to a different mm | ||
271 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | ||
272 | * Stop ipi delivery for the old mm. This is not synchronized with | ||
273 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | ||
274 | * for the wrong mm, and in the worst case we perform a superflous | ||
275 | * tlb flush. | ||
276 | * 1a2) set cpu_tlbstate to TLBSTATE_OK | ||
277 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | ||
278 | * was in lazy tlb mode. | ||
279 | * 1a3) update cpu_tlbstate[].active_mm | ||
280 | * Now cpu0 accepts tlb flushes for the new mm. | ||
281 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | ||
282 | * Now the other cpus will send tlb flush ipis. | ||
283 | * 1a4) change cr3. | ||
284 | * 1b) thread switch without mm change | ||
285 | * cpu_tlbstate[].active_mm is correct, cpu0 already handles | ||
286 | * flush ipis. | ||
287 | * 1b1) set cpu_tlbstate to TLBSTATE_OK | ||
288 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | ||
289 | * Atomically set the bit [other cpus will start sending flush ipis], | ||
290 | * and test the bit. | ||
291 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. | ||
292 | * 2) switch %%esp, ie current | ||
293 | * | ||
294 | * The interrupt must handle 2 special cases: | ||
295 | * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. | ||
296 | * - the cpu performs speculative tlb reads, i.e. even if the cpu only | ||
297 | * runs in kernel space, the cpu could load tlb entries for user space | ||
298 | * pages. | ||
299 | * | ||
300 | * The good news is that cpu_tlbstate is local to each cpu, no | ||
301 | * write/read ordering problems. | ||
302 | */ | ||
303 | |||
304 | /* | ||
305 | * TLB flush IPI: | ||
306 | * | ||
307 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | ||
308 | * 2) Leave the mm if we are in the lazy tlb mode. | ||
309 | */ | ||
310 | |||
311 | fastcall void smp_invalidate_interrupt(struct pt_regs *regs) | ||
312 | { | ||
313 | unsigned long cpu; | ||
314 | |||
315 | cpu = get_cpu(); | ||
316 | |||
317 | if (!cpu_isset(cpu, flush_cpumask)) | ||
318 | goto out; | ||
319 | /* | ||
320 | * This was a BUG() but until someone can quote me the | ||
321 | * line from the intel manual that guarantees an IPI to | ||
322 | * multiple CPUs is retried _only_ on the erroring CPUs | ||
323 | * its staying as a return | ||
324 | * | ||
325 | * BUG(); | ||
326 | */ | ||
327 | |||
328 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { | ||
329 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { | ||
330 | if (flush_va == FLUSH_ALL) | ||
331 | local_flush_tlb(); | ||
332 | else | ||
333 | __flush_tlb_one(flush_va); | ||
334 | } else | ||
335 | leave_mm(cpu); | ||
336 | } | ||
337 | ack_APIC_irq(); | ||
338 | smp_mb__before_clear_bit(); | ||
339 | cpu_clear(cpu, flush_cpumask); | ||
340 | smp_mb__after_clear_bit(); | ||
341 | out: | ||
342 | put_cpu_no_resched(); | ||
343 | } | ||
344 | |||
345 | static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | ||
346 | unsigned long va) | ||
347 | { | ||
348 | cpumask_t tmp; | ||
349 | /* | ||
350 | * A couple of (to be removed) sanity checks: | ||
351 | * | ||
352 | * - we do not send IPIs to not-yet booted CPUs. | ||
353 | * - current CPU must not be in mask | ||
354 | * - mask must exist :) | ||
355 | */ | ||
356 | BUG_ON(cpus_empty(cpumask)); | ||
357 | |||
358 | cpus_and(tmp, cpumask, cpu_online_map); | ||
359 | BUG_ON(!cpus_equal(cpumask, tmp)); | ||
360 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | ||
361 | BUG_ON(!mm); | ||
362 | |||
363 | /* | ||
364 | * i'm not happy about this global shared spinlock in the | ||
365 | * MM hot path, but we'll see how contended it is. | ||
366 | * Temporarily this turns IRQs off, so that lockups are | ||
367 | * detected by the NMI watchdog. | ||
368 | */ | ||
369 | spin_lock(&tlbstate_lock); | ||
370 | |||
371 | flush_mm = mm; | ||
372 | flush_va = va; | ||
373 | #if NR_CPUS <= BITS_PER_LONG | ||
374 | atomic_set_mask(cpumask, &flush_cpumask); | ||
375 | #else | ||
376 | { | ||
377 | int k; | ||
378 | unsigned long *flush_mask = (unsigned long *)&flush_cpumask; | ||
379 | unsigned long *cpu_mask = (unsigned long *)&cpumask; | ||
380 | for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) | ||
381 | atomic_set_mask(cpu_mask[k], &flush_mask[k]); | ||
382 | } | ||
383 | #endif | ||
384 | /* | ||
385 | * We have to send the IPI only to | ||
386 | * CPUs affected. | ||
387 | */ | ||
388 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); | ||
389 | |||
390 | while (!cpus_empty(flush_cpumask)) | ||
391 | /* nothing. lockup detection does not belong here */ | ||
392 | mb(); | ||
393 | |||
394 | flush_mm = NULL; | ||
395 | flush_va = 0; | ||
396 | spin_unlock(&tlbstate_lock); | ||
397 | } | ||
398 | |||
399 | void flush_tlb_current_task(void) | ||
400 | { | ||
401 | struct mm_struct *mm = current->mm; | ||
402 | cpumask_t cpu_mask; | ||
403 | |||
404 | preempt_disable(); | ||
405 | cpu_mask = mm->cpu_vm_mask; | ||
406 | cpu_clear(smp_processor_id(), cpu_mask); | ||
407 | |||
408 | local_flush_tlb(); | ||
409 | if (!cpus_empty(cpu_mask)) | ||
410 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | ||
411 | preempt_enable(); | ||
412 | } | ||
413 | |||
414 | void flush_tlb_mm (struct mm_struct * mm) | ||
415 | { | ||
416 | cpumask_t cpu_mask; | ||
417 | |||
418 | preempt_disable(); | ||
419 | cpu_mask = mm->cpu_vm_mask; | ||
420 | cpu_clear(smp_processor_id(), cpu_mask); | ||
421 | |||
422 | if (current->active_mm == mm) { | ||
423 | if (current->mm) | ||
424 | local_flush_tlb(); | ||
425 | else | ||
426 | leave_mm(smp_processor_id()); | ||
427 | } | ||
428 | if (!cpus_empty(cpu_mask)) | ||
429 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | ||
430 | |||
431 | preempt_enable(); | ||
432 | } | ||
433 | |||
434 | void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | ||
435 | { | ||
436 | struct mm_struct *mm = vma->vm_mm; | ||
437 | cpumask_t cpu_mask; | ||
438 | |||
439 | preempt_disable(); | ||
440 | cpu_mask = mm->cpu_vm_mask; | ||
441 | cpu_clear(smp_processor_id(), cpu_mask); | ||
442 | |||
443 | if (current->active_mm == mm) { | ||
444 | if(current->mm) | ||
445 | __flush_tlb_one(va); | ||
446 | else | ||
447 | leave_mm(smp_processor_id()); | ||
448 | } | ||
449 | |||
450 | if (!cpus_empty(cpu_mask)) | ||
451 | flush_tlb_others(cpu_mask, mm, va); | ||
452 | |||
453 | preempt_enable(); | ||
454 | } | ||
455 | |||
456 | static void do_flush_tlb_all(void* info) | ||
457 | { | ||
458 | unsigned long cpu = smp_processor_id(); | ||
459 | |||
460 | __flush_tlb_all(); | ||
461 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) | ||
462 | leave_mm(cpu); | ||
463 | } | ||
464 | |||
465 | void flush_tlb_all(void) | ||
466 | { | ||
467 | on_each_cpu(do_flush_tlb_all, NULL, 1, 1); | ||
468 | } | ||
469 | |||
470 | /* | ||
471 | * this function sends a 'reschedule' IPI to another CPU. | ||
472 | * it goes straight through and wastes no time serializing | ||
473 | * anything. Worst case is that we lose a reschedule ... | ||
474 | */ | ||
475 | void smp_send_reschedule(int cpu) | ||
476 | { | ||
477 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * Structure and data for smp_call_function(). This is designed to minimise | ||
482 | * static memory requirements. It also looks cleaner. | ||
483 | */ | ||
484 | static DEFINE_SPINLOCK(call_lock); | ||
485 | |||
486 | struct call_data_struct { | ||
487 | void (*func) (void *info); | ||
488 | void *info; | ||
489 | atomic_t started; | ||
490 | atomic_t finished; | ||
491 | int wait; | ||
492 | }; | ||
493 | |||
494 | static struct call_data_struct * call_data; | ||
495 | |||
496 | /* | ||
497 | * this function sends a 'generic call function' IPI to all other CPUs | ||
498 | * in the system. | ||
499 | */ | ||
500 | |||
501 | int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | ||
502 | int wait) | ||
503 | /* | ||
504 | * [SUMMARY] Run a function on all other CPUs. | ||
505 | * <func> The function to run. This must be fast and non-blocking. | ||
506 | * <info> An arbitrary pointer to pass to the function. | ||
507 | * <nonatomic> currently unused. | ||
508 | * <wait> If true, wait (atomically) until function has completed on other CPUs. | ||
509 | * [RETURNS] 0 on success, else a negative status code. Does not return until | ||
510 | * remote CPUs are nearly ready to execute <<func>> or are or have executed. | ||
511 | * | ||
512 | * You must not call this function with disabled interrupts or from a | ||
513 | * hardware interrupt handler or from a bottom half handler. | ||
514 | */ | ||
515 | { | ||
516 | struct call_data_struct data; | ||
517 | int cpus = num_online_cpus()-1; | ||
518 | |||
519 | if (!cpus) | ||
520 | return 0; | ||
521 | |||
522 | /* Can deadlock when called with interrupts disabled */ | ||
523 | WARN_ON(irqs_disabled()); | ||
524 | |||
525 | data.func = func; | ||
526 | data.info = info; | ||
527 | atomic_set(&data.started, 0); | ||
528 | data.wait = wait; | ||
529 | if (wait) | ||
530 | atomic_set(&data.finished, 0); | ||
531 | |||
532 | spin_lock(&call_lock); | ||
533 | call_data = &data; | ||
534 | mb(); | ||
535 | |||
536 | /* Send a message to all other CPUs and wait for them to respond */ | ||
537 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | ||
538 | |||
539 | /* Wait for response */ | ||
540 | while (atomic_read(&data.started) != cpus) | ||
541 | cpu_relax(); | ||
542 | |||
543 | if (wait) | ||
544 | while (atomic_read(&data.finished) != cpus) | ||
545 | cpu_relax(); | ||
546 | spin_unlock(&call_lock); | ||
547 | |||
548 | return 0; | ||
549 | } | ||
550 | |||
551 | static void stop_this_cpu (void * dummy) | ||
552 | { | ||
553 | /* | ||
554 | * Remove this CPU: | ||
555 | */ | ||
556 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
557 | local_irq_disable(); | ||
558 | disable_local_APIC(); | ||
559 | if (cpu_data[smp_processor_id()].hlt_works_ok) | ||
560 | for(;;) __asm__("hlt"); | ||
561 | for (;;); | ||
562 | } | ||
563 | |||
564 | /* | ||
565 | * this function calls the 'stop' function on all other CPUs in the system. | ||
566 | */ | ||
567 | |||
568 | void smp_send_stop(void) | ||
569 | { | ||
570 | smp_call_function(stop_this_cpu, NULL, 1, 0); | ||
571 | |||
572 | local_irq_disable(); | ||
573 | disable_local_APIC(); | ||
574 | local_irq_enable(); | ||
575 | } | ||
576 | |||
577 | /* | ||
578 | * Reschedule call back. Nothing to do, | ||
579 | * all the work is done automatically when | ||
580 | * we return from the interrupt. | ||
581 | */ | ||
582 | fastcall void smp_reschedule_interrupt(struct pt_regs *regs) | ||
583 | { | ||
584 | ack_APIC_irq(); | ||
585 | } | ||
586 | |||
587 | fastcall void smp_call_function_interrupt(struct pt_regs *regs) | ||
588 | { | ||
589 | void (*func) (void *info) = call_data->func; | ||
590 | void *info = call_data->info; | ||
591 | int wait = call_data->wait; | ||
592 | |||
593 | ack_APIC_irq(); | ||
594 | /* | ||
595 | * Notify initiating CPU that I've grabbed the data and am | ||
596 | * about to execute the function | ||
597 | */ | ||
598 | mb(); | ||
599 | atomic_inc(&call_data->started); | ||
600 | /* | ||
601 | * At this point the info structure may be out of scope unless wait==1 | ||
602 | */ | ||
603 | irq_enter(); | ||
604 | (*func)(info); | ||
605 | irq_exit(); | ||
606 | |||
607 | if (wait) { | ||
608 | mb(); | ||
609 | atomic_inc(&call_data->finished); | ||
610 | } | ||
611 | } | ||
612 | |||
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c new file mode 100644 index 000000000000..332ee7a1d1a1 --- /dev/null +++ b/arch/i386/kernel/smpboot.c | |||
@@ -0,0 +1,1145 @@ | |||
1 | /* | ||
2 | * x86 SMP booting functions | ||
3 | * | ||
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | ||
5 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> | ||
6 | * | ||
7 | * Much of the core SMP work is based on previous work by Thomas Radke, to | ||
8 | * whom a great many thanks are extended. | ||
9 | * | ||
10 | * Thanks to Intel for making available several different Pentium, | ||
11 | * Pentium Pro and Pentium-II/Xeon MP machines. | ||
12 | * Original development of Linux SMP code supported by Caldera. | ||
13 | * | ||
14 | * This code is released under the GNU General Public License version 2 or | ||
15 | * later. | ||
16 | * | ||
17 | * Fixes | ||
18 | * Felix Koop : NR_CPUS used properly | ||
19 | * Jose Renau : Handle single CPU case. | ||
20 | * Alan Cox : By repeated request 8) - Total BogoMIPS report. | ||
21 | * Greg Wright : Fix for kernel stacks panic. | ||
22 | * Erich Boleyn : MP v1.4 and additional changes. | ||
23 | * Matthias Sattler : Changes for 2.1 kernel map. | ||
24 | * Michel Lespinasse : Changes for 2.1 kernel map. | ||
25 | * Michael Chastain : Change trampoline.S to gnu as. | ||
26 | * Alan Cox : Dumb bug: 'B' step PPro's are fine | ||
27 | * Ingo Molnar : Added APIC timers, based on code | ||
28 | * from Jose Renau | ||
29 | * Ingo Molnar : various cleanups and rewrites | ||
30 | * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. | ||
31 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs | ||
32 | * Martin J. Bligh : Added support for multi-quad systems | ||
33 | * Dave Jones : Report invalid combinations of Athlon CPUs. | ||
34 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ | ||
35 | |||
36 | #include <linux/module.h> | ||
37 | #include <linux/config.h> | ||
38 | #include <linux/init.h> | ||
39 | #include <linux/kernel.h> | ||
40 | |||
41 | #include <linux/mm.h> | ||
42 | #include <linux/sched.h> | ||
43 | #include <linux/kernel_stat.h> | ||
44 | #include <linux/smp_lock.h> | ||
45 | #include <linux/irq.h> | ||
46 | #include <linux/bootmem.h> | ||
47 | |||
48 | #include <linux/delay.h> | ||
49 | #include <linux/mc146818rtc.h> | ||
50 | #include <asm/tlbflush.h> | ||
51 | #include <asm/desc.h> | ||
52 | #include <asm/arch_hooks.h> | ||
53 | |||
54 | #include <mach_apic.h> | ||
55 | #include <mach_wakecpu.h> | ||
56 | #include <smpboot_hooks.h> | ||
57 | |||
58 | /* Set if we find a B stepping CPU */ | ||
59 | static int __initdata smp_b_stepping; | ||
60 | |||
61 | /* Number of siblings per CPU package */ | ||
62 | int smp_num_siblings = 1; | ||
63 | int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ | ||
64 | EXPORT_SYMBOL(phys_proc_id); | ||
65 | |||
66 | /* bitmap of online cpus */ | ||
67 | cpumask_t cpu_online_map; | ||
68 | |||
69 | cpumask_t cpu_callin_map; | ||
70 | cpumask_t cpu_callout_map; | ||
71 | static cpumask_t smp_commenced_mask; | ||
72 | |||
73 | /* Per CPU bogomips and other parameters */ | ||
74 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | ||
75 | |||
76 | u8 x86_cpu_to_apicid[NR_CPUS] = | ||
77 | { [0 ... NR_CPUS-1] = 0xff }; | ||
78 | EXPORT_SYMBOL(x86_cpu_to_apicid); | ||
79 | |||
80 | /* | ||
81 | * Trampoline 80x86 program as an array. | ||
82 | */ | ||
83 | |||
84 | extern unsigned char trampoline_data []; | ||
85 | extern unsigned char trampoline_end []; | ||
86 | static unsigned char *trampoline_base; | ||
87 | static int trampoline_exec; | ||
88 | |||
89 | static void map_cpu_to_logical_apicid(void); | ||
90 | |||
91 | /* | ||
92 | * Currently trivial. Write the real->protected mode | ||
93 | * bootstrap into the page concerned. The caller | ||
94 | * has made sure it's suitably aligned. | ||
95 | */ | ||
96 | |||
97 | static unsigned long __init setup_trampoline(void) | ||
98 | { | ||
99 | memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); | ||
100 | return virt_to_phys(trampoline_base); | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * We are called very early to get the low memory for the | ||
105 | * SMP bootup trampoline page. | ||
106 | */ | ||
107 | void __init smp_alloc_memory(void) | ||
108 | { | ||
109 | trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); | ||
110 | /* | ||
111 | * Has to be in very low memory so we can execute | ||
112 | * real-mode AP code. | ||
113 | */ | ||
114 | if (__pa(trampoline_base) >= 0x9F000) | ||
115 | BUG(); | ||
116 | /* | ||
117 | * Make the SMP trampoline executable: | ||
118 | */ | ||
119 | trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * The bootstrap kernel entry code has set these up. Save them for | ||
124 | * a given CPU | ||
125 | */ | ||
126 | |||
127 | static void __init smp_store_cpu_info(int id) | ||
128 | { | ||
129 | struct cpuinfo_x86 *c = cpu_data + id; | ||
130 | |||
131 | *c = boot_cpu_data; | ||
132 | if (id!=0) | ||
133 | identify_cpu(c); | ||
134 | /* | ||
135 | * Mask B, Pentium, but not Pentium MMX | ||
136 | */ | ||
137 | if (c->x86_vendor == X86_VENDOR_INTEL && | ||
138 | c->x86 == 5 && | ||
139 | c->x86_mask >= 1 && c->x86_mask <= 4 && | ||
140 | c->x86_model <= 3) | ||
141 | /* | ||
142 | * Remember we have B step Pentia with bugs | ||
143 | */ | ||
144 | smp_b_stepping = 1; | ||
145 | |||
146 | /* | ||
147 | * Certain Athlons might work (for various values of 'work') in SMP | ||
148 | * but they are not certified as MP capable. | ||
149 | */ | ||
150 | if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { | ||
151 | |||
152 | /* Athlon 660/661 is valid. */ | ||
153 | if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) | ||
154 | goto valid_k7; | ||
155 | |||
156 | /* Duron 670 is valid */ | ||
157 | if ((c->x86_model==7) && (c->x86_mask==0)) | ||
158 | goto valid_k7; | ||
159 | |||
160 | /* | ||
161 | * Athlon 662, Duron 671, and Athlon >model 7 have capability bit. | ||
162 | * It's worth noting that the A5 stepping (662) of some Athlon XP's | ||
163 | * have the MP bit set. | ||
164 | * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more. | ||
165 | */ | ||
166 | if (((c->x86_model==6) && (c->x86_mask>=2)) || | ||
167 | ((c->x86_model==7) && (c->x86_mask>=1)) || | ||
168 | (c->x86_model> 7)) | ||
169 | if (cpu_has_mp) | ||
170 | goto valid_k7; | ||
171 | |||
172 | /* If we get here, it's not a certified SMP capable AMD system. */ | ||
173 | tainted |= TAINT_UNSAFE_SMP; | ||
174 | } | ||
175 | |||
176 | valid_k7: | ||
177 | ; | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * TSC synchronization. | ||
182 | * | ||
183 | * We first check whether all CPUs have their TSC's synchronized, | ||
184 | * then we print a warning if not, and always resync. | ||
185 | */ | ||
186 | |||
187 | static atomic_t tsc_start_flag = ATOMIC_INIT(0); | ||
188 | static atomic_t tsc_count_start = ATOMIC_INIT(0); | ||
189 | static atomic_t tsc_count_stop = ATOMIC_INIT(0); | ||
190 | static unsigned long long tsc_values[NR_CPUS]; | ||
191 | |||
192 | #define NR_LOOPS 5 | ||
193 | |||
194 | static void __init synchronize_tsc_bp (void) | ||
195 | { | ||
196 | int i; | ||
197 | unsigned long long t0; | ||
198 | unsigned long long sum, avg; | ||
199 | long long delta; | ||
200 | unsigned long one_usec; | ||
201 | int buggy = 0; | ||
202 | |||
203 | printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); | ||
204 | |||
205 | /* convert from kcyc/sec to cyc/usec */ | ||
206 | one_usec = cpu_khz / 1000; | ||
207 | |||
208 | atomic_set(&tsc_start_flag, 1); | ||
209 | wmb(); | ||
210 | |||
211 | /* | ||
212 | * We loop a few times to get a primed instruction cache, | ||
213 | * then the last pass is more or less synchronized and | ||
214 | * the BP and APs set their cycle counters to zero all at | ||
215 | * once. This reduces the chance of having random offsets | ||
216 | * between the processors, and guarantees that the maximum | ||
217 | * delay between the cycle counters is never bigger than | ||
218 | * the latency of information-passing (cachelines) between | ||
219 | * two CPUs. | ||
220 | */ | ||
221 | for (i = 0; i < NR_LOOPS; i++) { | ||
222 | /* | ||
223 | * all APs synchronize but they loop on '== num_cpus' | ||
224 | */ | ||
225 | while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) | ||
226 | mb(); | ||
227 | atomic_set(&tsc_count_stop, 0); | ||
228 | wmb(); | ||
229 | /* | ||
230 | * this lets the APs save their current TSC: | ||
231 | */ | ||
232 | atomic_inc(&tsc_count_start); | ||
233 | |||
234 | rdtscll(tsc_values[smp_processor_id()]); | ||
235 | /* | ||
236 | * We clear the TSC in the last loop: | ||
237 | */ | ||
238 | if (i == NR_LOOPS-1) | ||
239 | write_tsc(0, 0); | ||
240 | |||
241 | /* | ||
242 | * Wait for all APs to leave the synchronization point: | ||
243 | */ | ||
244 | while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) | ||
245 | mb(); | ||
246 | atomic_set(&tsc_count_start, 0); | ||
247 | wmb(); | ||
248 | atomic_inc(&tsc_count_stop); | ||
249 | } | ||
250 | |||
251 | sum = 0; | ||
252 | for (i = 0; i < NR_CPUS; i++) { | ||
253 | if (cpu_isset(i, cpu_callout_map)) { | ||
254 | t0 = tsc_values[i]; | ||
255 | sum += t0; | ||
256 | } | ||
257 | } | ||
258 | avg = sum; | ||
259 | do_div(avg, num_booting_cpus()); | ||
260 | |||
261 | sum = 0; | ||
262 | for (i = 0; i < NR_CPUS; i++) { | ||
263 | if (!cpu_isset(i, cpu_callout_map)) | ||
264 | continue; | ||
265 | delta = tsc_values[i] - avg; | ||
266 | if (delta < 0) | ||
267 | delta = -delta; | ||
268 | /* | ||
269 | * We report bigger than 2 microseconds clock differences. | ||
270 | */ | ||
271 | if (delta > 2*one_usec) { | ||
272 | long realdelta; | ||
273 | if (!buggy) { | ||
274 | buggy = 1; | ||
275 | printk("\n"); | ||
276 | } | ||
277 | realdelta = delta; | ||
278 | do_div(realdelta, one_usec); | ||
279 | if (tsc_values[i] < avg) | ||
280 | realdelta = -realdelta; | ||
281 | |||
282 | printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); | ||
283 | } | ||
284 | |||
285 | sum += delta; | ||
286 | } | ||
287 | if (!buggy) | ||
288 | printk("passed.\n"); | ||
289 | } | ||
290 | |||
291 | static void __init synchronize_tsc_ap (void) | ||
292 | { | ||
293 | int i; | ||
294 | |||
295 | /* | ||
296 | * Not every cpu is online at the time | ||
297 | * this gets called, so we first wait for the BP to | ||
298 | * finish SMP initialization: | ||
299 | */ | ||
300 | while (!atomic_read(&tsc_start_flag)) mb(); | ||
301 | |||
302 | for (i = 0; i < NR_LOOPS; i++) { | ||
303 | atomic_inc(&tsc_count_start); | ||
304 | while (atomic_read(&tsc_count_start) != num_booting_cpus()) | ||
305 | mb(); | ||
306 | |||
307 | rdtscll(tsc_values[smp_processor_id()]); | ||
308 | if (i == NR_LOOPS-1) | ||
309 | write_tsc(0, 0); | ||
310 | |||
311 | atomic_inc(&tsc_count_stop); | ||
312 | while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); | ||
313 | } | ||
314 | } | ||
315 | #undef NR_LOOPS | ||
316 | |||
317 | extern void calibrate_delay(void); | ||
318 | |||
319 | static atomic_t init_deasserted; | ||
320 | |||
321 | static void __init smp_callin(void) | ||
322 | { | ||
323 | int cpuid, phys_id; | ||
324 | unsigned long timeout; | ||
325 | |||
326 | /* | ||
327 | * If waken up by an INIT in an 82489DX configuration | ||
328 | * we may get here before an INIT-deassert IPI reaches | ||
329 | * our local APIC. We have to wait for the IPI or we'll | ||
330 | * lock up on an APIC access. | ||
331 | */ | ||
332 | wait_for_init_deassert(&init_deasserted); | ||
333 | |||
334 | /* | ||
335 | * (This works even if the APIC is not enabled.) | ||
336 | */ | ||
337 | phys_id = GET_APIC_ID(apic_read(APIC_ID)); | ||
338 | cpuid = smp_processor_id(); | ||
339 | if (cpu_isset(cpuid, cpu_callin_map)) { | ||
340 | printk("huh, phys CPU#%d, CPU#%d already present??\n", | ||
341 | phys_id, cpuid); | ||
342 | BUG(); | ||
343 | } | ||
344 | Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); | ||
345 | |||
346 | /* | ||
347 | * STARTUP IPIs are fragile beasts as they might sometimes | ||
348 | * trigger some glue motherboard logic. Complete APIC bus | ||
349 | * silence for 1 second, this overestimates the time the | ||
350 | * boot CPU is spending to send the up to 2 STARTUP IPIs | ||
351 | * by a factor of two. This should be enough. | ||
352 | */ | ||
353 | |||
354 | /* | ||
355 | * Waiting 2s total for startup (udelay is not yet working) | ||
356 | */ | ||
357 | timeout = jiffies + 2*HZ; | ||
358 | while (time_before(jiffies, timeout)) { | ||
359 | /* | ||
360 | * Has the boot CPU finished it's STARTUP sequence? | ||
361 | */ | ||
362 | if (cpu_isset(cpuid, cpu_callout_map)) | ||
363 | break; | ||
364 | rep_nop(); | ||
365 | } | ||
366 | |||
367 | if (!time_before(jiffies, timeout)) { | ||
368 | printk("BUG: CPU%d started up but did not get a callout!\n", | ||
369 | cpuid); | ||
370 | BUG(); | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * the boot CPU has finished the init stage and is spinning | ||
375 | * on callin_map until we finish. We are free to set up this | ||
376 | * CPU, first the APIC. (this is probably redundant on most | ||
377 | * boards) | ||
378 | */ | ||
379 | |||
380 | Dprintk("CALLIN, before setup_local_APIC().\n"); | ||
381 | smp_callin_clear_local_apic(); | ||
382 | setup_local_APIC(); | ||
383 | map_cpu_to_logical_apicid(); | ||
384 | |||
385 | /* | ||
386 | * Get our bogomips. | ||
387 | */ | ||
388 | calibrate_delay(); | ||
389 | Dprintk("Stack at about %p\n",&cpuid); | ||
390 | |||
391 | /* | ||
392 | * Save our processor parameters | ||
393 | */ | ||
394 | smp_store_cpu_info(cpuid); | ||
395 | |||
396 | disable_APIC_timer(); | ||
397 | |||
398 | /* | ||
399 | * Allow the master to continue. | ||
400 | */ | ||
401 | cpu_set(cpuid, cpu_callin_map); | ||
402 | |||
403 | /* | ||
404 | * Synchronize the TSC with the BP | ||
405 | */ | ||
406 | if (cpu_has_tsc && cpu_khz) | ||
407 | synchronize_tsc_ap(); | ||
408 | } | ||
409 | |||
410 | static int cpucount; | ||
411 | |||
412 | /* | ||
413 | * Activate a secondary processor. | ||
414 | */ | ||
415 | static void __init start_secondary(void *unused) | ||
416 | { | ||
417 | /* | ||
418 | * Dont put anything before smp_callin(), SMP | ||
419 | * booting is too fragile that we want to limit the | ||
420 | * things done here to the most necessary things. | ||
421 | */ | ||
422 | cpu_init(); | ||
423 | smp_callin(); | ||
424 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | ||
425 | rep_nop(); | ||
426 | setup_secondary_APIC_clock(); | ||
427 | if (nmi_watchdog == NMI_IO_APIC) { | ||
428 | disable_8259A_irq(0); | ||
429 | enable_NMI_through_LVT0(NULL); | ||
430 | enable_8259A_irq(0); | ||
431 | } | ||
432 | enable_APIC_timer(); | ||
433 | /* | ||
434 | * low-memory mappings have been cleared, flush them from | ||
435 | * the local TLBs too. | ||
436 | */ | ||
437 | local_flush_tlb(); | ||
438 | cpu_set(smp_processor_id(), cpu_online_map); | ||
439 | |||
440 | /* We can take interrupts now: we're officially "up". */ | ||
441 | local_irq_enable(); | ||
442 | |||
443 | wmb(); | ||
444 | cpu_idle(); | ||
445 | } | ||
446 | |||
447 | /* | ||
448 | * Everything has been set up for the secondary | ||
449 | * CPUs - they just need to reload everything | ||
450 | * from the task structure | ||
451 | * This function must not return. | ||
452 | */ | ||
453 | void __init initialize_secondary(void) | ||
454 | { | ||
455 | /* | ||
456 | * We don't actually need to load the full TSS, | ||
457 | * basically just the stack pointer and the eip. | ||
458 | */ | ||
459 | |||
460 | asm volatile( | ||
461 | "movl %0,%%esp\n\t" | ||
462 | "jmp *%1" | ||
463 | : | ||
464 | :"r" (current->thread.esp),"r" (current->thread.eip)); | ||
465 | } | ||
466 | |||
467 | extern struct { | ||
468 | void * esp; | ||
469 | unsigned short ss; | ||
470 | } stack_start; | ||
471 | |||
472 | #ifdef CONFIG_NUMA | ||
473 | |||
474 | /* which logical CPUs are on which nodes */ | ||
475 | cpumask_t node_2_cpu_mask[MAX_NUMNODES] = | ||
476 | { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; | ||
477 | /* which node each logical CPU is on */ | ||
478 | int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; | ||
479 | EXPORT_SYMBOL(cpu_2_node); | ||
480 | |||
481 | /* set up a mapping between cpu and node. */ | ||
482 | static inline void map_cpu_to_node(int cpu, int node) | ||
483 | { | ||
484 | printk("Mapping cpu %d to node %d\n", cpu, node); | ||
485 | cpu_set(cpu, node_2_cpu_mask[node]); | ||
486 | cpu_2_node[cpu] = node; | ||
487 | } | ||
488 | |||
489 | /* undo a mapping between cpu and node. */ | ||
490 | static inline void unmap_cpu_to_node(int cpu) | ||
491 | { | ||
492 | int node; | ||
493 | |||
494 | printk("Unmapping cpu %d from all nodes\n", cpu); | ||
495 | for (node = 0; node < MAX_NUMNODES; node ++) | ||
496 | cpu_clear(cpu, node_2_cpu_mask[node]); | ||
497 | cpu_2_node[cpu] = 0; | ||
498 | } | ||
499 | #else /* !CONFIG_NUMA */ | ||
500 | |||
501 | #define map_cpu_to_node(cpu, node) ({}) | ||
502 | #define unmap_cpu_to_node(cpu) ({}) | ||
503 | |||
504 | #endif /* CONFIG_NUMA */ | ||
505 | |||
506 | u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
507 | |||
508 | static void map_cpu_to_logical_apicid(void) | ||
509 | { | ||
510 | int cpu = smp_processor_id(); | ||
511 | int apicid = logical_smp_processor_id(); | ||
512 | |||
513 | cpu_2_logical_apicid[cpu] = apicid; | ||
514 | map_cpu_to_node(cpu, apicid_to_node(apicid)); | ||
515 | } | ||
516 | |||
517 | static void unmap_cpu_to_logical_apicid(int cpu) | ||
518 | { | ||
519 | cpu_2_logical_apicid[cpu] = BAD_APICID; | ||
520 | unmap_cpu_to_node(cpu); | ||
521 | } | ||
522 | |||
523 | #if APIC_DEBUG | ||
524 | static inline void __inquire_remote_apic(int apicid) | ||
525 | { | ||
526 | int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | ||
527 | char *names[] = { "ID", "VERSION", "SPIV" }; | ||
528 | int timeout, status; | ||
529 | |||
530 | printk("Inquiring remote APIC #%d...\n", apicid); | ||
531 | |||
532 | for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { | ||
533 | printk("... APIC #%d %s: ", apicid, names[i]); | ||
534 | |||
535 | /* | ||
536 | * Wait for idle. | ||
537 | */ | ||
538 | apic_wait_icr_idle(); | ||
539 | |||
540 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | ||
541 | apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); | ||
542 | |||
543 | timeout = 0; | ||
544 | do { | ||
545 | udelay(100); | ||
546 | status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; | ||
547 | } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); | ||
548 | |||
549 | switch (status) { | ||
550 | case APIC_ICR_RR_VALID: | ||
551 | status = apic_read(APIC_RRR); | ||
552 | printk("%08x\n", status); | ||
553 | break; | ||
554 | default: | ||
555 | printk("failed\n"); | ||
556 | } | ||
557 | } | ||
558 | } | ||
559 | #endif | ||
560 | |||
561 | #ifdef WAKE_SECONDARY_VIA_NMI | ||
562 | /* | ||
563 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal | ||
564 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this | ||
565 | * won't ... remember to clear down the APIC, etc later. | ||
566 | */ | ||
567 | static int __init | ||
568 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | ||
569 | { | ||
570 | unsigned long send_status = 0, accept_status = 0; | ||
571 | int timeout, maxlvt; | ||
572 | |||
573 | /* Target chip */ | ||
574 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); | ||
575 | |||
576 | /* Boot on the stack */ | ||
577 | /* Kick the second */ | ||
578 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); | ||
579 | |||
580 | Dprintk("Waiting for send to finish...\n"); | ||
581 | timeout = 0; | ||
582 | do { | ||
583 | Dprintk("+"); | ||
584 | udelay(100); | ||
585 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
586 | } while (send_status && (timeout++ < 1000)); | ||
587 | |||
588 | /* | ||
589 | * Give the other CPU some time to accept the IPI. | ||
590 | */ | ||
591 | udelay(200); | ||
592 | /* | ||
593 | * Due to the Pentium erratum 3AP. | ||
594 | */ | ||
595 | maxlvt = get_maxlvt(); | ||
596 | if (maxlvt > 3) { | ||
597 | apic_read_around(APIC_SPIV); | ||
598 | apic_write(APIC_ESR, 0); | ||
599 | } | ||
600 | accept_status = (apic_read(APIC_ESR) & 0xEF); | ||
601 | Dprintk("NMI sent.\n"); | ||
602 | |||
603 | if (send_status) | ||
604 | printk("APIC never delivered???\n"); | ||
605 | if (accept_status) | ||
606 | printk("APIC delivery error (%lx).\n", accept_status); | ||
607 | |||
608 | return (send_status | accept_status); | ||
609 | } | ||
610 | #endif /* WAKE_SECONDARY_VIA_NMI */ | ||
611 | |||
612 | #ifdef WAKE_SECONDARY_VIA_INIT | ||
613 | static int __init | ||
614 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | ||
615 | { | ||
616 | unsigned long send_status = 0, accept_status = 0; | ||
617 | int maxlvt, timeout, num_starts, j; | ||
618 | |||
619 | /* | ||
620 | * Be paranoid about clearing APIC errors. | ||
621 | */ | ||
622 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { | ||
623 | apic_read_around(APIC_SPIV); | ||
624 | apic_write(APIC_ESR, 0); | ||
625 | apic_read(APIC_ESR); | ||
626 | } | ||
627 | |||
628 | Dprintk("Asserting INIT.\n"); | ||
629 | |||
630 | /* | ||
631 | * Turn INIT on target chip | ||
632 | */ | ||
633 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
634 | |||
635 | /* | ||
636 | * Send IPI | ||
637 | */ | ||
638 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | ||
639 | | APIC_DM_INIT); | ||
640 | |||
641 | Dprintk("Waiting for send to finish...\n"); | ||
642 | timeout = 0; | ||
643 | do { | ||
644 | Dprintk("+"); | ||
645 | udelay(100); | ||
646 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
647 | } while (send_status && (timeout++ < 1000)); | ||
648 | |||
649 | mdelay(10); | ||
650 | |||
651 | Dprintk("Deasserting INIT.\n"); | ||
652 | |||
653 | /* Target chip */ | ||
654 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
655 | |||
656 | /* Send IPI */ | ||
657 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); | ||
658 | |||
659 | Dprintk("Waiting for send to finish...\n"); | ||
660 | timeout = 0; | ||
661 | do { | ||
662 | Dprintk("+"); | ||
663 | udelay(100); | ||
664 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
665 | } while (send_status && (timeout++ < 1000)); | ||
666 | |||
667 | atomic_set(&init_deasserted, 1); | ||
668 | |||
669 | /* | ||
670 | * Should we send STARTUP IPIs ? | ||
671 | * | ||
672 | * Determine this based on the APIC version. | ||
673 | * If we don't have an integrated APIC, don't send the STARTUP IPIs. | ||
674 | */ | ||
675 | if (APIC_INTEGRATED(apic_version[phys_apicid])) | ||
676 | num_starts = 2; | ||
677 | else | ||
678 | num_starts = 0; | ||
679 | |||
680 | /* | ||
681 | * Run STARTUP IPI loop. | ||
682 | */ | ||
683 | Dprintk("#startup loops: %d.\n", num_starts); | ||
684 | |||
685 | maxlvt = get_maxlvt(); | ||
686 | |||
687 | for (j = 1; j <= num_starts; j++) { | ||
688 | Dprintk("Sending STARTUP #%d.\n",j); | ||
689 | apic_read_around(APIC_SPIV); | ||
690 | apic_write(APIC_ESR, 0); | ||
691 | apic_read(APIC_ESR); | ||
692 | Dprintk("After apic_write.\n"); | ||
693 | |||
694 | /* | ||
695 | * STARTUP IPI | ||
696 | */ | ||
697 | |||
698 | /* Target chip */ | ||
699 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
700 | |||
701 | /* Boot on the stack */ | ||
702 | /* Kick the second */ | ||
703 | apic_write_around(APIC_ICR, APIC_DM_STARTUP | ||
704 | | (start_eip >> 12)); | ||
705 | |||
706 | /* | ||
707 | * Give the other CPU some time to accept the IPI. | ||
708 | */ | ||
709 | udelay(300); | ||
710 | |||
711 | Dprintk("Startup point 1.\n"); | ||
712 | |||
713 | Dprintk("Waiting for send to finish...\n"); | ||
714 | timeout = 0; | ||
715 | do { | ||
716 | Dprintk("+"); | ||
717 | udelay(100); | ||
718 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
719 | } while (send_status && (timeout++ < 1000)); | ||
720 | |||
721 | /* | ||
722 | * Give the other CPU some time to accept the IPI. | ||
723 | */ | ||
724 | udelay(200); | ||
725 | /* | ||
726 | * Due to the Pentium erratum 3AP. | ||
727 | */ | ||
728 | if (maxlvt > 3) { | ||
729 | apic_read_around(APIC_SPIV); | ||
730 | apic_write(APIC_ESR, 0); | ||
731 | } | ||
732 | accept_status = (apic_read(APIC_ESR) & 0xEF); | ||
733 | if (send_status || accept_status) | ||
734 | break; | ||
735 | } | ||
736 | Dprintk("After Startup.\n"); | ||
737 | |||
738 | if (send_status) | ||
739 | printk("APIC never delivered???\n"); | ||
740 | if (accept_status) | ||
741 | printk("APIC delivery error (%lx).\n", accept_status); | ||
742 | |||
743 | return (send_status | accept_status); | ||
744 | } | ||
745 | #endif /* WAKE_SECONDARY_VIA_INIT */ | ||
746 | |||
747 | extern cpumask_t cpu_initialized; | ||
748 | |||
749 | static int __init do_boot_cpu(int apicid) | ||
750 | /* | ||
751 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | ||
752 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | ||
753 | * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. | ||
754 | */ | ||
755 | { | ||
756 | struct task_struct *idle; | ||
757 | unsigned long boot_error; | ||
758 | int timeout, cpu; | ||
759 | unsigned long start_eip; | ||
760 | unsigned short nmi_high = 0, nmi_low = 0; | ||
761 | |||
762 | cpu = ++cpucount; | ||
763 | /* | ||
764 | * We can't use kernel_thread since we must avoid to | ||
765 | * reschedule the child. | ||
766 | */ | ||
767 | idle = fork_idle(cpu); | ||
768 | if (IS_ERR(idle)) | ||
769 | panic("failed fork for CPU %d", cpu); | ||
770 | idle->thread.eip = (unsigned long) start_secondary; | ||
771 | /* start_eip had better be page-aligned! */ | ||
772 | start_eip = setup_trampoline(); | ||
773 | |||
774 | /* So we see what's up */ | ||
775 | printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); | ||
776 | /* Stack for startup_32 can be just as for start_secondary onwards */ | ||
777 | stack_start.esp = (void *) idle->thread.esp; | ||
778 | |||
779 | irq_ctx_init(cpu); | ||
780 | |||
781 | /* | ||
782 | * This grunge runs the startup process for | ||
783 | * the targeted processor. | ||
784 | */ | ||
785 | |||
786 | atomic_set(&init_deasserted, 0); | ||
787 | |||
788 | Dprintk("Setting warm reset code and vector.\n"); | ||
789 | |||
790 | store_NMI_vector(&nmi_high, &nmi_low); | ||
791 | |||
792 | smpboot_setup_warm_reset_vector(start_eip); | ||
793 | |||
794 | /* | ||
795 | * Starting actual IPI sequence... | ||
796 | */ | ||
797 | boot_error = wakeup_secondary_cpu(apicid, start_eip); | ||
798 | |||
799 | if (!boot_error) { | ||
800 | /* | ||
801 | * allow APs to start initializing. | ||
802 | */ | ||
803 | Dprintk("Before Callout %d.\n", cpu); | ||
804 | cpu_set(cpu, cpu_callout_map); | ||
805 | Dprintk("After Callout %d.\n", cpu); | ||
806 | |||
807 | /* | ||
808 | * Wait 5s total for a response | ||
809 | */ | ||
810 | for (timeout = 0; timeout < 50000; timeout++) { | ||
811 | if (cpu_isset(cpu, cpu_callin_map)) | ||
812 | break; /* It has booted */ | ||
813 | udelay(100); | ||
814 | } | ||
815 | |||
816 | if (cpu_isset(cpu, cpu_callin_map)) { | ||
817 | /* number CPUs logically, starting from 1 (BSP is 0) */ | ||
818 | Dprintk("OK.\n"); | ||
819 | printk("CPU%d: ", cpu); | ||
820 | print_cpu_info(&cpu_data[cpu]); | ||
821 | Dprintk("CPU has booted.\n"); | ||
822 | } else { | ||
823 | boot_error= 1; | ||
824 | if (*((volatile unsigned char *)trampoline_base) | ||
825 | == 0xA5) | ||
826 | /* trampoline started but...? */ | ||
827 | printk("Stuck ??\n"); | ||
828 | else | ||
829 | /* trampoline code not run */ | ||
830 | printk("Not responding.\n"); | ||
831 | inquire_remote_apic(apicid); | ||
832 | } | ||
833 | } | ||
834 | x86_cpu_to_apicid[cpu] = apicid; | ||
835 | if (boot_error) { | ||
836 | /* Try to put things back the way they were before ... */ | ||
837 | unmap_cpu_to_logical_apicid(cpu); | ||
838 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ | ||
839 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ | ||
840 | cpucount--; | ||
841 | } | ||
842 | |||
843 | /* mark "stuck" area as not stuck */ | ||
844 | *((volatile unsigned long *)trampoline_base) = 0; | ||
845 | |||
846 | return boot_error; | ||
847 | } | ||
848 | |||
849 | static void smp_tune_scheduling (void) | ||
850 | { | ||
851 | unsigned long cachesize; /* kB */ | ||
852 | unsigned long bandwidth = 350; /* MB/s */ | ||
853 | /* | ||
854 | * Rough estimation for SMP scheduling, this is the number of | ||
855 | * cycles it takes for a fully memory-limited process to flush | ||
856 | * the SMP-local cache. | ||
857 | * | ||
858 | * (For a P5 this pretty much means we will choose another idle | ||
859 | * CPU almost always at wakeup time (this is due to the small | ||
860 | * L1 cache), on PIIs it's around 50-100 usecs, depending on | ||
861 | * the cache size) | ||
862 | */ | ||
863 | |||
864 | if (!cpu_khz) { | ||
865 | /* | ||
866 | * this basically disables processor-affinity | ||
867 | * scheduling on SMP without a TSC. | ||
868 | */ | ||
869 | return; | ||
870 | } else { | ||
871 | cachesize = boot_cpu_data.x86_cache_size; | ||
872 | if (cachesize == -1) { | ||
873 | cachesize = 16; /* Pentiums, 2x8kB cache */ | ||
874 | bandwidth = 100; | ||
875 | } | ||
876 | } | ||
877 | } | ||
878 | |||
879 | /* | ||
880 | * Cycle through the processors sending APIC IPIs to boot each. | ||
881 | */ | ||
882 | |||
883 | static int boot_cpu_logical_apicid; | ||
884 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | ||
885 | void *xquad_portio; | ||
886 | |||
887 | cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; | ||
888 | |||
889 | static void __init smp_boot_cpus(unsigned int max_cpus) | ||
890 | { | ||
891 | int apicid, cpu, bit, kicked; | ||
892 | unsigned long bogosum = 0; | ||
893 | |||
894 | /* | ||
895 | * Setup boot CPU information | ||
896 | */ | ||
897 | smp_store_cpu_info(0); /* Final full version of the data */ | ||
898 | printk("CPU%d: ", 0); | ||
899 | print_cpu_info(&cpu_data[0]); | ||
900 | |||
901 | boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); | ||
902 | boot_cpu_logical_apicid = logical_smp_processor_id(); | ||
903 | x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; | ||
904 | |||
905 | current_thread_info()->cpu = 0; | ||
906 | smp_tune_scheduling(); | ||
907 | cpus_clear(cpu_sibling_map[0]); | ||
908 | cpu_set(0, cpu_sibling_map[0]); | ||
909 | |||
910 | /* | ||
911 | * If we couldn't find an SMP configuration at boot time, | ||
912 | * get out of here now! | ||
913 | */ | ||
914 | if (!smp_found_config && !acpi_lapic) { | ||
915 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); | ||
916 | smpboot_clear_io_apic_irqs(); | ||
917 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
918 | if (APIC_init_uniprocessor()) | ||
919 | printk(KERN_NOTICE "Local APIC not detected." | ||
920 | " Using dummy APIC emulation.\n"); | ||
921 | map_cpu_to_logical_apicid(); | ||
922 | return; | ||
923 | } | ||
924 | |||
925 | /* | ||
926 | * Should not be necessary because the MP table should list the boot | ||
927 | * CPU too, but we do it for the sake of robustness anyway. | ||
928 | * Makes no sense to do this check in clustered apic mode, so skip it | ||
929 | */ | ||
930 | if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { | ||
931 | printk("weird, boot CPU (#%d) not listed by the BIOS.\n", | ||
932 | boot_cpu_physical_apicid); | ||
933 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | ||
934 | } | ||
935 | |||
936 | /* | ||
937 | * If we couldn't find a local APIC, then get out of here now! | ||
938 | */ | ||
939 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { | ||
940 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | ||
941 | boot_cpu_physical_apicid); | ||
942 | printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); | ||
943 | smpboot_clear_io_apic_irqs(); | ||
944 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
945 | return; | ||
946 | } | ||
947 | |||
948 | verify_local_APIC(); | ||
949 | |||
950 | /* | ||
951 | * If SMP should be disabled, then really disable it! | ||
952 | */ | ||
953 | if (!max_cpus) { | ||
954 | smp_found_config = 0; | ||
955 | printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); | ||
956 | smpboot_clear_io_apic_irqs(); | ||
957 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
958 | return; | ||
959 | } | ||
960 | |||
961 | connect_bsp_APIC(); | ||
962 | setup_local_APIC(); | ||
963 | map_cpu_to_logical_apicid(); | ||
964 | |||
965 | |||
966 | setup_portio_remap(); | ||
967 | |||
968 | /* | ||
969 | * Scan the CPU present map and fire up the other CPUs via do_boot_cpu | ||
970 | * | ||
971 | * In clustered apic mode, phys_cpu_present_map is a constructed thus: | ||
972 | * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the | ||
973 | * clustered apic ID. | ||
974 | */ | ||
975 | Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map)); | ||
976 | |||
977 | kicked = 1; | ||
978 | for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) { | ||
979 | apicid = cpu_present_to_apicid(bit); | ||
980 | /* | ||
981 | * Don't even attempt to start the boot CPU! | ||
982 | */ | ||
983 | if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID)) | ||
984 | continue; | ||
985 | |||
986 | if (!check_apicid_present(bit)) | ||
987 | continue; | ||
988 | if (max_cpus <= cpucount+1) | ||
989 | continue; | ||
990 | |||
991 | if (do_boot_cpu(apicid)) | ||
992 | printk("CPU #%d not responding - cannot use it.\n", | ||
993 | apicid); | ||
994 | else | ||
995 | ++kicked; | ||
996 | } | ||
997 | |||
998 | /* | ||
999 | * Cleanup possible dangling ends... | ||
1000 | */ | ||
1001 | smpboot_restore_warm_reset_vector(); | ||
1002 | |||
1003 | /* | ||
1004 | * Allow the user to impress friends. | ||
1005 | */ | ||
1006 | Dprintk("Before bogomips.\n"); | ||
1007 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
1008 | if (cpu_isset(cpu, cpu_callout_map)) | ||
1009 | bogosum += cpu_data[cpu].loops_per_jiffy; | ||
1010 | printk(KERN_INFO | ||
1011 | "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", | ||
1012 | cpucount+1, | ||
1013 | bogosum/(500000/HZ), | ||
1014 | (bogosum/(5000/HZ))%100); | ||
1015 | |||
1016 | Dprintk("Before bogocount - setting activated=1.\n"); | ||
1017 | |||
1018 | if (smp_b_stepping) | ||
1019 | printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); | ||
1020 | |||
1021 | /* | ||
1022 | * Don't taint if we are running SMP kernel on a single non-MP | ||
1023 | * approved Athlon | ||
1024 | */ | ||
1025 | if (tainted & TAINT_UNSAFE_SMP) { | ||
1026 | if (cpucount) | ||
1027 | printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n"); | ||
1028 | else | ||
1029 | tainted &= ~TAINT_UNSAFE_SMP; | ||
1030 | } | ||
1031 | |||
1032 | Dprintk("Boot done.\n"); | ||
1033 | |||
1034 | /* | ||
1035 | * construct cpu_sibling_map[], so that we can tell sibling CPUs | ||
1036 | * efficiently. | ||
1037 | */ | ||
1038 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
1039 | cpus_clear(cpu_sibling_map[cpu]); | ||
1040 | |||
1041 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
1042 | int siblings = 0; | ||
1043 | int i; | ||
1044 | if (!cpu_isset(cpu, cpu_callout_map)) | ||
1045 | continue; | ||
1046 | |||
1047 | if (smp_num_siblings > 1) { | ||
1048 | for (i = 0; i < NR_CPUS; i++) { | ||
1049 | if (!cpu_isset(i, cpu_callout_map)) | ||
1050 | continue; | ||
1051 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
1052 | siblings++; | ||
1053 | cpu_set(i, cpu_sibling_map[cpu]); | ||
1054 | } | ||
1055 | } | ||
1056 | } else { | ||
1057 | siblings++; | ||
1058 | cpu_set(cpu, cpu_sibling_map[cpu]); | ||
1059 | } | ||
1060 | |||
1061 | if (siblings != smp_num_siblings) | ||
1062 | printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); | ||
1063 | } | ||
1064 | |||
1065 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1066 | check_nmi_watchdog(); | ||
1067 | |||
1068 | smpboot_setup_io_apic(); | ||
1069 | |||
1070 | setup_boot_APIC_clock(); | ||
1071 | |||
1072 | /* | ||
1073 | * Synchronize the TSC with the AP | ||
1074 | */ | ||
1075 | if (cpu_has_tsc && cpucount && cpu_khz) | ||
1076 | synchronize_tsc_bp(); | ||
1077 | } | ||
1078 | |||
1079 | /* These are wrappers to interface to the new boot process. Someone | ||
1080 | who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ | ||
1081 | void __init smp_prepare_cpus(unsigned int max_cpus) | ||
1082 | { | ||
1083 | smp_boot_cpus(max_cpus); | ||
1084 | } | ||
1085 | |||
1086 | void __devinit smp_prepare_boot_cpu(void) | ||
1087 | { | ||
1088 | cpu_set(smp_processor_id(), cpu_online_map); | ||
1089 | cpu_set(smp_processor_id(), cpu_callout_map); | ||
1090 | } | ||
1091 | |||
1092 | int __devinit __cpu_up(unsigned int cpu) | ||
1093 | { | ||
1094 | /* This only works at boot for x86. See "rewrite" above. */ | ||
1095 | if (cpu_isset(cpu, smp_commenced_mask)) { | ||
1096 | local_irq_enable(); | ||
1097 | return -ENOSYS; | ||
1098 | } | ||
1099 | |||
1100 | /* In case one didn't come up */ | ||
1101 | if (!cpu_isset(cpu, cpu_callin_map)) { | ||
1102 | local_irq_enable(); | ||
1103 | return -EIO; | ||
1104 | } | ||
1105 | |||
1106 | local_irq_enable(); | ||
1107 | /* Unleash the CPU! */ | ||
1108 | cpu_set(cpu, smp_commenced_mask); | ||
1109 | while (!cpu_isset(cpu, cpu_online_map)) | ||
1110 | mb(); | ||
1111 | return 0; | ||
1112 | } | ||
1113 | |||
1114 | void __init smp_cpus_done(unsigned int max_cpus) | ||
1115 | { | ||
1116 | #ifdef CONFIG_X86_IO_APIC | ||
1117 | setup_ioapic_dest(); | ||
1118 | #endif | ||
1119 | zap_low_mappings(); | ||
1120 | /* | ||
1121 | * Disable executability of the SMP trampoline: | ||
1122 | */ | ||
1123 | set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); | ||
1124 | } | ||
1125 | |||
1126 | void __init smp_intr_init(void) | ||
1127 | { | ||
1128 | /* | ||
1129 | * IRQ0 must be given a fixed assignment and initialized, | ||
1130 | * because it's used before the IO-APIC is set up. | ||
1131 | */ | ||
1132 | set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); | ||
1133 | |||
1134 | /* | ||
1135 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | ||
1136 | * IPI, driven by wakeup. | ||
1137 | */ | ||
1138 | set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | ||
1139 | |||
1140 | /* IPI for invalidation */ | ||
1141 | set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); | ||
1142 | |||
1143 | /* IPI for generic function call */ | ||
1144 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | ||
1145 | } | ||
diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c new file mode 100644 index 000000000000..7b3b27d64409 --- /dev/null +++ b/arch/i386/kernel/srat.c | |||
@@ -0,0 +1,456 @@ | |||
1 | /* | ||
2 | * Some of the code in this file has been gleaned from the 64 bit | ||
3 | * discontigmem support code base. | ||
4 | * | ||
5 | * Copyright (C) 2002, IBM Corp. | ||
6 | * | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
17 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
18 | * details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | * | ||
24 | * Send feedback to Pat Gaughen <gone@us.ibm.com> | ||
25 | */ | ||
26 | #include <linux/config.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/bootmem.h> | ||
29 | #include <linux/mmzone.h> | ||
30 | #include <linux/acpi.h> | ||
31 | #include <linux/nodemask.h> | ||
32 | #include <asm/srat.h> | ||
33 | #include <asm/topology.h> | ||
34 | |||
35 | /* | ||
36 | * proximity macros and definitions | ||
37 | */ | ||
38 | #define NODE_ARRAY_INDEX(x) ((x) / 8) /* 8 bits/char */ | ||
39 | #define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */ | ||
40 | #define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit)) | ||
41 | #define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit))) | ||
42 | #define MAX_PXM_DOMAINS 256 /* 1 byte and no promises about values */ | ||
43 | /* bitmap length; _PXM is at most 255 */ | ||
44 | #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) | ||
45 | static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ | ||
46 | |||
47 | #define MAX_CHUNKS_PER_NODE 4 | ||
48 | #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) | ||
49 | struct node_memory_chunk_s { | ||
50 | unsigned long start_pfn; | ||
51 | unsigned long end_pfn; | ||
52 | u8 pxm; // proximity domain of node | ||
53 | u8 nid; // which cnode contains this chunk? | ||
54 | u8 bank; // which mem bank on this node | ||
55 | }; | ||
56 | static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; | ||
57 | |||
58 | static int num_memory_chunks; /* total number of memory chunks */ | ||
59 | static int zholes_size_init; | ||
60 | static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES]; | ||
61 | |||
62 | extern void * boot_ioremap(unsigned long, unsigned long); | ||
63 | |||
64 | /* Identify CPU proximity domains */ | ||
65 | static void __init parse_cpu_affinity_structure(char *p) | ||
66 | { | ||
67 | struct acpi_table_processor_affinity *cpu_affinity = | ||
68 | (struct acpi_table_processor_affinity *) p; | ||
69 | |||
70 | if (!cpu_affinity->flags.enabled) | ||
71 | return; /* empty entry */ | ||
72 | |||
73 | /* mark this node as "seen" in node bitmap */ | ||
74 | BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain); | ||
75 | |||
76 | printk("CPU 0x%02X in proximity domain 0x%02X\n", | ||
77 | cpu_affinity->apic_id, cpu_affinity->proximity_domain); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Identify memory proximity domains and hot-remove capabilities. | ||
82 | * Fill node memory chunk list structure. | ||
83 | */ | ||
84 | static void __init parse_memory_affinity_structure (char *sratp) | ||
85 | { | ||
86 | unsigned long long paddr, size; | ||
87 | unsigned long start_pfn, end_pfn; | ||
88 | u8 pxm; | ||
89 | struct node_memory_chunk_s *p, *q, *pend; | ||
90 | struct acpi_table_memory_affinity *memory_affinity = | ||
91 | (struct acpi_table_memory_affinity *) sratp; | ||
92 | |||
93 | if (!memory_affinity->flags.enabled) | ||
94 | return; /* empty entry */ | ||
95 | |||
96 | /* mark this node as "seen" in node bitmap */ | ||
97 | BMAP_SET(pxm_bitmap, memory_affinity->proximity_domain); | ||
98 | |||
99 | /* calculate info for memory chunk structure */ | ||
100 | paddr = memory_affinity->base_addr_hi; | ||
101 | paddr = (paddr << 32) | memory_affinity->base_addr_lo; | ||
102 | size = memory_affinity->length_hi; | ||
103 | size = (size << 32) | memory_affinity->length_lo; | ||
104 | |||
105 | start_pfn = paddr >> PAGE_SHIFT; | ||
106 | end_pfn = (paddr + size) >> PAGE_SHIFT; | ||
107 | |||
108 | pxm = memory_affinity->proximity_domain; | ||
109 | |||
110 | if (num_memory_chunks >= MAXCHUNKS) { | ||
111 | printk("Too many mem chunks in SRAT. Ignoring %lld MBytes at %llx\n", | ||
112 | size/(1024*1024), paddr); | ||
113 | return; | ||
114 | } | ||
115 | |||
116 | /* Insertion sort based on base address */ | ||
117 | pend = &node_memory_chunk[num_memory_chunks]; | ||
118 | for (p = &node_memory_chunk[0]; p < pend; p++) { | ||
119 | if (start_pfn < p->start_pfn) | ||
120 | break; | ||
121 | } | ||
122 | if (p < pend) { | ||
123 | for (q = pend; q >= p; q--) | ||
124 | *(q + 1) = *q; | ||
125 | } | ||
126 | p->start_pfn = start_pfn; | ||
127 | p->end_pfn = end_pfn; | ||
128 | p->pxm = pxm; | ||
129 | |||
130 | num_memory_chunks++; | ||
131 | |||
132 | printk("Memory range 0x%lX to 0x%lX (type 0x%X) in proximity domain 0x%02X %s\n", | ||
133 | start_pfn, end_pfn, | ||
134 | memory_affinity->memory_type, | ||
135 | memory_affinity->proximity_domain, | ||
136 | (memory_affinity->flags.hot_pluggable ? | ||
137 | "enabled and removable" : "enabled" ) ); | ||
138 | } | ||
139 | |||
140 | #if MAX_NR_ZONES != 3 | ||
141 | #error "MAX_NR_ZONES != 3, chunk_to_zone requires review" | ||
142 | #endif | ||
143 | /* Take a chunk of pages from page frame cstart to cend and count the number | ||
144 | * of pages in each zone, returned via zones[]. | ||
145 | */ | ||
146 | static __init void chunk_to_zones(unsigned long cstart, unsigned long cend, | ||
147 | unsigned long *zones) | ||
148 | { | ||
149 | unsigned long max_dma; | ||
150 | extern unsigned long max_low_pfn; | ||
151 | |||
152 | int z; | ||
153 | unsigned long rend; | ||
154 | |||
155 | /* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide | ||
156 | * similarly scoped information and should be handled in a consistant | ||
157 | * manner. | ||
158 | */ | ||
159 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
160 | |||
161 | /* Split the hole into the zones in which it falls. Repeatedly | ||
162 | * take the segment in which the remaining hole starts, round it | ||
163 | * to the end of that zone. | ||
164 | */ | ||
165 | memset(zones, 0, MAX_NR_ZONES * sizeof(long)); | ||
166 | while (cstart < cend) { | ||
167 | if (cstart < max_dma) { | ||
168 | z = ZONE_DMA; | ||
169 | rend = (cend < max_dma)? cend : max_dma; | ||
170 | |||
171 | } else if (cstart < max_low_pfn) { | ||
172 | z = ZONE_NORMAL; | ||
173 | rend = (cend < max_low_pfn)? cend : max_low_pfn; | ||
174 | |||
175 | } else { | ||
176 | z = ZONE_HIGHMEM; | ||
177 | rend = cend; | ||
178 | } | ||
179 | zones[z] += rend - cstart; | ||
180 | cstart = rend; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * The SRAT table always lists ascending addresses, so can always | ||
186 | * assume that the first "start" address that you see is the real | ||
187 | * start of the node, and that the current "end" address is after | ||
188 | * the previous one. | ||
189 | */ | ||
190 | static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) | ||
191 | { | ||
192 | /* | ||
193 | * Only add present memory as told by the e820. | ||
194 | * There is no guarantee from the SRAT that the memory it | ||
195 | * enumerates is present at boot time because it represents | ||
196 | * *possible* memory hotplug areas the same as normal RAM. | ||
197 | */ | ||
198 | if (memory_chunk->start_pfn >= max_pfn) { | ||
199 | printk (KERN_INFO "Ignoring SRAT pfns: 0x%08lx -> %08lx\n", | ||
200 | memory_chunk->start_pfn, memory_chunk->end_pfn); | ||
201 | return; | ||
202 | } | ||
203 | if (memory_chunk->nid != nid) | ||
204 | return; | ||
205 | |||
206 | if (!node_has_online_mem(nid)) | ||
207 | node_start_pfn[nid] = memory_chunk->start_pfn; | ||
208 | |||
209 | if (node_start_pfn[nid] > memory_chunk->start_pfn) | ||
210 | node_start_pfn[nid] = memory_chunk->start_pfn; | ||
211 | |||
212 | if (node_end_pfn[nid] < memory_chunk->end_pfn) | ||
213 | node_end_pfn[nid] = memory_chunk->end_pfn; | ||
214 | } | ||
215 | |||
216 | /* Parse the ACPI Static Resource Affinity Table */ | ||
217 | static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) | ||
218 | { | ||
219 | u8 *start, *end, *p; | ||
220 | int i, j, nid; | ||
221 | u8 pxm_to_nid_map[MAX_PXM_DOMAINS];/* _PXM to logical node ID map */ | ||
222 | u8 nid_to_pxm_map[MAX_NUMNODES];/* logical node ID to _PXM map */ | ||
223 | |||
224 | start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ | ||
225 | p = start; | ||
226 | end = (u8 *)sratp + sratp->header.length; | ||
227 | |||
228 | memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ | ||
229 | memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); | ||
230 | memset(zholes_size, 0, sizeof(zholes_size)); | ||
231 | |||
232 | /* -1 in these maps means not available */ | ||
233 | memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); | ||
234 | memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); | ||
235 | |||
236 | num_memory_chunks = 0; | ||
237 | while (p < end) { | ||
238 | switch (*p) { | ||
239 | case ACPI_SRAT_PROCESSOR_AFFINITY: | ||
240 | parse_cpu_affinity_structure(p); | ||
241 | break; | ||
242 | case ACPI_SRAT_MEMORY_AFFINITY: | ||
243 | parse_memory_affinity_structure(p); | ||
244 | break; | ||
245 | default: | ||
246 | printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]); | ||
247 | break; | ||
248 | } | ||
249 | p += p[1]; | ||
250 | if (p[1] == 0) { | ||
251 | printk("acpi20_parse_srat: Entry length value is zero;" | ||
252 | " can't parse any further!\n"); | ||
253 | break; | ||
254 | } | ||
255 | } | ||
256 | |||
257 | if (num_memory_chunks == 0) { | ||
258 | printk("could not finy any ACPI SRAT memory areas.\n"); | ||
259 | goto out_fail; | ||
260 | } | ||
261 | |||
262 | /* Calculate total number of nodes in system from PXM bitmap and create | ||
263 | * a set of sequential node IDs starting at zero. (ACPI doesn't seem | ||
264 | * to specify the range of _PXM values.) | ||
265 | */ | ||
266 | /* | ||
267 | * MCD - we no longer HAVE to number nodes sequentially. PXM domain | ||
268 | * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically | ||
269 | * 32, so we will continue numbering them in this manner until MAX_NUMNODES | ||
270 | * approaches MAX_PXM_DOMAINS for i386. | ||
271 | */ | ||
272 | nodes_clear(node_online_map); | ||
273 | for (i = 0; i < MAX_PXM_DOMAINS; i++) { | ||
274 | if (BMAP_TEST(pxm_bitmap, i)) { | ||
275 | nid = num_online_nodes(); | ||
276 | pxm_to_nid_map[i] = nid; | ||
277 | nid_to_pxm_map[nid] = i; | ||
278 | node_set_online(nid); | ||
279 | } | ||
280 | } | ||
281 | BUG_ON(num_online_nodes() == 0); | ||
282 | |||
283 | /* set cnode id in memory chunk structure */ | ||
284 | for (i = 0; i < num_memory_chunks; i++) | ||
285 | node_memory_chunk[i].nid = pxm_to_nid_map[node_memory_chunk[i].pxm]; | ||
286 | |||
287 | printk("pxm bitmap: "); | ||
288 | for (i = 0; i < sizeof(pxm_bitmap); i++) { | ||
289 | printk("%02X ", pxm_bitmap[i]); | ||
290 | } | ||
291 | printk("\n"); | ||
292 | printk("Number of logical nodes in system = %d\n", num_online_nodes()); | ||
293 | printk("Number of memory chunks in system = %d\n", num_memory_chunks); | ||
294 | |||
295 | for (j = 0; j < num_memory_chunks; j++){ | ||
296 | struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; | ||
297 | printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", | ||
298 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); | ||
299 | node_read_chunk(chunk->nid, chunk); | ||
300 | } | ||
301 | |||
302 | for_each_online_node(nid) { | ||
303 | unsigned long start = node_start_pfn[nid]; | ||
304 | unsigned long end = node_end_pfn[nid]; | ||
305 | |||
306 | memory_present(nid, start, end); | ||
307 | node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); | ||
308 | } | ||
309 | return 1; | ||
310 | out_fail: | ||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | int __init get_memcfg_from_srat(void) | ||
315 | { | ||
316 | struct acpi_table_header *header = NULL; | ||
317 | struct acpi_table_rsdp *rsdp = NULL; | ||
318 | struct acpi_table_rsdt *rsdt = NULL; | ||
319 | struct acpi_pointer *rsdp_address = NULL; | ||
320 | struct acpi_table_rsdt saved_rsdt; | ||
321 | int tables = 0; | ||
322 | int i = 0; | ||
323 | |||
324 | acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, rsdp_address); | ||
325 | |||
326 | if (rsdp_address->pointer_type == ACPI_PHYSICAL_POINTER) { | ||
327 | printk("%s: assigning address to rsdp\n", __FUNCTION__); | ||
328 | rsdp = (struct acpi_table_rsdp *) | ||
329 | (u32)rsdp_address->pointer.physical; | ||
330 | } else { | ||
331 | printk("%s: rsdp_address is not a physical pointer\n", __FUNCTION__); | ||
332 | goto out_err; | ||
333 | } | ||
334 | if (!rsdp) { | ||
335 | printk("%s: Didn't find ACPI root!\n", __FUNCTION__); | ||
336 | goto out_err; | ||
337 | } | ||
338 | |||
339 | printk(KERN_INFO "%.8s v%d [%.6s]\n", rsdp->signature, rsdp->revision, | ||
340 | rsdp->oem_id); | ||
341 | |||
342 | if (strncmp(rsdp->signature, RSDP_SIG,strlen(RSDP_SIG))) { | ||
343 | printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __FUNCTION__); | ||
344 | goto out_err; | ||
345 | } | ||
346 | |||
347 | rsdt = (struct acpi_table_rsdt *) | ||
348 | boot_ioremap(rsdp->rsdt_address, sizeof(struct acpi_table_rsdt)); | ||
349 | |||
350 | if (!rsdt) { | ||
351 | printk(KERN_WARNING | ||
352 | "%s: ACPI: Invalid root system description tables (RSDT)\n", | ||
353 | __FUNCTION__); | ||
354 | goto out_err; | ||
355 | } | ||
356 | |||
357 | header = & rsdt->header; | ||
358 | |||
359 | if (strncmp(header->signature, RSDT_SIG, strlen(RSDT_SIG))) { | ||
360 | printk(KERN_WARNING "ACPI: RSDT signature incorrect\n"); | ||
361 | goto out_err; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * The number of tables is computed by taking the | ||
366 | * size of all entries (header size minus total | ||
367 | * size of RSDT) divided by the size of each entry | ||
368 | * (4-byte table pointers). | ||
369 | */ | ||
370 | tables = (header->length - sizeof(struct acpi_table_header)) / 4; | ||
371 | |||
372 | if (!tables) | ||
373 | goto out_err; | ||
374 | |||
375 | memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt)); | ||
376 | |||
377 | if (saved_rsdt.header.length > sizeof(saved_rsdt)) { | ||
378 | printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n", | ||
379 | saved_rsdt.header.length); | ||
380 | goto out_err; | ||
381 | } | ||
382 | |||
383 | printk("Begin SRAT table scan....\n"); | ||
384 | |||
385 | for (i = 0; i < tables; i++) { | ||
386 | /* Map in header, then map in full table length. */ | ||
387 | header = (struct acpi_table_header *) | ||
388 | boot_ioremap(saved_rsdt.entry[i], sizeof(struct acpi_table_header)); | ||
389 | if (!header) | ||
390 | break; | ||
391 | header = (struct acpi_table_header *) | ||
392 | boot_ioremap(saved_rsdt.entry[i], header->length); | ||
393 | if (!header) | ||
394 | break; | ||
395 | |||
396 | if (strncmp((char *) &header->signature, "SRAT", 4)) | ||
397 | continue; | ||
398 | |||
399 | /* we've found the srat table. don't need to look at any more tables */ | ||
400 | return acpi20_parse_srat((struct acpi_table_srat *)header); | ||
401 | } | ||
402 | out_err: | ||
403 | printk("failed to get NUMA memory information from SRAT table\n"); | ||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | /* For each node run the memory list to determine whether there are | ||
408 | * any memory holes. For each hole determine which ZONE they fall | ||
409 | * into. | ||
410 | * | ||
411 | * NOTE#1: this requires knowledge of the zone boundries and so | ||
412 | * _cannot_ be performed before those are calculated in setup_memory. | ||
413 | * | ||
414 | * NOTE#2: we rely on the fact that the memory chunks are ordered by | ||
415 | * start pfn number during setup. | ||
416 | */ | ||
417 | static void __init get_zholes_init(void) | ||
418 | { | ||
419 | int nid; | ||
420 | int c; | ||
421 | int first; | ||
422 | unsigned long end = 0; | ||
423 | |||
424 | for_each_online_node(nid) { | ||
425 | first = 1; | ||
426 | for (c = 0; c < num_memory_chunks; c++){ | ||
427 | if (node_memory_chunk[c].nid == nid) { | ||
428 | if (first) { | ||
429 | end = node_memory_chunk[c].end_pfn; | ||
430 | first = 0; | ||
431 | |||
432 | } else { | ||
433 | /* Record any gap between this chunk | ||
434 | * and the previous chunk on this node | ||
435 | * against the zones it spans. | ||
436 | */ | ||
437 | chunk_to_zones(end, | ||
438 | node_memory_chunk[c].start_pfn, | ||
439 | &zholes_size[nid * MAX_NR_ZONES]); | ||
440 | } | ||
441 | } | ||
442 | } | ||
443 | } | ||
444 | } | ||
445 | |||
446 | unsigned long * __init get_zholes_size(int nid) | ||
447 | { | ||
448 | if (!zholes_size_init) { | ||
449 | zholes_size_init++; | ||
450 | get_zholes_init(); | ||
451 | } | ||
452 | if (nid >= MAX_NUMNODES || !node_online(nid)) | ||
453 | printk("%s: nid = %d is invalid/offline. num_online_nodes = %d", | ||
454 | __FUNCTION__, nid, num_online_nodes()); | ||
455 | return &zholes_size[nid * MAX_NR_ZONES]; | ||
456 | } | ||
diff --git a/arch/i386/kernel/summit.c b/arch/i386/kernel/summit.c new file mode 100644 index 000000000000..d0e01a3acf35 --- /dev/null +++ b/arch/i386/kernel/summit.c | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * arch/i386/kernel/summit.c - IBM Summit-Specific Code | ||
3 | * | ||
4 | * Written By: Matthew Dobson, IBM Corporation | ||
5 | * | ||
6 | * Copyright (c) 2003 IBM Corp. | ||
7 | * | ||
8 | * All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or (at | ||
13 | * your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
18 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
19 | * details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | * | ||
25 | * Send feedback to <colpatch@us.ibm.com> | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | #include <linux/mm.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <asm/io.h> | ||
32 | #include <asm/mach-summit/mach_mpparse.h> | ||
33 | |||
34 | static struct rio_table_hdr *rio_table_hdr __initdata; | ||
35 | static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; | ||
36 | static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; | ||
37 | |||
38 | static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | ||
39 | { | ||
40 | int twister = 0, node = 0; | ||
41 | int i, bus, num_buses; | ||
42 | |||
43 | for(i = 0; i < rio_table_hdr->num_rio_dev; i++){ | ||
44 | if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id){ | ||
45 | twister = rio_devs[i]->owner_id; | ||
46 | break; | ||
47 | } | ||
48 | } | ||
49 | if (i == rio_table_hdr->num_rio_dev){ | ||
50 | printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __FUNCTION__); | ||
51 | return last_bus; | ||
52 | } | ||
53 | |||
54 | for(i = 0; i < rio_table_hdr->num_scal_dev; i++){ | ||
55 | if (scal_devs[i]->node_id == twister){ | ||
56 | node = scal_devs[i]->node_id; | ||
57 | break; | ||
58 | } | ||
59 | } | ||
60 | if (i == rio_table_hdr->num_scal_dev){ | ||
61 | printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __FUNCTION__); | ||
62 | return last_bus; | ||
63 | } | ||
64 | |||
65 | switch (rio_devs[wpeg_num]->type){ | ||
66 | case CompatWPEG: | ||
67 | /* The Compatability Winnipeg controls the 2 legacy buses, | ||
68 | * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case | ||
69 | * a PCI-PCI bridge card is used in either slot: total 5 buses. | ||
70 | */ | ||
71 | num_buses = 5; | ||
72 | break; | ||
73 | case AltWPEG: | ||
74 | /* The Alternate Winnipeg controls the 2 133MHz buses [1 slot | ||
75 | * each], their 2 "extra" buses, the 100MHz bus [2 slots] and | ||
76 | * the "extra" buses for each of those slots: total 7 buses. | ||
77 | */ | ||
78 | num_buses = 7; | ||
79 | break; | ||
80 | case LookOutAWPEG: | ||
81 | case LookOutBWPEG: | ||
82 | /* A Lookout Winnipeg controls 3 100MHz buses [2 slots each] | ||
83 | * & the "extra" buses for each of those slots: total 9 buses. | ||
84 | */ | ||
85 | num_buses = 9; | ||
86 | break; | ||
87 | default: | ||
88 | printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __FUNCTION__); | ||
89 | return last_bus; | ||
90 | } | ||
91 | |||
92 | for(bus = last_bus; bus < last_bus + num_buses; bus++) | ||
93 | mp_bus_id_to_node[bus] = node; | ||
94 | return bus; | ||
95 | } | ||
96 | |||
97 | static int __init build_detail_arrays(void) | ||
98 | { | ||
99 | unsigned long ptr; | ||
100 | int i, scal_detail_size, rio_detail_size; | ||
101 | |||
102 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ | ||
103 | printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __FUNCTION__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | switch (rio_table_hdr->version){ | ||
108 | default: | ||
109 | printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __FUNCTION__, rio_table_hdr->version); | ||
110 | return 0; | ||
111 | case 2: | ||
112 | scal_detail_size = 11; | ||
113 | rio_detail_size = 13; | ||
114 | break; | ||
115 | case 3: | ||
116 | scal_detail_size = 12; | ||
117 | rio_detail_size = 15; | ||
118 | break; | ||
119 | } | ||
120 | |||
121 | ptr = (unsigned long)rio_table_hdr + 3; | ||
122 | for(i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size) | ||
123 | scal_devs[i] = (struct scal_detail *)ptr; | ||
124 | |||
125 | for(i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size) | ||
126 | rio_devs[i] = (struct rio_detail *)ptr; | ||
127 | |||
128 | return 1; | ||
129 | } | ||
130 | |||
131 | void __init setup_summit(void) | ||
132 | { | ||
133 | unsigned long ptr; | ||
134 | unsigned short offset; | ||
135 | int i, next_wpeg, next_bus = 0; | ||
136 | |||
137 | /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */ | ||
138 | ptr = *(unsigned short *)phys_to_virt(0x40Eul); | ||
139 | ptr = (unsigned long)phys_to_virt(ptr << 4); | ||
140 | |||
141 | rio_table_hdr = NULL; | ||
142 | offset = 0x180; | ||
143 | while (offset){ | ||
144 | /* The block id is stored in the 2nd word */ | ||
145 | if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ | ||
146 | /* set the pointer past the offset & block id */ | ||
147 | rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); | ||
148 | break; | ||
149 | } | ||
150 | /* The next offset is stored in the 1st word. 0 means no more */ | ||
151 | offset = *((unsigned short *)(ptr + offset)); | ||
152 | } | ||
153 | if (!rio_table_hdr){ | ||
154 | printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __FUNCTION__); | ||
155 | return; | ||
156 | } | ||
157 | |||
158 | if (!build_detail_arrays()) | ||
159 | return; | ||
160 | |||
161 | /* The first Winnipeg we're looking for has an index of 0 */ | ||
162 | next_wpeg = 0; | ||
163 | do { | ||
164 | for(i = 0; i < rio_table_hdr->num_rio_dev; i++){ | ||
165 | if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg){ | ||
166 | /* It's the Winnipeg we're looking for! */ | ||
167 | next_bus = setup_pci_node_map_for_wpeg(i, next_bus); | ||
168 | next_wpeg++; | ||
169 | break; | ||
170 | } | ||
171 | } | ||
172 | /* | ||
173 | * If we go through all Rio devices and don't find one with | ||
174 | * the next index, it means we've found all the Winnipegs, | ||
175 | * and thus all the PCI buses. | ||
176 | */ | ||
177 | if (i == rio_table_hdr->num_rio_dev) | ||
178 | next_wpeg = 0; | ||
179 | } while (next_wpeg != 0); | ||
180 | } | ||
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c new file mode 100644 index 000000000000..a4a61976ecb9 --- /dev/null +++ b/arch/i386/kernel/sys_i386.c | |||
@@ -0,0 +1,252 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/sys_i386.c | ||
3 | * | ||
4 | * This file contains various random system calls that | ||
5 | * have a non-standard calling sequence on the Linux/i386 | ||
6 | * platform. | ||
7 | */ | ||
8 | |||
9 | #include <linux/errno.h> | ||
10 | #include <linux/sched.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/smp_lock.h> | ||
14 | #include <linux/sem.h> | ||
15 | #include <linux/msg.h> | ||
16 | #include <linux/shm.h> | ||
17 | #include <linux/stat.h> | ||
18 | #include <linux/syscalls.h> | ||
19 | #include <linux/mman.h> | ||
20 | #include <linux/file.h> | ||
21 | #include <linux/utsname.h> | ||
22 | |||
23 | #include <asm/uaccess.h> | ||
24 | #include <asm/ipc.h> | ||
25 | |||
26 | /* | ||
27 | * sys_pipe() is the normal C calling standard for creating | ||
28 | * a pipe. It's not the way Unix traditionally does this, though. | ||
29 | */ | ||
30 | asmlinkage int sys_pipe(unsigned long __user * fildes) | ||
31 | { | ||
32 | int fd[2]; | ||
33 | int error; | ||
34 | |||
35 | error = do_pipe(fd); | ||
36 | if (!error) { | ||
37 | if (copy_to_user(fildes, fd, 2*sizeof(int))) | ||
38 | error = -EFAULT; | ||
39 | } | ||
40 | return error; | ||
41 | } | ||
42 | |||
43 | /* common code for old and new mmaps */ | ||
44 | static inline long do_mmap2( | ||
45 | unsigned long addr, unsigned long len, | ||
46 | unsigned long prot, unsigned long flags, | ||
47 | unsigned long fd, unsigned long pgoff) | ||
48 | { | ||
49 | int error = -EBADF; | ||
50 | struct file * file = NULL; | ||
51 | |||
52 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
53 | if (!(flags & MAP_ANONYMOUS)) { | ||
54 | file = fget(fd); | ||
55 | if (!file) | ||
56 | goto out; | ||
57 | } | ||
58 | |||
59 | down_write(¤t->mm->mmap_sem); | ||
60 | error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
61 | up_write(¤t->mm->mmap_sem); | ||
62 | |||
63 | if (file) | ||
64 | fput(file); | ||
65 | out: | ||
66 | return error; | ||
67 | } | ||
68 | |||
69 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, | ||
70 | unsigned long prot, unsigned long flags, | ||
71 | unsigned long fd, unsigned long pgoff) | ||
72 | { | ||
73 | return do_mmap2(addr, len, prot, flags, fd, pgoff); | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * Perform the select(nd, in, out, ex, tv) and mmap() system | ||
78 | * calls. Linux/i386 didn't use to be able to handle more than | ||
79 | * 4 system call parameters, so these system calls used a memory | ||
80 | * block for parameter passing.. | ||
81 | */ | ||
82 | |||
83 | struct mmap_arg_struct { | ||
84 | unsigned long addr; | ||
85 | unsigned long len; | ||
86 | unsigned long prot; | ||
87 | unsigned long flags; | ||
88 | unsigned long fd; | ||
89 | unsigned long offset; | ||
90 | }; | ||
91 | |||
92 | asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) | ||
93 | { | ||
94 | struct mmap_arg_struct a; | ||
95 | int err = -EFAULT; | ||
96 | |||
97 | if (copy_from_user(&a, arg, sizeof(a))) | ||
98 | goto out; | ||
99 | |||
100 | err = -EINVAL; | ||
101 | if (a.offset & ~PAGE_MASK) | ||
102 | goto out; | ||
103 | |||
104 | err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); | ||
105 | out: | ||
106 | return err; | ||
107 | } | ||
108 | |||
109 | |||
110 | struct sel_arg_struct { | ||
111 | unsigned long n; | ||
112 | fd_set __user *inp, *outp, *exp; | ||
113 | struct timeval __user *tvp; | ||
114 | }; | ||
115 | |||
116 | asmlinkage int old_select(struct sel_arg_struct __user *arg) | ||
117 | { | ||
118 | struct sel_arg_struct a; | ||
119 | |||
120 | if (copy_from_user(&a, arg, sizeof(a))) | ||
121 | return -EFAULT; | ||
122 | /* sys_select() does the appropriate kernel locking */ | ||
123 | return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * sys_ipc() is the de-multiplexer for the SysV IPC calls.. | ||
128 | * | ||
129 | * This is really horribly ugly. | ||
130 | */ | ||
131 | asmlinkage int sys_ipc (uint call, int first, int second, | ||
132 | int third, void __user *ptr, long fifth) | ||
133 | { | ||
134 | int version, ret; | ||
135 | |||
136 | version = call >> 16; /* hack for backward compatibility */ | ||
137 | call &= 0xffff; | ||
138 | |||
139 | switch (call) { | ||
140 | case SEMOP: | ||
141 | return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL); | ||
142 | case SEMTIMEDOP: | ||
143 | return sys_semtimedop(first, (struct sembuf __user *)ptr, second, | ||
144 | (const struct timespec __user *)fifth); | ||
145 | |||
146 | case SEMGET: | ||
147 | return sys_semget (first, second, third); | ||
148 | case SEMCTL: { | ||
149 | union semun fourth; | ||
150 | if (!ptr) | ||
151 | return -EINVAL; | ||
152 | if (get_user(fourth.__pad, (void __user * __user *) ptr)) | ||
153 | return -EFAULT; | ||
154 | return sys_semctl (first, second, third, fourth); | ||
155 | } | ||
156 | |||
157 | case MSGSND: | ||
158 | return sys_msgsnd (first, (struct msgbuf __user *) ptr, | ||
159 | second, third); | ||
160 | case MSGRCV: | ||
161 | switch (version) { | ||
162 | case 0: { | ||
163 | struct ipc_kludge tmp; | ||
164 | if (!ptr) | ||
165 | return -EINVAL; | ||
166 | |||
167 | if (copy_from_user(&tmp, | ||
168 | (struct ipc_kludge __user *) ptr, | ||
169 | sizeof (tmp))) | ||
170 | return -EFAULT; | ||
171 | return sys_msgrcv (first, tmp.msgp, second, | ||
172 | tmp.msgtyp, third); | ||
173 | } | ||
174 | default: | ||
175 | return sys_msgrcv (first, | ||
176 | (struct msgbuf __user *) ptr, | ||
177 | second, fifth, third); | ||
178 | } | ||
179 | case MSGGET: | ||
180 | return sys_msgget ((key_t) first, second); | ||
181 | case MSGCTL: | ||
182 | return sys_msgctl (first, second, (struct msqid_ds __user *) ptr); | ||
183 | |||
184 | case SHMAT: | ||
185 | switch (version) { | ||
186 | default: { | ||
187 | ulong raddr; | ||
188 | ret = do_shmat (first, (char __user *) ptr, second, &raddr); | ||
189 | if (ret) | ||
190 | return ret; | ||
191 | return put_user (raddr, (ulong __user *) third); | ||
192 | } | ||
193 | case 1: /* iBCS2 emulator entry point */ | ||
194 | if (!segment_eq(get_fs(), get_ds())) | ||
195 | return -EINVAL; | ||
196 | /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */ | ||
197 | return do_shmat (first, (char __user *) ptr, second, (ulong *) third); | ||
198 | } | ||
199 | case SHMDT: | ||
200 | return sys_shmdt ((char __user *)ptr); | ||
201 | case SHMGET: | ||
202 | return sys_shmget (first, second, third); | ||
203 | case SHMCTL: | ||
204 | return sys_shmctl (first, second, | ||
205 | (struct shmid_ds __user *) ptr); | ||
206 | default: | ||
207 | return -ENOSYS; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * Old cruft | ||
213 | */ | ||
214 | asmlinkage int sys_uname(struct old_utsname __user * name) | ||
215 | { | ||
216 | int err; | ||
217 | if (!name) | ||
218 | return -EFAULT; | ||
219 | down_read(&uts_sem); | ||
220 | err=copy_to_user(name, &system_utsname, sizeof (*name)); | ||
221 | up_read(&uts_sem); | ||
222 | return err?-EFAULT:0; | ||
223 | } | ||
224 | |||
225 | asmlinkage int sys_olduname(struct oldold_utsname __user * name) | ||
226 | { | ||
227 | int error; | ||
228 | |||
229 | if (!name) | ||
230 | return -EFAULT; | ||
231 | if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) | ||
232 | return -EFAULT; | ||
233 | |||
234 | down_read(&uts_sem); | ||
235 | |||
236 | error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); | ||
237 | error |= __put_user(0,name->sysname+__OLD_UTS_LEN); | ||
238 | error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); | ||
239 | error |= __put_user(0,name->nodename+__OLD_UTS_LEN); | ||
240 | error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); | ||
241 | error |= __put_user(0,name->release+__OLD_UTS_LEN); | ||
242 | error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); | ||
243 | error |= __put_user(0,name->version+__OLD_UTS_LEN); | ||
244 | error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); | ||
245 | error |= __put_user(0,name->machine+__OLD_UTS_LEN); | ||
246 | |||
247 | up_read(&uts_sem); | ||
248 | |||
249 | error = error ? -EFAULT : 0; | ||
250 | |||
251 | return error; | ||
252 | } | ||
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c new file mode 100644 index 000000000000..960d8bd137d0 --- /dev/null +++ b/arch/i386/kernel/sysenter.c | |||
@@ -0,0 +1,65 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/sysenter.c | ||
3 | * | ||
4 | * (C) Copyright 2002 Linus Torvalds | ||
5 | * | ||
6 | * This file contains the needed initializations to support sysenter. | ||
7 | */ | ||
8 | |||
9 | #include <linux/init.h> | ||
10 | #include <linux/smp.h> | ||
11 | #include <linux/thread_info.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/gfp.h> | ||
14 | #include <linux/string.h> | ||
15 | #include <linux/elf.h> | ||
16 | |||
17 | #include <asm/cpufeature.h> | ||
18 | #include <asm/msr.h> | ||
19 | #include <asm/pgtable.h> | ||
20 | #include <asm/unistd.h> | ||
21 | |||
22 | extern asmlinkage void sysenter_entry(void); | ||
23 | |||
24 | void enable_sep_cpu(void *info) | ||
25 | { | ||
26 | int cpu = get_cpu(); | ||
27 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
28 | |||
29 | tss->ss1 = __KERNEL_CS; | ||
30 | tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; | ||
31 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | ||
32 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); | ||
33 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); | ||
34 | put_cpu(); | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * These symbols are defined by vsyscall.o to mark the bounds | ||
39 | * of the ELF DSO images included therein. | ||
40 | */ | ||
41 | extern const char vsyscall_int80_start, vsyscall_int80_end; | ||
42 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; | ||
43 | |||
44 | static int __init sysenter_setup(void) | ||
45 | { | ||
46 | void *page = (void *)get_zeroed_page(GFP_ATOMIC); | ||
47 | |||
48 | __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); | ||
49 | |||
50 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | ||
51 | memcpy(page, | ||
52 | &vsyscall_int80_start, | ||
53 | &vsyscall_int80_end - &vsyscall_int80_start); | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | memcpy(page, | ||
58 | &vsyscall_sysenter_start, | ||
59 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); | ||
60 | |||
61 | on_each_cpu(enable_sep_cpu, NULL, 1, 1); | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | __initcall(sysenter_setup); | ||
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c new file mode 100644 index 000000000000..9b55e30e4490 --- /dev/null +++ b/arch/i386/kernel/time.c | |||
@@ -0,0 +1,476 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/time.c | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992, 1995 Linus Torvalds | ||
5 | * | ||
6 | * This file contains the PC-specific time handling details: | ||
7 | * reading the RTC at bootup, etc.. | ||
8 | * 1994-07-02 Alan Modra | ||
9 | * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime | ||
10 | * 1995-03-26 Markus Kuhn | ||
11 | * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 | ||
12 | * precision CMOS clock update | ||
13 | * 1996-05-03 Ingo Molnar | ||
14 | * fixed time warps in do_[slow|fast]_gettimeoffset() | ||
15 | * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 | ||
16 | * "A Kernel Model for Precision Timekeeping" by Dave Mills | ||
17 | * 1998-09-05 (Various) | ||
18 | * More robust do_fast_gettimeoffset() algorithm implemented | ||
19 | * (works with APM, Cyrix 6x86MX and Centaur C6), | ||
20 | * monotonic gettimeofday() with fast_get_timeoffset(), | ||
21 | * drift-proof precision TSC calibration on boot | ||
22 | * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D. | ||
23 | * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>; | ||
24 | * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>). | ||
25 | * 1998-12-16 Andrea Arcangeli | ||
26 | * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy | ||
27 | * because was not accounting lost_ticks. | ||
28 | * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli | ||
29 | * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to | ||
30 | * serialize accesses to xtime/lost_ticks). | ||
31 | */ | ||
32 | |||
33 | #include <linux/errno.h> | ||
34 | #include <linux/sched.h> | ||
35 | #include <linux/kernel.h> | ||
36 | #include <linux/param.h> | ||
37 | #include <linux/string.h> | ||
38 | #include <linux/mm.h> | ||
39 | #include <linux/interrupt.h> | ||
40 | #include <linux/time.h> | ||
41 | #include <linux/delay.h> | ||
42 | #include <linux/init.h> | ||
43 | #include <linux/smp.h> | ||
44 | #include <linux/module.h> | ||
45 | #include <linux/sysdev.h> | ||
46 | #include <linux/bcd.h> | ||
47 | #include <linux/efi.h> | ||
48 | #include <linux/mca.h> | ||
49 | |||
50 | #include <asm/io.h> | ||
51 | #include <asm/smp.h> | ||
52 | #include <asm/irq.h> | ||
53 | #include <asm/msr.h> | ||
54 | #include <asm/delay.h> | ||
55 | #include <asm/mpspec.h> | ||
56 | #include <asm/uaccess.h> | ||
57 | #include <asm/processor.h> | ||
58 | #include <asm/timer.h> | ||
59 | |||
60 | #include "mach_time.h" | ||
61 | |||
62 | #include <linux/timex.h> | ||
63 | #include <linux/config.h> | ||
64 | |||
65 | #include <asm/hpet.h> | ||
66 | |||
67 | #include <asm/arch_hooks.h> | ||
68 | |||
69 | #include "io_ports.h" | ||
70 | |||
71 | extern spinlock_t i8259A_lock; | ||
72 | int pit_latch_buggy; /* extern */ | ||
73 | |||
74 | #include "do_timer.h" | ||
75 | |||
76 | u64 jiffies_64 = INITIAL_JIFFIES; | ||
77 | |||
78 | EXPORT_SYMBOL(jiffies_64); | ||
79 | |||
80 | unsigned long cpu_khz; /* Detected as we calibrate the TSC */ | ||
81 | |||
82 | extern unsigned long wall_jiffies; | ||
83 | |||
84 | DEFINE_SPINLOCK(rtc_lock); | ||
85 | |||
86 | DEFINE_SPINLOCK(i8253_lock); | ||
87 | EXPORT_SYMBOL(i8253_lock); | ||
88 | |||
89 | struct timer_opts *cur_timer = &timer_none; | ||
90 | |||
91 | /* | ||
92 | * This is a special lock that is owned by the CPU and holds the index | ||
93 | * register we are working with. It is required for NMI access to the | ||
94 | * CMOS/RTC registers. See include/asm-i386/mc146818rtc.h for details. | ||
95 | */ | ||
96 | volatile unsigned long cmos_lock = 0; | ||
97 | EXPORT_SYMBOL(cmos_lock); | ||
98 | |||
99 | /* Routines for accessing the CMOS RAM/RTC. */ | ||
100 | unsigned char rtc_cmos_read(unsigned char addr) | ||
101 | { | ||
102 | unsigned char val; | ||
103 | lock_cmos_prefix(addr); | ||
104 | outb_p(addr, RTC_PORT(0)); | ||
105 | val = inb_p(RTC_PORT(1)); | ||
106 | lock_cmos_suffix(addr); | ||
107 | return val; | ||
108 | } | ||
109 | EXPORT_SYMBOL(rtc_cmos_read); | ||
110 | |||
111 | void rtc_cmos_write(unsigned char val, unsigned char addr) | ||
112 | { | ||
113 | lock_cmos_prefix(addr); | ||
114 | outb_p(addr, RTC_PORT(0)); | ||
115 | outb_p(val, RTC_PORT(1)); | ||
116 | lock_cmos_suffix(addr); | ||
117 | } | ||
118 | EXPORT_SYMBOL(rtc_cmos_write); | ||
119 | |||
120 | /* | ||
121 | * This version of gettimeofday has microsecond resolution | ||
122 | * and better than microsecond precision on fast x86 machines with TSC. | ||
123 | */ | ||
124 | void do_gettimeofday(struct timeval *tv) | ||
125 | { | ||
126 | unsigned long seq; | ||
127 | unsigned long usec, sec; | ||
128 | unsigned long max_ntp_tick; | ||
129 | |||
130 | do { | ||
131 | unsigned long lost; | ||
132 | |||
133 | seq = read_seqbegin(&xtime_lock); | ||
134 | |||
135 | usec = cur_timer->get_offset(); | ||
136 | lost = jiffies - wall_jiffies; | ||
137 | |||
138 | /* | ||
139 | * If time_adjust is negative then NTP is slowing the clock | ||
140 | * so make sure not to go into next possible interval. | ||
141 | * Better to lose some accuracy than have time go backwards.. | ||
142 | */ | ||
143 | if (unlikely(time_adjust < 0)) { | ||
144 | max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; | ||
145 | usec = min(usec, max_ntp_tick); | ||
146 | |||
147 | if (lost) | ||
148 | usec += lost * max_ntp_tick; | ||
149 | } | ||
150 | else if (unlikely(lost)) | ||
151 | usec += lost * (USEC_PER_SEC / HZ); | ||
152 | |||
153 | sec = xtime.tv_sec; | ||
154 | usec += (xtime.tv_nsec / 1000); | ||
155 | } while (read_seqretry(&xtime_lock, seq)); | ||
156 | |||
157 | while (usec >= 1000000) { | ||
158 | usec -= 1000000; | ||
159 | sec++; | ||
160 | } | ||
161 | |||
162 | tv->tv_sec = sec; | ||
163 | tv->tv_usec = usec; | ||
164 | } | ||
165 | |||
166 | EXPORT_SYMBOL(do_gettimeofday); | ||
167 | |||
168 | int do_settimeofday(struct timespec *tv) | ||
169 | { | ||
170 | time_t wtm_sec, sec = tv->tv_sec; | ||
171 | long wtm_nsec, nsec = tv->tv_nsec; | ||
172 | |||
173 | if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) | ||
174 | return -EINVAL; | ||
175 | |||
176 | write_seqlock_irq(&xtime_lock); | ||
177 | /* | ||
178 | * This is revolting. We need to set "xtime" correctly. However, the | ||
179 | * value in this location is the value at the most recent update of | ||
180 | * wall time. Discover what correction gettimeofday() would have | ||
181 | * made, and then undo it! | ||
182 | */ | ||
183 | nsec -= cur_timer->get_offset() * NSEC_PER_USEC; | ||
184 | nsec -= (jiffies - wall_jiffies) * TICK_NSEC; | ||
185 | |||
186 | wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); | ||
187 | wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); | ||
188 | |||
189 | set_normalized_timespec(&xtime, sec, nsec); | ||
190 | set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); | ||
191 | |||
192 | time_adjust = 0; /* stop active adjtime() */ | ||
193 | time_status |= STA_UNSYNC; | ||
194 | time_maxerror = NTP_PHASE_LIMIT; | ||
195 | time_esterror = NTP_PHASE_LIMIT; | ||
196 | write_sequnlock_irq(&xtime_lock); | ||
197 | clock_was_set(); | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | EXPORT_SYMBOL(do_settimeofday); | ||
202 | |||
203 | static int set_rtc_mmss(unsigned long nowtime) | ||
204 | { | ||
205 | int retval; | ||
206 | |||
207 | WARN_ON(irqs_disabled()); | ||
208 | |||
209 | /* gets recalled with irq locally disabled */ | ||
210 | spin_lock_irq(&rtc_lock); | ||
211 | if (efi_enabled) | ||
212 | retval = efi_set_rtc_mmss(nowtime); | ||
213 | else | ||
214 | retval = mach_set_rtc_mmss(nowtime); | ||
215 | spin_unlock_irq(&rtc_lock); | ||
216 | |||
217 | return retval; | ||
218 | } | ||
219 | |||
220 | |||
221 | int timer_ack; | ||
222 | |||
223 | /* monotonic_clock(): returns # of nanoseconds passed since time_init() | ||
224 | * Note: This function is required to return accurate | ||
225 | * time even in the absence of multiple timer ticks. | ||
226 | */ | ||
227 | unsigned long long monotonic_clock(void) | ||
228 | { | ||
229 | return cur_timer->monotonic_clock(); | ||
230 | } | ||
231 | EXPORT_SYMBOL(monotonic_clock); | ||
232 | |||
233 | #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) | ||
234 | unsigned long profile_pc(struct pt_regs *regs) | ||
235 | { | ||
236 | unsigned long pc = instruction_pointer(regs); | ||
237 | |||
238 | if (in_lock_functions(pc)) | ||
239 | return *(unsigned long *)(regs->ebp + 4); | ||
240 | |||
241 | return pc; | ||
242 | } | ||
243 | EXPORT_SYMBOL(profile_pc); | ||
244 | #endif | ||
245 | |||
246 | /* | ||
247 | * timer_interrupt() needs to keep up the real-time clock, | ||
248 | * as well as call the "do_timer()" routine every clocktick | ||
249 | */ | ||
250 | static inline void do_timer_interrupt(int irq, void *dev_id, | ||
251 | struct pt_regs *regs) | ||
252 | { | ||
253 | #ifdef CONFIG_X86_IO_APIC | ||
254 | if (timer_ack) { | ||
255 | /* | ||
256 | * Subtle, when I/O APICs are used we have to ack timer IRQ | ||
257 | * manually to reset the IRR bit for do_slow_gettimeoffset(). | ||
258 | * This will also deassert NMI lines for the watchdog if run | ||
259 | * on an 82489DX-based system. | ||
260 | */ | ||
261 | spin_lock(&i8259A_lock); | ||
262 | outb(0x0c, PIC_MASTER_OCW3); | ||
263 | /* Ack the IRQ; AEOI will end it automatically. */ | ||
264 | inb(PIC_MASTER_POLL); | ||
265 | spin_unlock(&i8259A_lock); | ||
266 | } | ||
267 | #endif | ||
268 | |||
269 | do_timer_interrupt_hook(regs); | ||
270 | |||
271 | |||
272 | if (MCA_bus) { | ||
273 | /* The PS/2 uses level-triggered interrupts. You can't | ||
274 | turn them off, nor would you want to (any attempt to | ||
275 | enable edge-triggered interrupts usually gets intercepted by a | ||
276 | special hardware circuit). Hence we have to acknowledge | ||
277 | the timer interrupt. Through some incredibly stupid | ||
278 | design idea, the reset for IRQ 0 is done by setting the | ||
279 | high bit of the PPI port B (0x61). Note that some PS/2s, | ||
280 | notably the 55SX, work fine if this is removed. */ | ||
281 | |||
282 | irq = inb_p( 0x61 ); /* read the current state */ | ||
283 | outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ | ||
284 | } | ||
285 | } | ||
286 | |||
287 | /* | ||
288 | * This is the same as the above, except we _also_ save the current | ||
289 | * Time Stamp Counter value at the time of the timer interrupt, so that | ||
290 | * we later on can estimate the time of day more exactly. | ||
291 | */ | ||
292 | irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | ||
293 | { | ||
294 | /* | ||
295 | * Here we are in the timer irq handler. We just have irqs locally | ||
296 | * disabled but we don't know if the timer_bh is running on the other | ||
297 | * CPU. We need to avoid to SMP race with it. NOTE: we don' t need | ||
298 | * the irq version of write_lock because as just said we have irq | ||
299 | * locally disabled. -arca | ||
300 | */ | ||
301 | write_seqlock(&xtime_lock); | ||
302 | |||
303 | cur_timer->mark_offset(); | ||
304 | |||
305 | do_timer_interrupt(irq, NULL, regs); | ||
306 | |||
307 | write_sequnlock(&xtime_lock); | ||
308 | return IRQ_HANDLED; | ||
309 | } | ||
310 | |||
311 | /* not static: needed by APM */ | ||
312 | unsigned long get_cmos_time(void) | ||
313 | { | ||
314 | unsigned long retval; | ||
315 | |||
316 | spin_lock(&rtc_lock); | ||
317 | |||
318 | if (efi_enabled) | ||
319 | retval = efi_get_time(); | ||
320 | else | ||
321 | retval = mach_get_cmos_time(); | ||
322 | |||
323 | spin_unlock(&rtc_lock); | ||
324 | |||
325 | return retval; | ||
326 | } | ||
327 | static void sync_cmos_clock(unsigned long dummy); | ||
328 | |||
329 | static struct timer_list sync_cmos_timer = | ||
330 | TIMER_INITIALIZER(sync_cmos_clock, 0, 0); | ||
331 | |||
332 | static void sync_cmos_clock(unsigned long dummy) | ||
333 | { | ||
334 | struct timeval now, next; | ||
335 | int fail = 1; | ||
336 | |||
337 | /* | ||
338 | * If we have an externally synchronized Linux clock, then update | ||
339 | * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be | ||
340 | * called as close as possible to 500 ms before the new second starts. | ||
341 | * This code is run on a timer. If the clock is set, that timer | ||
342 | * may not expire at the correct time. Thus, we adjust... | ||
343 | */ | ||
344 | if ((time_status & STA_UNSYNC) != 0) | ||
345 | /* | ||
346 | * Not synced, exit, do not restart a timer (if one is | ||
347 | * running, let it run out). | ||
348 | */ | ||
349 | return; | ||
350 | |||
351 | do_gettimeofday(&now); | ||
352 | if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && | ||
353 | now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) | ||
354 | fail = set_rtc_mmss(now.tv_sec); | ||
355 | |||
356 | next.tv_usec = USEC_AFTER - now.tv_usec; | ||
357 | if (next.tv_usec <= 0) | ||
358 | next.tv_usec += USEC_PER_SEC; | ||
359 | |||
360 | if (!fail) | ||
361 | next.tv_sec = 659; | ||
362 | else | ||
363 | next.tv_sec = 0; | ||
364 | |||
365 | if (next.tv_usec >= USEC_PER_SEC) { | ||
366 | next.tv_sec++; | ||
367 | next.tv_usec -= USEC_PER_SEC; | ||
368 | } | ||
369 | mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next)); | ||
370 | } | ||
371 | |||
372 | void notify_arch_cmos_timer(void) | ||
373 | { | ||
374 | mod_timer(&sync_cmos_timer, jiffies + 1); | ||
375 | } | ||
376 | |||
377 | static long clock_cmos_diff, sleep_start; | ||
378 | |||
379 | static int timer_suspend(struct sys_device *dev, u32 state) | ||
380 | { | ||
381 | /* | ||
382 | * Estimate time zone so that set_time can update the clock | ||
383 | */ | ||
384 | clock_cmos_diff = -get_cmos_time(); | ||
385 | clock_cmos_diff += get_seconds(); | ||
386 | sleep_start = get_cmos_time(); | ||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | static int timer_resume(struct sys_device *dev) | ||
391 | { | ||
392 | unsigned long flags; | ||
393 | unsigned long sec; | ||
394 | unsigned long sleep_length; | ||
395 | |||
396 | #ifdef CONFIG_HPET_TIMER | ||
397 | if (is_hpet_enabled()) | ||
398 | hpet_reenable(); | ||
399 | #endif | ||
400 | sec = get_cmos_time() + clock_cmos_diff; | ||
401 | sleep_length = (get_cmos_time() - sleep_start) * HZ; | ||
402 | write_seqlock_irqsave(&xtime_lock, flags); | ||
403 | xtime.tv_sec = sec; | ||
404 | xtime.tv_nsec = 0; | ||
405 | write_sequnlock_irqrestore(&xtime_lock, flags); | ||
406 | jiffies += sleep_length; | ||
407 | wall_jiffies += sleep_length; | ||
408 | return 0; | ||
409 | } | ||
410 | |||
411 | static struct sysdev_class timer_sysclass = { | ||
412 | .resume = timer_resume, | ||
413 | .suspend = timer_suspend, | ||
414 | set_kset_name("timer"), | ||
415 | }; | ||
416 | |||
417 | |||
418 | /* XXX this driverfs stuff should probably go elsewhere later -john */ | ||
419 | static struct sys_device device_timer = { | ||
420 | .id = 0, | ||
421 | .cls = &timer_sysclass, | ||
422 | }; | ||
423 | |||
424 | static int time_init_device(void) | ||
425 | { | ||
426 | int error = sysdev_class_register(&timer_sysclass); | ||
427 | if (!error) | ||
428 | error = sysdev_register(&device_timer); | ||
429 | return error; | ||
430 | } | ||
431 | |||
432 | device_initcall(time_init_device); | ||
433 | |||
434 | #ifdef CONFIG_HPET_TIMER | ||
435 | extern void (*late_time_init)(void); | ||
436 | /* Duplicate of time_init() below, with hpet_enable part added */ | ||
437 | static void __init hpet_time_init(void) | ||
438 | { | ||
439 | xtime.tv_sec = get_cmos_time(); | ||
440 | xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); | ||
441 | set_normalized_timespec(&wall_to_monotonic, | ||
442 | -xtime.tv_sec, -xtime.tv_nsec); | ||
443 | |||
444 | if (hpet_enable() >= 0) { | ||
445 | printk("Using HPET for base-timer\n"); | ||
446 | } | ||
447 | |||
448 | cur_timer = select_timer(); | ||
449 | printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); | ||
450 | |||
451 | time_init_hook(); | ||
452 | } | ||
453 | #endif | ||
454 | |||
455 | void __init time_init(void) | ||
456 | { | ||
457 | #ifdef CONFIG_HPET_TIMER | ||
458 | if (is_hpet_capable()) { | ||
459 | /* | ||
460 | * HPET initialization needs to do memory-mapped io. So, let | ||
461 | * us do a late initialization after mem_init(). | ||
462 | */ | ||
463 | late_time_init = hpet_time_init; | ||
464 | return; | ||
465 | } | ||
466 | #endif | ||
467 | xtime.tv_sec = get_cmos_time(); | ||
468 | xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); | ||
469 | set_normalized_timespec(&wall_to_monotonic, | ||
470 | -xtime.tv_sec, -xtime.tv_nsec); | ||
471 | |||
472 | cur_timer = select_timer(); | ||
473 | printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); | ||
474 | |||
475 | time_init_hook(); | ||
476 | } | ||
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c new file mode 100644 index 000000000000..244a31b04be7 --- /dev/null +++ b/arch/i386/kernel/time_hpet.c | |||
@@ -0,0 +1,458 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/time_hpet.c | ||
3 | * This code largely copied from arch/x86_64/kernel/time.c | ||
4 | * See that file for credits. | ||
5 | * | ||
6 | * 2003-06-30 Venkatesh Pallipadi - Additional changes for HPET support | ||
7 | */ | ||
8 | |||
9 | #include <linux/errno.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/param.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/smp.h> | ||
15 | |||
16 | #include <asm/timer.h> | ||
17 | #include <asm/fixmap.h> | ||
18 | #include <asm/apic.h> | ||
19 | |||
20 | #include <linux/timex.h> | ||
21 | #include <linux/config.h> | ||
22 | |||
23 | #include <asm/hpet.h> | ||
24 | #include <linux/hpet.h> | ||
25 | |||
26 | static unsigned long hpet_period; /* fsecs / HPET clock */ | ||
27 | unsigned long hpet_tick; /* hpet clks count per tick */ | ||
28 | unsigned long hpet_address; /* hpet memory map physical address */ | ||
29 | |||
30 | static int use_hpet; /* can be used for runtime check of hpet */ | ||
31 | static int boot_hpet_disable; /* boottime override for HPET timer */ | ||
32 | static void __iomem * hpet_virt_address; /* hpet kernel virtual address */ | ||
33 | |||
34 | #define FSEC_TO_USEC (1000000000UL) | ||
35 | |||
36 | int hpet_readl(unsigned long a) | ||
37 | { | ||
38 | return readl(hpet_virt_address + a); | ||
39 | } | ||
40 | |||
41 | static void hpet_writel(unsigned long d, unsigned long a) | ||
42 | { | ||
43 | writel(d, hpet_virt_address + a); | ||
44 | } | ||
45 | |||
46 | #ifdef CONFIG_X86_LOCAL_APIC | ||
47 | /* | ||
48 | * HPET counters dont wrap around on every tick. They just change the | ||
49 | * comparator value and continue. Next tick can be caught by checking | ||
50 | * for a change in the comparator value. Used in apic.c. | ||
51 | */ | ||
52 | static void __init wait_hpet_tick(void) | ||
53 | { | ||
54 | unsigned int start_cmp_val, end_cmp_val; | ||
55 | |||
56 | start_cmp_val = hpet_readl(HPET_T0_CMP); | ||
57 | do { | ||
58 | end_cmp_val = hpet_readl(HPET_T0_CMP); | ||
59 | } while (start_cmp_val == end_cmp_val); | ||
60 | } | ||
61 | #endif | ||
62 | |||
63 | static int hpet_timer_stop_set_go(unsigned long tick) | ||
64 | { | ||
65 | unsigned int cfg; | ||
66 | |||
67 | /* | ||
68 | * Stop the timers and reset the main counter. | ||
69 | */ | ||
70 | cfg = hpet_readl(HPET_CFG); | ||
71 | cfg &= ~HPET_CFG_ENABLE; | ||
72 | hpet_writel(cfg, HPET_CFG); | ||
73 | hpet_writel(0, HPET_COUNTER); | ||
74 | hpet_writel(0, HPET_COUNTER + 4); | ||
75 | |||
76 | /* | ||
77 | * Set up timer 0, as periodic with first interrupt to happen at | ||
78 | * hpet_tick, and period also hpet_tick. | ||
79 | */ | ||
80 | cfg = hpet_readl(HPET_T0_CFG); | ||
81 | cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | | ||
82 | HPET_TN_SETVAL | HPET_TN_32BIT; | ||
83 | hpet_writel(cfg, HPET_T0_CFG); | ||
84 | |||
85 | /* | ||
86 | * The first write after writing TN_SETVAL to the config register sets | ||
87 | * the counter value, the second write sets the threshold. | ||
88 | */ | ||
89 | hpet_writel(tick, HPET_T0_CMP); | ||
90 | hpet_writel(tick, HPET_T0_CMP); | ||
91 | |||
92 | /* | ||
93 | * Go! | ||
94 | */ | ||
95 | cfg = hpet_readl(HPET_CFG); | ||
96 | cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY; | ||
97 | hpet_writel(cfg, HPET_CFG); | ||
98 | |||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Check whether HPET was found by ACPI boot parse. If yes setup HPET | ||
104 | * counter 0 for kernel base timer. | ||
105 | */ | ||
106 | int __init hpet_enable(void) | ||
107 | { | ||
108 | unsigned int id; | ||
109 | unsigned long tick_fsec_low, tick_fsec_high; /* tick in femto sec */ | ||
110 | unsigned long hpet_tick_rem; | ||
111 | |||
112 | if (boot_hpet_disable) | ||
113 | return -1; | ||
114 | |||
115 | if (!hpet_address) { | ||
116 | return -1; | ||
117 | } | ||
118 | hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | ||
119 | /* | ||
120 | * Read the period, compute tick and quotient. | ||
121 | */ | ||
122 | id = hpet_readl(HPET_ID); | ||
123 | |||
124 | /* | ||
125 | * We are checking for value '1' or more in number field if | ||
126 | * CONFIG_HPET_EMULATE_RTC is set because we will need an | ||
127 | * additional timer for RTC emulation. | ||
128 | * However, we can do with one timer otherwise using the | ||
129 | * the single HPET timer for system time. | ||
130 | */ | ||
131 | if ( | ||
132 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
133 | !(id & HPET_ID_NUMBER) || | ||
134 | #endif | ||
135 | !(id & HPET_ID_LEGSUP)) | ||
136 | return -1; | ||
137 | |||
138 | hpet_period = hpet_readl(HPET_PERIOD); | ||
139 | if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD)) | ||
140 | return -1; | ||
141 | |||
142 | /* | ||
143 | * 64 bit math | ||
144 | * First changing tick into fsec | ||
145 | * Then 64 bit div to find number of hpet clk per tick | ||
146 | */ | ||
147 | ASM_MUL64_REG(tick_fsec_low, tick_fsec_high, | ||
148 | KERNEL_TICK_USEC, FSEC_TO_USEC); | ||
149 | ASM_DIV64_REG(hpet_tick, hpet_tick_rem, | ||
150 | hpet_period, tick_fsec_low, tick_fsec_high); | ||
151 | |||
152 | if (hpet_tick_rem > (hpet_period >> 1)) | ||
153 | hpet_tick++; /* rounding the result */ | ||
154 | |||
155 | if (hpet_timer_stop_set_go(hpet_tick)) | ||
156 | return -1; | ||
157 | |||
158 | use_hpet = 1; | ||
159 | |||
160 | #ifdef CONFIG_HPET | ||
161 | { | ||
162 | struct hpet_data hd; | ||
163 | unsigned int ntimer; | ||
164 | |||
165 | memset(&hd, 0, sizeof (hd)); | ||
166 | |||
167 | ntimer = hpet_readl(HPET_ID); | ||
168 | ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; | ||
169 | ntimer++; | ||
170 | |||
171 | /* | ||
172 | * Register with driver. | ||
173 | * Timer0 and Timer1 is used by platform. | ||
174 | */ | ||
175 | hd.hd_phys_address = hpet_address; | ||
176 | hd.hd_address = hpet_virt_address; | ||
177 | hd.hd_nirqs = ntimer; | ||
178 | hd.hd_flags = HPET_DATA_PLATFORM; | ||
179 | hpet_reserve_timer(&hd, 0); | ||
180 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
181 | hpet_reserve_timer(&hd, 1); | ||
182 | #endif | ||
183 | hd.hd_irq[0] = HPET_LEGACY_8254; | ||
184 | hd.hd_irq[1] = HPET_LEGACY_RTC; | ||
185 | if (ntimer > 2) { | ||
186 | struct hpet __iomem *hpet; | ||
187 | struct hpet_timer __iomem *timer; | ||
188 | int i; | ||
189 | |||
190 | hpet = hpet_virt_address; | ||
191 | |||
192 | for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer; | ||
193 | timer++, i++) | ||
194 | hd.hd_irq[i] = (timer->hpet_config & | ||
195 | Tn_INT_ROUTE_CNF_MASK) >> | ||
196 | Tn_INT_ROUTE_CNF_SHIFT; | ||
197 | |||
198 | } | ||
199 | |||
200 | hpet_alloc(&hd); | ||
201 | } | ||
202 | #endif | ||
203 | |||
204 | #ifdef CONFIG_X86_LOCAL_APIC | ||
205 | wait_timer_tick = wait_hpet_tick; | ||
206 | #endif | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | int hpet_reenable(void) | ||
211 | { | ||
212 | return hpet_timer_stop_set_go(hpet_tick); | ||
213 | } | ||
214 | |||
215 | int is_hpet_enabled(void) | ||
216 | { | ||
217 | return use_hpet; | ||
218 | } | ||
219 | |||
220 | int is_hpet_capable(void) | ||
221 | { | ||
222 | if (!boot_hpet_disable && hpet_address) | ||
223 | return 1; | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | static int __init hpet_setup(char* str) | ||
228 | { | ||
229 | if (str) { | ||
230 | if (!strncmp("disable", str, 7)) | ||
231 | boot_hpet_disable = 1; | ||
232 | } | ||
233 | return 1; | ||
234 | } | ||
235 | |||
236 | __setup("hpet=", hpet_setup); | ||
237 | |||
238 | #ifdef CONFIG_HPET_EMULATE_RTC | ||
239 | /* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET | ||
240 | * is enabled, we support RTC interrupt functionality in software. | ||
241 | * RTC has 3 kinds of interrupts: | ||
242 | * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock | ||
243 | * is updated | ||
244 | * 2) Alarm Interrupt - generate an interrupt at a specific time of day | ||
245 | * 3) Periodic Interrupt - generate periodic interrupt, with frequencies | ||
246 | * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) | ||
247 | * (1) and (2) above are implemented using polling at a frequency of | ||
248 | * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt | ||
249 | * overhead. (DEFAULT_RTC_INT_FREQ) | ||
250 | * For (3), we use interrupts at 64Hz or user specified periodic | ||
251 | * frequency, whichever is higher. | ||
252 | */ | ||
253 | #include <linux/mc146818rtc.h> | ||
254 | #include <linux/rtc.h> | ||
255 | |||
256 | extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); | ||
257 | |||
258 | #define DEFAULT_RTC_INT_FREQ 64 | ||
259 | #define RTC_NUM_INTS 1 | ||
260 | |||
261 | static unsigned long UIE_on; | ||
262 | static unsigned long prev_update_sec; | ||
263 | |||
264 | static unsigned long AIE_on; | ||
265 | static struct rtc_time alarm_time; | ||
266 | |||
267 | static unsigned long PIE_on; | ||
268 | static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; | ||
269 | static unsigned long PIE_count; | ||
270 | |||
271 | static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ | ||
272 | |||
273 | /* | ||
274 | * Timer 1 for RTC, we do not use periodic interrupt feature, | ||
275 | * even if HPET supports periodic interrupts on Timer 1. | ||
276 | * The reason being, to set up a periodic interrupt in HPET, we need to | ||
277 | * stop the main counter. And if we do that everytime someone diables/enables | ||
278 | * RTC, we will have adverse effect on main kernel timer running on Timer 0. | ||
279 | * So, for the time being, simulate the periodic interrupt in software. | ||
280 | * | ||
281 | * hpet_rtc_timer_init() is called for the first time and during subsequent | ||
282 | * interuppts reinit happens through hpet_rtc_timer_reinit(). | ||
283 | */ | ||
284 | int hpet_rtc_timer_init(void) | ||
285 | { | ||
286 | unsigned int cfg, cnt; | ||
287 | unsigned long flags; | ||
288 | |||
289 | if (!is_hpet_enabled()) | ||
290 | return 0; | ||
291 | /* | ||
292 | * Set the counter 1 and enable the interrupts. | ||
293 | */ | ||
294 | if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) | ||
295 | hpet_rtc_int_freq = PIE_freq; | ||
296 | else | ||
297 | hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; | ||
298 | |||
299 | local_irq_save(flags); | ||
300 | cnt = hpet_readl(HPET_COUNTER); | ||
301 | cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); | ||
302 | hpet_writel(cnt, HPET_T1_CMP); | ||
303 | local_irq_restore(flags); | ||
304 | |||
305 | cfg = hpet_readl(HPET_T1_CFG); | ||
306 | cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; | ||
307 | hpet_writel(cfg, HPET_T1_CFG); | ||
308 | |||
309 | return 1; | ||
310 | } | ||
311 | |||
312 | static void hpet_rtc_timer_reinit(void) | ||
313 | { | ||
314 | unsigned int cfg, cnt; | ||
315 | |||
316 | if (!(PIE_on | AIE_on | UIE_on)) | ||
317 | return; | ||
318 | |||
319 | if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) | ||
320 | hpet_rtc_int_freq = PIE_freq; | ||
321 | else | ||
322 | hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; | ||
323 | |||
324 | /* It is more accurate to use the comparator value than current count.*/ | ||
325 | cnt = hpet_readl(HPET_T1_CMP); | ||
326 | cnt += hpet_tick*HZ/hpet_rtc_int_freq; | ||
327 | hpet_writel(cnt, HPET_T1_CMP); | ||
328 | |||
329 | cfg = hpet_readl(HPET_T1_CFG); | ||
330 | cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; | ||
331 | hpet_writel(cfg, HPET_T1_CFG); | ||
332 | |||
333 | return; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * The functions below are called from rtc driver. | ||
338 | * Return 0 if HPET is not being used. | ||
339 | * Otherwise do the necessary changes and return 1. | ||
340 | */ | ||
341 | int hpet_mask_rtc_irq_bit(unsigned long bit_mask) | ||
342 | { | ||
343 | if (!is_hpet_enabled()) | ||
344 | return 0; | ||
345 | |||
346 | if (bit_mask & RTC_UIE) | ||
347 | UIE_on = 0; | ||
348 | if (bit_mask & RTC_PIE) | ||
349 | PIE_on = 0; | ||
350 | if (bit_mask & RTC_AIE) | ||
351 | AIE_on = 0; | ||
352 | |||
353 | return 1; | ||
354 | } | ||
355 | |||
356 | int hpet_set_rtc_irq_bit(unsigned long bit_mask) | ||
357 | { | ||
358 | int timer_init_reqd = 0; | ||
359 | |||
360 | if (!is_hpet_enabled()) | ||
361 | return 0; | ||
362 | |||
363 | if (!(PIE_on | AIE_on | UIE_on)) | ||
364 | timer_init_reqd = 1; | ||
365 | |||
366 | if (bit_mask & RTC_UIE) { | ||
367 | UIE_on = 1; | ||
368 | } | ||
369 | if (bit_mask & RTC_PIE) { | ||
370 | PIE_on = 1; | ||
371 | PIE_count = 0; | ||
372 | } | ||
373 | if (bit_mask & RTC_AIE) { | ||
374 | AIE_on = 1; | ||
375 | } | ||
376 | |||
377 | if (timer_init_reqd) | ||
378 | hpet_rtc_timer_init(); | ||
379 | |||
380 | return 1; | ||
381 | } | ||
382 | |||
383 | int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) | ||
384 | { | ||
385 | if (!is_hpet_enabled()) | ||
386 | return 0; | ||
387 | |||
388 | alarm_time.tm_hour = hrs; | ||
389 | alarm_time.tm_min = min; | ||
390 | alarm_time.tm_sec = sec; | ||
391 | |||
392 | return 1; | ||
393 | } | ||
394 | |||
395 | int hpet_set_periodic_freq(unsigned long freq) | ||
396 | { | ||
397 | if (!is_hpet_enabled()) | ||
398 | return 0; | ||
399 | |||
400 | PIE_freq = freq; | ||
401 | PIE_count = 0; | ||
402 | |||
403 | return 1; | ||
404 | } | ||
405 | |||
406 | int hpet_rtc_dropped_irq(void) | ||
407 | { | ||
408 | if (!is_hpet_enabled()) | ||
409 | return 0; | ||
410 | |||
411 | return 1; | ||
412 | } | ||
413 | |||
414 | irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) | ||
415 | { | ||
416 | struct rtc_time curr_time; | ||
417 | unsigned long rtc_int_flag = 0; | ||
418 | int call_rtc_interrupt = 0; | ||
419 | |||
420 | hpet_rtc_timer_reinit(); | ||
421 | |||
422 | if (UIE_on | AIE_on) { | ||
423 | rtc_get_rtc_time(&curr_time); | ||
424 | } | ||
425 | if (UIE_on) { | ||
426 | if (curr_time.tm_sec != prev_update_sec) { | ||
427 | /* Set update int info, call real rtc int routine */ | ||
428 | call_rtc_interrupt = 1; | ||
429 | rtc_int_flag = RTC_UF; | ||
430 | prev_update_sec = curr_time.tm_sec; | ||
431 | } | ||
432 | } | ||
433 | if (PIE_on) { | ||
434 | PIE_count++; | ||
435 | if (PIE_count >= hpet_rtc_int_freq/PIE_freq) { | ||
436 | /* Set periodic int info, call real rtc int routine */ | ||
437 | call_rtc_interrupt = 1; | ||
438 | rtc_int_flag |= RTC_PF; | ||
439 | PIE_count = 0; | ||
440 | } | ||
441 | } | ||
442 | if (AIE_on) { | ||
443 | if ((curr_time.tm_sec == alarm_time.tm_sec) && | ||
444 | (curr_time.tm_min == alarm_time.tm_min) && | ||
445 | (curr_time.tm_hour == alarm_time.tm_hour)) { | ||
446 | /* Set alarm int info, call real rtc int routine */ | ||
447 | call_rtc_interrupt = 1; | ||
448 | rtc_int_flag |= RTC_AF; | ||
449 | } | ||
450 | } | ||
451 | if (call_rtc_interrupt) { | ||
452 | rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); | ||
453 | rtc_interrupt(rtc_int_flag, dev_id, regs); | ||
454 | } | ||
455 | return IRQ_HANDLED; | ||
456 | } | ||
457 | #endif | ||
458 | |||
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile new file mode 100644 index 000000000000..8fa12be658dd --- /dev/null +++ b/arch/i386/kernel/timers/Makefile | |||
@@ -0,0 +1,9 @@ | |||
1 | # | ||
2 | # Makefile for x86 timers | ||
3 | # | ||
4 | |||
5 | obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o | ||
6 | |||
7 | obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o | ||
8 | obj-$(CONFIG_HPET_TIMER) += timer_hpet.o | ||
9 | obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o | ||
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c new file mode 100644 index 000000000000..f7f90005e22e --- /dev/null +++ b/arch/i386/kernel/timers/common.c | |||
@@ -0,0 +1,160 @@ | |||
1 | /* | ||
2 | * Common functions used across the timers go here | ||
3 | */ | ||
4 | |||
5 | #include <linux/init.h> | ||
6 | #include <linux/timex.h> | ||
7 | #include <linux/errno.h> | ||
8 | #include <linux/jiffies.h> | ||
9 | |||
10 | #include <asm/io.h> | ||
11 | #include <asm/timer.h> | ||
12 | #include <asm/hpet.h> | ||
13 | |||
14 | #include "mach_timer.h" | ||
15 | |||
16 | /* ------ Calibrate the TSC ------- | ||
17 | * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). | ||
18 | * Too much 64-bit arithmetic here to do this cleanly in C, and for | ||
19 | * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) | ||
20 | * output busy loop as low as possible. We avoid reading the CTC registers | ||
21 | * directly because of the awkward 8-bit access mechanism of the 82C54 | ||
22 | * device. | ||
23 | */ | ||
24 | |||
25 | #define CALIBRATE_TIME (5 * 1000020/HZ) | ||
26 | |||
27 | unsigned long __init calibrate_tsc(void) | ||
28 | { | ||
29 | mach_prepare_counter(); | ||
30 | |||
31 | { | ||
32 | unsigned long startlow, starthigh; | ||
33 | unsigned long endlow, endhigh; | ||
34 | unsigned long count; | ||
35 | |||
36 | rdtsc(startlow,starthigh); | ||
37 | mach_countup(&count); | ||
38 | rdtsc(endlow,endhigh); | ||
39 | |||
40 | |||
41 | /* Error: ECTCNEVERSET */ | ||
42 | if (count <= 1) | ||
43 | goto bad_ctc; | ||
44 | |||
45 | /* 64-bit subtract - gcc just messes up with long longs */ | ||
46 | __asm__("subl %2,%0\n\t" | ||
47 | "sbbl %3,%1" | ||
48 | :"=a" (endlow), "=d" (endhigh) | ||
49 | :"g" (startlow), "g" (starthigh), | ||
50 | "0" (endlow), "1" (endhigh)); | ||
51 | |||
52 | /* Error: ECPUTOOFAST */ | ||
53 | if (endhigh) | ||
54 | goto bad_ctc; | ||
55 | |||
56 | /* Error: ECPUTOOSLOW */ | ||
57 | if (endlow <= CALIBRATE_TIME) | ||
58 | goto bad_ctc; | ||
59 | |||
60 | __asm__("divl %2" | ||
61 | :"=a" (endlow), "=d" (endhigh) | ||
62 | :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME)); | ||
63 | |||
64 | return endlow; | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * The CTC wasn't reliable: we got a hit on the very first read, | ||
69 | * or the CPU was so fast/slow that the quotient wouldn't fit in | ||
70 | * 32 bits.. | ||
71 | */ | ||
72 | bad_ctc: | ||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | #ifdef CONFIG_HPET_TIMER | ||
77 | /* ------ Calibrate the TSC using HPET ------- | ||
78 | * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq. | ||
79 | * Second output is parameter 1 (when non NULL) | ||
80 | * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet(). | ||
81 | * calibrate_tsc() calibrates the processor TSC by comparing | ||
82 | * it to the HPET timer of known frequency. | ||
83 | * Too much 64-bit arithmetic here to do this cleanly in C | ||
84 | */ | ||
85 | #define CALIBRATE_CNT_HPET (5 * hpet_tick) | ||
86 | #define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) | ||
87 | |||
88 | unsigned long __init calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) | ||
89 | { | ||
90 | unsigned long tsc_startlow, tsc_starthigh; | ||
91 | unsigned long tsc_endlow, tsc_endhigh; | ||
92 | unsigned long hpet_start, hpet_end; | ||
93 | unsigned long result, remain; | ||
94 | |||
95 | hpet_start = hpet_readl(HPET_COUNTER); | ||
96 | rdtsc(tsc_startlow, tsc_starthigh); | ||
97 | do { | ||
98 | hpet_end = hpet_readl(HPET_COUNTER); | ||
99 | } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET); | ||
100 | rdtsc(tsc_endlow, tsc_endhigh); | ||
101 | |||
102 | /* 64-bit subtract - gcc just messes up with long longs */ | ||
103 | __asm__("subl %2,%0\n\t" | ||
104 | "sbbl %3,%1" | ||
105 | :"=a" (tsc_endlow), "=d" (tsc_endhigh) | ||
106 | :"g" (tsc_startlow), "g" (tsc_starthigh), | ||
107 | "0" (tsc_endlow), "1" (tsc_endhigh)); | ||
108 | |||
109 | /* Error: ECPUTOOFAST */ | ||
110 | if (tsc_endhigh) | ||
111 | goto bad_calibration; | ||
112 | |||
113 | /* Error: ECPUTOOSLOW */ | ||
114 | if (tsc_endlow <= CALIBRATE_TIME_HPET) | ||
115 | goto bad_calibration; | ||
116 | |||
117 | ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET); | ||
118 | if (remain > (tsc_endlow >> 1)) | ||
119 | result++; /* rounding the result */ | ||
120 | |||
121 | if (tsc_hpet_quotient_ptr) { | ||
122 | unsigned long tsc_hpet_quotient; | ||
123 | |||
124 | ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0, | ||
125 | CALIBRATE_CNT_HPET); | ||
126 | if (remain > (tsc_endlow >> 1)) | ||
127 | tsc_hpet_quotient++; /* rounding the result */ | ||
128 | *tsc_hpet_quotient_ptr = tsc_hpet_quotient; | ||
129 | } | ||
130 | |||
131 | return result; | ||
132 | bad_calibration: | ||
133 | /* | ||
134 | * the CPU was so fast/slow that the quotient wouldn't fit in | ||
135 | * 32 bits.. | ||
136 | */ | ||
137 | return 0; | ||
138 | } | ||
139 | #endif | ||
140 | |||
141 | /* calculate cpu_khz */ | ||
142 | void __init init_cpu_khz(void) | ||
143 | { | ||
144 | if (cpu_has_tsc) { | ||
145 | unsigned long tsc_quotient = calibrate_tsc(); | ||
146 | if (tsc_quotient) { | ||
147 | /* report CPU clock rate in Hz. | ||
148 | * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = | ||
149 | * clock/second. Our precision is about 100 ppm. | ||
150 | */ | ||
151 | { unsigned long eax=0, edx=1000; | ||
152 | __asm__("divl %2" | ||
153 | :"=a" (cpu_khz), "=d" (edx) | ||
154 | :"r" (tsc_quotient), | ||
155 | "0" (eax), "1" (edx)); | ||
156 | printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); | ||
157 | } | ||
158 | } | ||
159 | } | ||
160 | } | ||
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c new file mode 100644 index 000000000000..a3d6a288088b --- /dev/null +++ b/arch/i386/kernel/timers/timer.c | |||
@@ -0,0 +1,66 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/kernel.h> | ||
3 | #include <linux/string.h> | ||
4 | #include <asm/timer.h> | ||
5 | |||
6 | #ifdef CONFIG_HPET_TIMER | ||
7 | /* | ||
8 | * HPET memory read is slower than tsc reads, but is more dependable as it | ||
9 | * always runs at constant frequency and reduces complexity due to | ||
10 | * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use | ||
11 | * timer_pit when HPET is active. So, we default to timer_tsc. | ||
12 | */ | ||
13 | #endif | ||
14 | /* list of timers, ordered by preference, NULL terminated */ | ||
15 | static struct init_timer_opts* __initdata timers[] = { | ||
16 | #ifdef CONFIG_X86_CYCLONE_TIMER | ||
17 | &timer_cyclone_init, | ||
18 | #endif | ||
19 | #ifdef CONFIG_HPET_TIMER | ||
20 | &timer_hpet_init, | ||
21 | #endif | ||
22 | #ifdef CONFIG_X86_PM_TIMER | ||
23 | &timer_pmtmr_init, | ||
24 | #endif | ||
25 | &timer_tsc_init, | ||
26 | &timer_pit_init, | ||
27 | NULL, | ||
28 | }; | ||
29 | |||
30 | static char clock_override[10] __initdata; | ||
31 | |||
32 | static int __init clock_setup(char* str) | ||
33 | { | ||
34 | if (str) | ||
35 | strlcpy(clock_override, str, sizeof(clock_override)); | ||
36 | return 1; | ||
37 | } | ||
38 | __setup("clock=", clock_setup); | ||
39 | |||
40 | |||
41 | /* The chosen timesource has been found to be bad. | ||
42 | * Fall back to a known good timesource (the PIT) | ||
43 | */ | ||
44 | void clock_fallback(void) | ||
45 | { | ||
46 | cur_timer = &timer_pit; | ||
47 | } | ||
48 | |||
49 | /* iterates through the list of timers, returning the first | ||
50 | * one that initializes successfully. | ||
51 | */ | ||
52 | struct timer_opts* __init select_timer(void) | ||
53 | { | ||
54 | int i = 0; | ||
55 | |||
56 | /* find most preferred working timer */ | ||
57 | while (timers[i]) { | ||
58 | if (timers[i]->init) | ||
59 | if (timers[i]->init(clock_override) == 0) | ||
60 | return timers[i]->opts; | ||
61 | ++i; | ||
62 | } | ||
63 | |||
64 | panic("select_timer: Cannot find a suitable timer\n"); | ||
65 | return NULL; | ||
66 | } | ||
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c new file mode 100644 index 000000000000..f6f1206a11bb --- /dev/null +++ b/arch/i386/kernel/timers/timer_cyclone.c | |||
@@ -0,0 +1,259 @@ | |||
1 | /* Cyclone-timer: | ||
2 | * This code implements timer_ops for the cyclone counter found | ||
3 | * on IBM x440, x360, and other Summit based systems. | ||
4 | * | ||
5 | * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com) | ||
6 | */ | ||
7 | |||
8 | |||
9 | #include <linux/spinlock.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/timex.h> | ||
12 | #include <linux/errno.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <linux/jiffies.h> | ||
15 | |||
16 | #include <asm/timer.h> | ||
17 | #include <asm/io.h> | ||
18 | #include <asm/pgtable.h> | ||
19 | #include <asm/fixmap.h> | ||
20 | #include "io_ports.h" | ||
21 | |||
22 | extern spinlock_t i8253_lock; | ||
23 | |||
24 | /* Number of usecs that the last interrupt was delayed */ | ||
25 | static int delay_at_last_interrupt; | ||
26 | |||
27 | #define CYCLONE_CBAR_ADDR 0xFEB00CD0 | ||
28 | #define CYCLONE_PMCC_OFFSET 0x51A0 | ||
29 | #define CYCLONE_MPMC_OFFSET 0x51D0 | ||
30 | #define CYCLONE_MPCS_OFFSET 0x51A8 | ||
31 | #define CYCLONE_TIMER_FREQ 100000000 | ||
32 | #define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */ | ||
33 | int use_cyclone = 0; | ||
34 | |||
35 | static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */ | ||
36 | static u32 last_cyclone_low; | ||
37 | static u32 last_cyclone_high; | ||
38 | static unsigned long long monotonic_base; | ||
39 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
40 | |||
41 | /* helper macro to atomically read both cyclone counter registers */ | ||
42 | #define read_cyclone_counter(low,high) \ | ||
43 | do{ \ | ||
44 | high = cyclone_timer[1]; low = cyclone_timer[0]; \ | ||
45 | } while (high != cyclone_timer[1]); | ||
46 | |||
47 | |||
48 | static void mark_offset_cyclone(void) | ||
49 | { | ||
50 | unsigned long lost, delay; | ||
51 | unsigned long delta = last_cyclone_low; | ||
52 | int count; | ||
53 | unsigned long long this_offset, last_offset; | ||
54 | |||
55 | write_seqlock(&monotonic_lock); | ||
56 | last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; | ||
57 | |||
58 | spin_lock(&i8253_lock); | ||
59 | read_cyclone_counter(last_cyclone_low,last_cyclone_high); | ||
60 | |||
61 | /* read values for delay_at_last_interrupt */ | ||
62 | outb_p(0x00, 0x43); /* latch the count ASAP */ | ||
63 | |||
64 | count = inb_p(0x40); /* read the latched count */ | ||
65 | count |= inb(0x40) << 8; | ||
66 | |||
67 | /* | ||
68 | * VIA686a test code... reset the latch if count > max + 1 | ||
69 | * from timer_pit.c - cjb | ||
70 | */ | ||
71 | if (count > LATCH) { | ||
72 | outb_p(0x34, PIT_MODE); | ||
73 | outb_p(LATCH & 0xff, PIT_CH0); | ||
74 | outb(LATCH >> 8, PIT_CH0); | ||
75 | count = LATCH - 1; | ||
76 | } | ||
77 | spin_unlock(&i8253_lock); | ||
78 | |||
79 | /* lost tick compensation */ | ||
80 | delta = last_cyclone_low - delta; | ||
81 | delta /= (CYCLONE_TIMER_FREQ/1000000); | ||
82 | delta += delay_at_last_interrupt; | ||
83 | lost = delta/(1000000/HZ); | ||
84 | delay = delta%(1000000/HZ); | ||
85 | if (lost >= 2) | ||
86 | jiffies_64 += lost-1; | ||
87 | |||
88 | /* update the monotonic base value */ | ||
89 | this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; | ||
90 | monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK; | ||
91 | write_sequnlock(&monotonic_lock); | ||
92 | |||
93 | /* calculate delay_at_last_interrupt */ | ||
94 | count = ((LATCH-1) - count) * TICK_SIZE; | ||
95 | delay_at_last_interrupt = (count + LATCH/2) / LATCH; | ||
96 | |||
97 | |||
98 | /* catch corner case where tick rollover occured | ||
99 | * between cyclone and pit reads (as noted when | ||
100 | * usec delta is > 90% # of usecs/tick) | ||
101 | */ | ||
102 | if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) | ||
103 | jiffies_64++; | ||
104 | } | ||
105 | |||
106 | static unsigned long get_offset_cyclone(void) | ||
107 | { | ||
108 | u32 offset; | ||
109 | |||
110 | if(!cyclone_timer) | ||
111 | return delay_at_last_interrupt; | ||
112 | |||
113 | /* Read the cyclone timer */ | ||
114 | offset = cyclone_timer[0]; | ||
115 | |||
116 | /* .. relative to previous jiffy */ | ||
117 | offset = offset - last_cyclone_low; | ||
118 | |||
119 | /* convert cyclone ticks to microseconds */ | ||
120 | /* XXX slow, can we speed this up? */ | ||
121 | offset = offset/(CYCLONE_TIMER_FREQ/1000000); | ||
122 | |||
123 | /* our adjusted time offset in microseconds */ | ||
124 | return delay_at_last_interrupt + offset; | ||
125 | } | ||
126 | |||
127 | static unsigned long long monotonic_clock_cyclone(void) | ||
128 | { | ||
129 | u32 now_low, now_high; | ||
130 | unsigned long long last_offset, this_offset, base; | ||
131 | unsigned long long ret; | ||
132 | unsigned seq; | ||
133 | |||
134 | /* atomically read monotonic base & last_offset */ | ||
135 | do { | ||
136 | seq = read_seqbegin(&monotonic_lock); | ||
137 | last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; | ||
138 | base = monotonic_base; | ||
139 | } while (read_seqretry(&monotonic_lock, seq)); | ||
140 | |||
141 | |||
142 | /* Read the cyclone counter */ | ||
143 | read_cyclone_counter(now_low,now_high); | ||
144 | this_offset = ((unsigned long long)now_high<<32)|now_low; | ||
145 | |||
146 | /* convert to nanoseconds */ | ||
147 | ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK); | ||
148 | return ret * (1000000000 / CYCLONE_TIMER_FREQ); | ||
149 | } | ||
150 | |||
151 | static int __init init_cyclone(char* override) | ||
152 | { | ||
153 | u32* reg; | ||
154 | u32 base; /* saved cyclone base address */ | ||
155 | u32 pageaddr; /* page that contains cyclone_timer register */ | ||
156 | u32 offset; /* offset from pageaddr to cyclone_timer register */ | ||
157 | int i; | ||
158 | |||
159 | /* check clock override */ | ||
160 | if (override[0] && strncmp(override,"cyclone",7)) | ||
161 | return -ENODEV; | ||
162 | |||
163 | /*make sure we're on a summit box*/ | ||
164 | if(!use_cyclone) return -ENODEV; | ||
165 | |||
166 | printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); | ||
167 | |||
168 | /* find base address */ | ||
169 | pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK; | ||
170 | offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK); | ||
171 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
172 | reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
173 | if(!reg){ | ||
174 | printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n"); | ||
175 | return -ENODEV; | ||
176 | } | ||
177 | base = *reg; | ||
178 | if(!base){ | ||
179 | printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n"); | ||
180 | return -ENODEV; | ||
181 | } | ||
182 | |||
183 | /* setup PMCC */ | ||
184 | pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK; | ||
185 | offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK); | ||
186 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
187 | reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
188 | if(!reg){ | ||
189 | printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n"); | ||
190 | return -ENODEV; | ||
191 | } | ||
192 | reg[0] = 0x00000001; | ||
193 | |||
194 | /* setup MPCS */ | ||
195 | pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK; | ||
196 | offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK); | ||
197 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
198 | reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
199 | if(!reg){ | ||
200 | printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n"); | ||
201 | return -ENODEV; | ||
202 | } | ||
203 | reg[0] = 0x00000001; | ||
204 | |||
205 | /* map in cyclone_timer */ | ||
206 | pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK; | ||
207 | offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK); | ||
208 | set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); | ||
209 | cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); | ||
210 | if(!cyclone_timer){ | ||
211 | printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n"); | ||
212 | return -ENODEV; | ||
213 | } | ||
214 | |||
215 | /*quick test to make sure its ticking*/ | ||
216 | for(i=0; i<3; i++){ | ||
217 | u32 old = cyclone_timer[0]; | ||
218 | int stall = 100; | ||
219 | while(stall--) barrier(); | ||
220 | if(cyclone_timer[0] == old){ | ||
221 | printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n"); | ||
222 | cyclone_timer = 0; | ||
223 | return -ENODEV; | ||
224 | } | ||
225 | } | ||
226 | |||
227 | init_cpu_khz(); | ||
228 | |||
229 | /* Everything looks good! */ | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | |||
234 | static void delay_cyclone(unsigned long loops) | ||
235 | { | ||
236 | unsigned long bclock, now; | ||
237 | if(!cyclone_timer) | ||
238 | return; | ||
239 | bclock = cyclone_timer[0]; | ||
240 | do { | ||
241 | rep_nop(); | ||
242 | now = cyclone_timer[0]; | ||
243 | } while ((now-bclock) < loops); | ||
244 | } | ||
245 | /************************************************************/ | ||
246 | |||
247 | /* cyclone timer_opts struct */ | ||
248 | static struct timer_opts timer_cyclone = { | ||
249 | .name = "cyclone", | ||
250 | .mark_offset = mark_offset_cyclone, | ||
251 | .get_offset = get_offset_cyclone, | ||
252 | .monotonic_clock = monotonic_clock_cyclone, | ||
253 | .delay = delay_cyclone, | ||
254 | }; | ||
255 | |||
256 | struct init_timer_opts __initdata timer_cyclone_init = { | ||
257 | .init = init_cyclone, | ||
258 | .opts = &timer_cyclone, | ||
259 | }; | ||
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c new file mode 100644 index 000000000000..713134e71844 --- /dev/null +++ b/arch/i386/kernel/timers/timer_hpet.c | |||
@@ -0,0 +1,191 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/time.c. | ||
3 | * See comments there for proper credits. | ||
4 | */ | ||
5 | |||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/timex.h> | ||
9 | #include <linux/errno.h> | ||
10 | #include <linux/string.h> | ||
11 | #include <linux/jiffies.h> | ||
12 | |||
13 | #include <asm/timer.h> | ||
14 | #include <asm/io.h> | ||
15 | #include <asm/processor.h> | ||
16 | |||
17 | #include "io_ports.h" | ||
18 | #include "mach_timer.h" | ||
19 | #include <asm/hpet.h> | ||
20 | |||
21 | static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */ | ||
22 | static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ | ||
23 | static unsigned long hpet_last; /* hpet counter value at last tick*/ | ||
24 | static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ | ||
25 | static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ | ||
26 | static unsigned long long monotonic_base; | ||
27 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
28 | |||
29 | /* convert from cycles(64bits) => nanoseconds (64bits) | ||
30 | * basic equation: | ||
31 | * ns = cycles / (freq / ns_per_sec) | ||
32 | * ns = cycles * (ns_per_sec / freq) | ||
33 | * ns = cycles * (10^9 / (cpu_mhz * 10^6)) | ||
34 | * ns = cycles * (10^3 / cpu_mhz) | ||
35 | * | ||
36 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
37 | * ns = cycles * (10^3 * SC / cpu_mhz) / SC | ||
38 | * ns = cycles * cyc2ns_scale / SC | ||
39 | * | ||
40 | * And since SC is a constant power of two, we can convert the div | ||
41 | * into a shift. | ||
42 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
43 | */ | ||
44 | static unsigned long cyc2ns_scale; | ||
45 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
46 | |||
47 | static inline void set_cyc2ns_scale(unsigned long cpu_mhz) | ||
48 | { | ||
49 | cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; | ||
50 | } | ||
51 | |||
52 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
53 | { | ||
54 | return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; | ||
55 | } | ||
56 | |||
57 | static unsigned long long monotonic_clock_hpet(void) | ||
58 | { | ||
59 | unsigned long long last_offset, this_offset, base; | ||
60 | unsigned seq; | ||
61 | |||
62 | /* atomically read monotonic base & last_offset */ | ||
63 | do { | ||
64 | seq = read_seqbegin(&monotonic_lock); | ||
65 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
66 | base = monotonic_base; | ||
67 | } while (read_seqretry(&monotonic_lock, seq)); | ||
68 | |||
69 | /* Read the Time Stamp Counter */ | ||
70 | rdtscll(this_offset); | ||
71 | |||
72 | /* return the value in ns */ | ||
73 | return base + cycles_2_ns(this_offset - last_offset); | ||
74 | } | ||
75 | |||
76 | static unsigned long get_offset_hpet(void) | ||
77 | { | ||
78 | register unsigned long eax, edx; | ||
79 | |||
80 | eax = hpet_readl(HPET_COUNTER); | ||
81 | eax -= hpet_last; /* hpet delta */ | ||
82 | |||
83 | /* | ||
84 | * Time offset = (hpet delta) * ( usecs per HPET clock ) | ||
85 | * = (hpet delta) * ( usecs per tick / HPET clocks per tick) | ||
86 | * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) | ||
87 | * | ||
88 | * Where, | ||
89 | * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick | ||
90 | * | ||
91 | * Using a mull instead of a divl saves some cycles in critical path. | ||
92 | */ | ||
93 | ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax); | ||
94 | |||
95 | /* our adjusted time offset in microseconds */ | ||
96 | return edx; | ||
97 | } | ||
98 | |||
99 | static void mark_offset_hpet(void) | ||
100 | { | ||
101 | unsigned long long this_offset, last_offset; | ||
102 | unsigned long offset; | ||
103 | |||
104 | write_seqlock(&monotonic_lock); | ||
105 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
106 | rdtsc(last_tsc_low, last_tsc_high); | ||
107 | |||
108 | offset = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
109 | if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) { | ||
110 | int lost_ticks = (offset - hpet_last) / hpet_tick; | ||
111 | jiffies_64 += lost_ticks; | ||
112 | } | ||
113 | hpet_last = offset; | ||
114 | |||
115 | /* update the monotonic base value */ | ||
116 | this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
117 | monotonic_base += cycles_2_ns(this_offset - last_offset); | ||
118 | write_sequnlock(&monotonic_lock); | ||
119 | } | ||
120 | |||
121 | static void delay_hpet(unsigned long loops) | ||
122 | { | ||
123 | unsigned long hpet_start, hpet_end; | ||
124 | unsigned long eax; | ||
125 | |||
126 | /* loops is the number of cpu cycles. Convert it to hpet clocks */ | ||
127 | ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops); | ||
128 | |||
129 | hpet_start = hpet_readl(HPET_COUNTER); | ||
130 | do { | ||
131 | rep_nop(); | ||
132 | hpet_end = hpet_readl(HPET_COUNTER); | ||
133 | } while ((hpet_end - hpet_start) < (loops)); | ||
134 | } | ||
135 | |||
136 | static int __init init_hpet(char* override) | ||
137 | { | ||
138 | unsigned long result, remain; | ||
139 | |||
140 | /* check clock override */ | ||
141 | if (override[0] && strncmp(override,"hpet",4)) | ||
142 | return -ENODEV; | ||
143 | |||
144 | if (!is_hpet_enabled()) | ||
145 | return -ENODEV; | ||
146 | |||
147 | printk("Using HPET for gettimeofday\n"); | ||
148 | if (cpu_has_tsc) { | ||
149 | unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient); | ||
150 | if (tsc_quotient) { | ||
151 | /* report CPU clock rate in Hz. | ||
152 | * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = | ||
153 | * clock/second. Our precision is about 100 ppm. | ||
154 | */ | ||
155 | { unsigned long eax=0, edx=1000; | ||
156 | ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, | ||
157 | eax, edx); | ||
158 | printk("Detected %lu.%03lu MHz processor.\n", | ||
159 | cpu_khz / 1000, cpu_khz % 1000); | ||
160 | } | ||
161 | set_cyc2ns_scale(cpu_khz/1000); | ||
162 | } | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Math to calculate hpet to usec multiplier | ||
167 | * Look for the comments at get_offset_hpet() | ||
168 | */ | ||
169 | ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC); | ||
170 | if (remain > (hpet_tick >> 1)) | ||
171 | result++; /* rounding the result */ | ||
172 | hpet_usec_quotient = result; | ||
173 | |||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | /************************************************************/ | ||
178 | |||
179 | /* tsc timer_opts struct */ | ||
180 | static struct timer_opts timer_hpet = { | ||
181 | .name = "hpet", | ||
182 | .mark_offset = mark_offset_hpet, | ||
183 | .get_offset = get_offset_hpet, | ||
184 | .monotonic_clock = monotonic_clock_hpet, | ||
185 | .delay = delay_hpet, | ||
186 | }; | ||
187 | |||
188 | struct init_timer_opts __initdata timer_hpet_init = { | ||
189 | .init = init_hpet, | ||
190 | .opts = &timer_hpet, | ||
191 | }; | ||
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c new file mode 100644 index 000000000000..4ea2f414dbbd --- /dev/null +++ b/arch/i386/kernel/timers/timer_none.c | |||
@@ -0,0 +1,39 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <asm/timer.h> | ||
3 | |||
4 | static void mark_offset_none(void) | ||
5 | { | ||
6 | /* nothing needed */ | ||
7 | } | ||
8 | |||
9 | static unsigned long get_offset_none(void) | ||
10 | { | ||
11 | return 0; | ||
12 | } | ||
13 | |||
14 | static unsigned long long monotonic_clock_none(void) | ||
15 | { | ||
16 | return 0; | ||
17 | } | ||
18 | |||
19 | static void delay_none(unsigned long loops) | ||
20 | { | ||
21 | int d0; | ||
22 | __asm__ __volatile__( | ||
23 | "\tjmp 1f\n" | ||
24 | ".align 16\n" | ||
25 | "1:\tjmp 2f\n" | ||
26 | ".align 16\n" | ||
27 | "2:\tdecl %0\n\tjns 2b" | ||
28 | :"=&a" (d0) | ||
29 | :"0" (loops)); | ||
30 | } | ||
31 | |||
32 | /* none timer_opts struct */ | ||
33 | struct timer_opts timer_none = { | ||
34 | .name = "none", | ||
35 | .mark_offset = mark_offset_none, | ||
36 | .get_offset = get_offset_none, | ||
37 | .monotonic_clock = monotonic_clock_none, | ||
38 | .delay = delay_none, | ||
39 | }; | ||
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c new file mode 100644 index 000000000000..967d5453cd0e --- /dev/null +++ b/arch/i386/kernel/timers/timer_pit.c | |||
@@ -0,0 +1,206 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/time.c. | ||
3 | * See comments there for proper credits. | ||
4 | */ | ||
5 | |||
6 | #include <linux/spinlock.h> | ||
7 | #include <linux/module.h> | ||
8 | #include <linux/device.h> | ||
9 | #include <linux/irq.h> | ||
10 | #include <linux/sysdev.h> | ||
11 | #include <linux/timex.h> | ||
12 | #include <asm/delay.h> | ||
13 | #include <asm/mpspec.h> | ||
14 | #include <asm/timer.h> | ||
15 | #include <asm/smp.h> | ||
16 | #include <asm/io.h> | ||
17 | #include <asm/arch_hooks.h> | ||
18 | |||
19 | extern spinlock_t i8259A_lock; | ||
20 | extern spinlock_t i8253_lock; | ||
21 | #include "do_timer.h" | ||
22 | #include "io_ports.h" | ||
23 | |||
24 | static int count_p; /* counter in get_offset_pit() */ | ||
25 | |||
26 | static int __init init_pit(char* override) | ||
27 | { | ||
28 | /* check clock override */ | ||
29 | if (override[0] && strncmp(override,"pit",3)) | ||
30 | printk(KERN_ERR "Warning: clock= override failed. Defaulting to PIT\n"); | ||
31 | |||
32 | count_p = LATCH; | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | static void mark_offset_pit(void) | ||
37 | { | ||
38 | /* nothing needed */ | ||
39 | } | ||
40 | |||
41 | static unsigned long long monotonic_clock_pit(void) | ||
42 | { | ||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | static void delay_pit(unsigned long loops) | ||
47 | { | ||
48 | int d0; | ||
49 | __asm__ __volatile__( | ||
50 | "\tjmp 1f\n" | ||
51 | ".align 16\n" | ||
52 | "1:\tjmp 2f\n" | ||
53 | ".align 16\n" | ||
54 | "2:\tdecl %0\n\tjns 2b" | ||
55 | :"=&a" (d0) | ||
56 | :"0" (loops)); | ||
57 | } | ||
58 | |||
59 | |||
60 | /* This function must be called with xtime_lock held. | ||
61 | * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs | ||
62 | * | ||
63 | * However, the pc-audio speaker driver changes the divisor so that | ||
64 | * it gets interrupted rather more often - it loads 64 into the | ||
65 | * counter rather than 11932! This has an adverse impact on | ||
66 | * do_gettimeoffset() -- it stops working! What is also not | ||
67 | * good is that the interval that our timer function gets called | ||
68 | * is no longer 10.0002 ms, but 9.9767 ms. To get around this | ||
69 | * would require using a different timing source. Maybe someone | ||
70 | * could use the RTC - I know that this can interrupt at frequencies | ||
71 | * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix | ||
72 | * it so that at startup, the timer code in sched.c would select | ||
73 | * using either the RTC or the 8253 timer. The decision would be | ||
74 | * based on whether there was any other device around that needed | ||
75 | * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz, | ||
76 | * and then do some jiggery to have a version of do_timer that | ||
77 | * advanced the clock by 1/1024 s. Every time that reached over 1/100 | ||
78 | * of a second, then do all the old code. If the time was kept correct | ||
79 | * then do_gettimeoffset could just return 0 - there is no low order | ||
80 | * divider that can be accessed. | ||
81 | * | ||
82 | * Ideally, you would be able to use the RTC for the speaker driver, | ||
83 | * but it appears that the speaker driver really needs interrupt more | ||
84 | * often than every 120 us or so. | ||
85 | * | ||
86 | * Anyway, this needs more thought.... pjsg (1993-08-28) | ||
87 | * | ||
88 | * If you are really that interested, you should be reading | ||
89 | * comp.protocols.time.ntp! | ||
90 | */ | ||
91 | |||
92 | static unsigned long get_offset_pit(void) | ||
93 | { | ||
94 | int count; | ||
95 | unsigned long flags; | ||
96 | static unsigned long jiffies_p = 0; | ||
97 | |||
98 | /* | ||
99 | * cache volatile jiffies temporarily; we have xtime_lock. | ||
100 | */ | ||
101 | unsigned long jiffies_t; | ||
102 | |||
103 | spin_lock_irqsave(&i8253_lock, flags); | ||
104 | /* timer count may underflow right here */ | ||
105 | outb_p(0x00, PIT_MODE); /* latch the count ASAP */ | ||
106 | |||
107 | count = inb_p(PIT_CH0); /* read the latched count */ | ||
108 | |||
109 | /* | ||
110 | * We do this guaranteed double memory access instead of a _p | ||
111 | * postfix in the previous port access. Wheee, hackady hack | ||
112 | */ | ||
113 | jiffies_t = jiffies; | ||
114 | |||
115 | count |= inb_p(PIT_CH0) << 8; | ||
116 | |||
117 | /* VIA686a test code... reset the latch if count > max + 1 */ | ||
118 | if (count > LATCH) { | ||
119 | outb_p(0x34, PIT_MODE); | ||
120 | outb_p(LATCH & 0xff, PIT_CH0); | ||
121 | outb(LATCH >> 8, PIT_CH0); | ||
122 | count = LATCH - 1; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * avoiding timer inconsistencies (they are rare, but they happen)... | ||
127 | * there are two kinds of problems that must be avoided here: | ||
128 | * 1. the timer counter underflows | ||
129 | * 2. hardware problem with the timer, not giving us continuous time, | ||
130 | * the counter does small "jumps" upwards on some Pentium systems, | ||
131 | * (see c't 95/10 page 335 for Neptun bug.) | ||
132 | */ | ||
133 | |||
134 | if( jiffies_t == jiffies_p ) { | ||
135 | if( count > count_p ) { | ||
136 | /* the nutcase */ | ||
137 | count = do_timer_overflow(count); | ||
138 | } | ||
139 | } else | ||
140 | jiffies_p = jiffies_t; | ||
141 | |||
142 | count_p = count; | ||
143 | |||
144 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
145 | |||
146 | count = ((LATCH-1) - count) * TICK_SIZE; | ||
147 | count = (count + LATCH/2) / LATCH; | ||
148 | |||
149 | return count; | ||
150 | } | ||
151 | |||
152 | |||
153 | /* tsc timer_opts struct */ | ||
154 | struct timer_opts timer_pit = { | ||
155 | .name = "pit", | ||
156 | .mark_offset = mark_offset_pit, | ||
157 | .get_offset = get_offset_pit, | ||
158 | .monotonic_clock = monotonic_clock_pit, | ||
159 | .delay = delay_pit, | ||
160 | }; | ||
161 | |||
162 | struct init_timer_opts __initdata timer_pit_init = { | ||
163 | .init = init_pit, | ||
164 | .opts = &timer_pit, | ||
165 | }; | ||
166 | |||
167 | void setup_pit_timer(void) | ||
168 | { | ||
169 | extern spinlock_t i8253_lock; | ||
170 | unsigned long flags; | ||
171 | |||
172 | spin_lock_irqsave(&i8253_lock, flags); | ||
173 | outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | ||
174 | udelay(10); | ||
175 | outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ | ||
176 | udelay(10); | ||
177 | outb(LATCH >> 8 , PIT_CH0); /* MSB */ | ||
178 | spin_unlock_irqrestore(&i8253_lock, flags); | ||
179 | } | ||
180 | |||
181 | static int timer_resume(struct sys_device *dev) | ||
182 | { | ||
183 | setup_pit_timer(); | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static struct sysdev_class timer_sysclass = { | ||
188 | set_kset_name("timer_pit"), | ||
189 | .resume = timer_resume, | ||
190 | }; | ||
191 | |||
192 | static struct sys_device device_timer = { | ||
193 | .id = 0, | ||
194 | .cls = &timer_sysclass, | ||
195 | }; | ||
196 | |||
197 | static int __init init_timer_sysfs(void) | ||
198 | { | ||
199 | int error = sysdev_class_register(&timer_sysclass); | ||
200 | if (!error) | ||
201 | error = sysdev_register(&device_timer); | ||
202 | return error; | ||
203 | } | ||
204 | |||
205 | device_initcall(init_timer_sysfs); | ||
206 | |||
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c new file mode 100644 index 000000000000..d77f22030fe6 --- /dev/null +++ b/arch/i386/kernel/timers/timer_pm.c | |||
@@ -0,0 +1,258 @@ | |||
1 | /* | ||
2 | * (C) Dominik Brodowski <linux@brodo.de> 2003 | ||
3 | * | ||
4 | * Driver to use the Power Management Timer (PMTMR) available in some | ||
5 | * southbridges as primary timing source for the Linux kernel. | ||
6 | * | ||
7 | * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, | ||
8 | * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. | ||
9 | * | ||
10 | * This file is licensed under the GPL v2. | ||
11 | */ | ||
12 | |||
13 | |||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/device.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <asm/types.h> | ||
19 | #include <asm/timer.h> | ||
20 | #include <asm/smp.h> | ||
21 | #include <asm/io.h> | ||
22 | #include <asm/arch_hooks.h> | ||
23 | |||
24 | #include <linux/timex.h> | ||
25 | #include "mach_timer.h" | ||
26 | |||
27 | /* Number of PMTMR ticks expected during calibration run */ | ||
28 | #define PMTMR_TICKS_PER_SEC 3579545 | ||
29 | #define PMTMR_EXPECTED_RATE \ | ||
30 | ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10)) | ||
31 | |||
32 | |||
33 | /* The I/O port the PMTMR resides at. | ||
34 | * The location is detected during setup_arch(), | ||
35 | * in arch/i386/acpi/boot.c */ | ||
36 | u32 pmtmr_ioport = 0; | ||
37 | |||
38 | |||
39 | /* value of the Power timer at last timer interrupt */ | ||
40 | static u32 offset_tick; | ||
41 | static u32 offset_delay; | ||
42 | |||
43 | static unsigned long long monotonic_base; | ||
44 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
45 | |||
46 | #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ | ||
47 | |||
48 | /*helper function to safely read acpi pm timesource*/ | ||
49 | static inline u32 read_pmtmr(void) | ||
50 | { | ||
51 | u32 v1=0,v2=0,v3=0; | ||
52 | /* It has been reported that because of various broken | ||
53 | * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time | ||
54 | * source is not latched, so you must read it multiple | ||
55 | * times to insure a safe value is read. | ||
56 | */ | ||
57 | do { | ||
58 | v1 = inl(pmtmr_ioport); | ||
59 | v2 = inl(pmtmr_ioport); | ||
60 | v3 = inl(pmtmr_ioport); | ||
61 | } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) | ||
62 | || (v3 > v1 && v3 < v2)); | ||
63 | |||
64 | /* mask the output to 24 bits */ | ||
65 | return v2 & ACPI_PM_MASK; | ||
66 | } | ||
67 | |||
68 | |||
69 | /* | ||
70 | * Some boards have the PMTMR running way too fast. We check | ||
71 | * the PMTMR rate against PIT channel 2 to catch these cases. | ||
72 | */ | ||
73 | static int verify_pmtmr_rate(void) | ||
74 | { | ||
75 | u32 value1, value2; | ||
76 | unsigned long count, delta; | ||
77 | |||
78 | mach_prepare_counter(); | ||
79 | value1 = read_pmtmr(); | ||
80 | mach_countup(&count); | ||
81 | value2 = read_pmtmr(); | ||
82 | delta = (value2 - value1) & ACPI_PM_MASK; | ||
83 | |||
84 | /* Check that the PMTMR delta is within 5% of what we expect */ | ||
85 | if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 || | ||
86 | delta > (PMTMR_EXPECTED_RATE * 21) / 20) { | ||
87 | printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE); | ||
88 | return -1; | ||
89 | } | ||
90 | |||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | |||
95 | static int init_pmtmr(char* override) | ||
96 | { | ||
97 | u32 value1, value2; | ||
98 | unsigned int i; | ||
99 | |||
100 | if (override[0] && strncmp(override,"pmtmr",5)) | ||
101 | return -ENODEV; | ||
102 | |||
103 | if (!pmtmr_ioport) | ||
104 | return -ENODEV; | ||
105 | |||
106 | /* we use the TSC for delay_pmtmr, so make sure it exists */ | ||
107 | if (!cpu_has_tsc) | ||
108 | return -ENODEV; | ||
109 | |||
110 | /* "verify" this timing source */ | ||
111 | value1 = read_pmtmr(); | ||
112 | for (i = 0; i < 10000; i++) { | ||
113 | value2 = read_pmtmr(); | ||
114 | if (value2 == value1) | ||
115 | continue; | ||
116 | if (value2 > value1) | ||
117 | goto pm_good; | ||
118 | if ((value2 < value1) && ((value2) < 0xFFF)) | ||
119 | goto pm_good; | ||
120 | printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); | ||
121 | return -EINVAL; | ||
122 | } | ||
123 | printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); | ||
124 | return -ENODEV; | ||
125 | |||
126 | pm_good: | ||
127 | if (verify_pmtmr_rate() != 0) | ||
128 | return -ENODEV; | ||
129 | |||
130 | init_cpu_khz(); | ||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static inline u32 cyc2us(u32 cycles) | ||
135 | { | ||
136 | /* The Power Management Timer ticks at 3.579545 ticks per microsecond. | ||
137 | * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] | ||
138 | * | ||
139 | * Even with HZ = 100, delta is at maximum 35796 ticks, so it can | ||
140 | * easily be multiplied with 286 (=0x11E) without having to fear | ||
141 | * u32 overflows. | ||
142 | */ | ||
143 | cycles *= 286; | ||
144 | return (cycles >> 10); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * this gets called during each timer interrupt | ||
149 | * - Called while holding the writer xtime_lock | ||
150 | */ | ||
151 | static void mark_offset_pmtmr(void) | ||
152 | { | ||
153 | u32 lost, delta, last_offset; | ||
154 | static int first_run = 1; | ||
155 | last_offset = offset_tick; | ||
156 | |||
157 | write_seqlock(&monotonic_lock); | ||
158 | |||
159 | offset_tick = read_pmtmr(); | ||
160 | |||
161 | /* calculate tick interval */ | ||
162 | delta = (offset_tick - last_offset) & ACPI_PM_MASK; | ||
163 | |||
164 | /* convert to usecs */ | ||
165 | delta = cyc2us(delta); | ||
166 | |||
167 | /* update the monotonic base value */ | ||
168 | monotonic_base += delta * NSEC_PER_USEC; | ||
169 | write_sequnlock(&monotonic_lock); | ||
170 | |||
171 | /* convert to ticks */ | ||
172 | delta += offset_delay; | ||
173 | lost = delta / (USEC_PER_SEC / HZ); | ||
174 | offset_delay = delta % (USEC_PER_SEC / HZ); | ||
175 | |||
176 | |||
177 | /* compensate for lost ticks */ | ||
178 | if (lost >= 2) | ||
179 | jiffies_64 += lost - 1; | ||
180 | |||
181 | /* don't calculate delay for first run, | ||
182 | or if we've got less then a tick */ | ||
183 | if (first_run || (lost < 1)) { | ||
184 | first_run = 0; | ||
185 | offset_delay = 0; | ||
186 | } | ||
187 | } | ||
188 | |||
189 | |||
190 | static unsigned long long monotonic_clock_pmtmr(void) | ||
191 | { | ||
192 | u32 last_offset, this_offset; | ||
193 | unsigned long long base, ret; | ||
194 | unsigned seq; | ||
195 | |||
196 | |||
197 | /* atomically read monotonic base & last_offset */ | ||
198 | do { | ||
199 | seq = read_seqbegin(&monotonic_lock); | ||
200 | last_offset = offset_tick; | ||
201 | base = monotonic_base; | ||
202 | } while (read_seqretry(&monotonic_lock, seq)); | ||
203 | |||
204 | /* Read the pmtmr */ | ||
205 | this_offset = read_pmtmr(); | ||
206 | |||
207 | /* convert to nanoseconds */ | ||
208 | ret = (this_offset - last_offset) & ACPI_PM_MASK; | ||
209 | ret = base + (cyc2us(ret) * NSEC_PER_USEC); | ||
210 | return ret; | ||
211 | } | ||
212 | |||
213 | static void delay_pmtmr(unsigned long loops) | ||
214 | { | ||
215 | unsigned long bclock, now; | ||
216 | |||
217 | rdtscl(bclock); | ||
218 | do | ||
219 | { | ||
220 | rep_nop(); | ||
221 | rdtscl(now); | ||
222 | } while ((now-bclock) < loops); | ||
223 | } | ||
224 | |||
225 | |||
226 | /* | ||
227 | * get the offset (in microseconds) from the last call to mark_offset() | ||
228 | * - Called holding a reader xtime_lock | ||
229 | */ | ||
230 | static unsigned long get_offset_pmtmr(void) | ||
231 | { | ||
232 | u32 now, offset, delta = 0; | ||
233 | |||
234 | offset = offset_tick; | ||
235 | now = read_pmtmr(); | ||
236 | delta = (now - offset)&ACPI_PM_MASK; | ||
237 | |||
238 | return (unsigned long) offset_delay + cyc2us(delta); | ||
239 | } | ||
240 | |||
241 | |||
242 | /* acpi timer_opts struct */ | ||
243 | static struct timer_opts timer_pmtmr = { | ||
244 | .name = "pmtmr", | ||
245 | .mark_offset = mark_offset_pmtmr, | ||
246 | .get_offset = get_offset_pmtmr, | ||
247 | .monotonic_clock = monotonic_clock_pmtmr, | ||
248 | .delay = delay_pmtmr, | ||
249 | }; | ||
250 | |||
251 | struct init_timer_opts __initdata timer_pmtmr_init = { | ||
252 | .init = init_pmtmr, | ||
253 | .opts = &timer_pmtmr, | ||
254 | }; | ||
255 | |||
256 | MODULE_LICENSE("GPL"); | ||
257 | MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); | ||
258 | MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); | ||
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c new file mode 100644 index 000000000000..a685994e5c8e --- /dev/null +++ b/arch/i386/kernel/timers/timer_tsc.c | |||
@@ -0,0 +1,560 @@ | |||
1 | /* | ||
2 | * This code largely moved from arch/i386/kernel/time.c. | ||
3 | * See comments there for proper credits. | ||
4 | * | ||
5 | * 2004-06-25 Jesper Juhl | ||
6 | * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4 | ||
7 | * failing to inline. | ||
8 | */ | ||
9 | |||
10 | #include <linux/spinlock.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/timex.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/cpufreq.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/jiffies.h> | ||
17 | |||
18 | #include <asm/timer.h> | ||
19 | #include <asm/io.h> | ||
20 | /* processor.h for distable_tsc flag */ | ||
21 | #include <asm/processor.h> | ||
22 | |||
23 | #include "io_ports.h" | ||
24 | #include "mach_timer.h" | ||
25 | |||
26 | #include <asm/hpet.h> | ||
27 | |||
28 | #ifdef CONFIG_HPET_TIMER | ||
29 | static unsigned long hpet_usec_quotient; | ||
30 | static unsigned long hpet_last; | ||
31 | static struct timer_opts timer_tsc; | ||
32 | #endif | ||
33 | |||
34 | static inline void cpufreq_delayed_get(void); | ||
35 | |||
36 | int tsc_disable __initdata = 0; | ||
37 | |||
38 | extern spinlock_t i8253_lock; | ||
39 | |||
40 | static int use_tsc; | ||
41 | /* Number of usecs that the last interrupt was delayed */ | ||
42 | static int delay_at_last_interrupt; | ||
43 | |||
44 | static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ | ||
45 | static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ | ||
46 | static unsigned long long monotonic_base; | ||
47 | static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; | ||
48 | |||
49 | /* convert from cycles(64bits) => nanoseconds (64bits) | ||
50 | * basic equation: | ||
51 | * ns = cycles / (freq / ns_per_sec) | ||
52 | * ns = cycles * (ns_per_sec / freq) | ||
53 | * ns = cycles * (10^9 / (cpu_mhz * 10^6)) | ||
54 | * ns = cycles * (10^3 / cpu_mhz) | ||
55 | * | ||
56 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
57 | * ns = cycles * (10^3 * SC / cpu_mhz) / SC | ||
58 | * ns = cycles * cyc2ns_scale / SC | ||
59 | * | ||
60 | * And since SC is a constant power of two, we can convert the div | ||
61 | * into a shift. | ||
62 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
63 | */ | ||
64 | static unsigned long cyc2ns_scale; | ||
65 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
66 | |||
67 | static inline void set_cyc2ns_scale(unsigned long cpu_mhz) | ||
68 | { | ||
69 | cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; | ||
70 | } | ||
71 | |||
72 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
73 | { | ||
74 | return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; | ||
75 | } | ||
76 | |||
77 | static int count2; /* counter for mark_offset_tsc() */ | ||
78 | |||
79 | /* Cached *multiplier* to convert TSC counts to microseconds. | ||
80 | * (see the equation below). | ||
81 | * Equal to 2^32 * (1 / (clocks per usec) ). | ||
82 | * Initialized in time_init. | ||
83 | */ | ||
84 | static unsigned long fast_gettimeoffset_quotient; | ||
85 | |||
86 | static unsigned long get_offset_tsc(void) | ||
87 | { | ||
88 | register unsigned long eax, edx; | ||
89 | |||
90 | /* Read the Time Stamp Counter */ | ||
91 | |||
92 | rdtsc(eax,edx); | ||
93 | |||
94 | /* .. relative to previous jiffy (32 bits is enough) */ | ||
95 | eax -= last_tsc_low; /* tsc_low delta */ | ||
96 | |||
97 | /* | ||
98 | * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient | ||
99 | * = (tsc_low delta) * (usecs_per_clock) | ||
100 | * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) | ||
101 | * | ||
102 | * Using a mull instead of a divl saves up to 31 clock cycles | ||
103 | * in the critical path. | ||
104 | */ | ||
105 | |||
106 | __asm__("mull %2" | ||
107 | :"=a" (eax), "=d" (edx) | ||
108 | :"rm" (fast_gettimeoffset_quotient), | ||
109 | "0" (eax)); | ||
110 | |||
111 | /* our adjusted time offset in microseconds */ | ||
112 | return delay_at_last_interrupt + edx; | ||
113 | } | ||
114 | |||
115 | static unsigned long long monotonic_clock_tsc(void) | ||
116 | { | ||
117 | unsigned long long last_offset, this_offset, base; | ||
118 | unsigned seq; | ||
119 | |||
120 | /* atomically read monotonic base & last_offset */ | ||
121 | do { | ||
122 | seq = read_seqbegin(&monotonic_lock); | ||
123 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
124 | base = monotonic_base; | ||
125 | } while (read_seqretry(&monotonic_lock, seq)); | ||
126 | |||
127 | /* Read the Time Stamp Counter */ | ||
128 | rdtscll(this_offset); | ||
129 | |||
130 | /* return the value in ns */ | ||
131 | return base + cycles_2_ns(this_offset - last_offset); | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * Scheduler clock - returns current time in nanosec units. | ||
136 | */ | ||
137 | unsigned long long sched_clock(void) | ||
138 | { | ||
139 | unsigned long long this_offset; | ||
140 | |||
141 | /* | ||
142 | * In the NUMA case we dont use the TSC as they are not | ||
143 | * synchronized across all CPUs. | ||
144 | */ | ||
145 | #ifndef CONFIG_NUMA | ||
146 | if (!use_tsc) | ||
147 | #endif | ||
148 | /* no locking but a rare wrong value is not a big deal */ | ||
149 | return jiffies_64 * (1000000000 / HZ); | ||
150 | |||
151 | /* Read the Time Stamp Counter */ | ||
152 | rdtscll(this_offset); | ||
153 | |||
154 | /* return the value in ns */ | ||
155 | return cycles_2_ns(this_offset); | ||
156 | } | ||
157 | |||
158 | static void delay_tsc(unsigned long loops) | ||
159 | { | ||
160 | unsigned long bclock, now; | ||
161 | |||
162 | rdtscl(bclock); | ||
163 | do | ||
164 | { | ||
165 | rep_nop(); | ||
166 | rdtscl(now); | ||
167 | } while ((now-bclock) < loops); | ||
168 | } | ||
169 | |||
170 | #ifdef CONFIG_HPET_TIMER | ||
171 | static void mark_offset_tsc_hpet(void) | ||
172 | { | ||
173 | unsigned long long this_offset, last_offset; | ||
174 | unsigned long offset, temp, hpet_current; | ||
175 | |||
176 | write_seqlock(&monotonic_lock); | ||
177 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
178 | /* | ||
179 | * It is important that these two operations happen almost at | ||
180 | * the same time. We do the RDTSC stuff first, since it's | ||
181 | * faster. To avoid any inconsistencies, we need interrupts | ||
182 | * disabled locally. | ||
183 | */ | ||
184 | /* | ||
185 | * Interrupts are just disabled locally since the timer irq | ||
186 | * has the SA_INTERRUPT flag set. -arca | ||
187 | */ | ||
188 | /* read Pentium cycle counter */ | ||
189 | |||
190 | hpet_current = hpet_readl(HPET_COUNTER); | ||
191 | rdtsc(last_tsc_low, last_tsc_high); | ||
192 | |||
193 | /* lost tick compensation */ | ||
194 | offset = hpet_readl(HPET_T0_CMP) - hpet_tick; | ||
195 | if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) { | ||
196 | int lost_ticks = (offset - hpet_last) / hpet_tick; | ||
197 | jiffies_64 += lost_ticks; | ||
198 | } | ||
199 | hpet_last = hpet_current; | ||
200 | |||
201 | /* update the monotonic base value */ | ||
202 | this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
203 | monotonic_base += cycles_2_ns(this_offset - last_offset); | ||
204 | write_sequnlock(&monotonic_lock); | ||
205 | |||
206 | /* calculate delay_at_last_interrupt */ | ||
207 | /* | ||
208 | * Time offset = (hpet delta) * ( usecs per HPET clock ) | ||
209 | * = (hpet delta) * ( usecs per tick / HPET clocks per tick) | ||
210 | * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) | ||
211 | * Where, | ||
212 | * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick | ||
213 | */ | ||
214 | delay_at_last_interrupt = hpet_current - offset; | ||
215 | ASM_MUL64_REG(temp, delay_at_last_interrupt, | ||
216 | hpet_usec_quotient, delay_at_last_interrupt); | ||
217 | } | ||
218 | #endif | ||
219 | |||
220 | |||
221 | #ifdef CONFIG_CPU_FREQ | ||
222 | #include <linux/workqueue.h> | ||
223 | |||
224 | static unsigned int cpufreq_delayed_issched = 0; | ||
225 | static unsigned int cpufreq_init = 0; | ||
226 | static struct work_struct cpufreq_delayed_get_work; | ||
227 | |||
228 | static void handle_cpufreq_delayed_get(void *v) | ||
229 | { | ||
230 | unsigned int cpu; | ||
231 | for_each_online_cpu(cpu) { | ||
232 | cpufreq_get(cpu); | ||
233 | } | ||
234 | cpufreq_delayed_issched = 0; | ||
235 | } | ||
236 | |||
237 | /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries | ||
238 | * to verify the CPU frequency the timing core thinks the CPU is running | ||
239 | * at is still correct. | ||
240 | */ | ||
241 | static inline void cpufreq_delayed_get(void) | ||
242 | { | ||
243 | if (cpufreq_init && !cpufreq_delayed_issched) { | ||
244 | cpufreq_delayed_issched = 1; | ||
245 | printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); | ||
246 | schedule_work(&cpufreq_delayed_get_work); | ||
247 | } | ||
248 | } | ||
249 | |||
250 | /* If the CPU frequency is scaled, TSC-based delays will need a different | ||
251 | * loops_per_jiffy value to function properly. | ||
252 | */ | ||
253 | |||
254 | static unsigned int ref_freq = 0; | ||
255 | static unsigned long loops_per_jiffy_ref = 0; | ||
256 | |||
257 | #ifndef CONFIG_SMP | ||
258 | static unsigned long fast_gettimeoffset_ref = 0; | ||
259 | static unsigned long cpu_khz_ref = 0; | ||
260 | #endif | ||
261 | |||
262 | static int | ||
263 | time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | ||
264 | void *data) | ||
265 | { | ||
266 | struct cpufreq_freqs *freq = data; | ||
267 | |||
268 | if (val != CPUFREQ_RESUMECHANGE) | ||
269 | write_seqlock_irq(&xtime_lock); | ||
270 | if (!ref_freq) { | ||
271 | ref_freq = freq->old; | ||
272 | loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; | ||
273 | #ifndef CONFIG_SMP | ||
274 | fast_gettimeoffset_ref = fast_gettimeoffset_quotient; | ||
275 | cpu_khz_ref = cpu_khz; | ||
276 | #endif | ||
277 | } | ||
278 | |||
279 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
280 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
281 | (val == CPUFREQ_RESUMECHANGE)) { | ||
282 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
283 | cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | ||
284 | #ifndef CONFIG_SMP | ||
285 | if (cpu_khz) | ||
286 | cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); | ||
287 | if (use_tsc) { | ||
288 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { | ||
289 | fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); | ||
290 | set_cyc2ns_scale(cpu_khz/1000); | ||
291 | } | ||
292 | } | ||
293 | #endif | ||
294 | } | ||
295 | |||
296 | if (val != CPUFREQ_RESUMECHANGE) | ||
297 | write_sequnlock_irq(&xtime_lock); | ||
298 | |||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | static struct notifier_block time_cpufreq_notifier_block = { | ||
303 | .notifier_call = time_cpufreq_notifier | ||
304 | }; | ||
305 | |||
306 | |||
307 | static int __init cpufreq_tsc(void) | ||
308 | { | ||
309 | int ret; | ||
310 | INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); | ||
311 | ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
312 | CPUFREQ_TRANSITION_NOTIFIER); | ||
313 | if (!ret) | ||
314 | cpufreq_init = 1; | ||
315 | return ret; | ||
316 | } | ||
317 | core_initcall(cpufreq_tsc); | ||
318 | |||
319 | #else /* CONFIG_CPU_FREQ */ | ||
320 | static inline void cpufreq_delayed_get(void) { return; } | ||
321 | #endif | ||
322 | |||
323 | static void mark_offset_tsc(void) | ||
324 | { | ||
325 | unsigned long lost,delay; | ||
326 | unsigned long delta = last_tsc_low; | ||
327 | int count; | ||
328 | int countmp; | ||
329 | static int count1 = 0; | ||
330 | unsigned long long this_offset, last_offset; | ||
331 | static int lost_count = 0; | ||
332 | |||
333 | write_seqlock(&monotonic_lock); | ||
334 | last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
335 | /* | ||
336 | * It is important that these two operations happen almost at | ||
337 | * the same time. We do the RDTSC stuff first, since it's | ||
338 | * faster. To avoid any inconsistencies, we need interrupts | ||
339 | * disabled locally. | ||
340 | */ | ||
341 | |||
342 | /* | ||
343 | * Interrupts are just disabled locally since the timer irq | ||
344 | * has the SA_INTERRUPT flag set. -arca | ||
345 | */ | ||
346 | |||
347 | /* read Pentium cycle counter */ | ||
348 | |||
349 | rdtsc(last_tsc_low, last_tsc_high); | ||
350 | |||
351 | spin_lock(&i8253_lock); | ||
352 | outb_p(0x00, PIT_MODE); /* latch the count ASAP */ | ||
353 | |||
354 | count = inb_p(PIT_CH0); /* read the latched count */ | ||
355 | count |= inb(PIT_CH0) << 8; | ||
356 | |||
357 | /* | ||
358 | * VIA686a test code... reset the latch if count > max + 1 | ||
359 | * from timer_pit.c - cjb | ||
360 | */ | ||
361 | if (count > LATCH) { | ||
362 | outb_p(0x34, PIT_MODE); | ||
363 | outb_p(LATCH & 0xff, PIT_CH0); | ||
364 | outb(LATCH >> 8, PIT_CH0); | ||
365 | count = LATCH - 1; | ||
366 | } | ||
367 | |||
368 | spin_unlock(&i8253_lock); | ||
369 | |||
370 | if (pit_latch_buggy) { | ||
371 | /* get center value of last 3 time lutch */ | ||
372 | if ((count2 >= count && count >= count1) | ||
373 | || (count1 >= count && count >= count2)) { | ||
374 | count2 = count1; count1 = count; | ||
375 | } else if ((count1 >= count2 && count2 >= count) | ||
376 | || (count >= count2 && count2 >= count1)) { | ||
377 | countmp = count;count = count2; | ||
378 | count2 = count1;count1 = countmp; | ||
379 | } else { | ||
380 | count2 = count1; count1 = count; count = count1; | ||
381 | } | ||
382 | } | ||
383 | |||
384 | /* lost tick compensation */ | ||
385 | delta = last_tsc_low - delta; | ||
386 | { | ||
387 | register unsigned long eax, edx; | ||
388 | eax = delta; | ||
389 | __asm__("mull %2" | ||
390 | :"=a" (eax), "=d" (edx) | ||
391 | :"rm" (fast_gettimeoffset_quotient), | ||
392 | "0" (eax)); | ||
393 | delta = edx; | ||
394 | } | ||
395 | delta += delay_at_last_interrupt; | ||
396 | lost = delta/(1000000/HZ); | ||
397 | delay = delta%(1000000/HZ); | ||
398 | if (lost >= 2) { | ||
399 | jiffies_64 += lost-1; | ||
400 | |||
401 | /* sanity check to ensure we're not always losing ticks */ | ||
402 | if (lost_count++ > 100) { | ||
403 | printk(KERN_WARNING "Losing too many ticks!\n"); | ||
404 | printk(KERN_WARNING "TSC cannot be used as a timesource. \n"); | ||
405 | printk(KERN_WARNING "Possible reasons for this are:\n"); | ||
406 | printk(KERN_WARNING " You're running with Speedstep,\n"); | ||
407 | printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n"); | ||
408 | printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n"); | ||
409 | printk(KERN_WARNING "Falling back to a sane timesource now.\n"); | ||
410 | |||
411 | clock_fallback(); | ||
412 | } | ||
413 | /* ... but give the TSC a fair chance */ | ||
414 | if (lost_count > 25) | ||
415 | cpufreq_delayed_get(); | ||
416 | } else | ||
417 | lost_count = 0; | ||
418 | /* update the monotonic base value */ | ||
419 | this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; | ||
420 | monotonic_base += cycles_2_ns(this_offset - last_offset); | ||
421 | write_sequnlock(&monotonic_lock); | ||
422 | |||
423 | /* calculate delay_at_last_interrupt */ | ||
424 | count = ((LATCH-1) - count) * TICK_SIZE; | ||
425 | delay_at_last_interrupt = (count + LATCH/2) / LATCH; | ||
426 | |||
427 | /* catch corner case where tick rollover occured | ||
428 | * between tsc and pit reads (as noted when | ||
429 | * usec delta is > 90% # of usecs/tick) | ||
430 | */ | ||
431 | if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) | ||
432 | jiffies_64++; | ||
433 | } | ||
434 | |||
435 | static int __init init_tsc(char* override) | ||
436 | { | ||
437 | |||
438 | /* check clock override */ | ||
439 | if (override[0] && strncmp(override,"tsc",3)) { | ||
440 | #ifdef CONFIG_HPET_TIMER | ||
441 | if (is_hpet_enabled()) { | ||
442 | printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n"); | ||
443 | } else | ||
444 | #endif | ||
445 | { | ||
446 | return -ENODEV; | ||
447 | } | ||
448 | } | ||
449 | |||
450 | /* | ||
451 | * If we have APM enabled or the CPU clock speed is variable | ||
452 | * (CPU stops clock on HLT or slows clock to save power) | ||
453 | * then the TSC timestamps may diverge by up to 1 jiffy from | ||
454 | * 'real time' but nothing will break. | ||
455 | * The most frequent case is that the CPU is "woken" from a halt | ||
456 | * state by the timer interrupt itself, so we get 0 error. In the | ||
457 | * rare cases where a driver would "wake" the CPU and request a | ||
458 | * timestamp, the maximum error is < 1 jiffy. But timestamps are | ||
459 | * still perfectly ordered. | ||
460 | * Note that the TSC counter will be reset if APM suspends | ||
461 | * to disk; this won't break the kernel, though, 'cuz we're | ||
462 | * smart. See arch/i386/kernel/apm.c. | ||
463 | */ | ||
464 | /* | ||
465 | * Firstly we have to do a CPU check for chips with | ||
466 | * a potentially buggy TSC. At this point we haven't run | ||
467 | * the ident/bugs checks so we must run this hook as it | ||
468 | * may turn off the TSC flag. | ||
469 | * | ||
470 | * NOTE: this doesn't yet handle SMP 486 machines where only | ||
471 | * some CPU's have a TSC. Thats never worked and nobody has | ||
472 | * moaned if you have the only one in the world - you fix it! | ||
473 | */ | ||
474 | |||
475 | count2 = LATCH; /* initialize counter for mark_offset_tsc() */ | ||
476 | |||
477 | if (cpu_has_tsc) { | ||
478 | unsigned long tsc_quotient; | ||
479 | #ifdef CONFIG_HPET_TIMER | ||
480 | if (is_hpet_enabled()){ | ||
481 | unsigned long result, remain; | ||
482 | printk("Using TSC for gettimeofday\n"); | ||
483 | tsc_quotient = calibrate_tsc_hpet(NULL); | ||
484 | timer_tsc.mark_offset = &mark_offset_tsc_hpet; | ||
485 | /* | ||
486 | * Math to calculate hpet to usec multiplier | ||
487 | * Look for the comments at get_offset_tsc_hpet() | ||
488 | */ | ||
489 | ASM_DIV64_REG(result, remain, hpet_tick, | ||
490 | 0, KERNEL_TICK_USEC); | ||
491 | if (remain > (hpet_tick >> 1)) | ||
492 | result++; /* rounding the result */ | ||
493 | |||
494 | hpet_usec_quotient = result; | ||
495 | } else | ||
496 | #endif | ||
497 | { | ||
498 | tsc_quotient = calibrate_tsc(); | ||
499 | } | ||
500 | |||
501 | if (tsc_quotient) { | ||
502 | fast_gettimeoffset_quotient = tsc_quotient; | ||
503 | use_tsc = 1; | ||
504 | /* | ||
505 | * We could be more selective here I suspect | ||
506 | * and just enable this for the next intel chips ? | ||
507 | */ | ||
508 | /* report CPU clock rate in Hz. | ||
509 | * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = | ||
510 | * clock/second. Our precision is about 100 ppm. | ||
511 | */ | ||
512 | { unsigned long eax=0, edx=1000; | ||
513 | __asm__("divl %2" | ||
514 | :"=a" (cpu_khz), "=d" (edx) | ||
515 | :"r" (tsc_quotient), | ||
516 | "0" (eax), "1" (edx)); | ||
517 | printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); | ||
518 | } | ||
519 | set_cyc2ns_scale(cpu_khz/1000); | ||
520 | return 0; | ||
521 | } | ||
522 | } | ||
523 | return -ENODEV; | ||
524 | } | ||
525 | |||
526 | #ifndef CONFIG_X86_TSC | ||
527 | /* disable flag for tsc. Takes effect by clearing the TSC cpu flag | ||
528 | * in cpu/common.c */ | ||
529 | static int __init tsc_setup(char *str) | ||
530 | { | ||
531 | tsc_disable = 1; | ||
532 | return 1; | ||
533 | } | ||
534 | #else | ||
535 | static int __init tsc_setup(char *str) | ||
536 | { | ||
537 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | ||
538 | "cannot disable TSC.\n"); | ||
539 | return 1; | ||
540 | } | ||
541 | #endif | ||
542 | __setup("notsc", tsc_setup); | ||
543 | |||
544 | |||
545 | |||
546 | /************************************************************/ | ||
547 | |||
548 | /* tsc timer_opts struct */ | ||
549 | static struct timer_opts timer_tsc = { | ||
550 | .name = "tsc", | ||
551 | .mark_offset = mark_offset_tsc, | ||
552 | .get_offset = get_offset_tsc, | ||
553 | .monotonic_clock = monotonic_clock_tsc, | ||
554 | .delay = delay_tsc, | ||
555 | }; | ||
556 | |||
557 | struct init_timer_opts __initdata timer_tsc_init = { | ||
558 | .init = init_tsc, | ||
559 | .opts = &timer_tsc, | ||
560 | }; | ||
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S new file mode 100644 index 000000000000..fcce0e61b0e7 --- /dev/null +++ b/arch/i386/kernel/trampoline.S | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Trampoline.S Derived from Setup.S by Linus Torvalds | ||
4 | * | ||
5 | * 4 Jan 1997 Michael Chastain: changed to gnu as. | ||
6 | * | ||
7 | * This is only used for booting secondary CPUs in SMP machine | ||
8 | * | ||
9 | * Entry: CS:IP point to the start of our code, we are | ||
10 | * in real mode with no stack, but the rest of the | ||
11 | * trampoline page to make our stack and everything else | ||
12 | * is a mystery. | ||
13 | * | ||
14 | * In fact we don't actually need a stack so we don't | ||
15 | * set one up. | ||
16 | * | ||
17 | * We jump into the boot/compressed/head.S code. So you'd | ||
18 | * better be running a compressed kernel image or you | ||
19 | * won't get very far. | ||
20 | * | ||
21 | * On entry to trampoline_data, the processor is in real mode | ||
22 | * with 16-bit addressing and 16-bit data. CS has some value | ||
23 | * and IP is zero. Thus, data addresses need to be absolute | ||
24 | * (no relocation) and are taken with regard to r_base. | ||
25 | * | ||
26 | * If you work on this file, check the object module with | ||
27 | * objdump --reloc to make sure there are no relocation | ||
28 | * entries except for: | ||
29 | * | ||
30 | * TYPE VALUE | ||
31 | * R_386_32 startup_32_smp | ||
32 | * R_386_32 boot_gdt_table | ||
33 | */ | ||
34 | |||
35 | #include <linux/linkage.h> | ||
36 | #include <asm/segment.h> | ||
37 | #include <asm/page.h> | ||
38 | |||
39 | .data | ||
40 | |||
41 | .code16 | ||
42 | |||
43 | ENTRY(trampoline_data) | ||
44 | r_base = . | ||
45 | wbinvd # Needed for NUMA-Q should be harmless for others | ||
46 | mov %cs, %ax # Code and data in the same place | ||
47 | mov %ax, %ds | ||
48 | |||
49 | cli # We should be safe anyway | ||
50 | |||
51 | movl $0xA5A5A5A5, trampoline_data - r_base | ||
52 | # write marker for master knows we're running | ||
53 | |||
54 | /* GDT tables in non default location kernel can be beyond 16MB and | ||
55 | * lgdt will not be able to load the address as in real mode default | ||
56 | * operand size is 16bit. Use lgdtl instead to force operand size | ||
57 | * to 32 bit. | ||
58 | */ | ||
59 | |||
60 | lidtl boot_idt - r_base # load idt with 0, 0 | ||
61 | lgdtl boot_gdt - r_base # load gdt with whatever is appropriate | ||
62 | |||
63 | xor %ax, %ax | ||
64 | inc %ax # protected mode (PE) bit | ||
65 | lmsw %ax # into protected mode | ||
66 | # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S | ||
67 | ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET) | ||
68 | |||
69 | # These need to be in the same 64K segment as the above; | ||
70 | # hence we don't use the boot_gdt_descr defined in head.S | ||
71 | boot_gdt: | ||
72 | .word __BOOT_DS + 7 # gdt limit | ||
73 | .long boot_gdt_table-__PAGE_OFFSET # gdt base | ||
74 | |||
75 | boot_idt: | ||
76 | .word 0 # idt limit = 0 | ||
77 | .long 0 # idt base = 0L | ||
78 | |||
79 | .globl trampoline_end | ||
80 | trampoline_end: | ||
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c new file mode 100644 index 000000000000..6c0e383915b6 --- /dev/null +++ b/arch/i386/kernel/traps.c | |||
@@ -0,0 +1,1084 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/traps.c | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
5 | * | ||
6 | * Pentium III FXSR, SSE support | ||
7 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
8 | */ | ||
9 | |||
10 | /* | ||
11 | * 'Traps.c' handles hardware traps and faults after we have saved some | ||
12 | * state in 'asm.s'. | ||
13 | */ | ||
14 | #include <linux/config.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/timer.h> | ||
20 | #include <linux/mm.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/delay.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/highmem.h> | ||
26 | #include <linux/kallsyms.h> | ||
27 | #include <linux/ptrace.h> | ||
28 | #include <linux/utsname.h> | ||
29 | #include <linux/kprobes.h> | ||
30 | |||
31 | #ifdef CONFIG_EISA | ||
32 | #include <linux/ioport.h> | ||
33 | #include <linux/eisa.h> | ||
34 | #endif | ||
35 | |||
36 | #ifdef CONFIG_MCA | ||
37 | #include <linux/mca.h> | ||
38 | #endif | ||
39 | |||
40 | #include <asm/processor.h> | ||
41 | #include <asm/system.h> | ||
42 | #include <asm/uaccess.h> | ||
43 | #include <asm/io.h> | ||
44 | #include <asm/atomic.h> | ||
45 | #include <asm/debugreg.h> | ||
46 | #include <asm/desc.h> | ||
47 | #include <asm/i387.h> | ||
48 | #include <asm/nmi.h> | ||
49 | |||
50 | #include <asm/smp.h> | ||
51 | #include <asm/arch_hooks.h> | ||
52 | #include <asm/kdebug.h> | ||
53 | |||
54 | #include <linux/irq.h> | ||
55 | #include <linux/module.h> | ||
56 | |||
57 | #include "mach_traps.h" | ||
58 | |||
59 | asmlinkage int system_call(void); | ||
60 | |||
61 | struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, | ||
62 | { 0, 0 }, { 0, 0 } }; | ||
63 | |||
64 | /* Do we ignore FPU interrupts ? */ | ||
65 | char ignore_fpu_irq = 0; | ||
66 | |||
67 | /* | ||
68 | * The IDT has to be page-aligned to simplify the Pentium | ||
69 | * F0 0F bug workaround.. We have a special link segment | ||
70 | * for this. | ||
71 | */ | ||
72 | struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; | ||
73 | |||
74 | asmlinkage void divide_error(void); | ||
75 | asmlinkage void debug(void); | ||
76 | asmlinkage void nmi(void); | ||
77 | asmlinkage void int3(void); | ||
78 | asmlinkage void overflow(void); | ||
79 | asmlinkage void bounds(void); | ||
80 | asmlinkage void invalid_op(void); | ||
81 | asmlinkage void device_not_available(void); | ||
82 | asmlinkage void coprocessor_segment_overrun(void); | ||
83 | asmlinkage void invalid_TSS(void); | ||
84 | asmlinkage void segment_not_present(void); | ||
85 | asmlinkage void stack_segment(void); | ||
86 | asmlinkage void general_protection(void); | ||
87 | asmlinkage void page_fault(void); | ||
88 | asmlinkage void coprocessor_error(void); | ||
89 | asmlinkage void simd_coprocessor_error(void); | ||
90 | asmlinkage void alignment_check(void); | ||
91 | asmlinkage void spurious_interrupt_bug(void); | ||
92 | asmlinkage void machine_check(void); | ||
93 | |||
94 | static int kstack_depth_to_print = 24; | ||
95 | struct notifier_block *i386die_chain; | ||
96 | static DEFINE_SPINLOCK(die_notifier_lock); | ||
97 | |||
98 | int register_die_notifier(struct notifier_block *nb) | ||
99 | { | ||
100 | int err = 0; | ||
101 | unsigned long flags; | ||
102 | spin_lock_irqsave(&die_notifier_lock, flags); | ||
103 | err = notifier_chain_register(&i386die_chain, nb); | ||
104 | spin_unlock_irqrestore(&die_notifier_lock, flags); | ||
105 | return err; | ||
106 | } | ||
107 | |||
108 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) | ||
109 | { | ||
110 | return p > (void *)tinfo && | ||
111 | p < (void *)tinfo + THREAD_SIZE - 3; | ||
112 | } | ||
113 | |||
114 | static inline unsigned long print_context_stack(struct thread_info *tinfo, | ||
115 | unsigned long *stack, unsigned long ebp) | ||
116 | { | ||
117 | unsigned long addr; | ||
118 | |||
119 | #ifdef CONFIG_FRAME_POINTER | ||
120 | while (valid_stack_ptr(tinfo, (void *)ebp)) { | ||
121 | addr = *(unsigned long *)(ebp + 4); | ||
122 | printk(" [<%08lx>] ", addr); | ||
123 | print_symbol("%s", addr); | ||
124 | printk("\n"); | ||
125 | ebp = *(unsigned long *)ebp; | ||
126 | } | ||
127 | #else | ||
128 | while (valid_stack_ptr(tinfo, stack)) { | ||
129 | addr = *stack++; | ||
130 | if (__kernel_text_address(addr)) { | ||
131 | printk(" [<%08lx>]", addr); | ||
132 | print_symbol(" %s", addr); | ||
133 | printk("\n"); | ||
134 | } | ||
135 | } | ||
136 | #endif | ||
137 | return ebp; | ||
138 | } | ||
139 | |||
140 | void show_trace(struct task_struct *task, unsigned long * stack) | ||
141 | { | ||
142 | unsigned long ebp; | ||
143 | |||
144 | if (!task) | ||
145 | task = current; | ||
146 | |||
147 | if (task == current) { | ||
148 | /* Grab ebp right from our regs */ | ||
149 | asm ("movl %%ebp, %0" : "=r" (ebp) : ); | ||
150 | } else { | ||
151 | /* ebp is the last reg pushed by switch_to */ | ||
152 | ebp = *(unsigned long *) task->thread.esp; | ||
153 | } | ||
154 | |||
155 | while (1) { | ||
156 | struct thread_info *context; | ||
157 | context = (struct thread_info *) | ||
158 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | ||
159 | ebp = print_context_stack(context, stack, ebp); | ||
160 | stack = (unsigned long*)context->previous_esp; | ||
161 | if (!stack) | ||
162 | break; | ||
163 | printk(" =======================\n"); | ||
164 | } | ||
165 | } | ||
166 | |||
167 | void show_stack(struct task_struct *task, unsigned long *esp) | ||
168 | { | ||
169 | unsigned long *stack; | ||
170 | int i; | ||
171 | |||
172 | if (esp == NULL) { | ||
173 | if (task) | ||
174 | esp = (unsigned long*)task->thread.esp; | ||
175 | else | ||
176 | esp = (unsigned long *)&esp; | ||
177 | } | ||
178 | |||
179 | stack = esp; | ||
180 | for(i = 0; i < kstack_depth_to_print; i++) { | ||
181 | if (kstack_end(stack)) | ||
182 | break; | ||
183 | if (i && ((i % 8) == 0)) | ||
184 | printk("\n "); | ||
185 | printk("%08lx ", *stack++); | ||
186 | } | ||
187 | printk("\nCall Trace:\n"); | ||
188 | show_trace(task, esp); | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * The architecture-independent dump_stack generator | ||
193 | */ | ||
194 | void dump_stack(void) | ||
195 | { | ||
196 | unsigned long stack; | ||
197 | |||
198 | show_trace(current, &stack); | ||
199 | } | ||
200 | |||
201 | EXPORT_SYMBOL(dump_stack); | ||
202 | |||
203 | void show_registers(struct pt_regs *regs) | ||
204 | { | ||
205 | int i; | ||
206 | int in_kernel = 1; | ||
207 | unsigned long esp; | ||
208 | unsigned short ss; | ||
209 | |||
210 | esp = (unsigned long) (®s->esp); | ||
211 | ss = __KERNEL_DS; | ||
212 | if (regs->xcs & 3) { | ||
213 | in_kernel = 0; | ||
214 | esp = regs->esp; | ||
215 | ss = regs->xss & 0xffff; | ||
216 | } | ||
217 | print_modules(); | ||
218 | printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx" | ||
219 | " (%s) \n", | ||
220 | smp_processor_id(), 0xffff & regs->xcs, regs->eip, | ||
221 | print_tainted(), regs->eflags, system_utsname.release); | ||
222 | print_symbol("EIP is at %s\n", regs->eip); | ||
223 | printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", | ||
224 | regs->eax, regs->ebx, regs->ecx, regs->edx); | ||
225 | printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", | ||
226 | regs->esi, regs->edi, regs->ebp, esp); | ||
227 | printk("ds: %04x es: %04x ss: %04x\n", | ||
228 | regs->xds & 0xffff, regs->xes & 0xffff, ss); | ||
229 | printk("Process %s (pid: %d, threadinfo=%p task=%p)", | ||
230 | current->comm, current->pid, current_thread_info(), current); | ||
231 | /* | ||
232 | * When in-kernel, we also print out the stack and code at the | ||
233 | * time of the fault.. | ||
234 | */ | ||
235 | if (in_kernel) { | ||
236 | u8 *eip; | ||
237 | |||
238 | printk("\nStack: "); | ||
239 | show_stack(NULL, (unsigned long*)esp); | ||
240 | |||
241 | printk("Code: "); | ||
242 | |||
243 | eip = (u8 *)regs->eip - 43; | ||
244 | for (i = 0; i < 64; i++, eip++) { | ||
245 | unsigned char c; | ||
246 | |||
247 | if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) { | ||
248 | printk(" Bad EIP value."); | ||
249 | break; | ||
250 | } | ||
251 | if (eip == (u8 *)regs->eip) | ||
252 | printk("<%02x> ", c); | ||
253 | else | ||
254 | printk("%02x ", c); | ||
255 | } | ||
256 | } | ||
257 | printk("\n"); | ||
258 | } | ||
259 | |||
260 | static void handle_BUG(struct pt_regs *regs) | ||
261 | { | ||
262 | unsigned short ud2; | ||
263 | unsigned short line; | ||
264 | char *file; | ||
265 | char c; | ||
266 | unsigned long eip; | ||
267 | |||
268 | if (regs->xcs & 3) | ||
269 | goto no_bug; /* Not in kernel */ | ||
270 | |||
271 | eip = regs->eip; | ||
272 | |||
273 | if (eip < PAGE_OFFSET) | ||
274 | goto no_bug; | ||
275 | if (__get_user(ud2, (unsigned short *)eip)) | ||
276 | goto no_bug; | ||
277 | if (ud2 != 0x0b0f) | ||
278 | goto no_bug; | ||
279 | if (__get_user(line, (unsigned short *)(eip + 2))) | ||
280 | goto bug; | ||
281 | if (__get_user(file, (char **)(eip + 4)) || | ||
282 | (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) | ||
283 | file = "<bad filename>"; | ||
284 | |||
285 | printk("------------[ cut here ]------------\n"); | ||
286 | printk(KERN_ALERT "kernel BUG at %s:%d!\n", file, line); | ||
287 | |||
288 | no_bug: | ||
289 | return; | ||
290 | |||
291 | /* Here we know it was a BUG but file-n-line is unavailable */ | ||
292 | bug: | ||
293 | printk("Kernel BUG\n"); | ||
294 | } | ||
295 | |||
296 | void die(const char * str, struct pt_regs * regs, long err) | ||
297 | { | ||
298 | static struct { | ||
299 | spinlock_t lock; | ||
300 | u32 lock_owner; | ||
301 | int lock_owner_depth; | ||
302 | } die = { | ||
303 | .lock = SPIN_LOCK_UNLOCKED, | ||
304 | .lock_owner = -1, | ||
305 | .lock_owner_depth = 0 | ||
306 | }; | ||
307 | static int die_counter; | ||
308 | |||
309 | if (die.lock_owner != _smp_processor_id()) { | ||
310 | console_verbose(); | ||
311 | spin_lock_irq(&die.lock); | ||
312 | die.lock_owner = smp_processor_id(); | ||
313 | die.lock_owner_depth = 0; | ||
314 | bust_spinlocks(1); | ||
315 | } | ||
316 | |||
317 | if (++die.lock_owner_depth < 3) { | ||
318 | int nl = 0; | ||
319 | handle_BUG(regs); | ||
320 | printk(KERN_ALERT "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); | ||
321 | #ifdef CONFIG_PREEMPT | ||
322 | printk("PREEMPT "); | ||
323 | nl = 1; | ||
324 | #endif | ||
325 | #ifdef CONFIG_SMP | ||
326 | printk("SMP "); | ||
327 | nl = 1; | ||
328 | #endif | ||
329 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
330 | printk("DEBUG_PAGEALLOC"); | ||
331 | nl = 1; | ||
332 | #endif | ||
333 | if (nl) | ||
334 | printk("\n"); | ||
335 | notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); | ||
336 | show_registers(regs); | ||
337 | } else | ||
338 | printk(KERN_ERR "Recursive die() failure, output suppressed\n"); | ||
339 | |||
340 | bust_spinlocks(0); | ||
341 | die.lock_owner = -1; | ||
342 | spin_unlock_irq(&die.lock); | ||
343 | if (in_interrupt()) | ||
344 | panic("Fatal exception in interrupt"); | ||
345 | |||
346 | if (panic_on_oops) { | ||
347 | printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); | ||
348 | ssleep(5); | ||
349 | panic("Fatal exception"); | ||
350 | } | ||
351 | do_exit(SIGSEGV); | ||
352 | } | ||
353 | |||
354 | static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) | ||
355 | { | ||
356 | if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) | ||
357 | die(str, regs, err); | ||
358 | } | ||
359 | |||
360 | static void do_trap(int trapnr, int signr, char *str, int vm86, | ||
361 | struct pt_regs * regs, long error_code, siginfo_t *info) | ||
362 | { | ||
363 | if (regs->eflags & VM_MASK) { | ||
364 | if (vm86) | ||
365 | goto vm86_trap; | ||
366 | goto trap_signal; | ||
367 | } | ||
368 | |||
369 | if (!(regs->xcs & 3)) | ||
370 | goto kernel_trap; | ||
371 | |||
372 | trap_signal: { | ||
373 | struct task_struct *tsk = current; | ||
374 | tsk->thread.error_code = error_code; | ||
375 | tsk->thread.trap_no = trapnr; | ||
376 | if (info) | ||
377 | force_sig_info(signr, info, tsk); | ||
378 | else | ||
379 | force_sig(signr, tsk); | ||
380 | return; | ||
381 | } | ||
382 | |||
383 | kernel_trap: { | ||
384 | if (!fixup_exception(regs)) | ||
385 | die(str, regs, error_code); | ||
386 | return; | ||
387 | } | ||
388 | |||
389 | vm86_trap: { | ||
390 | int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr); | ||
391 | if (ret) goto trap_signal; | ||
392 | return; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | #define DO_ERROR(trapnr, signr, str, name) \ | ||
397 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | ||
398 | { \ | ||
399 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
400 | == NOTIFY_STOP) \ | ||
401 | return; \ | ||
402 | do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ | ||
403 | } | ||
404 | |||
405 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | ||
406 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | ||
407 | { \ | ||
408 | siginfo_t info; \ | ||
409 | info.si_signo = signr; \ | ||
410 | info.si_errno = 0; \ | ||
411 | info.si_code = sicode; \ | ||
412 | info.si_addr = (void __user *)siaddr; \ | ||
413 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
414 | == NOTIFY_STOP) \ | ||
415 | return; \ | ||
416 | do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ | ||
417 | } | ||
418 | |||
419 | #define DO_VM86_ERROR(trapnr, signr, str, name) \ | ||
420 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | ||
421 | { \ | ||
422 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
423 | == NOTIFY_STOP) \ | ||
424 | return; \ | ||
425 | do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ | ||
426 | } | ||
427 | |||
428 | #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | ||
429 | fastcall void do_##name(struct pt_regs * regs, long error_code) \ | ||
430 | { \ | ||
431 | siginfo_t info; \ | ||
432 | info.si_signo = signr; \ | ||
433 | info.si_errno = 0; \ | ||
434 | info.si_code = sicode; \ | ||
435 | info.si_addr = (void __user *)siaddr; \ | ||
436 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
437 | == NOTIFY_STOP) \ | ||
438 | return; \ | ||
439 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ | ||
440 | } | ||
441 | |||
442 | DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) | ||
443 | #ifndef CONFIG_KPROBES | ||
444 | DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) | ||
445 | #endif | ||
446 | DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) | ||
447 | DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) | ||
448 | DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) | ||
449 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | ||
450 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | ||
451 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | ||
452 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | ||
453 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | ||
454 | |||
455 | fastcall void do_general_protection(struct pt_regs * regs, long error_code) | ||
456 | { | ||
457 | int cpu = get_cpu(); | ||
458 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
459 | struct thread_struct *thread = ¤t->thread; | ||
460 | |||
461 | /* | ||
462 | * Perform the lazy TSS's I/O bitmap copy. If the TSS has an | ||
463 | * invalid offset set (the LAZY one) and the faulting thread has | ||
464 | * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS | ||
465 | * and we set the offset field correctly. Then we let the CPU to | ||
466 | * restart the faulting instruction. | ||
467 | */ | ||
468 | if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && | ||
469 | thread->io_bitmap_ptr) { | ||
470 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, | ||
471 | thread->io_bitmap_max); | ||
472 | /* | ||
473 | * If the previously set map was extending to higher ports | ||
474 | * than the current one, pad extra space with 0xff (no access). | ||
475 | */ | ||
476 | if (thread->io_bitmap_max < tss->io_bitmap_max) | ||
477 | memset((char *) tss->io_bitmap + | ||
478 | thread->io_bitmap_max, 0xff, | ||
479 | tss->io_bitmap_max - thread->io_bitmap_max); | ||
480 | tss->io_bitmap_max = thread->io_bitmap_max; | ||
481 | tss->io_bitmap_base = IO_BITMAP_OFFSET; | ||
482 | put_cpu(); | ||
483 | return; | ||
484 | } | ||
485 | put_cpu(); | ||
486 | |||
487 | if (regs->eflags & VM_MASK) | ||
488 | goto gp_in_vm86; | ||
489 | |||
490 | if (!(regs->xcs & 3)) | ||
491 | goto gp_in_kernel; | ||
492 | |||
493 | current->thread.error_code = error_code; | ||
494 | current->thread.trap_no = 13; | ||
495 | force_sig(SIGSEGV, current); | ||
496 | return; | ||
497 | |||
498 | gp_in_vm86: | ||
499 | local_irq_enable(); | ||
500 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | ||
501 | return; | ||
502 | |||
503 | gp_in_kernel: | ||
504 | if (!fixup_exception(regs)) { | ||
505 | if (notify_die(DIE_GPF, "general protection fault", regs, | ||
506 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | ||
507 | return; | ||
508 | die("general protection fault", regs, error_code); | ||
509 | } | ||
510 | } | ||
511 | |||
512 | static void mem_parity_error(unsigned char reason, struct pt_regs * regs) | ||
513 | { | ||
514 | printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); | ||
515 | printk("You probably have a hardware problem with your RAM chips\n"); | ||
516 | |||
517 | /* Clear and disable the memory parity error line. */ | ||
518 | clear_mem_error(reason); | ||
519 | } | ||
520 | |||
521 | static void io_check_error(unsigned char reason, struct pt_regs * regs) | ||
522 | { | ||
523 | unsigned long i; | ||
524 | |||
525 | printk("NMI: IOCK error (debug interrupt?)\n"); | ||
526 | show_registers(regs); | ||
527 | |||
528 | /* Re-enable the IOCK line, wait for a few seconds */ | ||
529 | reason = (reason & 0xf) | 8; | ||
530 | outb(reason, 0x61); | ||
531 | i = 2000; | ||
532 | while (--i) udelay(1000); | ||
533 | reason &= ~8; | ||
534 | outb(reason, 0x61); | ||
535 | } | ||
536 | |||
537 | static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | ||
538 | { | ||
539 | #ifdef CONFIG_MCA | ||
540 | /* Might actually be able to figure out what the guilty party | ||
541 | * is. */ | ||
542 | if( MCA_bus ) { | ||
543 | mca_handle_nmi(); | ||
544 | return; | ||
545 | } | ||
546 | #endif | ||
547 | printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | ||
548 | reason, smp_processor_id()); | ||
549 | printk("Dazed and confused, but trying to continue\n"); | ||
550 | printk("Do you have a strange power saving mode enabled?\n"); | ||
551 | } | ||
552 | |||
553 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
554 | |||
555 | void die_nmi (struct pt_regs *regs, const char *msg) | ||
556 | { | ||
557 | spin_lock(&nmi_print_lock); | ||
558 | /* | ||
559 | * We are in trouble anyway, lets at least try | ||
560 | * to get a message out. | ||
561 | */ | ||
562 | bust_spinlocks(1); | ||
563 | printk(msg); | ||
564 | printk(" on CPU%d, eip %08lx, registers:\n", | ||
565 | smp_processor_id(), regs->eip); | ||
566 | show_registers(regs); | ||
567 | printk("console shuts up ...\n"); | ||
568 | console_silent(); | ||
569 | spin_unlock(&nmi_print_lock); | ||
570 | bust_spinlocks(0); | ||
571 | do_exit(SIGSEGV); | ||
572 | } | ||
573 | |||
574 | static void default_do_nmi(struct pt_regs * regs) | ||
575 | { | ||
576 | unsigned char reason = 0; | ||
577 | |||
578 | /* Only the BSP gets external NMIs from the system. */ | ||
579 | if (!smp_processor_id()) | ||
580 | reason = get_nmi_reason(); | ||
581 | |||
582 | if (!(reason & 0xc0)) { | ||
583 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) | ||
584 | == NOTIFY_STOP) | ||
585 | return; | ||
586 | #ifdef CONFIG_X86_LOCAL_APIC | ||
587 | /* | ||
588 | * Ok, so this is none of the documented NMI sources, | ||
589 | * so it must be the NMI watchdog. | ||
590 | */ | ||
591 | if (nmi_watchdog) { | ||
592 | nmi_watchdog_tick(regs); | ||
593 | return; | ||
594 | } | ||
595 | #endif | ||
596 | unknown_nmi_error(reason, regs); | ||
597 | return; | ||
598 | } | ||
599 | if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP) | ||
600 | return; | ||
601 | if (reason & 0x80) | ||
602 | mem_parity_error(reason, regs); | ||
603 | if (reason & 0x40) | ||
604 | io_check_error(reason, regs); | ||
605 | /* | ||
606 | * Reassert NMI in case it became active meanwhile | ||
607 | * as it's edge-triggered. | ||
608 | */ | ||
609 | reassert_nmi(); | ||
610 | } | ||
611 | |||
612 | static int dummy_nmi_callback(struct pt_regs * regs, int cpu) | ||
613 | { | ||
614 | return 0; | ||
615 | } | ||
616 | |||
617 | static nmi_callback_t nmi_callback = dummy_nmi_callback; | ||
618 | |||
619 | fastcall void do_nmi(struct pt_regs * regs, long error_code) | ||
620 | { | ||
621 | int cpu; | ||
622 | |||
623 | nmi_enter(); | ||
624 | |||
625 | cpu = smp_processor_id(); | ||
626 | ++nmi_count(cpu); | ||
627 | |||
628 | if (!nmi_callback(regs, cpu)) | ||
629 | default_do_nmi(regs); | ||
630 | |||
631 | nmi_exit(); | ||
632 | } | ||
633 | |||
634 | void set_nmi_callback(nmi_callback_t callback) | ||
635 | { | ||
636 | nmi_callback = callback; | ||
637 | } | ||
638 | |||
639 | void unset_nmi_callback(void) | ||
640 | { | ||
641 | nmi_callback = dummy_nmi_callback; | ||
642 | } | ||
643 | |||
644 | #ifdef CONFIG_KPROBES | ||
645 | fastcall int do_int3(struct pt_regs *regs, long error_code) | ||
646 | { | ||
647 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | ||
648 | == NOTIFY_STOP) | ||
649 | return 1; | ||
650 | /* This is an interrupt gate, because kprobes wants interrupts | ||
651 | disabled. Normal trap handlers don't. */ | ||
652 | restore_interrupts(regs); | ||
653 | do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); | ||
654 | return 0; | ||
655 | } | ||
656 | #endif | ||
657 | |||
658 | /* | ||
659 | * Our handling of the processor debug registers is non-trivial. | ||
660 | * We do not clear them on entry and exit from the kernel. Therefore | ||
661 | * it is possible to get a watchpoint trap here from inside the kernel. | ||
662 | * However, the code in ./ptrace.c has ensured that the user can | ||
663 | * only set watchpoints on userspace addresses. Therefore the in-kernel | ||
664 | * watchpoint trap can only occur in code which is reading/writing | ||
665 | * from user space. Such code must not hold kernel locks (since it | ||
666 | * can equally take a page fault), therefore it is safe to call | ||
667 | * force_sig_info even though that claims and releases locks. | ||
668 | * | ||
669 | * Code in ./signal.c ensures that the debug control register | ||
670 | * is restored before we deliver any signal, and therefore that | ||
671 | * user code runs with the correct debug control register even though | ||
672 | * we clear it here. | ||
673 | * | ||
674 | * Being careful here means that we don't have to be as careful in a | ||
675 | * lot of more complicated places (task switching can be a bit lazy | ||
676 | * about restoring all the debug state, and ptrace doesn't have to | ||
677 | * find every occurrence of the TF bit that could be saved away even | ||
678 | * by user code) | ||
679 | */ | ||
680 | fastcall void do_debug(struct pt_regs * regs, long error_code) | ||
681 | { | ||
682 | unsigned int condition; | ||
683 | struct task_struct *tsk = current; | ||
684 | |||
685 | __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); | ||
686 | |||
687 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | ||
688 | SIGTRAP) == NOTIFY_STOP) | ||
689 | return; | ||
690 | /* It's safe to allow irq's after DR6 has been saved */ | ||
691 | if (regs->eflags & X86_EFLAGS_IF) | ||
692 | local_irq_enable(); | ||
693 | |||
694 | /* Mask out spurious debug traps due to lazy DR7 setting */ | ||
695 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | ||
696 | if (!tsk->thread.debugreg[7]) | ||
697 | goto clear_dr7; | ||
698 | } | ||
699 | |||
700 | if (regs->eflags & VM_MASK) | ||
701 | goto debug_vm86; | ||
702 | |||
703 | /* Save debug status register where ptrace can see it */ | ||
704 | tsk->thread.debugreg[6] = condition; | ||
705 | |||
706 | /* | ||
707 | * Single-stepping through TF: make sure we ignore any events in | ||
708 | * kernel space (but re-enable TF when returning to user mode). | ||
709 | */ | ||
710 | if (condition & DR_STEP) { | ||
711 | /* | ||
712 | * We already checked v86 mode above, so we can | ||
713 | * check for kernel mode by just checking the CPL | ||
714 | * of CS. | ||
715 | */ | ||
716 | if ((regs->xcs & 3) == 0) | ||
717 | goto clear_TF_reenable; | ||
718 | } | ||
719 | |||
720 | /* Ok, finally something we can handle */ | ||
721 | send_sigtrap(tsk, regs, error_code); | ||
722 | |||
723 | /* Disable additional traps. They'll be re-enabled when | ||
724 | * the signal is delivered. | ||
725 | */ | ||
726 | clear_dr7: | ||
727 | __asm__("movl %0,%%db7" | ||
728 | : /* no output */ | ||
729 | : "r" (0)); | ||
730 | return; | ||
731 | |||
732 | debug_vm86: | ||
733 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | ||
734 | return; | ||
735 | |||
736 | clear_TF_reenable: | ||
737 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
738 | regs->eflags &= ~TF_MASK; | ||
739 | return; | ||
740 | } | ||
741 | |||
742 | /* | ||
743 | * Note that we play around with the 'TS' bit in an attempt to get | ||
744 | * the correct behaviour even in the presence of the asynchronous | ||
745 | * IRQ13 behaviour | ||
746 | */ | ||
747 | void math_error(void __user *eip) | ||
748 | { | ||
749 | struct task_struct * task; | ||
750 | siginfo_t info; | ||
751 | unsigned short cwd, swd; | ||
752 | |||
753 | /* | ||
754 | * Save the info for the exception handler and clear the error. | ||
755 | */ | ||
756 | task = current; | ||
757 | save_init_fpu(task); | ||
758 | task->thread.trap_no = 16; | ||
759 | task->thread.error_code = 0; | ||
760 | info.si_signo = SIGFPE; | ||
761 | info.si_errno = 0; | ||
762 | info.si_code = __SI_FAULT; | ||
763 | info.si_addr = eip; | ||
764 | /* | ||
765 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | ||
766 | * status. 0x3f is the exception bits in these regs, 0x200 is the | ||
767 | * C1 reg you need in case of a stack fault, 0x040 is the stack | ||
768 | * fault bit. We should only be taking one exception at a time, | ||
769 | * so if this combination doesn't produce any single exception, | ||
770 | * then we have a bad program that isn't syncronizing its FPU usage | ||
771 | * and it will suffer the consequences since we won't be able to | ||
772 | * fully reproduce the context of the exception | ||
773 | */ | ||
774 | cwd = get_fpu_cwd(task); | ||
775 | swd = get_fpu_swd(task); | ||
776 | switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) { | ||
777 | case 0x000: | ||
778 | default: | ||
779 | break; | ||
780 | case 0x001: /* Invalid Op */ | ||
781 | case 0x041: /* Stack Fault */ | ||
782 | case 0x241: /* Stack Fault | Direction */ | ||
783 | info.si_code = FPE_FLTINV; | ||
784 | /* Should we clear the SF or let user space do it ???? */ | ||
785 | break; | ||
786 | case 0x002: /* Denormalize */ | ||
787 | case 0x010: /* Underflow */ | ||
788 | info.si_code = FPE_FLTUND; | ||
789 | break; | ||
790 | case 0x004: /* Zero Divide */ | ||
791 | info.si_code = FPE_FLTDIV; | ||
792 | break; | ||
793 | case 0x008: /* Overflow */ | ||
794 | info.si_code = FPE_FLTOVF; | ||
795 | break; | ||
796 | case 0x020: /* Precision */ | ||
797 | info.si_code = FPE_FLTRES; | ||
798 | break; | ||
799 | } | ||
800 | force_sig_info(SIGFPE, &info, task); | ||
801 | } | ||
802 | |||
803 | fastcall void do_coprocessor_error(struct pt_regs * regs, long error_code) | ||
804 | { | ||
805 | ignore_fpu_irq = 1; | ||
806 | math_error((void __user *)regs->eip); | ||
807 | } | ||
808 | |||
809 | static void simd_math_error(void __user *eip) | ||
810 | { | ||
811 | struct task_struct * task; | ||
812 | siginfo_t info; | ||
813 | unsigned short mxcsr; | ||
814 | |||
815 | /* | ||
816 | * Save the info for the exception handler and clear the error. | ||
817 | */ | ||
818 | task = current; | ||
819 | save_init_fpu(task); | ||
820 | task->thread.trap_no = 19; | ||
821 | task->thread.error_code = 0; | ||
822 | info.si_signo = SIGFPE; | ||
823 | info.si_errno = 0; | ||
824 | info.si_code = __SI_FAULT; | ||
825 | info.si_addr = eip; | ||
826 | /* | ||
827 | * The SIMD FPU exceptions are handled a little differently, as there | ||
828 | * is only a single status/control register. Thus, to determine which | ||
829 | * unmasked exception was caught we must mask the exception mask bits | ||
830 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
831 | */ | ||
832 | mxcsr = get_fpu_mxcsr(task); | ||
833 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | ||
834 | case 0x000: | ||
835 | default: | ||
836 | break; | ||
837 | case 0x001: /* Invalid Op */ | ||
838 | info.si_code = FPE_FLTINV; | ||
839 | break; | ||
840 | case 0x002: /* Denormalize */ | ||
841 | case 0x010: /* Underflow */ | ||
842 | info.si_code = FPE_FLTUND; | ||
843 | break; | ||
844 | case 0x004: /* Zero Divide */ | ||
845 | info.si_code = FPE_FLTDIV; | ||
846 | break; | ||
847 | case 0x008: /* Overflow */ | ||
848 | info.si_code = FPE_FLTOVF; | ||
849 | break; | ||
850 | case 0x020: /* Precision */ | ||
851 | info.si_code = FPE_FLTRES; | ||
852 | break; | ||
853 | } | ||
854 | force_sig_info(SIGFPE, &info, task); | ||
855 | } | ||
856 | |||
857 | fastcall void do_simd_coprocessor_error(struct pt_regs * regs, | ||
858 | long error_code) | ||
859 | { | ||
860 | if (cpu_has_xmm) { | ||
861 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | ||
862 | ignore_fpu_irq = 1; | ||
863 | simd_math_error((void __user *)regs->eip); | ||
864 | } else { | ||
865 | /* | ||
866 | * Handle strange cache flush from user space exception | ||
867 | * in all other cases. This is undocumented behaviour. | ||
868 | */ | ||
869 | if (regs->eflags & VM_MASK) { | ||
870 | handle_vm86_fault((struct kernel_vm86_regs *)regs, | ||
871 | error_code); | ||
872 | return; | ||
873 | } | ||
874 | die_if_kernel("cache flush denied", regs, error_code); | ||
875 | current->thread.trap_no = 19; | ||
876 | current->thread.error_code = error_code; | ||
877 | force_sig(SIGSEGV, current); | ||
878 | } | ||
879 | } | ||
880 | |||
881 | fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, | ||
882 | long error_code) | ||
883 | { | ||
884 | #if 0 | ||
885 | /* No need to warn about this any longer. */ | ||
886 | printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); | ||
887 | #endif | ||
888 | } | ||
889 | |||
890 | fastcall void setup_x86_bogus_stack(unsigned char * stk) | ||
891 | { | ||
892 | unsigned long *switch16_ptr, *switch32_ptr; | ||
893 | struct pt_regs *regs; | ||
894 | unsigned long stack_top, stack_bot; | ||
895 | unsigned short iret_frame16_off; | ||
896 | int cpu = smp_processor_id(); | ||
897 | /* reserve the space on 32bit stack for the magic switch16 pointer */ | ||
898 | memmove(stk, stk + 8, sizeof(struct pt_regs)); | ||
899 | switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); | ||
900 | regs = (struct pt_regs *)stk; | ||
901 | /* now the switch32 on 16bit stack */ | ||
902 | stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); | ||
903 | stack_top = stack_bot + CPU_16BIT_STACK_SIZE; | ||
904 | switch32_ptr = (unsigned long *)(stack_top - 8); | ||
905 | iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; | ||
906 | /* copy iret frame on 16bit stack */ | ||
907 | memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); | ||
908 | /* fill in the switch pointers */ | ||
909 | switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; | ||
910 | switch16_ptr[1] = __ESPFIX_SS; | ||
911 | switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + | ||
912 | 8 - CPU_16BIT_STACK_SIZE; | ||
913 | switch32_ptr[1] = __KERNEL_DS; | ||
914 | } | ||
915 | |||
916 | fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) | ||
917 | { | ||
918 | unsigned long *switch32_ptr; | ||
919 | unsigned char *stack16, *stack32; | ||
920 | unsigned long stack_top, stack_bot; | ||
921 | int len; | ||
922 | int cpu = smp_processor_id(); | ||
923 | stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); | ||
924 | stack_top = stack_bot + CPU_16BIT_STACK_SIZE; | ||
925 | switch32_ptr = (unsigned long *)(stack_top - 8); | ||
926 | /* copy the data from 16bit stack to 32bit stack */ | ||
927 | len = CPU_16BIT_STACK_SIZE - 8 - sp; | ||
928 | stack16 = (unsigned char *)(stack_bot + sp); | ||
929 | stack32 = (unsigned char *) | ||
930 | (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); | ||
931 | memcpy(stack32, stack16, len); | ||
932 | return stack32; | ||
933 | } | ||
934 | |||
935 | /* | ||
936 | * 'math_state_restore()' saves the current math information in the | ||
937 | * old math state array, and gets the new ones from the current task | ||
938 | * | ||
939 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | ||
940 | * Don't touch unless you *really* know how it works. | ||
941 | * | ||
942 | * Must be called with kernel preemption disabled (in this case, | ||
943 | * local interrupts are disabled at the call-site in entry.S). | ||
944 | */ | ||
945 | asmlinkage void math_state_restore(struct pt_regs regs) | ||
946 | { | ||
947 | struct thread_info *thread = current_thread_info(); | ||
948 | struct task_struct *tsk = thread->task; | ||
949 | |||
950 | clts(); /* Allow maths ops (or we recurse) */ | ||
951 | if (!tsk_used_math(tsk)) | ||
952 | init_fpu(tsk); | ||
953 | restore_fpu(tsk); | ||
954 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
955 | } | ||
956 | |||
957 | #ifndef CONFIG_MATH_EMULATION | ||
958 | |||
959 | asmlinkage void math_emulate(long arg) | ||
960 | { | ||
961 | printk("math-emulation not enabled and no coprocessor found.\n"); | ||
962 | printk("killing %s.\n",current->comm); | ||
963 | force_sig(SIGFPE,current); | ||
964 | schedule(); | ||
965 | } | ||
966 | |||
967 | #endif /* CONFIG_MATH_EMULATION */ | ||
968 | |||
969 | #ifdef CONFIG_X86_F00F_BUG | ||
970 | void __init trap_init_f00f_bug(void) | ||
971 | { | ||
972 | __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); | ||
973 | |||
974 | /* | ||
975 | * Update the IDT descriptor and reload the IDT so that | ||
976 | * it uses the read-only mapped virtual address. | ||
977 | */ | ||
978 | idt_descr.address = fix_to_virt(FIX_F00F_IDT); | ||
979 | __asm__ __volatile__("lidt %0" : : "m" (idt_descr)); | ||
980 | } | ||
981 | #endif | ||
982 | |||
983 | #define _set_gate(gate_addr,type,dpl,addr,seg) \ | ||
984 | do { \ | ||
985 | int __d0, __d1; \ | ||
986 | __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ | ||
987 | "movw %4,%%dx\n\t" \ | ||
988 | "movl %%eax,%0\n\t" \ | ||
989 | "movl %%edx,%1" \ | ||
990 | :"=m" (*((long *) (gate_addr))), \ | ||
991 | "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ | ||
992 | :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ | ||
993 | "3" ((char *) (addr)),"2" ((seg) << 16)); \ | ||
994 | } while (0) | ||
995 | |||
996 | |||
997 | /* | ||
998 | * This needs to use 'idt_table' rather than 'idt', and | ||
999 | * thus use the _nonmapped_ version of the IDT, as the | ||
1000 | * Pentium F0 0F bugfix can have resulted in the mapped | ||
1001 | * IDT being write-protected. | ||
1002 | */ | ||
1003 | void set_intr_gate(unsigned int n, void *addr) | ||
1004 | { | ||
1005 | _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); | ||
1006 | } | ||
1007 | |||
1008 | /* | ||
1009 | * This routine sets up an interrupt gate at directory privilege level 3. | ||
1010 | */ | ||
1011 | static inline void set_system_intr_gate(unsigned int n, void *addr) | ||
1012 | { | ||
1013 | _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS); | ||
1014 | } | ||
1015 | |||
1016 | static void __init set_trap_gate(unsigned int n, void *addr) | ||
1017 | { | ||
1018 | _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); | ||
1019 | } | ||
1020 | |||
1021 | static void __init set_system_gate(unsigned int n, void *addr) | ||
1022 | { | ||
1023 | _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); | ||
1024 | } | ||
1025 | |||
1026 | static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) | ||
1027 | { | ||
1028 | _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); | ||
1029 | } | ||
1030 | |||
1031 | |||
1032 | void __init trap_init(void) | ||
1033 | { | ||
1034 | #ifdef CONFIG_EISA | ||
1035 | void __iomem *p = ioremap(0x0FFFD9, 4); | ||
1036 | if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { | ||
1037 | EISA_bus = 1; | ||
1038 | } | ||
1039 | iounmap(p); | ||
1040 | #endif | ||
1041 | |||
1042 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1043 | init_apic_mappings(); | ||
1044 | #endif | ||
1045 | |||
1046 | set_trap_gate(0,÷_error); | ||
1047 | set_intr_gate(1,&debug); | ||
1048 | set_intr_gate(2,&nmi); | ||
1049 | set_system_intr_gate(3, &int3); /* int3-5 can be called from all */ | ||
1050 | set_system_gate(4,&overflow); | ||
1051 | set_system_gate(5,&bounds); | ||
1052 | set_trap_gate(6,&invalid_op); | ||
1053 | set_trap_gate(7,&device_not_available); | ||
1054 | set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS); | ||
1055 | set_trap_gate(9,&coprocessor_segment_overrun); | ||
1056 | set_trap_gate(10,&invalid_TSS); | ||
1057 | set_trap_gate(11,&segment_not_present); | ||
1058 | set_trap_gate(12,&stack_segment); | ||
1059 | set_trap_gate(13,&general_protection); | ||
1060 | set_intr_gate(14,&page_fault); | ||
1061 | set_trap_gate(15,&spurious_interrupt_bug); | ||
1062 | set_trap_gate(16,&coprocessor_error); | ||
1063 | set_trap_gate(17,&alignment_check); | ||
1064 | #ifdef CONFIG_X86_MCE | ||
1065 | set_trap_gate(18,&machine_check); | ||
1066 | #endif | ||
1067 | set_trap_gate(19,&simd_coprocessor_error); | ||
1068 | |||
1069 | set_system_gate(SYSCALL_VECTOR,&system_call); | ||
1070 | |||
1071 | /* | ||
1072 | * Should be a barrier for any external CPU state. | ||
1073 | */ | ||
1074 | cpu_init(); | ||
1075 | |||
1076 | trap_init_hook(); | ||
1077 | } | ||
1078 | |||
1079 | static int __init kstack_setup(char *s) | ||
1080 | { | ||
1081 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
1082 | return 0; | ||
1083 | } | ||
1084 | __setup("kstack=", kstack_setup); | ||
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c new file mode 100644 index 000000000000..2f3d52dacff7 --- /dev/null +++ b/arch/i386/kernel/vm86.c | |||
@@ -0,0 +1,804 @@ | |||
1 | /* | ||
2 | * linux/kernel/vm86.c | ||
3 | * | ||
4 | * Copyright (C) 1994 Linus Torvalds | ||
5 | * | ||
6 | * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86 | ||
7 | * stack - Manfred Spraul <manfreds@colorfullife.com> | ||
8 | * | ||
9 | * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle | ||
10 | * them correctly. Now the emulation will be in a | ||
11 | * consistent state after stackfaults - Kasper Dupont | ||
12 | * <kasperd@daimi.au.dk> | ||
13 | * | ||
14 | * 22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont | ||
15 | * <kasperd@daimi.au.dk> | ||
16 | * | ||
17 | * ?? ??? 2002 - Fixed premature returns from handle_vm86_fault | ||
18 | * caused by Kasper Dupont's changes - Stas Sergeev | ||
19 | * | ||
20 | * 4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes. | ||
21 | * Kasper Dupont <kasperd@daimi.au.dk> | ||
22 | * | ||
23 | * 9 apr 2002 - Changed syntax of macros in handle_vm86_fault. | ||
24 | * Kasper Dupont <kasperd@daimi.au.dk> | ||
25 | * | ||
26 | * 9 apr 2002 - Changed stack access macros to jump to a label | ||
27 | * instead of returning to userspace. This simplifies | ||
28 | * do_int, and is needed by handle_vm6_fault. Kasper | ||
29 | * Dupont <kasperd@daimi.au.dk> | ||
30 | * | ||
31 | */ | ||
32 | |||
33 | #include <linux/config.h> | ||
34 | #include <linux/errno.h> | ||
35 | #include <linux/interrupt.h> | ||
36 | #include <linux/sched.h> | ||
37 | #include <linux/kernel.h> | ||
38 | #include <linux/signal.h> | ||
39 | #include <linux/string.h> | ||
40 | #include <linux/mm.h> | ||
41 | #include <linux/smp.h> | ||
42 | #include <linux/smp_lock.h> | ||
43 | #include <linux/highmem.h> | ||
44 | #include <linux/ptrace.h> | ||
45 | |||
46 | #include <asm/uaccess.h> | ||
47 | #include <asm/io.h> | ||
48 | #include <asm/tlbflush.h> | ||
49 | #include <asm/irq.h> | ||
50 | |||
51 | /* | ||
52 | * Known problems: | ||
53 | * | ||
54 | * Interrupt handling is not guaranteed: | ||
55 | * - a real x86 will disable all interrupts for one instruction | ||
56 | * after a "mov ss,xx" to make stack handling atomic even without | ||
57 | * the 'lss' instruction. We can't guarantee this in v86 mode, | ||
58 | * as the next instruction might result in a page fault or similar. | ||
59 | * - a real x86 will have interrupts disabled for one instruction | ||
60 | * past the 'sti' that enables them. We don't bother with all the | ||
61 | * details yet. | ||
62 | * | ||
63 | * Let's hope these problems do not actually matter for anything. | ||
64 | */ | ||
65 | |||
66 | |||
67 | #define KVM86 ((struct kernel_vm86_struct *)regs) | ||
68 | #define VMPI KVM86->vm86plus | ||
69 | |||
70 | |||
71 | /* | ||
72 | * 8- and 16-bit register defines.. | ||
73 | */ | ||
74 | #define AL(regs) (((unsigned char *)&((regs)->eax))[0]) | ||
75 | #define AH(regs) (((unsigned char *)&((regs)->eax))[1]) | ||
76 | #define IP(regs) (*(unsigned short *)&((regs)->eip)) | ||
77 | #define SP(regs) (*(unsigned short *)&((regs)->esp)) | ||
78 | |||
79 | /* | ||
80 | * virtual flags (16 and 32-bit versions) | ||
81 | */ | ||
82 | #define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) | ||
83 | #define VEFLAGS (current->thread.v86flags) | ||
84 | |||
85 | #define set_flags(X,new,mask) \ | ||
86 | ((X) = ((X) & ~(mask)) | ((new) & (mask))) | ||
87 | |||
88 | #define SAFE_MASK (0xDD5) | ||
89 | #define RETURN_MASK (0xDFF) | ||
90 | |||
91 | #define VM86_REGS_PART2 orig_eax | ||
92 | #define VM86_REGS_SIZE1 \ | ||
93 | ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) ) | ||
94 | #define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1) | ||
95 | |||
96 | struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); | ||
97 | struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) | ||
98 | { | ||
99 | struct tss_struct *tss; | ||
100 | struct pt_regs *ret; | ||
101 | unsigned long tmp; | ||
102 | |||
103 | /* | ||
104 | * This gets called from entry.S with interrupts disabled, but | ||
105 | * from process context. Enable interrupts here, before trying | ||
106 | * to access user space. | ||
107 | */ | ||
108 | local_irq_enable(); | ||
109 | |||
110 | if (!current->thread.vm86_info) { | ||
111 | printk("no vm86_info: BAD\n"); | ||
112 | do_exit(SIGSEGV); | ||
113 | } | ||
114 | set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask); | ||
115 | tmp = copy_to_user(¤t->thread.vm86_info->regs,regs, VM86_REGS_SIZE1); | ||
116 | tmp += copy_to_user(¤t->thread.vm86_info->regs.VM86_REGS_PART2, | ||
117 | ®s->VM86_REGS_PART2, VM86_REGS_SIZE2); | ||
118 | tmp += put_user(current->thread.screen_bitmap,¤t->thread.vm86_info->screen_bitmap); | ||
119 | if (tmp) { | ||
120 | printk("vm86: could not access userspace vm86_info\n"); | ||
121 | do_exit(SIGSEGV); | ||
122 | } | ||
123 | |||
124 | tss = &per_cpu(init_tss, get_cpu()); | ||
125 | current->thread.esp0 = current->thread.saved_esp0; | ||
126 | current->thread.sysenter_cs = __KERNEL_CS; | ||
127 | load_esp0(tss, ¤t->thread); | ||
128 | current->thread.saved_esp0 = 0; | ||
129 | put_cpu(); | ||
130 | |||
131 | loadsegment(fs, current->thread.saved_fs); | ||
132 | loadsegment(gs, current->thread.saved_gs); | ||
133 | ret = KVM86->regs32; | ||
134 | return ret; | ||
135 | } | ||
136 | |||
137 | static void mark_screen_rdonly(struct task_struct * tsk) | ||
138 | { | ||
139 | pgd_t *pgd; | ||
140 | pud_t *pud; | ||
141 | pmd_t *pmd; | ||
142 | pte_t *pte, *mapped; | ||
143 | int i; | ||
144 | |||
145 | preempt_disable(); | ||
146 | spin_lock(&tsk->mm->page_table_lock); | ||
147 | pgd = pgd_offset(tsk->mm, 0xA0000); | ||
148 | if (pgd_none_or_clear_bad(pgd)) | ||
149 | goto out; | ||
150 | pud = pud_offset(pgd, 0xA0000); | ||
151 | if (pud_none_or_clear_bad(pud)) | ||
152 | goto out; | ||
153 | pmd = pmd_offset(pud, 0xA0000); | ||
154 | if (pmd_none_or_clear_bad(pmd)) | ||
155 | goto out; | ||
156 | pte = mapped = pte_offset_map(pmd, 0xA0000); | ||
157 | for (i = 0; i < 32; i++) { | ||
158 | if (pte_present(*pte)) | ||
159 | set_pte(pte, pte_wrprotect(*pte)); | ||
160 | pte++; | ||
161 | } | ||
162 | pte_unmap(mapped); | ||
163 | out: | ||
164 | spin_unlock(&tsk->mm->page_table_lock); | ||
165 | preempt_enable(); | ||
166 | flush_tlb(); | ||
167 | } | ||
168 | |||
169 | |||
170 | |||
171 | static int do_vm86_irq_handling(int subfunction, int irqnumber); | ||
172 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); | ||
173 | |||
174 | asmlinkage int sys_vm86old(struct pt_regs regs) | ||
175 | { | ||
176 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx; | ||
177 | struct kernel_vm86_struct info; /* declare this _on top_, | ||
178 | * this avoids wasting of stack space. | ||
179 | * This remains on the stack until we | ||
180 | * return to 32 bit user space. | ||
181 | */ | ||
182 | struct task_struct *tsk; | ||
183 | int tmp, ret = -EPERM; | ||
184 | |||
185 | tsk = current; | ||
186 | if (tsk->thread.saved_esp0) | ||
187 | goto out; | ||
188 | tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); | ||
189 | tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, | ||
190 | (long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2); | ||
191 | ret = -EFAULT; | ||
192 | if (tmp) | ||
193 | goto out; | ||
194 | memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); | ||
195 | info.regs32 = ®s; | ||
196 | tsk->thread.vm86_info = v86; | ||
197 | do_sys_vm86(&info, tsk); | ||
198 | ret = 0; /* we never return here */ | ||
199 | out: | ||
200 | return ret; | ||
201 | } | ||
202 | |||
203 | |||
204 | asmlinkage int sys_vm86(struct pt_regs regs) | ||
205 | { | ||
206 | struct kernel_vm86_struct info; /* declare this _on top_, | ||
207 | * this avoids wasting of stack space. | ||
208 | * This remains on the stack until we | ||
209 | * return to 32 bit user space. | ||
210 | */ | ||
211 | struct task_struct *tsk; | ||
212 | int tmp, ret; | ||
213 | struct vm86plus_struct __user *v86; | ||
214 | |||
215 | tsk = current; | ||
216 | switch (regs.ebx) { | ||
217 | case VM86_REQUEST_IRQ: | ||
218 | case VM86_FREE_IRQ: | ||
219 | case VM86_GET_IRQ_BITS: | ||
220 | case VM86_GET_AND_RESET_IRQ: | ||
221 | ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx); | ||
222 | goto out; | ||
223 | case VM86_PLUS_INSTALL_CHECK: | ||
224 | /* NOTE: on old vm86 stuff this will return the error | ||
225 | from verify_area(), because the subfunction is | ||
226 | interpreted as (invalid) address to vm86_struct. | ||
227 | So the installation check works. | ||
228 | */ | ||
229 | ret = 0; | ||
230 | goto out; | ||
231 | } | ||
232 | |||
233 | /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ | ||
234 | ret = -EPERM; | ||
235 | if (tsk->thread.saved_esp0) | ||
236 | goto out; | ||
237 | v86 = (struct vm86plus_struct __user *)regs.ecx; | ||
238 | tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); | ||
239 | tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, | ||
240 | (long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2); | ||
241 | ret = -EFAULT; | ||
242 | if (tmp) | ||
243 | goto out; | ||
244 | info.regs32 = ®s; | ||
245 | info.vm86plus.is_vm86pus = 1; | ||
246 | tsk->thread.vm86_info = (struct vm86_struct __user *)v86; | ||
247 | do_sys_vm86(&info, tsk); | ||
248 | ret = 0; /* we never return here */ | ||
249 | out: | ||
250 | return ret; | ||
251 | } | ||
252 | |||
253 | |||
254 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) | ||
255 | { | ||
256 | struct tss_struct *tss; | ||
257 | /* | ||
258 | * make sure the vm86() system call doesn't try to do anything silly | ||
259 | */ | ||
260 | info->regs.__null_ds = 0; | ||
261 | info->regs.__null_es = 0; | ||
262 | |||
263 | /* we are clearing fs,gs later just before "jmp resume_userspace", | ||
264 | * because starting with Linux 2.1.x they aren't no longer saved/restored | ||
265 | */ | ||
266 | |||
267 | /* | ||
268 | * The eflags register is also special: we cannot trust that the user | ||
269 | * has set it up safely, so this makes sure interrupt etc flags are | ||
270 | * inherited from protected mode. | ||
271 | */ | ||
272 | VEFLAGS = info->regs.eflags; | ||
273 | info->regs.eflags &= SAFE_MASK; | ||
274 | info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK; | ||
275 | info->regs.eflags |= VM_MASK; | ||
276 | |||
277 | switch (info->cpu_type) { | ||
278 | case CPU_286: | ||
279 | tsk->thread.v86mask = 0; | ||
280 | break; | ||
281 | case CPU_386: | ||
282 | tsk->thread.v86mask = NT_MASK | IOPL_MASK; | ||
283 | break; | ||
284 | case CPU_486: | ||
285 | tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK; | ||
286 | break; | ||
287 | default: | ||
288 | tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK; | ||
289 | break; | ||
290 | } | ||
291 | |||
292 | /* | ||
293 | * Save old state, set default return value (%eax) to 0 | ||
294 | */ | ||
295 | info->regs32->eax = 0; | ||
296 | tsk->thread.saved_esp0 = tsk->thread.esp0; | ||
297 | asm volatile("movl %%fs,%0":"=m" (tsk->thread.saved_fs)); | ||
298 | asm volatile("movl %%gs,%0":"=m" (tsk->thread.saved_gs)); | ||
299 | |||
300 | tss = &per_cpu(init_tss, get_cpu()); | ||
301 | tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; | ||
302 | if (cpu_has_sep) | ||
303 | tsk->thread.sysenter_cs = 0; | ||
304 | load_esp0(tss, &tsk->thread); | ||
305 | put_cpu(); | ||
306 | |||
307 | tsk->thread.screen_bitmap = info->screen_bitmap; | ||
308 | if (info->flags & VM86_SCREEN_BITMAP) | ||
309 | mark_screen_rdonly(tsk); | ||
310 | __asm__ __volatile__( | ||
311 | "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t" | ||
312 | "movl %0,%%esp\n\t" | ||
313 | "movl %1,%%ebp\n\t" | ||
314 | "jmp resume_userspace" | ||
315 | : /* no outputs */ | ||
316 | :"r" (&info->regs), "r" (tsk->thread_info) : "ax"); | ||
317 | /* we never return here */ | ||
318 | } | ||
319 | |||
320 | static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval) | ||
321 | { | ||
322 | struct pt_regs * regs32; | ||
323 | |||
324 | regs32 = save_v86_state(regs16); | ||
325 | regs32->eax = retval; | ||
326 | __asm__ __volatile__("movl %0,%%esp\n\t" | ||
327 | "movl %1,%%ebp\n\t" | ||
328 | "jmp resume_userspace" | ||
329 | : : "r" (regs32), "r" (current_thread_info())); | ||
330 | } | ||
331 | |||
332 | static inline void set_IF(struct kernel_vm86_regs * regs) | ||
333 | { | ||
334 | VEFLAGS |= VIF_MASK; | ||
335 | if (VEFLAGS & VIP_MASK) | ||
336 | return_to_32bit(regs, VM86_STI); | ||
337 | } | ||
338 | |||
339 | static inline void clear_IF(struct kernel_vm86_regs * regs) | ||
340 | { | ||
341 | VEFLAGS &= ~VIF_MASK; | ||
342 | } | ||
343 | |||
344 | static inline void clear_TF(struct kernel_vm86_regs * regs) | ||
345 | { | ||
346 | regs->eflags &= ~TF_MASK; | ||
347 | } | ||
348 | |||
349 | static inline void clear_AC(struct kernel_vm86_regs * regs) | ||
350 | { | ||
351 | regs->eflags &= ~AC_MASK; | ||
352 | } | ||
353 | |||
354 | /* It is correct to call set_IF(regs) from the set_vflags_* | ||
355 | * functions. However someone forgot to call clear_IF(regs) | ||
356 | * in the opposite case. | ||
357 | * After the command sequence CLI PUSHF STI POPF you should | ||
358 | * end up with interrups disabled, but you ended up with | ||
359 | * interrupts enabled. | ||
360 | * ( I was testing my own changes, but the only bug I | ||
361 | * could find was in a function I had not changed. ) | ||
362 | * [KD] | ||
363 | */ | ||
364 | |||
365 | static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs) | ||
366 | { | ||
367 | set_flags(VEFLAGS, eflags, current->thread.v86mask); | ||
368 | set_flags(regs->eflags, eflags, SAFE_MASK); | ||
369 | if (eflags & IF_MASK) | ||
370 | set_IF(regs); | ||
371 | else | ||
372 | clear_IF(regs); | ||
373 | } | ||
374 | |||
375 | static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs) | ||
376 | { | ||
377 | set_flags(VFLAGS, flags, current->thread.v86mask); | ||
378 | set_flags(regs->eflags, flags, SAFE_MASK); | ||
379 | if (flags & IF_MASK) | ||
380 | set_IF(regs); | ||
381 | else | ||
382 | clear_IF(regs); | ||
383 | } | ||
384 | |||
385 | static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) | ||
386 | { | ||
387 | unsigned long flags = regs->eflags & RETURN_MASK; | ||
388 | |||
389 | if (VEFLAGS & VIF_MASK) | ||
390 | flags |= IF_MASK; | ||
391 | flags |= IOPL_MASK; | ||
392 | return flags | (VEFLAGS & current->thread.v86mask); | ||
393 | } | ||
394 | |||
395 | static inline int is_revectored(int nr, struct revectored_struct * bitmap) | ||
396 | { | ||
397 | __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" | ||
398 | :"=r" (nr) | ||
399 | :"m" (*bitmap),"r" (nr)); | ||
400 | return nr; | ||
401 | } | ||
402 | |||
403 | #define val_byte(val, n) (((__u8 *)&val)[n]) | ||
404 | |||
405 | #define pushb(base, ptr, val, err_label) \ | ||
406 | do { \ | ||
407 | __u8 __val = val; \ | ||
408 | ptr--; \ | ||
409 | if (put_user(__val, base + ptr) < 0) \ | ||
410 | goto err_label; \ | ||
411 | } while(0) | ||
412 | |||
413 | #define pushw(base, ptr, val, err_label) \ | ||
414 | do { \ | ||
415 | __u16 __val = val; \ | ||
416 | ptr--; \ | ||
417 | if (put_user(val_byte(__val, 1), base + ptr) < 0) \ | ||
418 | goto err_label; \ | ||
419 | ptr--; \ | ||
420 | if (put_user(val_byte(__val, 0), base + ptr) < 0) \ | ||
421 | goto err_label; \ | ||
422 | } while(0) | ||
423 | |||
424 | #define pushl(base, ptr, val, err_label) \ | ||
425 | do { \ | ||
426 | __u32 __val = val; \ | ||
427 | ptr--; \ | ||
428 | if (put_user(val_byte(__val, 3), base + ptr) < 0) \ | ||
429 | goto err_label; \ | ||
430 | ptr--; \ | ||
431 | if (put_user(val_byte(__val, 2), base + ptr) < 0) \ | ||
432 | goto err_label; \ | ||
433 | ptr--; \ | ||
434 | if (put_user(val_byte(__val, 1), base + ptr) < 0) \ | ||
435 | goto err_label; \ | ||
436 | ptr--; \ | ||
437 | if (put_user(val_byte(__val, 0), base + ptr) < 0) \ | ||
438 | goto err_label; \ | ||
439 | } while(0) | ||
440 | |||
441 | #define popb(base, ptr, err_label) \ | ||
442 | ({ \ | ||
443 | __u8 __res; \ | ||
444 | if (get_user(__res, base + ptr) < 0) \ | ||
445 | goto err_label; \ | ||
446 | ptr++; \ | ||
447 | __res; \ | ||
448 | }) | ||
449 | |||
450 | #define popw(base, ptr, err_label) \ | ||
451 | ({ \ | ||
452 | __u16 __res; \ | ||
453 | if (get_user(val_byte(__res, 0), base + ptr) < 0) \ | ||
454 | goto err_label; \ | ||
455 | ptr++; \ | ||
456 | if (get_user(val_byte(__res, 1), base + ptr) < 0) \ | ||
457 | goto err_label; \ | ||
458 | ptr++; \ | ||
459 | __res; \ | ||
460 | }) | ||
461 | |||
462 | #define popl(base, ptr, err_label) \ | ||
463 | ({ \ | ||
464 | __u32 __res; \ | ||
465 | if (get_user(val_byte(__res, 0), base + ptr) < 0) \ | ||
466 | goto err_label; \ | ||
467 | ptr++; \ | ||
468 | if (get_user(val_byte(__res, 1), base + ptr) < 0) \ | ||
469 | goto err_label; \ | ||
470 | ptr++; \ | ||
471 | if (get_user(val_byte(__res, 2), base + ptr) < 0) \ | ||
472 | goto err_label; \ | ||
473 | ptr++; \ | ||
474 | if (get_user(val_byte(__res, 3), base + ptr) < 0) \ | ||
475 | goto err_label; \ | ||
476 | ptr++; \ | ||
477 | __res; \ | ||
478 | }) | ||
479 | |||
480 | /* There are so many possible reasons for this function to return | ||
481 | * VM86_INTx, so adding another doesn't bother me. We can expect | ||
482 | * userspace programs to be able to handle it. (Getting a problem | ||
483 | * in userspace is always better than an Oops anyway.) [KD] | ||
484 | */ | ||
485 | static void do_int(struct kernel_vm86_regs *regs, int i, | ||
486 | unsigned char __user * ssp, unsigned short sp) | ||
487 | { | ||
488 | unsigned long __user *intr_ptr; | ||
489 | unsigned long segoffs; | ||
490 | |||
491 | if (regs->cs == BIOSSEG) | ||
492 | goto cannot_handle; | ||
493 | if (is_revectored(i, &KVM86->int_revectored)) | ||
494 | goto cannot_handle; | ||
495 | if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored)) | ||
496 | goto cannot_handle; | ||
497 | intr_ptr = (unsigned long __user *) (i << 2); | ||
498 | if (get_user(segoffs, intr_ptr)) | ||
499 | goto cannot_handle; | ||
500 | if ((segoffs >> 16) == BIOSSEG) | ||
501 | goto cannot_handle; | ||
502 | pushw(ssp, sp, get_vflags(regs), cannot_handle); | ||
503 | pushw(ssp, sp, regs->cs, cannot_handle); | ||
504 | pushw(ssp, sp, IP(regs), cannot_handle); | ||
505 | regs->cs = segoffs >> 16; | ||
506 | SP(regs) -= 6; | ||
507 | IP(regs) = segoffs & 0xffff; | ||
508 | clear_TF(regs); | ||
509 | clear_IF(regs); | ||
510 | clear_AC(regs); | ||
511 | return; | ||
512 | |||
513 | cannot_handle: | ||
514 | return_to_32bit(regs, VM86_INTx + (i << 8)); | ||
515 | } | ||
516 | |||
517 | int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno) | ||
518 | { | ||
519 | if (VMPI.is_vm86pus) { | ||
520 | if ( (trapno==3) || (trapno==1) ) | ||
521 | return_to_32bit(regs, VM86_TRAP + (trapno << 8)); | ||
522 | do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs)); | ||
523 | return 0; | ||
524 | } | ||
525 | if (trapno !=1) | ||
526 | return 1; /* we let this handle by the calling routine */ | ||
527 | if (current->ptrace & PT_PTRACED) { | ||
528 | unsigned long flags; | ||
529 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
530 | sigdelset(¤t->blocked, SIGTRAP); | ||
531 | recalc_sigpending(); | ||
532 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
533 | } | ||
534 | send_sig(SIGTRAP, current, 1); | ||
535 | current->thread.trap_no = trapno; | ||
536 | current->thread.error_code = error_code; | ||
537 | return 0; | ||
538 | } | ||
539 | |||
540 | void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) | ||
541 | { | ||
542 | unsigned char opcode; | ||
543 | unsigned char __user *csp; | ||
544 | unsigned char __user *ssp; | ||
545 | unsigned short ip, sp; | ||
546 | int data32, pref_done; | ||
547 | |||
548 | #define CHECK_IF_IN_TRAP \ | ||
549 | if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ | ||
550 | newflags |= TF_MASK | ||
551 | #define VM86_FAULT_RETURN do { \ | ||
552 | if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \ | ||
553 | return_to_32bit(regs, VM86_PICRETURN); \ | ||
554 | return; } while (0) | ||
555 | |||
556 | csp = (unsigned char __user *) (regs->cs << 4); | ||
557 | ssp = (unsigned char __user *) (regs->ss << 4); | ||
558 | sp = SP(regs); | ||
559 | ip = IP(regs); | ||
560 | |||
561 | data32 = 0; | ||
562 | pref_done = 0; | ||
563 | do { | ||
564 | switch (opcode = popb(csp, ip, simulate_sigsegv)) { | ||
565 | case 0x66: /* 32-bit data */ data32=1; break; | ||
566 | case 0x67: /* 32-bit address */ break; | ||
567 | case 0x2e: /* CS */ break; | ||
568 | case 0x3e: /* DS */ break; | ||
569 | case 0x26: /* ES */ break; | ||
570 | case 0x36: /* SS */ break; | ||
571 | case 0x65: /* GS */ break; | ||
572 | case 0x64: /* FS */ break; | ||
573 | case 0xf2: /* repnz */ break; | ||
574 | case 0xf3: /* rep */ break; | ||
575 | default: pref_done = 1; | ||
576 | } | ||
577 | } while (!pref_done); | ||
578 | |||
579 | switch (opcode) { | ||
580 | |||
581 | /* pushf */ | ||
582 | case 0x9c: | ||
583 | if (data32) { | ||
584 | pushl(ssp, sp, get_vflags(regs), simulate_sigsegv); | ||
585 | SP(regs) -= 4; | ||
586 | } else { | ||
587 | pushw(ssp, sp, get_vflags(regs), simulate_sigsegv); | ||
588 | SP(regs) -= 2; | ||
589 | } | ||
590 | IP(regs) = ip; | ||
591 | VM86_FAULT_RETURN; | ||
592 | |||
593 | /* popf */ | ||
594 | case 0x9d: | ||
595 | { | ||
596 | unsigned long newflags; | ||
597 | if (data32) { | ||
598 | newflags=popl(ssp, sp, simulate_sigsegv); | ||
599 | SP(regs) += 4; | ||
600 | } else { | ||
601 | newflags = popw(ssp, sp, simulate_sigsegv); | ||
602 | SP(regs) += 2; | ||
603 | } | ||
604 | IP(regs) = ip; | ||
605 | CHECK_IF_IN_TRAP; | ||
606 | if (data32) { | ||
607 | set_vflags_long(newflags, regs); | ||
608 | } else { | ||
609 | set_vflags_short(newflags, regs); | ||
610 | } | ||
611 | VM86_FAULT_RETURN; | ||
612 | } | ||
613 | |||
614 | /* int xx */ | ||
615 | case 0xcd: { | ||
616 | int intno=popb(csp, ip, simulate_sigsegv); | ||
617 | IP(regs) = ip; | ||
618 | if (VMPI.vm86dbg_active) { | ||
619 | if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] ) | ||
620 | return_to_32bit(regs, VM86_INTx + (intno << 8)); | ||
621 | } | ||
622 | do_int(regs, intno, ssp, sp); | ||
623 | return; | ||
624 | } | ||
625 | |||
626 | /* iret */ | ||
627 | case 0xcf: | ||
628 | { | ||
629 | unsigned long newip; | ||
630 | unsigned long newcs; | ||
631 | unsigned long newflags; | ||
632 | if (data32) { | ||
633 | newip=popl(ssp, sp, simulate_sigsegv); | ||
634 | newcs=popl(ssp, sp, simulate_sigsegv); | ||
635 | newflags=popl(ssp, sp, simulate_sigsegv); | ||
636 | SP(regs) += 12; | ||
637 | } else { | ||
638 | newip = popw(ssp, sp, simulate_sigsegv); | ||
639 | newcs = popw(ssp, sp, simulate_sigsegv); | ||
640 | newflags = popw(ssp, sp, simulate_sigsegv); | ||
641 | SP(regs) += 6; | ||
642 | } | ||
643 | IP(regs) = newip; | ||
644 | regs->cs = newcs; | ||
645 | CHECK_IF_IN_TRAP; | ||
646 | if (data32) { | ||
647 | set_vflags_long(newflags, regs); | ||
648 | } else { | ||
649 | set_vflags_short(newflags, regs); | ||
650 | } | ||
651 | VM86_FAULT_RETURN; | ||
652 | } | ||
653 | |||
654 | /* cli */ | ||
655 | case 0xfa: | ||
656 | IP(regs) = ip; | ||
657 | clear_IF(regs); | ||
658 | VM86_FAULT_RETURN; | ||
659 | |||
660 | /* sti */ | ||
661 | /* | ||
662 | * Damn. This is incorrect: the 'sti' instruction should actually | ||
663 | * enable interrupts after the /next/ instruction. Not good. | ||
664 | * | ||
665 | * Probably needs some horsing around with the TF flag. Aiee.. | ||
666 | */ | ||
667 | case 0xfb: | ||
668 | IP(regs) = ip; | ||
669 | set_IF(regs); | ||
670 | VM86_FAULT_RETURN; | ||
671 | |||
672 | default: | ||
673 | return_to_32bit(regs, VM86_UNKNOWN); | ||
674 | } | ||
675 | |||
676 | return; | ||
677 | |||
678 | simulate_sigsegv: | ||
679 | /* FIXME: After a long discussion with Stas we finally | ||
680 | * agreed, that this is wrong. Here we should | ||
681 | * really send a SIGSEGV to the user program. | ||
682 | * But how do we create the correct context? We | ||
683 | * are inside a general protection fault handler | ||
684 | * and has just returned from a page fault handler. | ||
685 | * The correct context for the signal handler | ||
686 | * should be a mixture of the two, but how do we | ||
687 | * get the information? [KD] | ||
688 | */ | ||
689 | return_to_32bit(regs, VM86_UNKNOWN); | ||
690 | } | ||
691 | |||
692 | /* ---------------- vm86 special IRQ passing stuff ----------------- */ | ||
693 | |||
694 | #define VM86_IRQNAME "vm86irq" | ||
695 | |||
696 | static struct vm86_irqs { | ||
697 | struct task_struct *tsk; | ||
698 | int sig; | ||
699 | } vm86_irqs[16]; | ||
700 | |||
701 | static DEFINE_SPINLOCK(irqbits_lock); | ||
702 | static int irqbits; | ||
703 | |||
704 | #define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \ | ||
705 | | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \ | ||
706 | | (1 << SIGUNUSED) ) | ||
707 | |||
708 | static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs) | ||
709 | { | ||
710 | int irq_bit; | ||
711 | unsigned long flags; | ||
712 | |||
713 | spin_lock_irqsave(&irqbits_lock, flags); | ||
714 | irq_bit = 1 << intno; | ||
715 | if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk) | ||
716 | goto out; | ||
717 | irqbits |= irq_bit; | ||
718 | if (vm86_irqs[intno].sig) | ||
719 | send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1); | ||
720 | spin_unlock_irqrestore(&irqbits_lock, flags); | ||
721 | /* | ||
722 | * IRQ will be re-enabled when user asks for the irq (whether | ||
723 | * polling or as a result of the signal) | ||
724 | */ | ||
725 | disable_irq(intno); | ||
726 | return IRQ_HANDLED; | ||
727 | |||
728 | out: | ||
729 | spin_unlock_irqrestore(&irqbits_lock, flags); | ||
730 | return IRQ_NONE; | ||
731 | } | ||
732 | |||
733 | static inline void free_vm86_irq(int irqnumber) | ||
734 | { | ||
735 | unsigned long flags; | ||
736 | |||
737 | free_irq(irqnumber, NULL); | ||
738 | vm86_irqs[irqnumber].tsk = NULL; | ||
739 | |||
740 | spin_lock_irqsave(&irqbits_lock, flags); | ||
741 | irqbits &= ~(1 << irqnumber); | ||
742 | spin_unlock_irqrestore(&irqbits_lock, flags); | ||
743 | } | ||
744 | |||
745 | void release_vm86_irqs(struct task_struct *task) | ||
746 | { | ||
747 | int i; | ||
748 | for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++) | ||
749 | if (vm86_irqs[i].tsk == task) | ||
750 | free_vm86_irq(i); | ||
751 | } | ||
752 | |||
753 | static inline int get_and_reset_irq(int irqnumber) | ||
754 | { | ||
755 | int bit; | ||
756 | unsigned long flags; | ||
757 | |||
758 | if (invalid_vm86_irq(irqnumber)) return 0; | ||
759 | if (vm86_irqs[irqnumber].tsk != current) return 0; | ||
760 | spin_lock_irqsave(&irqbits_lock, flags); | ||
761 | bit = irqbits & (1 << irqnumber); | ||
762 | irqbits &= ~bit; | ||
763 | spin_unlock_irqrestore(&irqbits_lock, flags); | ||
764 | if (!bit) | ||
765 | return 0; | ||
766 | enable_irq(irqnumber); | ||
767 | return 1; | ||
768 | } | ||
769 | |||
770 | |||
771 | static int do_vm86_irq_handling(int subfunction, int irqnumber) | ||
772 | { | ||
773 | int ret; | ||
774 | switch (subfunction) { | ||
775 | case VM86_GET_AND_RESET_IRQ: { | ||
776 | return get_and_reset_irq(irqnumber); | ||
777 | } | ||
778 | case VM86_GET_IRQ_BITS: { | ||
779 | return irqbits; | ||
780 | } | ||
781 | case VM86_REQUEST_IRQ: { | ||
782 | int sig = irqnumber >> 8; | ||
783 | int irq = irqnumber & 255; | ||
784 | if (!capable(CAP_SYS_ADMIN)) return -EPERM; | ||
785 | if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM; | ||
786 | if (invalid_vm86_irq(irq)) return -EPERM; | ||
787 | if (vm86_irqs[irq].tsk) return -EPERM; | ||
788 | ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL); | ||
789 | if (ret) return ret; | ||
790 | vm86_irqs[irq].sig = sig; | ||
791 | vm86_irqs[irq].tsk = current; | ||
792 | return irq; | ||
793 | } | ||
794 | case VM86_FREE_IRQ: { | ||
795 | if (invalid_vm86_irq(irqnumber)) return -EPERM; | ||
796 | if (!vm86_irqs[irqnumber].tsk) return 0; | ||
797 | if (vm86_irqs[irqnumber].tsk != current) return -EPERM; | ||
798 | free_vm86_irq(irqnumber); | ||
799 | return 0; | ||
800 | } | ||
801 | } | ||
802 | return -EINVAL; | ||
803 | } | ||
804 | |||
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S new file mode 100644 index 000000000000..e0512cc8bea7 --- /dev/null +++ b/arch/i386/kernel/vmlinux.lds.S | |||
@@ -0,0 +1,134 @@ | |||
1 | /* ld script to make i386 Linux kernel | ||
2 | * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; | ||
3 | */ | ||
4 | |||
5 | #include <asm-generic/vmlinux.lds.h> | ||
6 | #include <asm/thread_info.h> | ||
7 | #include <asm/page.h> | ||
8 | |||
9 | OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") | ||
10 | OUTPUT_ARCH(i386) | ||
11 | ENTRY(startup_32) | ||
12 | jiffies = jiffies_64; | ||
13 | SECTIONS | ||
14 | { | ||
15 | . = __PAGE_OFFSET + 0x100000; | ||
16 | /* read-only */ | ||
17 | _text = .; /* Text and read-only data */ | ||
18 | .text : { | ||
19 | *(.text) | ||
20 | SCHED_TEXT | ||
21 | LOCK_TEXT | ||
22 | *(.fixup) | ||
23 | *(.gnu.warning) | ||
24 | } = 0x9090 | ||
25 | |||
26 | _etext = .; /* End of text section */ | ||
27 | |||
28 | . = ALIGN(16); /* Exception table */ | ||
29 | __start___ex_table = .; | ||
30 | __ex_table : { *(__ex_table) } | ||
31 | __stop___ex_table = .; | ||
32 | |||
33 | RODATA | ||
34 | |||
35 | /* writeable */ | ||
36 | .data : { /* Data */ | ||
37 | *(.data) | ||
38 | CONSTRUCTORS | ||
39 | } | ||
40 | |||
41 | . = ALIGN(4096); | ||
42 | __nosave_begin = .; | ||
43 | .data_nosave : { *(.data.nosave) } | ||
44 | . = ALIGN(4096); | ||
45 | __nosave_end = .; | ||
46 | |||
47 | . = ALIGN(4096); | ||
48 | .data.page_aligned : { *(.data.idt) } | ||
49 | |||
50 | . = ALIGN(32); | ||
51 | .data.cacheline_aligned : { *(.data.cacheline_aligned) } | ||
52 | |||
53 | _edata = .; /* End of data section */ | ||
54 | |||
55 | . = ALIGN(THREAD_SIZE); /* init_task */ | ||
56 | .data.init_task : { *(.data.init_task) } | ||
57 | |||
58 | /* will be freed after init */ | ||
59 | . = ALIGN(4096); /* Init code and data */ | ||
60 | __init_begin = .; | ||
61 | .init.text : { | ||
62 | _sinittext = .; | ||
63 | *(.init.text) | ||
64 | _einittext = .; | ||
65 | } | ||
66 | .init.data : { *(.init.data) } | ||
67 | . = ALIGN(16); | ||
68 | __setup_start = .; | ||
69 | .init.setup : { *(.init.setup) } | ||
70 | __setup_end = .; | ||
71 | __initcall_start = .; | ||
72 | .initcall.init : { | ||
73 | *(.initcall1.init) | ||
74 | *(.initcall2.init) | ||
75 | *(.initcall3.init) | ||
76 | *(.initcall4.init) | ||
77 | *(.initcall5.init) | ||
78 | *(.initcall6.init) | ||
79 | *(.initcall7.init) | ||
80 | } | ||
81 | __initcall_end = .; | ||
82 | __con_initcall_start = .; | ||
83 | .con_initcall.init : { *(.con_initcall.init) } | ||
84 | __con_initcall_end = .; | ||
85 | SECURITY_INIT | ||
86 | . = ALIGN(4); | ||
87 | __alt_instructions = .; | ||
88 | .altinstructions : { *(.altinstructions) } | ||
89 | __alt_instructions_end = .; | ||
90 | .altinstr_replacement : { *(.altinstr_replacement) } | ||
91 | /* .exit.text is discard at runtime, not link time, to deal with references | ||
92 | from .altinstructions and .eh_frame */ | ||
93 | .exit.text : { *(.exit.text) } | ||
94 | .exit.data : { *(.exit.data) } | ||
95 | . = ALIGN(4096); | ||
96 | __initramfs_start = .; | ||
97 | .init.ramfs : { *(.init.ramfs) } | ||
98 | __initramfs_end = .; | ||
99 | . = ALIGN(32); | ||
100 | __per_cpu_start = .; | ||
101 | .data.percpu : { *(.data.percpu) } | ||
102 | __per_cpu_end = .; | ||
103 | . = ALIGN(4096); | ||
104 | __init_end = .; | ||
105 | /* freed after init ends here */ | ||
106 | |||
107 | __bss_start = .; /* BSS */ | ||
108 | .bss : { | ||
109 | *(.bss.page_aligned) | ||
110 | *(.bss) | ||
111 | } | ||
112 | . = ALIGN(4); | ||
113 | __bss_stop = .; | ||
114 | |||
115 | _end = . ; | ||
116 | |||
117 | /* This is where the kernel creates the early boot page tables */ | ||
118 | . = ALIGN(4096); | ||
119 | pg0 = .; | ||
120 | |||
121 | /* Sections to be discarded */ | ||
122 | /DISCARD/ : { | ||
123 | *(.exitcall.exit) | ||
124 | } | ||
125 | |||
126 | /* Stabs debugging sections. */ | ||
127 | .stab 0 : { *(.stab) } | ||
128 | .stabstr 0 : { *(.stabstr) } | ||
129 | .stab.excl 0 : { *(.stab.excl) } | ||
130 | .stab.exclstr 0 : { *(.stab.exclstr) } | ||
131 | .stab.index 0 : { *(.stab.index) } | ||
132 | .stab.indexstr 0 : { *(.stab.indexstr) } | ||
133 | .comment 0 : { *(.comment) } | ||
134 | } | ||
diff --git a/arch/i386/kernel/vsyscall-int80.S b/arch/i386/kernel/vsyscall-int80.S new file mode 100644 index 000000000000..530d0525e5e2 --- /dev/null +++ b/arch/i386/kernel/vsyscall-int80.S | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * Code for the vsyscall page. This version uses the old int $0x80 method. | ||
3 | * | ||
4 | * NOTE: | ||
5 | * 1) __kernel_vsyscall _must_ be first in this page. | ||
6 | * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S | ||
7 | * for details. | ||
8 | */ | ||
9 | |||
10 | .text | ||
11 | .globl __kernel_vsyscall | ||
12 | .type __kernel_vsyscall,@function | ||
13 | __kernel_vsyscall: | ||
14 | .LSTART_vsyscall: | ||
15 | int $0x80 | ||
16 | ret | ||
17 | .LEND_vsyscall: | ||
18 | .size __kernel_vsyscall,.-.LSTART_vsyscall | ||
19 | .previous | ||
20 | |||
21 | .section .eh_frame,"a",@progbits | ||
22 | .LSTARTFRAMEDLSI: | ||
23 | .long .LENDCIEDLSI-.LSTARTCIEDLSI | ||
24 | .LSTARTCIEDLSI: | ||
25 | .long 0 /* CIE ID */ | ||
26 | .byte 1 /* Version number */ | ||
27 | .string "zR" /* NUL-terminated augmentation string */ | ||
28 | .uleb128 1 /* Code alignment factor */ | ||
29 | .sleb128 -4 /* Data alignment factor */ | ||
30 | .byte 8 /* Return address register column */ | ||
31 | .uleb128 1 /* Augmentation value length */ | ||
32 | .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ | ||
33 | .byte 0x0c /* DW_CFA_def_cfa */ | ||
34 | .uleb128 4 | ||
35 | .uleb128 4 | ||
36 | .byte 0x88 /* DW_CFA_offset, column 0x8 */ | ||
37 | .uleb128 1 | ||
38 | .align 4 | ||
39 | .LENDCIEDLSI: | ||
40 | .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */ | ||
41 | .LSTARTFDEDLSI: | ||
42 | .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */ | ||
43 | .long .LSTART_vsyscall-. /* PC-relative start address */ | ||
44 | .long .LEND_vsyscall-.LSTART_vsyscall | ||
45 | .uleb128 0 | ||
46 | .align 4 | ||
47 | .LENDFDEDLSI: | ||
48 | .previous | ||
49 | |||
50 | /* | ||
51 | * Get the common code for the sigreturn entry points. | ||
52 | */ | ||
53 | #include "vsyscall-sigreturn.S" | ||
diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S new file mode 100644 index 000000000000..c8fcf75b9be3 --- /dev/null +++ b/arch/i386/kernel/vsyscall-sigreturn.S | |||
@@ -0,0 +1,142 @@ | |||
1 | /* | ||
2 | * Common code for the sigreturn entry points on the vsyscall page. | ||
3 | * So far this code is the same for both int80 and sysenter versions. | ||
4 | * This file is #include'd by vsyscall-*.S to define them after the | ||
5 | * vsyscall entry point. The kernel assumes that the addresses of these | ||
6 | * routines are constant for all vsyscall implementations. | ||
7 | */ | ||
8 | |||
9 | #include <asm/unistd.h> | ||
10 | #include <asm/asm_offsets.h> | ||
11 | |||
12 | |||
13 | /* XXX | ||
14 | Should these be named "_sigtramp" or something? | ||
15 | */ | ||
16 | |||
17 | .text | ||
18 | .org __kernel_vsyscall+32 | ||
19 | .globl __kernel_sigreturn | ||
20 | .type __kernel_sigreturn,@function | ||
21 | __kernel_sigreturn: | ||
22 | .LSTART_sigreturn: | ||
23 | popl %eax /* XXX does this mean it needs unwind info? */ | ||
24 | movl $__NR_sigreturn, %eax | ||
25 | int $0x80 | ||
26 | .LEND_sigreturn: | ||
27 | .size __kernel_sigreturn,.-.LSTART_sigreturn | ||
28 | |||
29 | .balign 32 | ||
30 | .globl __kernel_rt_sigreturn | ||
31 | .type __kernel_rt_sigreturn,@function | ||
32 | __kernel_rt_sigreturn: | ||
33 | .LSTART_rt_sigreturn: | ||
34 | movl $__NR_rt_sigreturn, %eax | ||
35 | int $0x80 | ||
36 | .LEND_rt_sigreturn: | ||
37 | .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn | ||
38 | .previous | ||
39 | |||
40 | .section .eh_frame,"a",@progbits | ||
41 | .LSTARTFRAMEDLSI1: | ||
42 | .long .LENDCIEDLSI1-.LSTARTCIEDLSI1 | ||
43 | .LSTARTCIEDLSI1: | ||
44 | .long 0 /* CIE ID */ | ||
45 | .byte 1 /* Version number */ | ||
46 | .string "zR" /* NUL-terminated augmentation string */ | ||
47 | .uleb128 1 /* Code alignment factor */ | ||
48 | .sleb128 -4 /* Data alignment factor */ | ||
49 | .byte 8 /* Return address register column */ | ||
50 | .uleb128 1 /* Augmentation value length */ | ||
51 | .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ | ||
52 | .byte 0 /* DW_CFA_nop */ | ||
53 | .align 4 | ||
54 | .LENDCIEDLSI1: | ||
55 | .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */ | ||
56 | .LSTARTFDEDLSI1: | ||
57 | .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */ | ||
58 | /* HACK: The dwarf2 unwind routines will subtract 1 from the | ||
59 | return address to get an address in the middle of the | ||
60 | presumed call instruction. Since we didn't get here via | ||
61 | a call, we need to include the nop before the real start | ||
62 | to make up for it. */ | ||
63 | .long .LSTART_sigreturn-1-. /* PC-relative start address */ | ||
64 | .long .LEND_sigreturn-.LSTART_sigreturn+1 | ||
65 | .uleb128 0 /* Augmentation */ | ||
66 | /* What follows are the instructions for the table generation. | ||
67 | We record the locations of each register saved. This is | ||
68 | complicated by the fact that the "CFA" is always assumed to | ||
69 | be the value of the stack pointer in the caller. This means | ||
70 | that we must define the CFA of this body of code to be the | ||
71 | saved value of the stack pointer in the sigcontext. Which | ||
72 | also means that there is no fixed relation to the other | ||
73 | saved registers, which means that we must use DW_CFA_expression | ||
74 | to compute their addresses. It also means that when we | ||
75 | adjust the stack with the popl, we have to do it all over again. */ | ||
76 | |||
77 | #define do_cfa_expr(offset) \ | ||
78 | .byte 0x0f; /* DW_CFA_def_cfa_expression */ \ | ||
79 | .uleb128 1f-0f; /* length */ \ | ||
80 | 0: .byte 0x74; /* DW_OP_breg4 */ \ | ||
81 | .sleb128 offset; /* offset */ \ | ||
82 | .byte 0x06; /* DW_OP_deref */ \ | ||
83 | 1: | ||
84 | |||
85 | #define do_expr(regno, offset) \ | ||
86 | .byte 0x10; /* DW_CFA_expression */ \ | ||
87 | .uleb128 regno; /* regno */ \ | ||
88 | .uleb128 1f-0f; /* length */ \ | ||
89 | 0: .byte 0x74; /* DW_OP_breg4 */ \ | ||
90 | .sleb128 offset; /* offset */ \ | ||
91 | 1: | ||
92 | |||
93 | do_cfa_expr(SIGCONTEXT_esp+4) | ||
94 | do_expr(0, SIGCONTEXT_eax+4) | ||
95 | do_expr(1, SIGCONTEXT_ecx+4) | ||
96 | do_expr(2, SIGCONTEXT_edx+4) | ||
97 | do_expr(3, SIGCONTEXT_ebx+4) | ||
98 | do_expr(5, SIGCONTEXT_ebp+4) | ||
99 | do_expr(6, SIGCONTEXT_esi+4) | ||
100 | do_expr(7, SIGCONTEXT_edi+4) | ||
101 | do_expr(8, SIGCONTEXT_eip+4) | ||
102 | |||
103 | .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */ | ||
104 | |||
105 | do_cfa_expr(SIGCONTEXT_esp) | ||
106 | do_expr(0, SIGCONTEXT_eax) | ||
107 | do_expr(1, SIGCONTEXT_ecx) | ||
108 | do_expr(2, SIGCONTEXT_edx) | ||
109 | do_expr(3, SIGCONTEXT_ebx) | ||
110 | do_expr(5, SIGCONTEXT_ebp) | ||
111 | do_expr(6, SIGCONTEXT_esi) | ||
112 | do_expr(7, SIGCONTEXT_edi) | ||
113 | do_expr(8, SIGCONTEXT_eip) | ||
114 | |||
115 | .align 4 | ||
116 | .LENDFDEDLSI1: | ||
117 | |||
118 | .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */ | ||
119 | .LSTARTFDEDLSI2: | ||
120 | .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */ | ||
121 | /* HACK: See above wrt unwind library assumptions. */ | ||
122 | .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */ | ||
123 | .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1 | ||
124 | .uleb128 0 /* Augmentation */ | ||
125 | /* What follows are the instructions for the table generation. | ||
126 | We record the locations of each register saved. This is | ||
127 | slightly less complicated than the above, since we don't | ||
128 | modify the stack pointer in the process. */ | ||
129 | |||
130 | do_cfa_expr(RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esp) | ||
131 | do_expr(0, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eax) | ||
132 | do_expr(1, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ecx) | ||
133 | do_expr(2, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edx) | ||
134 | do_expr(3, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebx) | ||
135 | do_expr(5, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebp) | ||
136 | do_expr(6, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esi) | ||
137 | do_expr(7, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edi) | ||
138 | do_expr(8, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eip) | ||
139 | |||
140 | .align 4 | ||
141 | .LENDFDEDLSI2: | ||
142 | .previous | ||
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S new file mode 100644 index 000000000000..4daefb2ec1b2 --- /dev/null +++ b/arch/i386/kernel/vsyscall-sysenter.S | |||
@@ -0,0 +1,104 @@ | |||
1 | /* | ||
2 | * Code for the vsyscall page. This version uses the sysenter instruction. | ||
3 | * | ||
4 | * NOTE: | ||
5 | * 1) __kernel_vsyscall _must_ be first in this page. | ||
6 | * 2) there are alignment constraints on this stub, see vsyscall-sigreturn.S | ||
7 | * for details. | ||
8 | */ | ||
9 | |||
10 | .text | ||
11 | .globl __kernel_vsyscall | ||
12 | .type __kernel_vsyscall,@function | ||
13 | __kernel_vsyscall: | ||
14 | .LSTART_vsyscall: | ||
15 | push %ecx | ||
16 | .Lpush_ecx: | ||
17 | push %edx | ||
18 | .Lpush_edx: | ||
19 | push %ebp | ||
20 | .Lenter_kernel: | ||
21 | movl %esp,%ebp | ||
22 | sysenter | ||
23 | |||
24 | /* 7: align return point with nop's to make disassembly easier */ | ||
25 | .space 7,0x90 | ||
26 | |||
27 | /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */ | ||
28 | jmp .Lenter_kernel | ||
29 | /* 16: System call normal return point is here! */ | ||
30 | .globl SYSENTER_RETURN /* Symbol used by entry.S. */ | ||
31 | SYSENTER_RETURN: | ||
32 | pop %ebp | ||
33 | .Lpop_ebp: | ||
34 | pop %edx | ||
35 | .Lpop_edx: | ||
36 | pop %ecx | ||
37 | .Lpop_ecx: | ||
38 | ret | ||
39 | .LEND_vsyscall: | ||
40 | .size __kernel_vsyscall,.-.LSTART_vsyscall | ||
41 | .previous | ||
42 | |||
43 | .section .eh_frame,"a",@progbits | ||
44 | .LSTARTFRAMEDLSI: | ||
45 | .long .LENDCIEDLSI-.LSTARTCIEDLSI | ||
46 | .LSTARTCIEDLSI: | ||
47 | .long 0 /* CIE ID */ | ||
48 | .byte 1 /* Version number */ | ||
49 | .string "zR" /* NUL-terminated augmentation string */ | ||
50 | .uleb128 1 /* Code alignment factor */ | ||
51 | .sleb128 -4 /* Data alignment factor */ | ||
52 | .byte 8 /* Return address register column */ | ||
53 | .uleb128 1 /* Augmentation value length */ | ||
54 | .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ | ||
55 | .byte 0x0c /* DW_CFA_def_cfa */ | ||
56 | .uleb128 4 | ||
57 | .uleb128 4 | ||
58 | .byte 0x88 /* DW_CFA_offset, column 0x8 */ | ||
59 | .uleb128 1 | ||
60 | .align 4 | ||
61 | .LENDCIEDLSI: | ||
62 | .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */ | ||
63 | .LSTARTFDEDLSI: | ||
64 | .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */ | ||
65 | .long .LSTART_vsyscall-. /* PC-relative start address */ | ||
66 | .long .LEND_vsyscall-.LSTART_vsyscall | ||
67 | .uleb128 0 | ||
68 | /* What follows are the instructions for the table generation. | ||
69 | We have to record all changes of the stack pointer. */ | ||
70 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
71 | .long .Lpush_ecx-.LSTART_vsyscall | ||
72 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
73 | .byte 0x08 /* RA at offset 8 now */ | ||
74 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
75 | .long .Lpush_edx-.Lpush_ecx | ||
76 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
77 | .byte 0x0c /* RA at offset 12 now */ | ||
78 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
79 | .long .Lenter_kernel-.Lpush_edx | ||
80 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
81 | .byte 0x10 /* RA at offset 16 now */ | ||
82 | .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */ | ||
83 | /* Finally the epilogue. */ | ||
84 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
85 | .long .Lpop_ebp-.Lenter_kernel | ||
86 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
87 | .byte 0x0c /* RA at offset 12 now */ | ||
88 | .byte 0xc5 /* DW_CFA_restore %ebp */ | ||
89 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
90 | .long .Lpop_edx-.Lpop_ebp | ||
91 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
92 | .byte 0x08 /* RA at offset 8 now */ | ||
93 | .byte 0x04 /* DW_CFA_advance_loc4 */ | ||
94 | .long .Lpop_ecx-.Lpop_edx | ||
95 | .byte 0x0e /* DW_CFA_def_cfa_offset */ | ||
96 | .byte 0x04 /* RA at offset 4 now */ | ||
97 | .align 4 | ||
98 | .LENDFDEDLSI: | ||
99 | .previous | ||
100 | |||
101 | /* | ||
102 | * Get the common code for the sigreturn entry points. | ||
103 | */ | ||
104 | #include "vsyscall-sigreturn.S" | ||
diff --git a/arch/i386/kernel/vsyscall.S b/arch/i386/kernel/vsyscall.S new file mode 100644 index 000000000000..b403890fe39b --- /dev/null +++ b/arch/i386/kernel/vsyscall.S | |||
@@ -0,0 +1,15 @@ | |||
1 | #include <linux/init.h> | ||
2 | |||
3 | __INITDATA | ||
4 | |||
5 | .globl vsyscall_int80_start, vsyscall_int80_end | ||
6 | vsyscall_int80_start: | ||
7 | .incbin "arch/i386/kernel/vsyscall-int80.so" | ||
8 | vsyscall_int80_end: | ||
9 | |||
10 | .globl vsyscall_sysenter_start, vsyscall_sysenter_end | ||
11 | vsyscall_sysenter_start: | ||
12 | .incbin "arch/i386/kernel/vsyscall-sysenter.so" | ||
13 | vsyscall_sysenter_end: | ||
14 | |||
15 | __FINIT | ||
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S new file mode 100644 index 000000000000..3a8329d6536e --- /dev/null +++ b/arch/i386/kernel/vsyscall.lds.S | |||
@@ -0,0 +1,65 @@ | |||
1 | /* | ||
2 | * Linker script for vsyscall DSO. The vsyscall page is an ELF shared | ||
3 | * object prelinked to its virtual address, and with only one read-only | ||
4 | * segment (that fits in one page). This script controls its layout. | ||
5 | */ | ||
6 | #include <asm/asm_offsets.h> | ||
7 | |||
8 | SECTIONS | ||
9 | { | ||
10 | . = VSYSCALL_BASE + SIZEOF_HEADERS; | ||
11 | |||
12 | .hash : { *(.hash) } :text | ||
13 | .dynsym : { *(.dynsym) } | ||
14 | .dynstr : { *(.dynstr) } | ||
15 | .gnu.version : { *(.gnu.version) } | ||
16 | .gnu.version_d : { *(.gnu.version_d) } | ||
17 | .gnu.version_r : { *(.gnu.version_r) } | ||
18 | |||
19 | /* This linker script is used both with -r and with -shared. | ||
20 | For the layouts to match, we need to skip more than enough | ||
21 | space for the dynamic symbol table et al. If this amount | ||
22 | is insufficient, ld -shared will barf. Just increase it here. */ | ||
23 | . = VSYSCALL_BASE + 0x400; | ||
24 | |||
25 | .text : { *(.text) } :text =0x90909090 | ||
26 | |||
27 | .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr | ||
28 | .eh_frame : { KEEP (*(.eh_frame)) } :text | ||
29 | .dynamic : { *(.dynamic) } :text :dynamic | ||
30 | .useless : { | ||
31 | *(.got.plt) *(.got) | ||
32 | *(.data .data.* .gnu.linkonce.d.*) | ||
33 | *(.dynbss) | ||
34 | *(.bss .bss.* .gnu.linkonce.b.*) | ||
35 | } :text | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * We must supply the ELF program headers explicitly to get just one | ||
40 | * PT_LOAD segment, and set the flags explicitly to make segments read-only. | ||
41 | */ | ||
42 | PHDRS | ||
43 | { | ||
44 | text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ | ||
45 | dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ | ||
46 | eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * This controls what symbols we export from the DSO. | ||
51 | */ | ||
52 | VERSION | ||
53 | { | ||
54 | LINUX_2.5 { | ||
55 | global: | ||
56 | __kernel_vsyscall; | ||
57 | __kernel_sigreturn; | ||
58 | __kernel_rt_sigreturn; | ||
59 | |||
60 | local: *; | ||
61 | }; | ||
62 | } | ||
63 | |||
64 | /* The ELF entry point can be used to set the AT_SYSINFO value. */ | ||
65 | ENTRY(__kernel_vsyscall); | ||