aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig42
-rw-r--r--arch/x86/Kconfig.debug24
-rw-r--r--arch/x86/boot/Makefile1
-rw-r--r--arch/x86/boot/a20.c6
-rw-r--r--arch/x86/boot/boot.h3
-rw-r--r--arch/x86/boot/compressed/head_32.S8
-rw-r--r--arch/x86/boot/compressed/head_64.S10
-rw-r--r--arch/x86/boot/copy.S40
-rw-r--r--arch/x86/boot/header.S2
-rw-r--r--arch/x86/boot/main.c5
-rw-r--r--arch/x86/boot/pmjump.S16
-rw-r--r--arch/x86/boot/voyager.c40
-rw-r--r--arch/x86/configs/i386_defconfig7
-rw-r--r--arch/x86/configs/x86_64_defconfig7
-rw-r--r--arch/x86/ia32/ia32_signal.c70
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/arch_hooks.h26
-rw-r--r--arch/x86/include/asm/i8259.h4
-rw-r--r--arch/x86/include/asm/io.h11
-rw-r--r--arch/x86/include/asm/iomap.h6
-rw-r--r--arch/x86/include/asm/irq_vectors.h2
-rw-r--r--arch/x86/include/asm/kvm.h7
-rw-r--r--arch/x86/include/asm/linkage.h64
-rw-r--r--arch/x86/include/asm/mach-voyager/do_timer.h17
-rw-r--r--arch/x86/include/asm/mach-voyager/entry_arch.h26
-rw-r--r--arch/x86/include/asm/mach-voyager/setup_arch.h12
-rw-r--r--arch/x86/include/asm/mmzone_32.h2
-rw-r--r--arch/x86/include/asm/mmzone_64.h2
-rw-r--r--arch/x86/include/asm/page_32_types.h2
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/page_types.h6
-rw-r--r--arch/x86/include/asm/pat.h3
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h6
-rw-r--r--arch/x86/include/asm/processor.h10
-rw-r--r--arch/x86/include/asm/setup.h9
-rw-r--r--arch/x86/include/asm/syscalls.h2
-rw-r--r--arch/x86/include/asm/timer.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h4
-rw-r--r--arch/x86/include/asm/uaccess_64.h21
-rw-r--r--arch/x86/include/asm/vic.h61
-rw-r--r--arch/x86/include/asm/voyager.h571
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.S4
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S2
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S34
-rw-r--r--arch/x86/kernel/alternative.c6
-rw-r--r--arch/x86/kernel/apic/apic.c12
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_64.c13
-rw-r--r--arch/x86/kernel/apic/summit_32.c57
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c5
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c10
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c12
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c6
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/x86/kernel/e820.c3
-rw-r--r--arch/x86/kernel/efi_stub_32.S3
-rw-r--r--arch/x86/kernel/efi_stub_64.S7
-rw-r--r--arch/x86/kernel/entry_32.S4
-rw-r--r--arch/x86/kernel/entry_64.S25
-rw-r--r--arch/x86/kernel/head_32.S4
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/irqinit_32.c13
-rw-r--r--arch/x86/kernel/kvmclock.c1
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/mca_32.c5
-rw-r--r--arch/x86/kernel/mpparse.c15
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S4
-rw-r--r--arch/x86/kernel/setup.c59
-rw-r--r--arch/x86/kernel/time_32.c6
-rw-r--r--arch/x86/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/trampoline_32.S2
-rw-r--r--arch/x86/kernel/trampoline_64.S4
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/visws_quirks.c1
-rw-r--r--arch/x86/kernel/vmiclock_32.c7
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S2
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S2
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/irq.c7
-rw-r--r--arch/x86/kvm/irq.h1
-rw-r--r--arch/x86/kvm/lapic.c66
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c9
-rw-r--r--arch/x86/kvm/svm.c1
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c10
-rw-r--r--arch/x86/lguest/Kconfig1
-rw-r--r--arch/x86/lib/getuser.S2
-rw-r--r--arch/x86/mach-voyager/Makefile8
-rw-r--r--arch/x86/mach-voyager/setup.c119
-rw-r--r--arch/x86/mach-voyager/voyager_basic.c317
-rw-r--r--arch/x86/mach-voyager/voyager_cat.c1197
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c1805
-rw-r--r--arch/x86/mach-voyager/voyager_thread.c128
-rw-r--r--arch/x86/mm/fault.c1078
-rw-r--r--arch/x86/mm/iomap_32.c58
-rw-r--r--arch/x86/mm/memtest.c156
-rw-r--r--arch/x86/mm/numa_32.c2
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/pageattr.c22
-rw-r--r--arch/x86/mm/pat.c46
-rw-r--r--arch/x86/power/hibernate_asm_32.S2
-rw-r--r--arch/x86/power/hibernate_asm_64.S2
-rw-r--r--arch/x86/vdso/vma.c4
-rw-r--r--arch/x86/xen/Kconfig2
-rw-r--r--arch/x86/xen/enlighten.c3
-rw-r--r--arch/x86/xen/xen-head.S2
121 files changed, 1167 insertions, 5447 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8780ffde6ef..2535427020b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -285,6 +285,7 @@ config X86_BIGSMP
285 ---help--- 285 ---help---
286 This option is needed for the systems that have more than 8 CPUs 286 This option is needed for the systems that have more than 8 CPUs
287 287
288if X86_32
288config X86_EXTENDED_PLATFORM 289config X86_EXTENDED_PLATFORM
289 bool "Support for extended (non-PC) x86 platforms" 290 bool "Support for extended (non-PC) x86 platforms"
290 default y 291 default y
@@ -293,12 +294,36 @@ config X86_EXTENDED_PLATFORM
293 standard PC platforms. (which covers the vast majority of 294 standard PC platforms. (which covers the vast majority of
294 systems out there.) 295 systems out there.)
295 296
296 If you enable this option then you'll be able to select a number 297 If you enable this option then you'll be able to select support
297 of non-PC x86 platforms. 298 for the following (non-PC) 32 bit x86 platforms:
299 AMD Elan
300 NUMAQ (IBM/Sequent)
301 RDC R-321x SoC
302 SGI 320/540 (Visual Workstation)
303 Summit/EXA (IBM x440)
304 Unisys ES7000 IA32 series
298 305
299 If you have one of these systems, or if you want to build a 306 If you have one of these systems, or if you want to build a
300 generic distribution kernel, say Y here - otherwise say N. 307 generic distribution kernel, say Y here - otherwise say N.
308endif
309
310if X86_64
311config X86_EXTENDED_PLATFORM
312 bool "Support for extended (non-PC) x86 platforms"
313 default y
314 ---help---
315 If you disable this option then the kernel will only support
316 standard PC platforms. (which covers the vast majority of
317 systems out there.)
301 318
319 If you enable this option then you'll be able to select support
320 for the following (non-PC) 64 bit x86 platforms:
321 ScaleMP vSMP
322 SGI Ultraviolet
323
324 If you have one of these systems, or if you want to build a
325 generic distribution kernel, say Y here - otherwise say N.
326endif
302# This is an alphabetically sorted list of 64 bit extended platforms 327# This is an alphabetically sorted list of 64 bit extended platforms
303# Please maintain the alphabetic order if and when there are additions 328# Please maintain the alphabetic order if and when there are additions
304 329
@@ -397,19 +422,6 @@ config X86_ES7000
397 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is 422 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
398 supposed to run on an IA32-based Unisys ES7000 system. 423 supposed to run on an IA32-based Unisys ES7000 system.
399 424
400config X86_VOYAGER
401 bool "Voyager (NCR)"
402 depends on SMP && !PCI && BROKEN
403 depends on X86_32_NON_STANDARD
404 ---help---
405 Voyager is an MCA-based 32-way capable SMP architecture proprietary
406 to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based.
407
408 *** WARNING ***
409
410 If you do not specifically know you have a Voyager based machine,
411 say N here, otherwise the kernel you build will not be bootable.
412
413config SCHED_OMIT_FRAME_POINTER 425config SCHED_OMIT_FRAME_POINTER
414 def_bool y 426 def_bool y
415 prompt "Single-depth WCHAN output" 427 prompt "Single-depth WCHAN output"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index ba4781b9389..fdb45df608b 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -175,28 +175,8 @@ config IOMMU_LEAK
175 Add a simple leak tracer to the IOMMU code. This is useful when you 175 Add a simple leak tracer to the IOMMU code. This is useful when you
176 are debugging a buggy device driver that leaks IOMMU mappings. 176 are debugging a buggy device driver that leaks IOMMU mappings.
177 177
178config MMIOTRACE 178config HAVE_MMIOTRACE_SUPPORT
179 bool "Memory mapped IO tracing" 179 def_bool y
180 depends on DEBUG_KERNEL && PCI
181 select TRACING
182 help
183 Mmiotrace traces Memory Mapped I/O access and is meant for
184 debugging and reverse engineering. It is called from the ioremap
185 implementation and works via page faults. Tracing is disabled by
186 default and can be enabled at run-time.
187
188 See Documentation/tracers/mmiotrace.txt.
189 If you are not helping to develop drivers, say N.
190
191config MMIOTRACE_TEST
192 tristate "Test module for mmiotrace"
193 depends on MMIOTRACE && m
194 help
195 This is a dumb module for testing mmiotrace. It is very dangerous
196 as it will write garbage to IO memory starting at a given address.
197 However, it should be safe to use on e.g. unused portion of VRAM.
198
199 Say N, unless you absolutely know what you are doing.
200 180
201# 181#
202# IO delay types: 182# IO delay types:
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index cd48c721001..c70eff69a1f 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -32,7 +32,6 @@ setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
32setup-y += header.o main.o mca.o memory.o pm.o pmjump.o 32setup-y += header.o main.o mca.o memory.o pm.o pmjump.o
33setup-y += printf.o string.o tty.o video.o video-mode.o version.o 33setup-y += printf.o string.o tty.o video.o video-mode.o version.o
34setup-$(CONFIG_X86_APM_BOOT) += apm.o 34setup-$(CONFIG_X86_APM_BOOT) += apm.o
35setup-$(CONFIG_X86_VOYAGER) += voyager.o
36 35
37# The link order of the video-*.o modules can matter. In particular, 36# The link order of the video-*.o modules can matter. In particular,
38# video-vga.o *must* be listed first, followed by video-vesa.o. 37# video-vga.o *must* be listed first, followed by video-vesa.o.
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c
index fba8e9c6a50..7c19ce8c244 100644
--- a/arch/x86/boot/a20.c
+++ b/arch/x86/boot/a20.c
@@ -126,11 +126,6 @@ static void enable_a20_fast(void)
126 126
127int enable_a20(void) 127int enable_a20(void)
128{ 128{
129#ifdef CONFIG_X86_VOYAGER
130 /* On Voyager, a20_test() is unsafe? */
131 enable_a20_kbc();
132 return 0;
133#else
134 int loops = A20_ENABLE_LOOPS; 129 int loops = A20_ENABLE_LOOPS;
135 int kbc_err; 130 int kbc_err;
136 131
@@ -164,5 +159,4 @@ int enable_a20(void)
164 } 159 }
165 160
166 return -1; 161 return -1;
167#endif
168} 162}
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index cc0ef13fba7..7b2692e897e 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -302,9 +302,6 @@ void probe_cards(int unsafe);
302/* video-vesa.c */ 302/* video-vesa.c */
303void vesa_store_edid(void); 303void vesa_store_edid(void);
304 304
305/* voyager.c */
306int query_voyager(void);
307
308#endif /* __ASSEMBLY__ */ 305#endif /* __ASSEMBLY__ */
309 306
310#endif /* BOOT_BOOT_H */ 307#endif /* BOOT_BOOT_H */
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 29c5fbf0839..3a8a866fb2e 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -25,14 +25,12 @@
25 25
26#include <linux/linkage.h> 26#include <linux/linkage.h>
27#include <asm/segment.h> 27#include <asm/segment.h>
28#include <asm/page.h> 28#include <asm/page_types.h>
29#include <asm/boot.h> 29#include <asm/boot.h>
30#include <asm/asm-offsets.h> 30#include <asm/asm-offsets.h>
31 31
32.section ".text.head","ax",@progbits 32.section ".text.head","ax",@progbits
33 .globl startup_32 33ENTRY(startup_32)
34
35startup_32:
36 cld 34 cld
37 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 35 /* test KEEP_SEGMENTS flag to see if the bootloader is asking
38 * us to not reload segments */ 36 * us to not reload segments */
@@ -113,6 +111,8 @@ startup_32:
113 */ 111 */
114 leal relocated(%ebx), %eax 112 leal relocated(%ebx), %eax
115 jmp *%eax 113 jmp *%eax
114ENDPROC(startup_32)
115
116.section ".text" 116.section ".text"
117relocated: 117relocated:
118 118
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1d5dff4123e..ed4a8294800 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -26,8 +26,8 @@
26 26
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <asm/segment.h> 28#include <asm/segment.h>
29#include <asm/pgtable.h> 29#include <asm/pgtable_types.h>
30#include <asm/page.h> 30#include <asm/page_types.h>
31#include <asm/boot.h> 31#include <asm/boot.h>
32#include <asm/msr.h> 32#include <asm/msr.h>
33#include <asm/processor-flags.h> 33#include <asm/processor-flags.h>
@@ -35,9 +35,7 @@
35 35
36.section ".text.head" 36.section ".text.head"
37 .code32 37 .code32
38 .globl startup_32 38ENTRY(startup_32)
39
40startup_32:
41 cld 39 cld
42 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 40 /* test KEEP_SEGMENTS flag to see if the bootloader is asking
43 * us to not reload segments */ 41 * us to not reload segments */
@@ -176,6 +174,7 @@ startup_32:
176 174
177 /* Jump from 32bit compatibility mode into 64bit mode. */ 175 /* Jump from 32bit compatibility mode into 64bit mode. */
178 lret 176 lret
177ENDPROC(startup_32)
179 178
180no_longmode: 179no_longmode:
181 /* This isn't an x86-64 CPU so hang */ 180 /* This isn't an x86-64 CPU so hang */
@@ -295,7 +294,6 @@ relocated:
295 call decompress_kernel 294 call decompress_kernel
296 popq %rsi 295 popq %rsi
297 296
298
299/* 297/*
300 * Jump to the decompressed kernel. 298 * Jump to the decompressed kernel.
301 */ 299 */
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index ef50c84e8b4..11f272c6f5e 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -8,6 +8,8 @@
8 * 8 *
9 * ----------------------------------------------------------------------- */ 9 * ----------------------------------------------------------------------- */
10 10
11#include <linux/linkage.h>
12
11/* 13/*
12 * Memory copy routines 14 * Memory copy routines
13 */ 15 */
@@ -15,9 +17,7 @@
15 .code16gcc 17 .code16gcc
16 .text 18 .text
17 19
18 .globl memcpy 20GLOBAL(memcpy)
19 .type memcpy, @function
20memcpy:
21 pushw %si 21 pushw %si
22 pushw %di 22 pushw %di
23 movw %ax, %di 23 movw %ax, %di
@@ -31,11 +31,9 @@ memcpy:
31 popw %di 31 popw %di
32 popw %si 32 popw %si
33 ret 33 ret
34 .size memcpy, .-memcpy 34ENDPROC(memcpy)
35 35
36 .globl memset 36GLOBAL(memset)
37 .type memset, @function
38memset:
39 pushw %di 37 pushw %di
40 movw %ax, %di 38 movw %ax, %di
41 movzbl %dl, %eax 39 movzbl %dl, %eax
@@ -48,52 +46,42 @@ memset:
48 rep; stosb 46 rep; stosb
49 popw %di 47 popw %di
50 ret 48 ret
51 .size memset, .-memset 49ENDPROC(memset)
52 50
53 .globl copy_from_fs 51GLOBAL(copy_from_fs)
54 .type copy_from_fs, @function
55copy_from_fs:
56 pushw %ds 52 pushw %ds
57 pushw %fs 53 pushw %fs
58 popw %ds 54 popw %ds
59 call memcpy 55 call memcpy
60 popw %ds 56 popw %ds
61 ret 57 ret
62 .size copy_from_fs, .-copy_from_fs 58ENDPROC(copy_from_fs)
63 59
64 .globl copy_to_fs 60GLOBAL(copy_to_fs)
65 .type copy_to_fs, @function
66copy_to_fs:
67 pushw %es 61 pushw %es
68 pushw %fs 62 pushw %fs
69 popw %es 63 popw %es
70 call memcpy 64 call memcpy
71 popw %es 65 popw %es
72 ret 66 ret
73 .size copy_to_fs, .-copy_to_fs 67ENDPROC(copy_to_fs)
74 68
75#if 0 /* Not currently used, but can be enabled as needed */ 69#if 0 /* Not currently used, but can be enabled as needed */
76 70GLOBAL(copy_from_gs)
77 .globl copy_from_gs
78 .type copy_from_gs, @function
79copy_from_gs:
80 pushw %ds 71 pushw %ds
81 pushw %gs 72 pushw %gs
82 popw %ds 73 popw %ds
83 call memcpy 74 call memcpy
84 popw %ds 75 popw %ds
85 ret 76 ret
86 .size copy_from_gs, .-copy_from_gs 77ENDPROC(copy_from_gs)
87 .globl copy_to_gs
88 78
89 .type copy_to_gs, @function 79GLOBAL(copy_to_gs)
90copy_to_gs:
91 pushw %es 80 pushw %es
92 pushw %gs 81 pushw %gs
93 popw %es 82 popw %es
94 call memcpy 83 call memcpy
95 popw %es 84 popw %es
96 ret 85 ret
97 .size copy_to_gs, .-copy_to_gs 86ENDPROC(copy_to_gs)
98
99#endif 87#endif
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b993062e9a5..7ccff4884a2 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -19,7 +19,7 @@
19#include <linux/utsrelease.h> 19#include <linux/utsrelease.h>
20#include <asm/boot.h> 20#include <asm/boot.h>
21#include <asm/e820.h> 21#include <asm/e820.h>
22#include <asm/page.h> 22#include <asm/page_types.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24#include "boot.h" 24#include "boot.h"
25#include "offsets.h" 25#include "offsets.h"
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 197421db1af..58f0415d3ae 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -149,11 +149,6 @@ void main(void)
149 /* Query MCA information */ 149 /* Query MCA information */
150 query_mca(); 150 query_mca();
151 151
152 /* Voyager */
153#ifdef CONFIG_X86_VOYAGER
154 query_voyager();
155#endif
156
157 /* Query Intel SpeedStep (IST) information */ 152 /* Query Intel SpeedStep (IST) information */
158 query_ist(); 153 query_ist();
159 154
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index 141b6e20ed3..019c17a7585 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -15,18 +15,15 @@
15#include <asm/boot.h> 15#include <asm/boot.h>
16#include <asm/processor-flags.h> 16#include <asm/processor-flags.h>
17#include <asm/segment.h> 17#include <asm/segment.h>
18#include <linux/linkage.h>
18 19
19 .text 20 .text
20
21 .globl protected_mode_jump
22 .type protected_mode_jump, @function
23
24 .code16 21 .code16
25 22
26/* 23/*
27 * void protected_mode_jump(u32 entrypoint, u32 bootparams); 24 * void protected_mode_jump(u32 entrypoint, u32 bootparams);
28 */ 25 */
29protected_mode_jump: 26GLOBAL(protected_mode_jump)
30 movl %edx, %esi # Pointer to boot_params table 27 movl %edx, %esi # Pointer to boot_params table
31 28
32 xorl %ebx, %ebx 29 xorl %ebx, %ebx
@@ -47,12 +44,10 @@ protected_mode_jump:
47 .byte 0x66, 0xea # ljmpl opcode 44 .byte 0x66, 0xea # ljmpl opcode
482: .long in_pm32 # offset 452: .long in_pm32 # offset
49 .word __BOOT_CS # segment 46 .word __BOOT_CS # segment
50 47ENDPROC(protected_mode_jump)
51 .size protected_mode_jump, .-protected_mode_jump
52 48
53 .code32 49 .code32
54 .type in_pm32, @function 50GLOBAL(in_pm32)
55in_pm32:
56 # Set up data segments for flat 32-bit mode 51 # Set up data segments for flat 32-bit mode
57 movl %ecx, %ds 52 movl %ecx, %ds
58 movl %ecx, %es 53 movl %ecx, %es
@@ -78,5 +73,4 @@ in_pm32:
78 lldt %cx 73 lldt %cx
79 74
80 jmpl *%eax # Jump to the 32-bit entrypoint 75 jmpl *%eax # Jump to the 32-bit entrypoint
81 76ENDPROC(in_pm32)
82 .size in_pm32, .-in_pm32
diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c
deleted file mode 100644
index 433909d61e5..00000000000
--- a/arch/x86/boot/voyager.c
+++ /dev/null
@@ -1,40 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright (C) 1991, 1992 Linus Torvalds
4 * Copyright 2007 rPath, Inc. - All Rights Reserved
5 *
6 * This file is part of the Linux kernel, and is made available under
7 * the terms of the GNU General Public License version 2.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * Get the Voyager config information
13 */
14
15#include "boot.h"
16
17int query_voyager(void)
18{
19 u8 err;
20 u16 es, di;
21 /* Abuse the apm_bios_info area for this */
22 u8 *data_ptr = (u8 *)&boot_params.apm_bios_info;
23
24 data_ptr[0] = 0xff; /* Flag on config not found(?) */
25
26 asm("pushw %%es ; "
27 "int $0x15 ; "
28 "setc %0 ; "
29 "movw %%es, %1 ; "
30 "popw %%es"
31 : "=q" (err), "=r" (es), "=D" (di)
32 : "a" (0xffc0));
33
34 if (err)
35 return -1; /* Not Voyager */
36
37 set_fs(es);
38 copy_from_fs(data_ptr, di, 7); /* Table is 7 bytes apparently */
39 return 0;
40}
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 096dd5359cd..235b81d0f6f 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.29-rc4 3# Linux kernel version: 2.6.29-rc4
4# Thu Feb 12 12:57:57 2009 4# Tue Feb 24 15:50:58 2009
5# 5#
6# CONFIG_64BIT is not set 6# CONFIG_64BIT is not set
7CONFIG_X86_32=y 7CONFIG_X86_32=y
@@ -197,7 +197,6 @@ CONFIG_SPARSE_IRQ=y
197CONFIG_X86_FIND_SMP_CONFIG=y 197CONFIG_X86_FIND_SMP_CONFIG=y
198CONFIG_X86_MPPARSE=y 198CONFIG_X86_MPPARSE=y
199# CONFIG_X86_ELAN is not set 199# CONFIG_X86_ELAN is not set
200# CONFIG_X86_VOYAGER is not set
201# CONFIG_X86_GENERICARCH is not set 200# CONFIG_X86_GENERICARCH is not set
202# CONFIG_X86_VSMP is not set 201# CONFIG_X86_VSMP is not set
203# CONFIG_X86_RDC321X is not set 202# CONFIG_X86_RDC321X is not set
@@ -267,7 +266,9 @@ CONFIG_PREEMPT_VOLUNTARY=y
267CONFIG_X86_LOCAL_APIC=y 266CONFIG_X86_LOCAL_APIC=y
268CONFIG_X86_IO_APIC=y 267CONFIG_X86_IO_APIC=y
269CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y 268CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
270# CONFIG_X86_MCE is not set 269CONFIG_X86_MCE=y
270CONFIG_X86_MCE_NONFATAL=y
271CONFIG_X86_MCE_P4THERMAL=y
271CONFIG_VM86=y 272CONFIG_VM86=y
272# CONFIG_TOSHIBA is not set 273# CONFIG_TOSHIBA is not set
273# CONFIG_I8K is not set 274# CONFIG_I8K is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 2efb5d5063f..9fe5d212ab4 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,7 +1,7 @@
1# 1#
2# Automatically generated make config: don't edit 2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.29-rc4 3# Linux kernel version: 2.6.29-rc4
4# Thu Feb 12 12:57:29 2009 4# Tue Feb 24 15:44:16 2009
5# 5#
6CONFIG_64BIT=y 6CONFIG_64BIT=y
7# CONFIG_X86_32 is not set 7# CONFIG_X86_32 is not set
@@ -199,7 +199,6 @@ CONFIG_SPARSE_IRQ=y
199CONFIG_X86_FIND_SMP_CONFIG=y 199CONFIG_X86_FIND_SMP_CONFIG=y
200CONFIG_X86_MPPARSE=y 200CONFIG_X86_MPPARSE=y
201# CONFIG_X86_ELAN is not set 201# CONFIG_X86_ELAN is not set
202# CONFIG_X86_VOYAGER is not set
203# CONFIG_X86_GENERICARCH is not set 202# CONFIG_X86_GENERICARCH is not set
204# CONFIG_X86_VSMP is not set 203# CONFIG_X86_VSMP is not set
205CONFIG_SCHED_OMIT_FRAME_POINTER=y 204CONFIG_SCHED_OMIT_FRAME_POINTER=y
@@ -267,7 +266,9 @@ CONFIG_PREEMPT_VOLUNTARY=y
267CONFIG_X86_LOCAL_APIC=y 266CONFIG_X86_LOCAL_APIC=y
268CONFIG_X86_IO_APIC=y 267CONFIG_X86_IO_APIC=y
269CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y 268CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
270# CONFIG_X86_MCE is not set 269CONFIG_X86_MCE=y
270CONFIG_X86_MCE_INTEL=y
271CONFIG_X86_MCE_AMD=y
271# CONFIG_I8K is not set 272# CONFIG_I8K is not set
272CONFIG_MICROCODE=y 273CONFIG_MICROCODE=y
273CONFIG_MICROCODE_INTEL=y 274CONFIG_MICROCODE_INTEL=y
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index dd77ac0cac4..588a7aa937e 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -33,8 +33,6 @@
33#include <asm/sigframe.h> 33#include <asm/sigframe.h>
34#include <asm/sys_ia32.h> 34#include <asm/sys_ia32.h>
35 35
36#define DEBUG_SIG 0
37
38#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) 36#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
39 37
40#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ 38#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
@@ -190,42 +188,47 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
190/* 188/*
191 * Do a signal return; undo the signal stack. 189 * Do a signal return; undo the signal stack.
192 */ 190 */
191#define loadsegment_gs(v) load_gs_index(v)
192#define loadsegment_fs(v) loadsegment(fs, v)
193#define loadsegment_ds(v) loadsegment(ds, v)
194#define loadsegment_es(v) loadsegment(es, v)
195
196#define get_user_seg(seg) ({ unsigned int v; savesegment(seg, v); v; })
197#define set_user_seg(seg, v) loadsegment_##seg(v)
198
193#define COPY(x) { \ 199#define COPY(x) { \
194 get_user_ex(regs->x, &sc->x); \ 200 get_user_ex(regs->x, &sc->x); \
195} 201}
196 202
197#define COPY_SEG_CPL3(seg) { \ 203#define GET_SEG(seg) ({ \
198 unsigned short tmp; \ 204 unsigned short tmp; \
199 get_user_ex(tmp, &sc->seg); \ 205 get_user_ex(tmp, &sc->seg); \
200 regs->seg = tmp | 3; \ 206 tmp; \
201} 207})
208
209#define COPY_SEG_CPL3(seg) do { \
210 regs->seg = GET_SEG(seg) | 3; \
211} while (0)
202 212
203#define RELOAD_SEG(seg) { \ 213#define RELOAD_SEG(seg) { \
204 unsigned int cur, pre; \ 214 unsigned int pre = GET_SEG(seg); \
205 get_user_ex(pre, &sc->seg); \ 215 unsigned int cur = get_user_seg(seg); \
206 savesegment(seg, cur); \
207 pre |= 3; \ 216 pre |= 3; \
208 if (pre != cur) \ 217 if (pre != cur) \
209 loadsegment(seg, pre); \ 218 set_user_seg(seg, pre); \
210} 219}
211 220
212static int ia32_restore_sigcontext(struct pt_regs *regs, 221static int ia32_restore_sigcontext(struct pt_regs *regs,
213 struct sigcontext_ia32 __user *sc, 222 struct sigcontext_ia32 __user *sc,
214 unsigned int *pax) 223 unsigned int *pax)
215{ 224{
216 unsigned int tmpflags, gs, oldgs, err = 0; 225 unsigned int tmpflags, err = 0;
217 void __user *buf; 226 void __user *buf;
218 u32 tmp; 227 u32 tmp;
219 228
220 /* Always make any pending restarted system calls return -EINTR */ 229 /* Always make any pending restarted system calls return -EINTR */
221 current_thread_info()->restart_block.fn = do_no_restart_syscall; 230 current_thread_info()->restart_block.fn = do_no_restart_syscall;
222 231
223#if DEBUG_SIG
224 printk(KERN_DEBUG "SIG restore_sigcontext: "
225 "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
226 sc, sc->err, sc->ip, sc->cs, sc->flags);
227#endif
228
229 get_user_try { 232 get_user_try {
230 /* 233 /*
231 * Reload fs and gs if they have changed in the signal 234 * Reload fs and gs if they have changed in the signal
@@ -233,12 +236,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
233 * the handler, but does not clobber them at least in the 236 * the handler, but does not clobber them at least in the
234 * normal case. 237 * normal case.
235 */ 238 */
236 get_user_ex(gs, &sc->gs); 239 RELOAD_SEG(gs);
237 gs |= 3;
238 savesegment(gs, oldgs);
239 if (gs != oldgs)
240 load_gs_index(gs);
241
242 RELOAD_SEG(fs); 240 RELOAD_SEG(fs);
243 RELOAD_SEG(ds); 241 RELOAD_SEG(ds);
244 RELOAD_SEG(es); 242 RELOAD_SEG(es);
@@ -337,17 +335,13 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
337 void __user *fpstate, 335 void __user *fpstate,
338 struct pt_regs *regs, unsigned int mask) 336 struct pt_regs *regs, unsigned int mask)
339{ 337{
340 int tmp, err = 0; 338 int err = 0;
341 339
342 put_user_try { 340 put_user_try {
343 savesegment(gs, tmp); 341 put_user_ex(get_user_seg(gs), (unsigned int __user *)&sc->gs);
344 put_user_ex(tmp, (unsigned int __user *)&sc->gs); 342 put_user_ex(get_user_seg(fs), (unsigned int __user *)&sc->fs);
345 savesegment(fs, tmp); 343 put_user_ex(get_user_seg(ds), (unsigned int __user *)&sc->ds);
346 put_user_ex(tmp, (unsigned int __user *)&sc->fs); 344 put_user_ex(get_user_seg(es), (unsigned int __user *)&sc->es);
347 savesegment(ds, tmp);
348 put_user_ex(tmp, (unsigned int __user *)&sc->ds);
349 savesegment(es, tmp);
350 put_user_ex(tmp, (unsigned int __user *)&sc->es);
351 345
352 put_user_ex(regs->di, &sc->di); 346 put_user_ex(regs->di, &sc->di);
353 put_user_ex(regs->si, &sc->si); 347 put_user_ex(regs->si, &sc->si);
@@ -488,11 +482,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
488 regs->cs = __USER32_CS; 482 regs->cs = __USER32_CS;
489 regs->ss = __USER32_DS; 483 regs->ss = __USER32_DS;
490 484
491#if DEBUG_SIG
492 printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
493 current->comm, current->pid, frame, regs->ip, frame->pretcode);
494#endif
495
496 return 0; 485 return 0;
497} 486}
498 487
@@ -574,10 +563,5 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
574 regs->cs = __USER32_CS; 563 regs->cs = __USER32_CS;
575 regs->ss = __USER32_DS; 564 regs->ss = __USER32_DS;
576 565
577#if DEBUG_SIG
578 printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
579 current->comm, current->pid, frame, regs->ip, frame->pretcode);
580#endif
581
582 return 0; 566 return 0;
583} 567}
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index dce1bf696cc..a6208dc7463 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -146,7 +146,7 @@ static inline u64 native_x2apic_icr_read(void)
146 return val; 146 return val;
147} 147}
148 148
149extern int x2apic; 149extern int x2apic, x2apic_phys;
150extern void check_x2apic(void); 150extern void check_x2apic(void);
151extern void enable_x2apic(void); 151extern void enable_x2apic(void);
152extern void enable_IR_x2apic(void); 152extern void enable_IR_x2apic(void);
diff --git a/arch/x86/include/asm/arch_hooks.h b/arch/x86/include/asm/arch_hooks.h
deleted file mode 100644
index cbd4957838a..00000000000
--- a/arch/x86/include/asm/arch_hooks.h
+++ /dev/null
@@ -1,26 +0,0 @@
1#ifndef _ASM_X86_ARCH_HOOKS_H
2#define _ASM_X86_ARCH_HOOKS_H
3
4#include <linux/interrupt.h>
5
6/*
7 * linux/include/asm/arch_hooks.h
8 *
9 * define the architecture specific hooks
10 */
11
12/* these aren't arch hooks, they are generic routines
13 * that can be used by the hooks */
14extern void init_ISA_irqs(void);
15extern irqreturn_t timer_interrupt(int irq, void *dev_id);
16
17/* these are the defined hooks */
18extern void intr_init_hook(void);
19extern void pre_intr_init_hook(void);
20extern void pre_setup_arch_hook(void);
21extern void trap_init_hook(void);
22extern void pre_time_init_hook(void);
23extern void time_init_hook(void);
24extern void mca_nmi_hook(void);
25
26#endif /* _ASM_X86_ARCH_HOOKS_H */
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 58d7091eeb1..1a99e6c092a 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -60,4 +60,8 @@ extern struct irq_chip i8259A_chip;
60extern void mask_8259A(void); 60extern void mask_8259A(void);
61extern void unmask_8259A(void); 61extern void unmask_8259A(void);
62 62
63#ifdef CONFIG_X86_32
64extern void init_ISA_irqs(void);
65#endif
66
63#endif /* _ASM_X86_I8259_H */ 67#endif /* _ASM_X86_I8259_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 4f8e820cf38..683d0b4c00f 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -124,10 +124,15 @@ static inline void *phys_to_virt(phys_addr_t address)
124 124
125/* 125/*
126 * ISA I/O bus memory addresses are 1:1 with the physical address. 126 * ISA I/O bus memory addresses are 1:1 with the physical address.
127 * However, we truncate the address to unsigned int to avoid undesirable
128 * promitions in legacy drivers.
127 */ 129 */
128#define isa_virt_to_bus (unsigned long)virt_to_phys 130static inline unsigned int isa_virt_to_bus(volatile void *address)
129#define isa_page_to_bus page_to_phys 131{
130#define isa_bus_to_virt phys_to_virt 132 return (unsigned int)virt_to_phys(address);
133}
134#define isa_page_to_bus(page) ((unsigned int)page_to_phys(page))
135#define isa_bus_to_virt phys_to_virt
131 136
132/* 137/*
133 * However PCI ones are not necessarily 1:1 and therefore these interfaces 138 * However PCI ones are not necessarily 1:1 and therefore these interfaces
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index c1f06289b14..bd46495ff7d 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -23,6 +23,12 @@
23#include <asm/pgtable.h> 23#include <asm/pgtable.h>
24#include <asm/tlbflush.h> 24#include <asm/tlbflush.h>
25 25
26int
27reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot);
28
29void
30free_io_memtype(u64 base, unsigned long size);
31
26void * 32void *
27iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); 33iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
28 34
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index b07278c55e9..8a285f356f8 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -128,7 +128,7 @@
128#ifndef __ASSEMBLY__ 128#ifndef __ASSEMBLY__
129static inline int invalid_vm86_irq(int irq) 129static inline int invalid_vm86_irq(int irq)
130{ 130{
131 return irq < 3 || irq > 15; 131 return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
132} 132}
133#endif 133#endif
134 134
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index d2e3bf3608a..886c9402ec4 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,13 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/ioctl.h> 10#include <linux/ioctl.h>
11 11
12/* Select x86 specific features in <linux/kvm.h> */
13#define __KVM_HAVE_PIT
14#define __KVM_HAVE_IOAPIC
15#define __KVM_HAVE_DEVICE_ASSIGNMENT
16#define __KVM_HAVE_MSI
17#define __KVM_HAVE_USER_NMI
18
12/* Architectural interrupt line count. */ 19/* Architectural interrupt line count. */
13#define KVM_NR_INTERRUPTS 256 20#define KVM_NR_INTERRUPTS 256
14 21
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 5d98d0b68ff..9320e2a8a26 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -52,70 +52,14 @@
52 52
53#endif 53#endif
54 54
55#define GLOBAL(name) \
56 .globl name; \
57 name:
58
55#ifdef CONFIG_X86_ALIGNMENT_16 59#ifdef CONFIG_X86_ALIGNMENT_16
56#define __ALIGN .align 16,0x90 60#define __ALIGN .align 16,0x90
57#define __ALIGN_STR ".align 16,0x90" 61#define __ALIGN_STR ".align 16,0x90"
58#endif 62#endif
59 63
60/*
61 * to check ENTRY_X86/END_X86 and
62 * KPROBE_ENTRY_X86/KPROBE_END_X86
63 * unbalanced-missed-mixed appearance
64 */
65#define __set_entry_x86 .set ENTRY_X86_IN, 0
66#define __unset_entry_x86 .set ENTRY_X86_IN, 1
67#define __set_kprobe_x86 .set KPROBE_X86_IN, 0
68#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1
69
70#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
71
72#define __check_entry_x86 \
73 .ifdef ENTRY_X86_IN; \
74 .ifeq ENTRY_X86_IN; \
75 __macro_err_x86; \
76 .abort; \
77 .endif; \
78 .endif
79
80#define __check_kprobe_x86 \
81 .ifdef KPROBE_X86_IN; \
82 .ifeq KPROBE_X86_IN; \
83 __macro_err_x86; \
84 .abort; \
85 .endif; \
86 .endif
87
88#define __check_entry_kprobe_x86 \
89 __check_entry_x86; \
90 __check_kprobe_x86
91
92#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
93
94#define ENTRY_X86(name) \
95 __check_entry_kprobe_x86; \
96 __set_entry_x86; \
97 .globl name; \
98 __ALIGN; \
99 name:
100
101#define END_X86(name) \
102 __unset_entry_x86; \
103 __check_entry_kprobe_x86; \
104 .size name, .-name
105
106#define KPROBE_ENTRY_X86(name) \
107 __check_entry_kprobe_x86; \
108 __set_kprobe_x86; \
109 .pushsection .kprobes.text, "ax"; \
110 .globl name; \
111 __ALIGN; \
112 name:
113
114#define KPROBE_END_X86(name) \
115 __unset_kprobe_x86; \
116 __check_entry_kprobe_x86; \
117 .size name, .-name; \
118 .popsection
119
120#endif /* _ASM_X86_LINKAGE_H */ 64#endif /* _ASM_X86_LINKAGE_H */
121 65
diff --git a/arch/x86/include/asm/mach-voyager/do_timer.h b/arch/x86/include/asm/mach-voyager/do_timer.h
deleted file mode 100644
index 9e5a459fd15..00000000000
--- a/arch/x86/include/asm/mach-voyager/do_timer.h
+++ /dev/null
@@ -1,17 +0,0 @@
1/* defines for inline arch setup functions */
2#include <linux/clockchips.h>
3
4#include <asm/voyager.h>
5#include <asm/i8253.h>
6
7/**
8 * do_timer_interrupt_hook - hook into timer tick
9 *
10 * Call the pit clock event handler. see asm/i8253.h
11 **/
12static inline void do_timer_interrupt_hook(void)
13{
14 global_clock_event->event_handler(global_clock_event);
15 voyager_timer_interrupt();
16}
17
diff --git a/arch/x86/include/asm/mach-voyager/entry_arch.h b/arch/x86/include/asm/mach-voyager/entry_arch.h
deleted file mode 100644
index ae52624b593..00000000000
--- a/arch/x86/include/asm/mach-voyager/entry_arch.h
+++ /dev/null
@@ -1,26 +0,0 @@
1/* -*- mode: c; c-basic-offset: 8 -*- */
2
3/* Copyright (C) 2002
4 *
5 * Author: James.Bottomley@HansenPartnership.com
6 *
7 * linux/arch/i386/voyager/entry_arch.h
8 *
9 * This file builds the VIC and QIC CPI gates
10 */
11
12/* initialise the voyager interrupt gates
13 *
14 * This uses the macros in irq.h to set up assembly jump gates. The
15 * calls are then redirected to the same routine with smp_ prefixed */
16BUILD_INTERRUPT(vic_sys_interrupt, VIC_SYS_INT)
17BUILD_INTERRUPT(vic_cmn_interrupt, VIC_CMN_INT)
18BUILD_INTERRUPT(vic_cpi_interrupt, VIC_CPI_LEVEL0);
19
20/* do all the QIC interrupts */
21BUILD_INTERRUPT(qic_timer_interrupt, QIC_TIMER_CPI);
22BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI);
23BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI);
24BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI);
25BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI);
26BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI);
diff --git a/arch/x86/include/asm/mach-voyager/setup_arch.h b/arch/x86/include/asm/mach-voyager/setup_arch.h
deleted file mode 100644
index 71729ca05cd..00000000000
--- a/arch/x86/include/asm/mach-voyager/setup_arch.h
+++ /dev/null
@@ -1,12 +0,0 @@
1#include <asm/voyager.h>
2#include <asm/setup.h>
3#define VOYAGER_BIOS_INFO ((struct voyager_bios_info *) \
4 (&boot_params.apm_bios_info))
5
6/* Hook to call BIOS initialisation function */
7
8/* for voyager, pass the voyager BIOS/SUS info area to the detection
9 * routines */
10
11#define ARCH_SETUP voyager_detect(VOYAGER_BIOS_INFO);
12
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 07f1af494ca..105fb90a063 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void)
32 get_memcfg_numa_flat(); 32 get_memcfg_numa_flat();
33} 33}
34 34
35extern int early_pfn_to_nid(unsigned long pfn);
36
37extern void resume_map_numa_kva(pgd_t *pgd); 35extern void resume_map_numa_kva(pgd_t *pgd);
38 36
39#else /* !CONFIG_NUMA */ 37#else /* !CONFIG_NUMA */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index a5b3817d4b9..a29f48c2a32 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ 40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
41 NODE_DATA(nid)->node_spanned_pages) 41 NODE_DATA(nid)->node_spanned_pages)
42 42
43extern int early_pfn_to_nid(unsigned long pfn);
44
45#ifdef CONFIG_NUMA_EMU 43#ifdef CONFIG_NUMA_EMU
46#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024) 44#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024)
47#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) 45#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index b5486aaf36e..f1e4a79a6e4 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -33,12 +33,10 @@
33/* 44=32+12, the limit we can fit into an unsigned long pfn */ 33/* 44=32+12, the limit we can fit into an unsigned long pfn */
34#define __PHYSICAL_MASK_SHIFT 44 34#define __PHYSICAL_MASK_SHIFT 44
35#define __VIRTUAL_MASK_SHIFT 32 35#define __VIRTUAL_MASK_SHIFT 32
36#define PAGETABLE_LEVELS 3
37 36
38#else /* !CONFIG_X86_PAE */ 37#else /* !CONFIG_X86_PAE */
39#define __PHYSICAL_MASK_SHIFT 32 38#define __PHYSICAL_MASK_SHIFT 32
40#define __VIRTUAL_MASK_SHIFT 32 39#define __VIRTUAL_MASK_SHIFT 32
41#define PAGETABLE_LEVELS 2
42#endif /* CONFIG_X86_PAE */ 40#endif /* CONFIG_X86_PAE */
43 41
44#ifndef __ASSEMBLY__ 42#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index bc73af3eda9..d38c91b7024 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,8 +1,6 @@
1#ifndef _ASM_X86_PAGE_64_DEFS_H 1#ifndef _ASM_X86_PAGE_64_DEFS_H
2#define _ASM_X86_PAGE_64_DEFS_H 2#define _ASM_X86_PAGE_64_DEFS_H
3 3
4#define PAGETABLE_LEVELS 4
5
6#define THREAD_ORDER 1 4#define THREAD_ORDER 1
7#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) 5#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
8#define CURRENT_MASK (~(THREAD_SIZE - 1)) 6#define CURRENT_MASK (~(THREAD_SIZE - 1))
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 2c52ff76758..2d625da6603 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -16,12 +16,6 @@
16 (ie, 32-bit PAE). */ 16 (ie, 32-bit PAE). */
17#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) 17#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK)
18 18
19/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
20#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
21
22/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
23#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
24
25#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) 19#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT)
26#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) 20#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1))
27 21
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 9709fdff661..b0e70056838 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -15,4 +15,7 @@ extern int reserve_memtype(u64 start, u64 end,
15 unsigned long req_type, unsigned long *ret_type); 15 unsigned long req_type, unsigned long *ret_type);
16extern int free_memtype(u64 start, u64 end); 16extern int free_memtype(u64 start, u64 end);
17 17
18extern int kernel_map_sync_memtype(u64 base, unsigned long size,
19 unsigned long flag);
20
18#endif /* _ASM_X86_PAT_H */ 21#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 09ae67efceb..daacc23e3fb 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -17,6 +17,7 @@ typedef union {
17#endif /* !__ASSEMBLY__ */ 17#endif /* !__ASSEMBLY__ */
18 18
19#define SHARED_KERNEL_PMD 0 19#define SHARED_KERNEL_PMD 0
20#define PAGETABLE_LEVELS 2
20 21
21/* 22/*
22 * traditional i386 two-level paging structure: 23 * traditional i386 two-level paging structure:
@@ -25,6 +26,7 @@ typedef union {
25#define PGDIR_SHIFT 22 26#define PGDIR_SHIFT 22
26#define PTRS_PER_PGD 1024 27#define PTRS_PER_PGD 1024
27 28
29
28/* 30/*
29 * the i386 is two-level, so we don't really have any 31 * the i386 is two-level, so we don't really have any
30 * PMD directory physically. 32 * PMD directory physically.
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index bcc89625ebe..1bd5876c864 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -24,6 +24,8 @@ typedef union {
24#define SHARED_KERNEL_PMD 1 24#define SHARED_KERNEL_PMD 1
25#endif 25#endif
26 26
27#define PAGETABLE_LEVELS 3
28
27/* 29/*
28 * PGDIR_SHIFT determines what a top-level page table entry can map 30 * PGDIR_SHIFT determines what a top-level page table entry can map
29 */ 31 */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 2f59135c6f2..fbf42b8e038 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -18,6 +18,7 @@ typedef struct { pteval_t pte; } pte_t;
18#endif /* !__ASSEMBLY__ */ 18#endif /* !__ASSEMBLY__ */
19 19
20#define SHARED_KERNEL_PMD 0 20#define SHARED_KERNEL_PMD 0
21#define PAGETABLE_LEVELS 4
21 22
22/* 23/*
23 * PGDIR_SHIFT determines what a top-level page table entry can map 24 * PGDIR_SHIFT determines what a top-level page table entry can map
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 9dafe87be2d..4d258ad76a0 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -173,6 +173,12 @@
173 173
174#include <linux/types.h> 174#include <linux/types.h>
175 175
176/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
177#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
178
179/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
180#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
181
176typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; 182typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
177 183
178typedef struct { pgdval_t pgd; } pgd_t; 184typedef struct { pgdval_t pgd; } pgd_t;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index dabab1a19dd..c7a98f73821 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -403,7 +403,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary);
403#endif 403#endif
404#endif /* X86_64 */ 404#endif /* X86_64 */
405 405
406extern void print_cpu_info(struct cpuinfo_x86 *);
407extern unsigned int xstate_size; 406extern unsigned int xstate_size;
408extern void free_thread_xstate(struct task_struct *); 407extern void free_thread_xstate(struct task_struct *);
409extern struct kmem_cache *task_xstate_cachep; 408extern struct kmem_cache *task_xstate_cachep;
@@ -862,6 +861,7 @@ static inline void spin_lock_prefetch(const void *x)
862 * User space process size: 3GB (default). 861 * User space process size: 3GB (default).
863 */ 862 */
864#define TASK_SIZE PAGE_OFFSET 863#define TASK_SIZE PAGE_OFFSET
864#define TASK_SIZE_MAX TASK_SIZE
865#define STACK_TOP TASK_SIZE 865#define STACK_TOP TASK_SIZE
866#define STACK_TOP_MAX STACK_TOP 866#define STACK_TOP_MAX STACK_TOP
867 867
@@ -921,7 +921,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
921/* 921/*
922 * User space process size. 47bits minus one guard page. 922 * User space process size. 47bits minus one guard page.
923 */ 923 */
924#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE) 924#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
925 925
926/* This decides where the kernel will search for a free chunk of vm 926/* This decides where the kernel will search for a free chunk of vm
927 * space during mmap's. 927 * space during mmap's.
@@ -930,12 +930,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
930 0xc0000000 : 0xFFFFe000) 930 0xc0000000 : 0xFFFFe000)
931 931
932#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ 932#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \
933 IA32_PAGE_OFFSET : TASK_SIZE64) 933 IA32_PAGE_OFFSET : TASK_SIZE_MAX)
934#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ 934#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \
935 IA32_PAGE_OFFSET : TASK_SIZE64) 935 IA32_PAGE_OFFSET : TASK_SIZE_MAX)
936 936
937#define STACK_TOP TASK_SIZE 937#define STACK_TOP TASK_SIZE
938#define STACK_TOP_MAX TASK_SIZE64 938#define STACK_TOP_MAX TASK_SIZE_MAX
939 939
940#define INIT_THREAD { \ 940#define INIT_THREAD { \
941 .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ 941 .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 8029369cd6f..66801cb72f6 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -13,6 +13,7 @@
13struct mpc_cpu; 13struct mpc_cpu;
14struct mpc_bus; 14struct mpc_bus;
15struct mpc_oemtable; 15struct mpc_oemtable;
16
16struct x86_quirks { 17struct x86_quirks {
17 int (*arch_pre_time_init)(void); 18 int (*arch_pre_time_init)(void);
18 int (*arch_time_init)(void); 19 int (*arch_time_init)(void);
@@ -33,6 +34,14 @@ struct x86_quirks {
33 int (*update_apic)(void); 34 int (*update_apic)(void);
34}; 35};
35 36
37extern void x86_quirk_pre_intr_init(void);
38extern void x86_quirk_intr_init(void);
39
40extern void x86_quirk_trap_init(void);
41
42extern void x86_quirk_pre_time_init(void);
43extern void x86_quirk_time_init(void);
44
36#endif /* __ASSEMBLY__ */ 45#endif /* __ASSEMBLY__ */
37 46
38#ifdef __i386__ 47#ifdef __i386__
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 258ef730aaa..7043408f690 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -82,7 +82,7 @@ asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
82/* kernel/signal_64.c */ 82/* kernel/signal_64.c */
83asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, 83asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
84 struct pt_regs *); 84 struct pt_regs *);
85asmlinkage long sys_rt_sigreturn(struct pt_regs *); 85long sys_rt_sigreturn(struct pt_regs *);
86 86
87/* kernel/sys_x86_64.c */ 87/* kernel/sys_x86_64.c */
88asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, 88asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 2bb6a835c45..a81195eaa2b 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -3,6 +3,7 @@
3#include <linux/init.h> 3#include <linux/init.h>
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/percpu.h> 5#include <linux/percpu.h>
6#include <linux/interrupt.h>
6 7
7#define TICK_SIZE (tick_nsec / 1000) 8#define TICK_SIZE (tick_nsec / 1000)
8 9
@@ -12,6 +13,7 @@ unsigned long native_calibrate_tsc(void);
12#ifdef CONFIG_X86_32 13#ifdef CONFIG_X86_32
13extern int timer_ack; 14extern int timer_ack;
14extern int recalibrate_cpu_khz(void); 15extern int recalibrate_cpu_khz(void);
16extern irqreturn_t timer_interrupt(int irq, void *dev_id);
15#endif /* CONFIG_X86_32 */ 17#endif /* CONFIG_X86_32 */
16 18
17extern int no_timer_check; 19extern int no_timer_check;
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 5e06259e90e..a0ba6138697 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -157,7 +157,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
157} 157}
158 158
159static __always_inline unsigned long __copy_from_user_nocache(void *to, 159static __always_inline unsigned long __copy_from_user_nocache(void *to,
160 const void __user *from, unsigned long n) 160 const void __user *from, unsigned long n, unsigned long total)
161{ 161{
162 might_fault(); 162 might_fault();
163 if (__builtin_constant_p(n)) { 163 if (__builtin_constant_p(n)) {
@@ -180,7 +180,7 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
180 180
181static __always_inline unsigned long 181static __always_inline unsigned long
182__copy_from_user_inatomic_nocache(void *to, const void __user *from, 182__copy_from_user_inatomic_nocache(void *to, const void __user *from,
183 unsigned long n) 183 unsigned long n, unsigned long total)
184{ 184{
185 return __copy_from_user_ll_nocache_nozero(to, from, n); 185 return __copy_from_user_ll_nocache_nozero(to, from, n);
186} 186}
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 84210c479fc..dcaa0404cf7 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -189,17 +189,28 @@ extern long __copy_user_nocache(void *dst, const void __user *src,
189 unsigned size, int zerorest); 189 unsigned size, int zerorest);
190 190
191static inline int __copy_from_user_nocache(void *dst, const void __user *src, 191static inline int __copy_from_user_nocache(void *dst, const void __user *src,
192 unsigned size) 192 unsigned size, unsigned long total)
193{ 193{
194 might_sleep(); 194 might_sleep();
195 return __copy_user_nocache(dst, src, size, 1); 195 /*
196 * In practice this limit means that large file write()s
197 * which get chunked to 4K copies get handled via
198 * non-temporal stores here. Smaller writes get handled
199 * via regular __copy_from_user():
200 */
201 if (likely(total >= PAGE_SIZE))
202 return __copy_user_nocache(dst, src, size, 1);
203 else
204 return __copy_from_user(dst, src, size);
196} 205}
197 206
198static inline int __copy_from_user_inatomic_nocache(void *dst, 207static inline int __copy_from_user_inatomic_nocache(void *dst,
199 const void __user *src, 208 const void __user *src, unsigned size, unsigned total)
200 unsigned size)
201{ 209{
202 return __copy_user_nocache(dst, src, size, 0); 210 if (likely(total >= PAGE_SIZE))
211 return __copy_user_nocache(dst, src, size, 0);
212 else
213 return __copy_from_user_inatomic(dst, src, size);
203} 214}
204 215
205unsigned long 216unsigned long
diff --git a/arch/x86/include/asm/vic.h b/arch/x86/include/asm/vic.h
deleted file mode 100644
index 53100f35361..00000000000
--- a/arch/x86/include/asm/vic.h
+++ /dev/null
@@ -1,61 +0,0 @@
1/* Copyright (C) 1999,2001
2 *
3 * Author: J.E.J.Bottomley@HansenPartnership.com
4 *
5 * Standard include definitions for the NCR Voyager Interrupt Controller */
6
7/* The eight CPI vectors. To activate a CPI, you write a bit mask
8 * corresponding to the processor set to be interrupted into the
9 * relevant register. That set of CPUs will then be interrupted with
10 * the CPI */
11static const int VIC_CPI_Registers[] =
12 {0xFC00, 0xFC01, 0xFC08, 0xFC09,
13 0xFC10, 0xFC11, 0xFC18, 0xFC19 };
14
15#define VIC_PROC_WHO_AM_I 0xfc29
16# define QUAD_IDENTIFIER 0xC0
17# define EIGHT_SLOT_IDENTIFIER 0xE0
18#define QIC_EXTENDED_PROCESSOR_SELECT 0xFC72
19#define VIC_CPI_BASE_REGISTER 0xFC41
20#define VIC_PROCESSOR_ID 0xFC21
21# define VIC_CPU_MASQUERADE_ENABLE 0x8
22
23#define VIC_CLAIM_REGISTER_0 0xFC38
24#define VIC_CLAIM_REGISTER_1 0xFC39
25#define VIC_REDIRECT_REGISTER_0 0xFC60
26#define VIC_REDIRECT_REGISTER_1 0xFC61
27#define VIC_PRIORITY_REGISTER 0xFC20
28
29#define VIC_PRIMARY_MC_BASE 0xFC48
30#define VIC_SECONDARY_MC_BASE 0xFC49
31
32#define QIC_PROCESSOR_ID 0xFC71
33# define QIC_CPUID_ENABLE 0x08
34
35#define QIC_VIC_CPI_BASE_REGISTER 0xFC79
36#define QIC_CPI_BASE_REGISTER 0xFC7A
37
38#define QIC_MASK_REGISTER0 0xFC80
39/* NOTE: these are masked high, enabled low */
40# define QIC_PERF_TIMER 0x01
41# define QIC_LPE 0x02
42# define QIC_SYS_INT 0x04
43# define QIC_CMN_INT 0x08
44/* at the moment, just enable CMN_INT, disable SYS_INT */
45# define QIC_DEFAULT_MASK0 (~(QIC_CMN_INT /* | VIC_SYS_INT */))
46#define QIC_MASK_REGISTER1 0xFC81
47# define QIC_BOOT_CPI_MASK 0xFE
48/* Enable CPI's 1-6 inclusive */
49# define QIC_CPI_ENABLE 0x81
50
51#define QIC_INTERRUPT_CLEAR0 0xFC8A
52#define QIC_INTERRUPT_CLEAR1 0xFC8B
53
54/* this is where we place the CPI vectors */
55#define VIC_DEFAULT_CPI_BASE 0xC0
56/* this is where we place the QIC CPI vectors */
57#define QIC_DEFAULT_CPI_BASE 0xD0
58
59#define VIC_BOOT_INTERRUPT_MASK 0xfe
60
61extern void smp_vic_timer_interrupt(void);
diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h
deleted file mode 100644
index c1635d43616..00000000000
--- a/arch/x86/include/asm/voyager.h
+++ /dev/null
@@ -1,571 +0,0 @@
1/* Copyright (C) 1999,2001
2 *
3 * Author: J.E.J.Bottomley@HansenPartnership.com
4 *
5 * Standard include definitions for the NCR Voyager system */
6
7#undef VOYAGER_DEBUG
8#undef VOYAGER_CAT_DEBUG
9
10#ifdef VOYAGER_DEBUG
11#define VDEBUG(x) printk x
12#else
13#define VDEBUG(x)
14#endif
15
16/* There are three levels of voyager machine: 3,4 and 5. The rule is
17 * if it's less than 3435 it's a Level 3 except for a 3360 which is
18 * a level 4. A 3435 or above is a Level 5 */
19#define VOYAGER_LEVEL5_AND_ABOVE 0x3435
20#define VOYAGER_LEVEL4 0x3360
21
22/* The L4 DINO ASIC */
23#define VOYAGER_DINO 0x43
24
25/* voyager ports in standard I/O space */
26#define VOYAGER_MC_SETUP 0x96
27
28
29#define VOYAGER_CAT_CONFIG_PORT 0x97
30# define VOYAGER_CAT_DESELECT 0xff
31#define VOYAGER_SSPB_RELOCATION_PORT 0x98
32
33/* Valid CAT controller commands */
34/* start instruction register cycle */
35#define VOYAGER_CAT_IRCYC 0x01
36/* start data register cycle */
37#define VOYAGER_CAT_DRCYC 0x02
38/* move to execute state */
39#define VOYAGER_CAT_RUN 0x0F
40/* end operation */
41#define VOYAGER_CAT_END 0x80
42/* hold in idle state */
43#define VOYAGER_CAT_HOLD 0x90
44/* single step an "intest" vector */
45#define VOYAGER_CAT_STEP 0xE0
46/* return cat controller to CLEMSON mode */
47#define VOYAGER_CAT_CLEMSON 0xFF
48
49/* the default cat command header */
50#define VOYAGER_CAT_HEADER 0x7F
51
52/* the range of possible CAT module ids in the system */
53#define VOYAGER_MIN_MODULE 0x10
54#define VOYAGER_MAX_MODULE 0x1f
55
56/* The voyager registers per asic */
57#define VOYAGER_ASIC_ID_REG 0x00
58#define VOYAGER_ASIC_TYPE_REG 0x01
59/* the sub address registers can be made auto incrementing on reads */
60#define VOYAGER_AUTO_INC_REG 0x02
61# define VOYAGER_AUTO_INC 0x04
62# define VOYAGER_NO_AUTO_INC 0xfb
63#define VOYAGER_SUBADDRDATA 0x03
64#define VOYAGER_SCANPATH 0x05
65# define VOYAGER_CONNECT_ASIC 0x01
66# define VOYAGER_DISCONNECT_ASIC 0xfe
67#define VOYAGER_SUBADDRLO 0x06
68#define VOYAGER_SUBADDRHI 0x07
69#define VOYAGER_SUBMODSELECT 0x08
70#define VOYAGER_SUBMODPRESENT 0x09
71
72#define VOYAGER_SUBADDR_LO 0xff
73#define VOYAGER_SUBADDR_HI 0xffff
74
75/* the maximum size of a scan path -- used to form instructions */
76#define VOYAGER_MAX_SCAN_PATH 0x100
77/* the biggest possible register size (in bytes) */
78#define VOYAGER_MAX_REG_SIZE 4
79
80/* Total number of possible modules (including submodules) */
81#define VOYAGER_MAX_MODULES 16
82/* Largest number of asics per module */
83#define VOYAGER_MAX_ASICS_PER_MODULE 7
84
85/* the CAT asic of each module is always the first one */
86#define VOYAGER_CAT_ID 0
87#define VOYAGER_PSI 0x1a
88
89/* voyager instruction operations and registers */
90#define VOYAGER_READ_CONFIG 0x1
91#define VOYAGER_WRITE_CONFIG 0x2
92#define VOYAGER_BYPASS 0xff
93
94typedef struct voyager_asic {
95 __u8 asic_addr; /* ASIC address; Level 4 */
96 __u8 asic_type; /* ASIC type */
97 __u8 asic_id; /* ASIC id */
98 __u8 jtag_id[4]; /* JTAG id */
99 __u8 asic_location; /* Location within scan path; start w/ 0 */
100 __u8 bit_location; /* Location within bit stream; start w/ 0 */
101 __u8 ireg_length; /* Instruction register length */
102 __u16 subaddr; /* Amount of sub address space */
103 struct voyager_asic *next; /* Next asic in linked list */
104} voyager_asic_t;
105
106typedef struct voyager_module {
107 __u8 module_addr; /* Module address */
108 __u8 scan_path_connected; /* Scan path connected */
109 __u16 ee_size; /* Size of the EEPROM */
110 __u16 num_asics; /* Number of Asics */
111 __u16 inst_bits; /* Instruction bits in the scan path */
112 __u16 largest_reg; /* Largest register in the scan path */
113 __u16 smallest_reg; /* Smallest register in the scan path */
114 voyager_asic_t *asic; /* First ASIC in scan path (CAT_I) */
115 struct voyager_module *submodule; /* Submodule pointer */
116 struct voyager_module *next; /* Next module in linked list */
117} voyager_module_t;
118
119typedef struct voyager_eeprom_hdr {
120 __u8 module_id[4];
121 __u8 version_id;
122 __u8 config_id;
123 __u16 boundry_id; /* boundary scan id */
124 __u16 ee_size; /* size of EEPROM */
125 __u8 assembly[11]; /* assembly # */
126 __u8 assembly_rev; /* assembly rev */
127 __u8 tracer[4]; /* tracer number */
128 __u16 assembly_cksum; /* asm checksum */
129 __u16 power_consump; /* pwr requirements */
130 __u16 num_asics; /* number of asics */
131 __u16 bist_time; /* min. bist time */
132 __u16 err_log_offset; /* error log offset */
133 __u16 scan_path_offset;/* scan path offset */
134 __u16 cct_offset;
135 __u16 log_length; /* length of err log */
136 __u16 xsum_end; /* offset to end of
137 checksum */
138 __u8 reserved[4];
139 __u8 sflag; /* starting sentinal */
140 __u8 part_number[13]; /* prom part number */
141 __u8 version[10]; /* version number */
142 __u8 signature[8];
143 __u16 eeprom_chksum;
144 __u32 data_stamp_offset;
145 __u8 eflag ; /* ending sentinal */
146} __attribute__((packed)) voyager_eprom_hdr_t;
147
148
149
150#define VOYAGER_EPROM_SIZE_OFFSET \
151 ((__u16)(&(((voyager_eprom_hdr_t *)0)->ee_size)))
152#define VOYAGER_XSUM_END_OFFSET 0x2a
153
154/* the following three definitions are for internal table layouts
155 * in the module EPROMs. We really only care about the IDs and
156 * offsets */
157typedef struct voyager_sp_table {
158 __u8 asic_id;
159 __u8 bypass_flag;
160 __u16 asic_data_offset;
161 __u16 config_data_offset;
162} __attribute__((packed)) voyager_sp_table_t;
163
164typedef struct voyager_jtag_table {
165 __u8 icode[4];
166 __u8 runbist[4];
167 __u8 intest[4];
168 __u8 samp_preld[4];
169 __u8 ireg_len;
170} __attribute__((packed)) voyager_jtt_t;
171
172typedef struct voyager_asic_data_table {
173 __u8 jtag_id[4];
174 __u16 length_bsr;
175 __u16 length_bist_reg;
176 __u32 bist_clk;
177 __u16 subaddr_bits;
178 __u16 seed_bits;
179 __u16 sig_bits;
180 __u16 jtag_offset;
181} __attribute__((packed)) voyager_at_t;
182
183/* Voyager Interrupt Controller (VIC) registers */
184
185/* Base to add to Cross Processor Interrupts (CPIs) when triggering
186 * the CPU IRQ line */
187/* register defines for the WCBICs (one per processor) */
188#define VOYAGER_WCBIC0 0x41 /* bus A node P1 processor 0 */
189#define VOYAGER_WCBIC1 0x49 /* bus A node P1 processor 1 */
190#define VOYAGER_WCBIC2 0x51 /* bus A node P2 processor 0 */
191#define VOYAGER_WCBIC3 0x59 /* bus A node P2 processor 1 */
192#define VOYAGER_WCBIC4 0x61 /* bus B node P1 processor 0 */
193#define VOYAGER_WCBIC5 0x69 /* bus B node P1 processor 1 */
194#define VOYAGER_WCBIC6 0x71 /* bus B node P2 processor 0 */
195#define VOYAGER_WCBIC7 0x79 /* bus B node P2 processor 1 */
196
197
198/* top of memory registers */
199#define VOYAGER_WCBIC_TOM_L 0x4
200#define VOYAGER_WCBIC_TOM_H 0x5
201
202/* register defines for Voyager Memory Contol (VMC)
203 * these are present on L4 machines only */
204#define VOYAGER_VMC1 0x81
205#define VOYAGER_VMC2 0x91
206#define VOYAGER_VMC3 0xa1
207#define VOYAGER_VMC4 0xb1
208
209/* VMC Ports */
210#define VOYAGER_VMC_MEMORY_SETUP 0x9
211# define VMC_Interleaving 0x01
212# define VMC_4Way 0x02
213# define VMC_EvenCacheLines 0x04
214# define VMC_HighLine 0x08
215# define VMC_Start0_Enable 0x20
216# define VMC_Start1_Enable 0x40
217# define VMC_Vremap 0x80
218#define VOYAGER_VMC_BANK_DENSITY 0xa
219# define VMC_BANK_EMPTY 0
220# define VMC_BANK_4MB 1
221# define VMC_BANK_16MB 2
222# define VMC_BANK_64MB 3
223# define VMC_BANK0_MASK 0x03
224# define VMC_BANK1_MASK 0x0C
225# define VMC_BANK2_MASK 0x30
226# define VMC_BANK3_MASK 0xC0
227
228/* Magellan Memory Controller (MMC) defines - present on L5 */
229#define VOYAGER_MMC_ASIC_ID 1
230/* the two memory modules corresponding to memory cards in the system */
231#define VOYAGER_MMC_MEMORY0_MODULE 0x14
232#define VOYAGER_MMC_MEMORY1_MODULE 0x15
233/* the Magellan Memory Address (MMA) defines */
234#define VOYAGER_MMA_ASIC_ID 2
235
236/* Submodule number for the Quad Baseboard */
237#define VOYAGER_QUAD_BASEBOARD 1
238
239/* ASIC defines for the Quad Baseboard */
240#define VOYAGER_QUAD_QDATA0 1
241#define VOYAGER_QUAD_QDATA1 2
242#define VOYAGER_QUAD_QABC 3
243
244/* Useful areas in extended CMOS */
245#define VOYAGER_PROCESSOR_PRESENT_MASK 0x88a
246#define VOYAGER_MEMORY_CLICKMAP 0xa23
247#define VOYAGER_DUMP_LOCATION 0xb1a
248
249/* SUS In Control bit - used to tell SUS that we don't need to be
250 * babysat anymore */
251#define VOYAGER_SUS_IN_CONTROL_PORT 0x3ff
252# define VOYAGER_IN_CONTROL_FLAG 0x80
253
254/* Voyager PSI defines */
255#define VOYAGER_PSI_STATUS_REG 0x08
256# define PSI_DC_FAIL 0x01
257# define PSI_MON 0x02
258# define PSI_FAULT 0x04
259# define PSI_ALARM 0x08
260# define PSI_CURRENT 0x10
261# define PSI_DVM 0x20
262# define PSI_PSCFAULT 0x40
263# define PSI_STAT_CHG 0x80
264
265#define VOYAGER_PSI_SUPPLY_REG 0x8000
266 /* read */
267# define PSI_FAIL_DC 0x01
268# define PSI_FAIL_AC 0x02
269# define PSI_MON_INT 0x04
270# define PSI_SWITCH_OFF 0x08
271# define PSI_HX_OFF 0x10
272# define PSI_SECURITY 0x20
273# define PSI_CMOS_BATT_LOW 0x40
274# define PSI_CMOS_BATT_FAIL 0x80
275 /* write */
276# define PSI_CLR_SWITCH_OFF 0x13
277# define PSI_CLR_HX_OFF 0x14
278# define PSI_CLR_CMOS_BATT_FAIL 0x17
279
280#define VOYAGER_PSI_MASK 0x8001
281# define PSI_MASK_MASK 0x10
282
283#define VOYAGER_PSI_AC_FAIL_REG 0x8004
284#define AC_FAIL_STAT_CHANGE 0x80
285
286#define VOYAGER_PSI_GENERAL_REG 0x8007
287 /* read */
288# define PSI_SWITCH_ON 0x01
289# define PSI_SWITCH_ENABLED 0x02
290# define PSI_ALARM_ENABLED 0x08
291# define PSI_SECURE_ENABLED 0x10
292# define PSI_COLD_RESET 0x20
293# define PSI_COLD_START 0x80
294 /* write */
295# define PSI_POWER_DOWN 0x10
296# define PSI_SWITCH_DISABLE 0x01
297# define PSI_SWITCH_ENABLE 0x11
298# define PSI_CLEAR 0x12
299# define PSI_ALARM_DISABLE 0x03
300# define PSI_ALARM_ENABLE 0x13
301# define PSI_CLEAR_COLD_RESET 0x05
302# define PSI_SET_COLD_RESET 0x15
303# define PSI_CLEAR_COLD_START 0x07
304# define PSI_SET_COLD_START 0x17
305
306
307
308struct voyager_bios_info {
309 __u8 len;
310 __u8 major;
311 __u8 minor;
312 __u8 debug;
313 __u8 num_classes;
314 __u8 class_1;
315 __u8 class_2;
316};
317
318/* The following structures and definitions are for the Kernel/SUS
319 * interface these are needed to find out how SUS initialised any Quad
320 * boards in the system */
321
322#define NUMBER_OF_MC_BUSSES 2
323#define SLOTS_PER_MC_BUS 8
324#define MAX_CPUS 16 /* 16 way CPU system */
325#define MAX_PROCESSOR_BOARDS 4 /* 4 processor slot system */
326#define MAX_CACHE_LEVELS 4 /* # of cache levels supported */
327#define MAX_SHARED_CPUS 4 /* # of CPUs that can share a LARC */
328#define NUMBER_OF_POS_REGS 8
329
330typedef struct {
331 __u8 MC_Slot;
332 __u8 POS_Values[NUMBER_OF_POS_REGS];
333} __attribute__((packed)) MC_SlotInformation_t;
334
335struct QuadDescription {
336 __u8 Type; /* for type 0 (DYADIC or MONADIC) all fields
337 * will be zero except for slot */
338 __u8 StructureVersion;
339 __u32 CPI_BaseAddress;
340 __u32 LARC_BankSize;
341 __u32 LocalMemoryStateBits;
342 __u8 Slot; /* Processor slots 1 - 4 */
343} __attribute__((packed));
344
345struct ProcBoardInfo {
346 __u8 Type;
347 __u8 StructureVersion;
348 __u8 NumberOfBoards;
349 struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS];
350} __attribute__((packed));
351
352struct CacheDescription {
353 __u8 Level;
354 __u32 TotalSize;
355 __u16 LineSize;
356 __u8 Associativity;
357 __u8 CacheType;
358 __u8 WriteType;
359 __u8 Number_CPUs_SharedBy;
360 __u8 Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS];
361
362} __attribute__((packed));
363
364struct CPU_Description {
365 __u8 CPU_HardwareId;
366 char *FRU_String;
367 __u8 NumberOfCacheLevels;
368 struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS];
369} __attribute__((packed));
370
371struct CPU_Info {
372 __u8 Type;
373 __u8 StructureVersion;
374 __u8 NumberOf_CPUs;
375 struct CPU_Description CPU_Data[MAX_CPUS];
376} __attribute__((packed));
377
378
379/*
380 * This structure will be used by SUS and the OS.
381 * The assumption about this structure is that no blank space is
382 * packed in it by our friend the compiler.
383 */
384typedef struct {
385 __u8 Mailbox_SUS; /* Written to by SUS to give
386 commands/response to the OS */
387 __u8 Mailbox_OS; /* Written to by the OS to give
388 commands/response to SUS */
389 __u8 SUS_MailboxVersion; /* Tells the OS which iteration of the
390 interface SUS supports */
391 __u8 OS_MailboxVersion; /* Tells SUS which iteration of the
392 interface the OS supports */
393 __u32 OS_Flags; /* Flags set by the OS as info for
394 SUS */
395 __u32 SUS_Flags; /* Flags set by SUS as info
396 for the OS */
397 __u32 WatchDogPeriod; /* Watchdog period (in seconds) which
398 the DP uses to see if the OS
399 is dead */
400 __u32 WatchDogCount; /* Updated by the OS on every tic. */
401 __u32 MemoryFor_SUS_ErrorLog; /* Flat 32 bit address which tells SUS
402 where to stuff the SUS error log
403 on a dump */
404 MC_SlotInformation_t MC_SlotInfo[NUMBER_OF_MC_BUSSES*SLOTS_PER_MC_BUS];
405 /* Storage for MCA POS data */
406 /* All new SECOND_PASS_INTERFACE fields added from this point */
407 struct ProcBoardInfo *BoardData;
408 struct CPU_Info *CPU_Data;
409 /* All new fields must be added from this point */
410} Voyager_KernelSUS_Mbox_t;
411
412/* structure for finding the right memory address to send a QIC CPI to */
413struct voyager_qic_cpi {
414 /* Each cache line (32 bytes) can trigger a cpi. The cpi
415 * read/write may occur anywhere in the cache line---pick the
416 * middle to be safe */
417 struct {
418 __u32 pad1[3];
419 __u32 cpi;
420 __u32 pad2[4];
421 } qic_cpi[8];
422};
423
424struct voyager_status {
425 __u32 power_fail:1;
426 __u32 switch_off:1;
427 __u32 request_from_kernel:1;
428};
429
430struct voyager_psi_regs {
431 __u8 cat_id;
432 __u8 cat_dev;
433 __u8 cat_control;
434 __u8 subaddr;
435 __u8 dummy4;
436 __u8 checkbit;
437 __u8 subaddr_low;
438 __u8 subaddr_high;
439 __u8 intstatus;
440 __u8 stat1;
441 __u8 stat3;
442 __u8 fault;
443 __u8 tms;
444 __u8 gen;
445 __u8 sysconf;
446 __u8 dummy15;
447};
448
449struct voyager_psi_subregs {
450 __u8 supply;
451 __u8 mask;
452 __u8 present;
453 __u8 DCfail;
454 __u8 ACfail;
455 __u8 fail;
456 __u8 UPSfail;
457 __u8 genstatus;
458};
459
460struct voyager_psi {
461 struct voyager_psi_regs regs;
462 struct voyager_psi_subregs subregs;
463};
464
465struct voyager_SUS {
466#define VOYAGER_DUMP_BUTTON_NMI 0x1
467#define VOYAGER_SUS_VALID 0x2
468#define VOYAGER_SYSINT_COMPLETE 0x3
469 __u8 SUS_mbox;
470#define VOYAGER_NO_COMMAND 0x0
471#define VOYAGER_IGNORE_DUMP 0x1
472#define VOYAGER_DO_DUMP 0x2
473#define VOYAGER_SYSINT_HANDSHAKE 0x3
474#define VOYAGER_DO_MEM_DUMP 0x4
475#define VOYAGER_SYSINT_WAS_RECOVERED 0x5
476 __u8 kernel_mbox;
477#define VOYAGER_MAILBOX_VERSION 0x10
478 __u8 SUS_version;
479 __u8 kernel_version;
480#define VOYAGER_OS_HAS_SYSINT 0x1
481#define VOYAGER_OS_IN_PROGRESS 0x2
482#define VOYAGER_UPDATING_WDPERIOD 0x4
483 __u32 kernel_flags;
484#define VOYAGER_SUS_BOOTING 0x1
485#define VOYAGER_SUS_IN_PROGRESS 0x2
486 __u32 SUS_flags;
487 __u32 watchdog_period;
488 __u32 watchdog_count;
489 __u32 SUS_errorlog;
490 /* lots of system configuration stuff under here */
491};
492
493/* Variables exported by voyager_smp */
494extern __u32 voyager_extended_vic_processors;
495extern __u32 voyager_allowed_boot_processors;
496extern __u32 voyager_quad_processors;
497extern struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS];
498extern struct voyager_SUS *voyager_SUS;
499
500/* variables exported always */
501extern struct task_struct *voyager_thread;
502extern int voyager_level;
503extern struct voyager_status voyager_status;
504
505/* functions exported by the voyager and voyager_smp modules */
506extern int voyager_cat_readb(__u8 module, __u8 asic, int reg);
507extern void voyager_cat_init(void);
508extern void voyager_detect(struct voyager_bios_info *);
509extern void voyager_trap_init(void);
510extern void voyager_setup_irqs(void);
511extern int voyager_memory_detect(int region, __u32 *addr, __u32 *length);
512extern void voyager_smp_intr_init(void);
513extern __u8 voyager_extended_cmos_read(__u16 cmos_address);
514extern void voyager_smp_dump(void);
515extern void voyager_timer_interrupt(void);
516extern void smp_local_timer_interrupt(void);
517extern void voyager_power_off(void);
518extern void smp_voyager_power_off(void *dummy);
519extern void voyager_restart(void);
520extern void voyager_cat_power_off(void);
521extern void voyager_cat_do_common_interrupt(void);
522extern void voyager_handle_nmi(void);
523extern void voyager_smp_intr_init(void);
524/* Commands for the following are */
525#define VOYAGER_PSI_READ 0
526#define VOYAGER_PSI_WRITE 1
527#define VOYAGER_PSI_SUBREAD 2
528#define VOYAGER_PSI_SUBWRITE 3
529extern void voyager_cat_psi(__u8, __u16, __u8 *);
530
531/* These define the CPIs we use in linux */
532#define VIC_CPI_LEVEL0 0
533#define VIC_CPI_LEVEL1 1
534/* now the fake CPIs */
535#define VIC_TIMER_CPI 2
536#define VIC_INVALIDATE_CPI 3
537#define VIC_RESCHEDULE_CPI 4
538#define VIC_ENABLE_IRQ_CPI 5
539#define VIC_CALL_FUNCTION_CPI 6
540#define VIC_CALL_FUNCTION_SINGLE_CPI 7
541
542/* Now the QIC CPIs: Since we don't need the two initial levels,
543 * these are 2 less than the VIC CPIs */
544#define QIC_CPI_OFFSET 1
545#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET)
546#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET)
547#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
548#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
549#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
550#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET)
551
552#define VIC_START_FAKE_CPI VIC_TIMER_CPI
553#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI
554
555/* this is the SYS_INT CPI. */
556#define VIC_SYS_INT 8
557#define VIC_CMN_INT 15
558
559/* This is the boot CPI for alternate processors. It gets overwritten
560 * by the above once the system has activated all available processors */
561#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0
562#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8)
563
564extern asmlinkage void vic_cpi_interrupt(void);
565extern asmlinkage void vic_sys_interrupt(void);
566extern asmlinkage void vic_cmn_interrupt(void);
567extern asmlinkage void qic_timer_interrupt(void);
568extern asmlinkage void qic_invalidate_interrupt(void);
569extern asmlinkage void qic_reschedule_interrupt(void);
570extern asmlinkage void qic_enable_irq_interrupt(void);
571extern asmlinkage void qic_call_function_interrupt(void);
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
index 3355973b12a..580b4e29601 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -3,8 +3,8 @@
3 */ 3 */
4#include <asm/segment.h> 4#include <asm/segment.h>
5#include <asm/msr-index.h> 5#include <asm/msr-index.h>
6#include <asm/page.h> 6#include <asm/page_types.h>
7#include <asm/pgtable.h> 7#include <asm/pgtable_types.h>
8#include <asm/processor-flags.h> 8#include <asm/processor-flags.h>
9 9
10 .code16 10 .code16
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index a12e6a9fb65..8ded418b059 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -1,7 +1,7 @@
1 .section .text.page_aligned 1 .section .text.page_aligned
2#include <linux/linkage.h> 2#include <linux/linkage.h>
3#include <asm/segment.h> 3#include <asm/segment.h>
4#include <asm/page.h> 4#include <asm/page_types.h>
5 5
6# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2 6# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
7 7
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index bcc293423a7..8ea5164cbd0 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -1,8 +1,8 @@
1.text 1.text
2#include <linux/linkage.h> 2#include <linux/linkage.h>
3#include <asm/segment.h> 3#include <asm/segment.h>
4#include <asm/pgtable.h> 4#include <asm/pgtable_types.h>
5#include <asm/page.h> 5#include <asm/page_types.h>
6#include <asm/msr.h> 6#include <asm/msr.h>
7#include <asm/asm-offsets.h> 7#include <asm/asm-offsets.h>
8 8
@@ -13,7 +13,6 @@
13 * Hooray, we are in Long 64-bit mode (but still running in low memory) 13 * Hooray, we are in Long 64-bit mode (but still running in low memory)
14 */ 14 */
15ENTRY(wakeup_long64) 15ENTRY(wakeup_long64)
16wakeup_long64:
17 movq saved_magic, %rax 16 movq saved_magic, %rax
18 movq $0x123456789abcdef0, %rdx 17 movq $0x123456789abcdef0, %rdx
19 cmpq %rdx, %rax 18 cmpq %rdx, %rax
@@ -34,16 +33,12 @@ wakeup_long64:
34 33
35 movq saved_rip, %rax 34 movq saved_rip, %rax
36 jmp *%rax 35 jmp *%rax
36ENDPROC(wakeup_long64)
37 37
38bogus_64_magic: 38bogus_64_magic:
39 jmp bogus_64_magic 39 jmp bogus_64_magic
40 40
41 .align 2 41ENTRY(do_suspend_lowlevel)
42 .p2align 4,,15
43.globl do_suspend_lowlevel
44 .type do_suspend_lowlevel,@function
45do_suspend_lowlevel:
46.LFB5:
47 subq $8, %rsp 42 subq $8, %rsp
48 xorl %eax, %eax 43 xorl %eax, %eax
49 call save_processor_state 44 call save_processor_state
@@ -67,7 +62,7 @@ do_suspend_lowlevel:
67 pushfq 62 pushfq
68 popq pt_regs_flags(%rax) 63 popq pt_regs_flags(%rax)
69 64
70 movq $.L97, saved_rip(%rip) 65 movq $resume_point, saved_rip(%rip)
71 66
72 movq %rsp, saved_rsp 67 movq %rsp, saved_rsp
73 movq %rbp, saved_rbp 68 movq %rbp, saved_rbp
@@ -78,14 +73,12 @@ do_suspend_lowlevel:
78 addq $8, %rsp 73 addq $8, %rsp
79 movl $3, %edi 74 movl $3, %edi
80 xorl %eax, %eax 75 xorl %eax, %eax
81 jmp acpi_enter_sleep_state 76 call acpi_enter_sleep_state
82.L97: 77 /* in case something went wrong, restore the machine status and go on */
83 .p2align 4,,7 78 jmp resume_point
84.L99:
85 .align 4
86 movl $24, %eax
87 movw %ax, %ds
88 79
80 .align 4
81resume_point:
89 /* We don't restore %rax, it must be 0 anyway */ 82 /* We don't restore %rax, it must be 0 anyway */
90 movq $saved_context, %rax 83 movq $saved_context, %rax
91 movq saved_context_cr4(%rax), %rbx 84 movq saved_context_cr4(%rax), %rbx
@@ -117,12 +110,9 @@ do_suspend_lowlevel:
117 xorl %eax, %eax 110 xorl %eax, %eax
118 addq $8, %rsp 111 addq $8, %rsp
119 jmp restore_processor_state 112 jmp restore_processor_state
120.LFE5: 113ENDPROC(do_suspend_lowlevel)
121.Lfe5: 114
122 .size do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel
123
124.data 115.data
125ALIGN
126ENTRY(saved_rbp) .quad 0 116ENTRY(saved_rbp) .quad 0
127ENTRY(saved_rsi) .quad 0 117ENTRY(saved_rsi) .quad 0
128ENTRY(saved_rdi) .quad 0 118ENTRY(saved_rdi) .quad 0
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a84ac7b570e..6907b8e85d5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -498,12 +498,12 @@ void *text_poke_early(void *addr, const void *opcode, size_t len)
498 */ 498 */
499void *__kprobes text_poke(void *addr, const void *opcode, size_t len) 499void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
500{ 500{
501 unsigned long flags;
502 char *vaddr; 501 char *vaddr;
503 int nr_pages = 2; 502 int nr_pages = 2;
504 struct page *pages[2]; 503 struct page *pages[2];
505 int i; 504 int i;
506 505
506 might_sleep();
507 if (!core_kernel_text((unsigned long)addr)) { 507 if (!core_kernel_text((unsigned long)addr)) {
508 pages[0] = vmalloc_to_page(addr); 508 pages[0] = vmalloc_to_page(addr);
509 pages[1] = vmalloc_to_page(addr + PAGE_SIZE); 509 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -517,9 +517,9 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
517 nr_pages = 1; 517 nr_pages = 1;
518 vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 518 vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
519 BUG_ON(!vaddr); 519 BUG_ON(!vaddr);
520 local_irq_save(flags); 520 local_irq_disable();
521 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); 521 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
522 local_irq_restore(flags); 522 local_irq_enable();
523 vunmap(vaddr); 523 vunmap(vaddr);
524 sync_core(); 524 sync_core();
525 /* Could also do a CLFLUSH here to speed up CPU recovery; but 525 /* Could also do a CLFLUSH here to speed up CPU recovery; but
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d1bf032ba26..4732768c534 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -35,7 +35,6 @@
35#include <linux/mm.h> 35#include <linux/mm.h>
36 36
37#include <asm/perf_counter.h> 37#include <asm/perf_counter.h>
38#include <asm/arch_hooks.h>
39#include <asm/pgalloc.h> 38#include <asm/pgalloc.h>
40#include <asm/atomic.h> 39#include <asm/atomic.h>
41#include <asm/mpspec.h> 40#include <asm/mpspec.h>
@@ -840,7 +839,7 @@ void clear_local_APIC(void)
840 } 839 }
841 840
842 /* lets not touch this if we didn't frob it */ 841 /* lets not touch this if we didn't frob it */
843#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) 842#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
844 if (maxlvt >= 5) { 843 if (maxlvt >= 5) {
845 v = apic_read(APIC_LVTTHMR); 844 v = apic_read(APIC_LVTTHMR);
846 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); 845 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
@@ -1269,14 +1268,7 @@ void __cpuinit end_local_APIC_setup(void)
1269#ifdef CONFIG_X86_X2APIC 1268#ifdef CONFIG_X86_X2APIC
1270void check_x2apic(void) 1269void check_x2apic(void)
1271{ 1270{
1272 int msr, msr2; 1271 if (x2apic_enabled()) {
1273
1274 if (!cpu_has_x2apic)
1275 return;
1276
1277 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1278
1279 if (msr & X2APIC_ENABLE) {
1280 pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); 1272 pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
1281 x2apic_preenabled = x2apic = 1; 1273 x2apic_preenabled = x2apic = 1;
1282 } 1274 }
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index c9ec90742e9..3a730fa574b 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -35,7 +35,6 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/interrupt.h> 36#include <linux/interrupt.h>
37#include <asm/acpi.h> 37#include <asm/acpi.h>
38#include <asm/arch_hooks.h>
39#include <asm/e820.h> 38#include <asm/e820.h>
40#include <asm/setup.h> 39#include <asm/setup.h>
41 40
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 70935dd904d..e7c163661c7 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -50,9 +50,16 @@ static struct apic *apic_probe[] __initdata = {
50void __init default_setup_apic_routing(void) 50void __init default_setup_apic_routing(void)
51{ 51{
52#ifdef CONFIG_X86_X2APIC 52#ifdef CONFIG_X86_X2APIC
53 if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { 53 if (x2apic && (apic != &apic_x2apic_phys &&
54 if (!intr_remapping_enabled) 54#ifdef CONFIG_X86_UV
55 apic = &apic_flat; 55 apic != &apic_x2apic_uv_x &&
56#endif
57 apic != &apic_x2apic_cluster)) {
58 if (x2apic_phys)
59 apic = &apic_x2apic_phys;
60 else
61 apic = &apic_x2apic_cluster;
62 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
56 } 63 }
57#endif 64#endif
58 65
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index cfe7b09015d..32838b57a94 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -48,7 +48,7 @@
48#include <linux/gfp.h> 48#include <linux/gfp.h>
49#include <linux/smp.h> 49#include <linux/smp.h>
50 50
51static inline unsigned summit_get_apic_id(unsigned long x) 51static unsigned summit_get_apic_id(unsigned long x)
52{ 52{
53 return (x >> 24) & 0xFF; 53 return (x >> 24) & 0xFF;
54} 54}
@@ -58,7 +58,7 @@ static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector)
58 default_send_IPI_mask_sequence_logical(mask, vector); 58 default_send_IPI_mask_sequence_logical(mask, vector);
59} 59}
60 60
61static inline void summit_send_IPI_allbutself(int vector) 61static void summit_send_IPI_allbutself(int vector)
62{ 62{
63 cpumask_t mask = cpu_online_map; 63 cpumask_t mask = cpu_online_map;
64 cpu_clear(smp_processor_id(), mask); 64 cpu_clear(smp_processor_id(), mask);
@@ -67,7 +67,7 @@ static inline void summit_send_IPI_allbutself(int vector)
67 summit_send_IPI_mask(&mask, vector); 67 summit_send_IPI_mask(&mask, vector);
68} 68}
69 69
70static inline void summit_send_IPI_all(int vector) 70static void summit_send_IPI_all(int vector)
71{ 71{
72 summit_send_IPI_mask(&cpu_online_map, vector); 72 summit_send_IPI_mask(&cpu_online_map, vector);
73} 73}
@@ -82,8 +82,8 @@ extern void setup_summit(void);
82#define setup_summit() {} 82#define setup_summit() {}
83#endif 83#endif
84 84
85static inline int 85static int summit_mps_oem_check(struct mpc_table *mpc, char *oem,
86summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) 86 char *productid)
87{ 87{
88 if (!strncmp(oem, "IBM ENSW", 8) && 88 if (!strncmp(oem, "IBM ENSW", 8) &&
89 (!strncmp(productid, "VIGIL SMP", 9) 89 (!strncmp(productid, "VIGIL SMP", 9)
@@ -98,7 +98,7 @@ summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
98} 98}
99 99
100/* Hook from generic ACPI tables.c */ 100/* Hook from generic ACPI tables.c */
101static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 101static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
102{ 102{
103 if (!strncmp(oem_id, "IBM", 3) && 103 if (!strncmp(oem_id, "IBM", 3) &&
104 (!strncmp(oem_table_id, "SERVIGIL", 8) 104 (!strncmp(oem_table_id, "SERVIGIL", 8)
@@ -186,7 +186,7 @@ static inline int is_WPEG(struct rio_detail *rio){
186 186
187#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) 187#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
188 188
189static inline const cpumask_t *summit_target_cpus(void) 189static const cpumask_t *summit_target_cpus(void)
190{ 190{
191 /* CPU_MASK_ALL (0xff) has undefined behaviour with 191 /* CPU_MASK_ALL (0xff) has undefined behaviour with
192 * dest_LowestPrio mode logical clustered apic interrupt routing 192 * dest_LowestPrio mode logical clustered apic interrupt routing
@@ -195,19 +195,18 @@ static inline const cpumask_t *summit_target_cpus(void)
195 return &cpumask_of_cpu(0); 195 return &cpumask_of_cpu(0);
196} 196}
197 197
198static inline unsigned long 198static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
199summit_check_apicid_used(physid_mask_t bitmap, int apicid)
200{ 199{
201 return 0; 200 return 0;
202} 201}
203 202
204/* we don't use the phys_cpu_present_map to indicate apicid presence */ 203/* we don't use the phys_cpu_present_map to indicate apicid presence */
205static inline unsigned long summit_check_apicid_present(int bit) 204static unsigned long summit_check_apicid_present(int bit)
206{ 205{
207 return 1; 206 return 1;
208} 207}
209 208
210static inline void summit_init_apic_ldr(void) 209static void summit_init_apic_ldr(void)
211{ 210{
212 unsigned long val, id; 211 unsigned long val, id;
213 int count = 0; 212 int count = 0;
@@ -234,18 +233,18 @@ static inline void summit_init_apic_ldr(void)
234 apic_write(APIC_LDR, val); 233 apic_write(APIC_LDR, val);
235} 234}
236 235
237static inline int summit_apic_id_registered(void) 236static int summit_apic_id_registered(void)
238{ 237{
239 return 1; 238 return 1;
240} 239}
241 240
242static inline void summit_setup_apic_routing(void) 241static void summit_setup_apic_routing(void)
243{ 242{
244 printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", 243 printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
245 nr_ioapics); 244 nr_ioapics);
246} 245}
247 246
248static inline int summit_apicid_to_node(int logical_apicid) 247static int summit_apicid_to_node(int logical_apicid)
249{ 248{
250#ifdef CONFIG_SMP 249#ifdef CONFIG_SMP
251 return apicid_2_node[hard_smp_processor_id()]; 250 return apicid_2_node[hard_smp_processor_id()];
@@ -266,7 +265,7 @@ static inline int summit_cpu_to_logical_apicid(int cpu)
266#endif 265#endif
267} 266}
268 267
269static inline int summit_cpu_present_to_apicid(int mps_cpu) 268static int summit_cpu_present_to_apicid(int mps_cpu)
270{ 269{
271 if (mps_cpu < nr_cpu_ids) 270 if (mps_cpu < nr_cpu_ids)
272 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); 271 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
@@ -274,28 +273,23 @@ static inline int summit_cpu_present_to_apicid(int mps_cpu)
274 return BAD_APICID; 273 return BAD_APICID;
275} 274}
276 275
277static inline physid_mask_t 276static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
278summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
279{ 277{
280 /* For clustered we don't have a good way to do this yet - hack */ 278 /* For clustered we don't have a good way to do this yet - hack */
281 return physids_promote(0x0F); 279 return physids_promote(0x0F);
282} 280}
283 281
284static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) 282static physid_mask_t summit_apicid_to_cpu_present(int apicid)
285{ 283{
286 return physid_mask_of_physid(0); 284 return physid_mask_of_physid(0);
287} 285}
288 286
289static inline void summit_setup_portio_remap(void) 287static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
290{
291}
292
293static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
294{ 288{
295 return 1; 289 return 1;
296} 290}
297 291
298static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) 292static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
299{ 293{
300 int cpus_found = 0; 294 int cpus_found = 0;
301 int num_bits_set; 295 int num_bits_set;
@@ -303,12 +297,10 @@ static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
303 int cpu; 297 int cpu;
304 298
305 num_bits_set = cpus_weight(*cpumask); 299 num_bits_set = cpus_weight(*cpumask);
306 /* Return id to all */
307 if (num_bits_set >= nr_cpu_ids) 300 if (num_bits_set >= nr_cpu_ids)
308 return 0xFF; 301 return BAD_APICID;
309 /* 302 /*
310 * The cpus in the mask must all be on the apic cluster. If are not 303 * The cpus in the mask must all be on the apic cluster.
311 * on the same apicid cluster return default value of target_cpus():
312 */ 304 */
313 cpu = first_cpu(*cpumask); 305 cpu = first_cpu(*cpumask);
314 apicid = summit_cpu_to_logical_apicid(cpu); 306 apicid = summit_cpu_to_logical_apicid(cpu);
@@ -318,9 +310,9 @@ static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
318 int new_apicid = summit_cpu_to_logical_apicid(cpu); 310 int new_apicid = summit_cpu_to_logical_apicid(cpu);
319 311
320 if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { 312 if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
321 printk ("%s: Not a valid mask!\n", __func__); 313 printk("%s: Not a valid mask!\n", __func__);
322 314
323 return 0xFF; 315 return BAD_APICID;
324 } 316 }
325 apicid = apicid | new_apicid; 317 apicid = apicid | new_apicid;
326 cpus_found++; 318 cpus_found++;
@@ -330,8 +322,7 @@ static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
330 return apicid; 322 return apicid;
331} 323}
332 324
333static inline unsigned int 325static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
334summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
335 const struct cpumask *andmask) 326 const struct cpumask *andmask)
336{ 327{
337 int apicid = summit_cpu_to_logical_apicid(0); 328 int apicid = summit_cpu_to_logical_apicid(0);
@@ -356,7 +347,7 @@ summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
356 * 347 *
357 * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. 348 * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
358 */ 349 */
359static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) 350static int summit_phys_pkg_id(int cpuid_apic, int index_msb)
360{ 351{
361 return hard_smp_processor_id() >> index_msb; 352 return hard_smp_processor_id() >> index_msb;
362} 353}
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 4e39d9ad4d5..354b9c45601 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -14,10 +14,7 @@ DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14 14
15static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 15static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{ 16{
17 if (cpu_has_x2apic) 17 return x2apic_enabled();
18 return 1;
19
20 return 0;
21} 18}
22 19
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 20/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index d2d52eb9f7e..5bcb174409b 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -10,7 +10,7 @@
10#include <asm/apic.h> 10#include <asm/apic.h>
11#include <asm/ipi.h> 11#include <asm/ipi.h>
12 12
13static int x2apic_phys; 13int x2apic_phys;
14 14
15static int set_x2apic_phys_mode(char *arg) 15static int set_x2apic_phys_mode(char *arg)
16{ 16{
@@ -21,10 +21,10 @@ early_param("x2apic_phys", set_x2apic_phys_mode);
21 21
22static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 22static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
23{ 23{
24 if (cpu_has_x2apic && x2apic_phys) 24 if (x2apic_phys)
25 return 1; 25 return x2apic_enabled();
26 26 else
27 return 0; 27 return 0;
28} 28}
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 37ba5f85b71..10033fe718e 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1192,6 +1192,7 @@ static int suspend(int vetoable)
1192 device_suspend(PMSG_SUSPEND); 1192 device_suspend(PMSG_SUSPEND);
1193 local_irq_disable(); 1193 local_irq_disable();
1194 device_power_down(PMSG_SUSPEND); 1194 device_power_down(PMSG_SUSPEND);
1195 sysdev_suspend(PMSG_SUSPEND);
1195 1196
1196 local_irq_enable(); 1197 local_irq_enable();
1197 1198
@@ -1208,6 +1209,7 @@ static int suspend(int vetoable)
1208 if (err != APM_SUCCESS) 1209 if (err != APM_SUCCESS)
1209 apm_error("suspend", err); 1210 apm_error("suspend", err);
1210 err = (err == APM_SUCCESS) ? 0 : -EIO; 1211 err = (err == APM_SUCCESS) ? 0 : -EIO;
1212 sysdev_resume();
1211 device_power_up(PMSG_RESUME); 1213 device_power_up(PMSG_RESUME);
1212 local_irq_enable(); 1214 local_irq_enable();
1213 device_resume(PMSG_RESUME); 1215 device_resume(PMSG_RESUME);
@@ -1228,6 +1230,7 @@ static void standby(void)
1228 1230
1229 local_irq_disable(); 1231 local_irq_disable();
1230 device_power_down(PMSG_SUSPEND); 1232 device_power_down(PMSG_SUSPEND);
1233 sysdev_suspend(PMSG_SUSPEND);
1231 local_irq_enable(); 1234 local_irq_enable();
1232 1235
1233 err = set_system_power_state(APM_STATE_STANDBY); 1236 err = set_system_power_state(APM_STATE_STANDBY);
@@ -1235,6 +1238,7 @@ static void standby(void)
1235 apm_error("standby", err); 1238 apm_error("standby", err);
1236 1239
1237 local_irq_disable(); 1240 local_irq_disable();
1241 sysdev_resume();
1238 device_power_up(PMSG_RESUME); 1242 device_power_up(PMSG_RESUME);
1239 local_irq_enable(); 1243 local_irq_enable();
1240} 1244}
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index c2f930d8664..41ab3f064cb 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
204 } 204 }
205 /* Enable Enhanced PowerSaver */ 205 /* Enable Enhanced PowerSaver */
206 rdmsrl(MSR_IA32_MISC_ENABLE, val); 206 rdmsrl(MSR_IA32_MISC_ENABLE, val);
207 if (!(val & 1 << 16)) { 207 if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
208 val |= 1 << 16; 208 val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
209 wrmsrl(MSR_IA32_MISC_ENABLE, val); 209 wrmsrl(MSR_IA32_MISC_ENABLE, val);
210 /* Can be locked at 0 */ 210 /* Can be locked at 0 */
211 rdmsrl(MSR_IA32_MISC_ENABLE, val); 211 rdmsrl(MSR_IA32_MISC_ENABLE, val);
212 if (!(val & 1 << 16)) { 212 if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
213 printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); 213 printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
214 return -ENODEV; 214 return -ENODEV;
215 } 215 }
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index fb039cd345d..6428aa17b40 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1157 data->cpu = pol->cpu; 1157 data->cpu = pol->cpu;
1158 data->currpstate = HW_PSTATE_INVALID; 1158 data->currpstate = HW_PSTATE_INVALID;
1159 1159
1160 rc = powernow_k8_cpu_init_acpi(data); 1160 if (powernow_k8_cpu_init_acpi(data)) {
1161 if (rc) {
1162 /* 1161 /*
1163 * Use the PSB BIOS structure. This is only availabe on 1162 * Use the PSB BIOS structure. This is only availabe on
1164 * an UP version, and is deprecated by AMD. 1163 * an UP version, and is deprecated by AMD.
@@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1176 "ACPI maintainers and complain to your BIOS " 1175 "ACPI maintainers and complain to your BIOS "
1177 "vendor.\n"); 1176 "vendor.\n");
1178#endif 1177#endif
1179 goto err_out; 1178 kfree(data);
1179 return -ENODEV;
1180 } 1180 }
1181 if (pol->cpu != 0) { 1181 if (pol->cpu != 0) {
1182 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " 1182 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1183 "CPU other than CPU0. Complain to your BIOS " 1183 "CPU other than CPU0. Complain to your BIOS "
1184 "vendor.\n"); 1184 "vendor.\n");
1185 goto err_out; 1185 kfree(data);
1186 return -ENODEV;
1186 } 1187 }
1187 rc = find_psb_table(data); 1188 rc = find_psb_table(data);
1188 if (rc) { 1189 if (rc) {
1189 goto err_out; 1190 kfree(data);
1191 return -ENODEV;
1190 } 1192 }
1191 /* Take a crude guess here. 1193 /* Take a crude guess here.
1192 * That guess was in microseconds, so multiply with 1000 */ 1194 * That guess was in microseconds, so multiply with 1000 */
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index f08998278a3..c9f1fdc0283 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
390 enable it if not. */ 390 enable it if not. */
391 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 391 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
392 392
393 if (!(l & (1<<16))) { 393 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
394 l |= (1<<16); 394 l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
395 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); 395 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
396 wrmsr(MSR_IA32_MISC_ENABLE, l, h); 396 wrmsr(MSR_IA32_MISC_ENABLE, l, h);
397 397
398 /* check to see if it stuck */ 398 /* check to see if it stuck */
399 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 399 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
400 if (!(l & (1<<16))) { 400 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
401 printk(KERN_INFO PFX 401 printk(KERN_INFO PFX
402 "couldn't enable Enhanced SpeedStep\n"); 402 "couldn't enable Enhanced SpeedStep\n");
403 return -ENODEV; 403 return -ENODEV;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7aeef1d327b..25c559ba8d5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -146,10 +146,10 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
146 */ 146 */
147 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { 147 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
148 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); 148 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
149 if ((lo & (1<<9)) == 0) { 149 if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) {
150 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); 150 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
151 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); 151 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
152 lo |= (1<<9); /* Disable hw prefetching */ 152 lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
153 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); 153 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
154 } 154 }
155 } 155 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 1c838032fd3..fe79985ce0f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -295,11 +295,11 @@ void do_machine_check(struct pt_regs * regs, long error_code)
295 * If we know that the error was in user space, send a 295 * If we know that the error was in user space, send a
296 * SIGBUS. Otherwise, panic if tolerance is low. 296 * SIGBUS. Otherwise, panic if tolerance is low.
297 * 297 *
298 * do_exit() takes an awful lot of locks and has a slight 298 * force_sig() takes an awful lot of locks and has a slight
299 * risk of deadlocking. 299 * risk of deadlocking.
300 */ 300 */
301 if (user_space) { 301 if (user_space) {
302 do_exit(SIGBUS); 302 force_sig(SIGBUS, current);
303 } else if (panic_on_oops || tolerant < 2) { 303 } else if (panic_on_oops || tolerant < 2) {
304 mce_panic("Uncorrected machine check", 304 mce_panic("Uncorrected machine check",
305 &panicm, mcestart); 305 &panicm, mcestart);
@@ -490,7 +490,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
490 490
491} 491}
492 492
493static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) 493static void mce_cpu_features(struct cpuinfo_x86 *c)
494{ 494{
495 switch (c->x86_vendor) { 495 switch (c->x86_vendor) {
496 case X86_VENDOR_INTEL: 496 case X86_VENDOR_INTEL:
@@ -734,6 +734,7 @@ __setup("mce=", mcheck_enable);
734static int mce_resume(struct sys_device *dev) 734static int mce_resume(struct sys_device *dev)
735{ 735{
736 mce_init(NULL); 736 mce_init(NULL);
737 mce_cpu_features(&current_cpu_data);
737 return 0; 738 return 0;
738} 739}
739 740
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 4772e91e824..9817506dd46 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -121,7 +121,7 @@ static long threshold_restart_bank(void *_tr)
121} 121}
122 122
123/* cpu init entry point, called from mce.c with preempt off */ 123/* cpu init entry point, called from mce.c with preempt off */
124void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) 124void mce_amd_feature_init(struct cpuinfo_x86 *c)
125{ 125{
126 unsigned int bank, block; 126 unsigned int bank, block;
127 unsigned int cpu = smp_processor_id(); 127 unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 5e8c79e748a..aa5e287c98e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -31,7 +31,7 @@ asmlinkage void smp_thermal_interrupt(void)
31 irq_exit(); 31 irq_exit();
32} 32}
33 33
34static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) 34static void intel_init_thermal(struct cpuinfo_x86 *c)
35{ 35{
36 u32 l, h; 36 u32 l, h;
37 int tm2 = 0; 37 int tm2 = 0;
@@ -49,13 +49,13 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
49 */ 49 */
50 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 50 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
51 h = apic_read(APIC_LVTTHMR); 51 h = apic_read(APIC_LVTTHMR);
52 if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { 52 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
53 printk(KERN_DEBUG 53 printk(KERN_DEBUG
54 "CPU%d: Thermal monitoring handled by SMI\n", cpu); 54 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
55 return; 55 return;
56 } 56 }
57 57
58 if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) 58 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
59 tm2 = 1; 59 tm2 = 1;
60 60
61 if (h & APIC_VECTOR_MASK) { 61 if (h & APIC_VECTOR_MASK) {
@@ -73,7 +73,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
73 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); 73 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
74 74
75 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 75 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
76 wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); 76 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
77 77
78 l = apic_read(APIC_LVTTHMR); 78 l = apic_read(APIC_LVTTHMR);
79 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 79 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
@@ -85,7 +85,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
85 return; 85 return;
86} 86}
87 87
88void __cpuinit mce_intel_feature_init(struct cpuinfo_x86 *c) 88void mce_intel_feature_init(struct cpuinfo_x86 *c)
89{ 89{
90 intel_init_thermal(c); 90 intel_init_thermal(c);
91} 91}
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 9b60fce09f7..f53bdcbaf38 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -85,7 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
85 */ 85 */
86 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 86 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
87 h = apic_read(APIC_LVTTHMR); 87 h = apic_read(APIC_LVTTHMR);
88 if ((l & (1<<3)) && (h & APIC_DM_SMI)) { 88 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", 89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
90 cpu); 90 cpu);
91 return; /* -EBUSY */ 91 return; /* -EBUSY */
@@ -111,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
111 vendor_thermal_interrupt = intel_thermal_interrupt; 111 vendor_thermal_interrupt = intel_thermal_interrupt;
112 112
113 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 113 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
114 wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); 114 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
115 115
116 l = apic_read(APIC_LVTTHMR); 116 l = apic_read(APIC_LVTTHMR);
117 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 117 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e85826829cf..508bec1cee2 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -858,6 +858,9 @@ void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
858 */ 858 */
859void __init reserve_early(u64 start, u64 end, char *name) 859void __init reserve_early(u64 start, u64 end, char *name)
860{ 860{
861 if (start >= end)
862 return;
863
861 drop_overlaps_that_are_ok(start, end); 864 drop_overlaps_that_are_ok(start, end);
862 __reserve_early(start, end, name, 0); 865 __reserve_early(start, end, name, 0);
863} 866}
diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S
index ef00bb77d7e..fbe66e626c0 100644
--- a/arch/x86/kernel/efi_stub_32.S
+++ b/arch/x86/kernel/efi_stub_32.S
@@ -6,7 +6,7 @@
6 */ 6 */
7 7
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/page.h> 9#include <asm/page_types.h>
10 10
11/* 11/*
12 * efi_call_phys(void *, ...) is a function with variable parameters. 12 * efi_call_phys(void *, ...) is a function with variable parameters.
@@ -113,6 +113,7 @@ ENTRY(efi_call_phys)
113 movl (%edx), %ecx 113 movl (%edx), %ecx
114 pushl %ecx 114 pushl %ecx
115 ret 115 ret
116ENDPROC(efi_call_phys)
116.previous 117.previous
117 118
118.data 119.data
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S
index 99b47d48c9f..4c07ccab814 100644
--- a/arch/x86/kernel/efi_stub_64.S
+++ b/arch/x86/kernel/efi_stub_64.S
@@ -41,6 +41,7 @@ ENTRY(efi_call0)
41 addq $32, %rsp 41 addq $32, %rsp
42 RESTORE_XMM 42 RESTORE_XMM
43 ret 43 ret
44ENDPROC(efi_call0)
44 45
45ENTRY(efi_call1) 46ENTRY(efi_call1)
46 SAVE_XMM 47 SAVE_XMM
@@ -50,6 +51,7 @@ ENTRY(efi_call1)
50 addq $32, %rsp 51 addq $32, %rsp
51 RESTORE_XMM 52 RESTORE_XMM
52 ret 53 ret
54ENDPROC(efi_call1)
53 55
54ENTRY(efi_call2) 56ENTRY(efi_call2)
55 SAVE_XMM 57 SAVE_XMM
@@ -59,6 +61,7 @@ ENTRY(efi_call2)
59 addq $32, %rsp 61 addq $32, %rsp
60 RESTORE_XMM 62 RESTORE_XMM
61 ret 63 ret
64ENDPROC(efi_call2)
62 65
63ENTRY(efi_call3) 66ENTRY(efi_call3)
64 SAVE_XMM 67 SAVE_XMM
@@ -69,6 +72,7 @@ ENTRY(efi_call3)
69 addq $32, %rsp 72 addq $32, %rsp
70 RESTORE_XMM 73 RESTORE_XMM
71 ret 74 ret
75ENDPROC(efi_call3)
72 76
73ENTRY(efi_call4) 77ENTRY(efi_call4)
74 SAVE_XMM 78 SAVE_XMM
@@ -80,6 +84,7 @@ ENTRY(efi_call4)
80 addq $32, %rsp 84 addq $32, %rsp
81 RESTORE_XMM 85 RESTORE_XMM
82 ret 86 ret
87ENDPROC(efi_call4)
83 88
84ENTRY(efi_call5) 89ENTRY(efi_call5)
85 SAVE_XMM 90 SAVE_XMM
@@ -92,6 +97,7 @@ ENTRY(efi_call5)
92 addq $48, %rsp 97 addq $48, %rsp
93 RESTORE_XMM 98 RESTORE_XMM
94 ret 99 ret
100ENDPROC(efi_call5)
95 101
96ENTRY(efi_call6) 102ENTRY(efi_call6)
97 SAVE_XMM 103 SAVE_XMM
@@ -107,3 +113,4 @@ ENTRY(efi_call6)
107 addq $48, %rsp 113 addq $48, %rsp
108 RESTORE_XMM 114 RESTORE_XMM
109 ret 115 ret
116ENDPROC(efi_call6)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index e9920683145..899e8938e79 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -47,7 +47,7 @@
47#include <asm/errno.h> 47#include <asm/errno.h>
48#include <asm/segment.h> 48#include <asm/segment.h>
49#include <asm/smp.h> 49#include <asm/smp.h>
50#include <asm/page.h> 50#include <asm/page_types.h>
51#include <asm/desc.h> 51#include <asm/desc.h>
52#include <asm/percpu.h> 52#include <asm/percpu.h>
53#include <asm/dwarf2.h> 53#include <asm/dwarf2.h>
@@ -1359,7 +1359,7 @@ nmi_espfix_stack:
1359 CFI_ADJUST_CFA_OFFSET 4 1359 CFI_ADJUST_CFA_OFFSET 4
1360 pushl %esp 1360 pushl %esp
1361 CFI_ADJUST_CFA_OFFSET 4 1361 CFI_ADJUST_CFA_OFFSET 4
1362 addw $4, (%esp) 1362 addl $4, (%esp)
1363 /* copy the iret frame of 12 bytes */ 1363 /* copy the iret frame of 12 bytes */
1364 .rept 3 1364 .rept 3
1365 pushl 16(%esp) 1365 pushl 16(%esp)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 860afce9660..24c7031e23c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -48,7 +48,7 @@
48#include <asm/unistd.h> 48#include <asm/unistd.h>
49#include <asm/thread_info.h> 49#include <asm/thread_info.h>
50#include <asm/hw_irq.h> 50#include <asm/hw_irq.h>
51#include <asm/page.h> 51#include <asm/page_types.h>
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
@@ -77,20 +77,17 @@ ENTRY(ftrace_caller)
77 movq 8(%rbp), %rsi 77 movq 8(%rbp), %rsi
78 subq $MCOUNT_INSN_SIZE, %rdi 78 subq $MCOUNT_INSN_SIZE, %rdi
79 79
80.globl ftrace_call 80GLOBAL(ftrace_call)
81ftrace_call:
82 call ftrace_stub 81 call ftrace_stub
83 82
84 MCOUNT_RESTORE_FRAME 83 MCOUNT_RESTORE_FRAME
85 84
86#ifdef CONFIG_FUNCTION_GRAPH_TRACER 85#ifdef CONFIG_FUNCTION_GRAPH_TRACER
87.globl ftrace_graph_call 86GLOBAL(ftrace_graph_call)
88ftrace_graph_call:
89 jmp ftrace_stub 87 jmp ftrace_stub
90#endif 88#endif
91 89
92.globl ftrace_stub 90GLOBAL(ftrace_stub)
93ftrace_stub:
94 retq 91 retq
95END(ftrace_caller) 92END(ftrace_caller)
96 93
@@ -110,8 +107,7 @@ ENTRY(mcount)
110 jnz ftrace_graph_caller 107 jnz ftrace_graph_caller
111#endif 108#endif
112 109
113.globl ftrace_stub 110GLOBAL(ftrace_stub)
114ftrace_stub:
115 retq 111 retq
116 112
117trace: 113trace:
@@ -148,9 +144,7 @@ ENTRY(ftrace_graph_caller)
148 retq 144 retq
149END(ftrace_graph_caller) 145END(ftrace_graph_caller)
150 146
151 147GLOBAL(return_to_handler)
152.globl return_to_handler
153return_to_handler:
154 subq $80, %rsp 148 subq $80, %rsp
155 149
156 movq %rax, (%rsp) 150 movq %rax, (%rsp)
@@ -188,6 +182,7 @@ return_to_handler:
188ENTRY(native_usergs_sysret64) 182ENTRY(native_usergs_sysret64)
189 swapgs 183 swapgs
190 sysretq 184 sysretq
185ENDPROC(native_usergs_sysret64)
191#endif /* CONFIG_PARAVIRT */ 186#endif /* CONFIG_PARAVIRT */
192 187
193 188
@@ -633,16 +628,14 @@ tracesys:
633 * Syscall return path ending with IRET. 628 * Syscall return path ending with IRET.
634 * Has correct top of stack, but partial stack frame. 629 * Has correct top of stack, but partial stack frame.
635 */ 630 */
636 .globl int_ret_from_sys_call 631GLOBAL(int_ret_from_sys_call)
637 .globl int_with_check
638int_ret_from_sys_call:
639 DISABLE_INTERRUPTS(CLBR_NONE) 632 DISABLE_INTERRUPTS(CLBR_NONE)
640 TRACE_IRQS_OFF 633 TRACE_IRQS_OFF
641 testl $3,CS-ARGOFFSET(%rsp) 634 testl $3,CS-ARGOFFSET(%rsp)
642 je retint_restore_args 635 je retint_restore_args
643 movl $_TIF_ALLWORK_MASK,%edi 636 movl $_TIF_ALLWORK_MASK,%edi
644 /* edi: mask to check */ 637 /* edi: mask to check */
645int_with_check: 638GLOBAL(int_with_check)
646 LOCKDEP_SYS_EXIT_IRQ 639 LOCKDEP_SYS_EXIT_IRQ
647 GET_THREAD_INFO(%rcx) 640 GET_THREAD_INFO(%rcx)
648 movl TI_flags(%rcx),%edx 641 movl TI_flags(%rcx),%edx
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 2a0aad7718d..c32ca19d591 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -11,8 +11,8 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <asm/segment.h> 13#include <asm/segment.h>
14#include <asm/page.h> 14#include <asm/page_types.h>
15#include <asm/pgtable.h> 15#include <asm/pgtable_types.h>
16#include <asm/desc.h> 16#include <asm/desc.h>
17#include <asm/cache.h> 17#include <asm/cache.h>
18#include <asm/thread_info.h> 18#include <asm/thread_info.h>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2e648e3a5ea..54b29bb24e7 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -329,8 +329,6 @@ early_idt_ripmsg:
329#endif /* CONFIG_EARLY_PRINTK */ 329#endif /* CONFIG_EARLY_PRINTK */
330 .previous 330 .previous
331 331
332.balign PAGE_SIZE
333
334#define NEXT_PAGE(name) \ 332#define NEXT_PAGE(name) \
335 .balign PAGE_SIZE; \ 333 .balign PAGE_SIZE; \
336ENTRY(name) 334ENTRY(name)
@@ -419,7 +417,7 @@ ENTRY(phys_base)
419 .section .bss, "aw", @nobits 417 .section .bss, "aw", @nobits
420 .align L1_CACHE_BYTES 418 .align L1_CACHE_BYTES
421ENTRY(idt_table) 419ENTRY(idt_table)
422 .skip 256 * 16 420 .skip IDT_ENTRIES * 16
423 421
424 .section .bss.page_aligned, "aw", @nobits 422 .section .bss.page_aligned, "aw", @nobits
425 .align PAGE_SIZE 423 .align PAGE_SIZE
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 11d5093eb28..df89102bef8 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -22,7 +22,6 @@
22#include <asm/pgtable.h> 22#include <asm/pgtable.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/arch_hooks.h>
26#include <asm/i8259.h> 25#include <asm/i8259.h>
27 26
28/* 27/*
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 520e6c1c5d2..f3e11cb295c 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -18,7 +18,7 @@
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20#include <asm/apic.h> 20#include <asm/apic.h>
21#include <asm/arch_hooks.h> 21#include <asm/setup.h>
22#include <asm/i8259.h> 22#include <asm/i8259.h>
23#include <asm/traps.h> 23#include <asm/traps.h>
24 24
@@ -181,8 +181,8 @@ void __init native_init_IRQ(void)
181{ 181{
182 int i; 182 int i;
183 183
184 /* all the set up before the call gates are initialised */ 184 /* Execute any quirks before the call gates are initialised: */
185 pre_intr_init_hook(); 185 x86_quirk_pre_intr_init();
186 186
187 apic_intr_init(); 187 apic_intr_init();
188 188
@@ -201,10 +201,11 @@ void __init native_init_IRQ(void)
201 if (!acpi_ioapic) 201 if (!acpi_ioapic)
202 setup_irq(2, &irq2); 202 setup_irq(2, &irq2);
203 203
204 /* setup after call gates are initialised (usually add in 204 /*
205 * the architecture specific gates) 205 * Call quirks after call gates are initialised (usually add in
206 * the architecture specific gates):
206 */ 207 */
207 intr_init_hook(); 208 x86_quirk_intr_init();
208 209
209 /* 210 /*
210 * External FPU? Set up irq13 if so, for 211 * External FPU? Set up irq13 if so, for
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 652fce6d2cc..137f2e8132d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -19,7 +19,6 @@
19#include <linux/clocksource.h> 19#include <linux/clocksource.h>
20#include <linux/kvm_para.h> 20#include <linux/kvm_para.h>
21#include <asm/pvclock.h> 21#include <asm/pvclock.h>
22#include <asm/arch_hooks.h>
23#include <asm/msr.h> 22#include <asm/msr.h>
24#include <asm/apic.h> 23#include <asm/apic.h>
25#include <linux/percpu.h> 24#include <linux/percpu.h>
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 37f420018a4..f5fc8c781a6 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -121,7 +121,7 @@ static void machine_kexec_page_table_set_one(
121static void machine_kexec_prepare_page_tables(struct kimage *image) 121static void machine_kexec_prepare_page_tables(struct kimage *image)
122{ 122{
123 void *control_page; 123 void *control_page;
124 pmd_t *pmd = 0; 124 pmd_t *pmd = NULL;
125 125
126 control_page = page_address(image->control_code_page); 126 control_page = page_address(image->control_code_page);
127#ifdef CONFIG_X86_PAE 127#ifdef CONFIG_X86_PAE
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
index 2dc183758be..845d80ce1ef 100644
--- a/arch/x86/kernel/mca_32.c
+++ b/arch/x86/kernel/mca_32.c
@@ -51,7 +51,6 @@
51#include <linux/ioport.h> 51#include <linux/ioport.h>
52#include <asm/uaccess.h> 52#include <asm/uaccess.h>
53#include <linux/init.h> 53#include <linux/init.h>
54#include <asm/arch_hooks.h>
55 54
56static unsigned char which_scsi; 55static unsigned char which_scsi;
57 56
@@ -474,6 +473,4 @@ void __kprobes mca_handle_nmi(void)
474 * adapter was responsible for the error. 473 * adapter was responsible for the error.
475 */ 474 */
476 bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); 475 bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
477 476}
478 mca_nmi_hook();
479} /* mca_handle_nmi */
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 7f4d2586972..37cb1bda1ba 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -710,13 +710,22 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
710 * of physical memory; so that simply reserving 710 * of physical memory; so that simply reserving
711 * PAGE_SIZE from mpf->physptr yields BUG() 711 * PAGE_SIZE from mpf->physptr yields BUG()
712 * in reserve_bootmem. 712 * in reserve_bootmem.
713 * also need to make sure physptr is below than
714 * max_low_pfn
715 * we don't need reserve the area above max_low_pfn
713 */ 716 */
714 unsigned long end = max_low_pfn * PAGE_SIZE; 717 unsigned long end = max_low_pfn * PAGE_SIZE;
715 if (mpf->physptr + size > end) 718
716 size = end - mpf->physptr; 719 if (mpf->physptr < end) {
717#endif 720 if (mpf->physptr + size > end)
721 size = end - mpf->physptr;
722 reserve_bootmem_generic(mpf->physptr, size,
723 BOOTMEM_DEFAULT);
724 }
725#else
718 reserve_bootmem_generic(mpf->physptr, size, 726 reserve_bootmem_generic(mpf->physptr, size,
719 BOOTMEM_DEFAULT); 727 BOOTMEM_DEFAULT);
728#endif
720 } 729 }
721 730
722 return 1; 731 return 1;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 6dc4dca255e..63dd358d8ee 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -28,7 +28,6 @@
28#include <asm/paravirt.h> 28#include <asm/paravirt.h>
29#include <asm/desc.h> 29#include <asm/desc.h>
30#include <asm/setup.h> 30#include <asm/setup.h>
31#include <asm/arch_hooks.h>
32#include <asm/pgtable.h> 31#include <asm/pgtable.h>
33#include <asm/time.h> 32#include <asm/time.h>
34#include <asm/pgalloc.h> 33#include <asm/pgalloc.h>
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index fec79ad85dc..646da41a620 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -111,9 +111,6 @@ void cpu_idle(void)
111 check_pgt_cache(); 111 check_pgt_cache();
112 rmb(); 112 rmb();
113 113
114 if (rcu_pending(cpu))
115 rcu_check_callbacks(cpu, 0);
116
117 if (cpu_is_offline(cpu)) 114 if (cpu_is_offline(cpu))
118 play_dead(); 115 play_dead();
119 116
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index d2f7cd5b2c8..fb2159a5c81 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -268,7 +268,7 @@ static unsigned long debugreg_addr_limit(struct task_struct *task)
268 if (test_tsk_thread_flag(task, TIF_IA32)) 268 if (test_tsk_thread_flag(task, TIF_IA32))
269 return IA32_PAGE_OFFSET - 3; 269 return IA32_PAGE_OFFSET - 3;
270#endif 270#endif
271 return TASK_SIZE64 - 7; 271 return TASK_SIZE_MAX - 7;
272} 272}
273 273
274#endif /* CONFIG_X86_32 */ 274#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index a160f311972..2064d0aa8d2 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -7,7 +7,7 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/page.h> 10#include <asm/page_types.h>
11#include <asm/kexec.h> 11#include <asm/kexec.h>
12#include <asm/processor-flags.h> 12#include <asm/processor-flags.h>
13 13
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index b0bbdd4829c..d32cfb27a47 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -7,10 +7,10 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/page.h> 10#include <asm/page_types.h>
11#include <asm/kexec.h> 11#include <asm/kexec.h>
12#include <asm/processor-flags.h> 12#include <asm/processor-flags.h>
13#include <asm/pgtable.h> 13#include <asm/pgtable_types.h>
14 14
15/* 15/*
16 * Must be relocatable PIC code callable as a C function 16 * Must be relocatable PIC code callable as a C function
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ebef8005579..5b85759e797 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -74,8 +74,9 @@
74#include <asm/e820.h> 74#include <asm/e820.h>
75#include <asm/mpspec.h> 75#include <asm/mpspec.h>
76#include <asm/setup.h> 76#include <asm/setup.h>
77#include <asm/arch_hooks.h>
78#include <asm/efi.h> 77#include <asm/efi.h>
78#include <asm/timer.h>
79#include <asm/i8259.h>
79#include <asm/sections.h> 80#include <asm/sections.h>
80#include <asm/dmi.h> 81#include <asm/dmi.h>
81#include <asm/io_apic.h> 82#include <asm/io_apic.h>
@@ -668,7 +669,6 @@ void __init setup_arch(char **cmdline_p)
668#ifdef CONFIG_X86_32 669#ifdef CONFIG_X86_32
669 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 670 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
670 visws_early_detect(); 671 visws_early_detect();
671 pre_setup_arch_hook();
672#else 672#else
673 printk(KERN_INFO "Command line: %s\n", boot_command_line); 673 printk(KERN_INFO "Command line: %s\n", boot_command_line);
674#endif 674#endif
@@ -988,7 +988,7 @@ void __init setup_arch(char **cmdline_p)
988#ifdef CONFIG_X86_32 988#ifdef CONFIG_X86_32
989 989
990/** 990/**
991 * pre_intr_init_hook - initialisation prior to setting up interrupt vectors 991 * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
992 * 992 *
993 * Description: 993 * Description:
994 * Perform any necessary interrupt initialisation prior to setting up 994 * Perform any necessary interrupt initialisation prior to setting up
@@ -996,7 +996,7 @@ void __init setup_arch(char **cmdline_p)
996 * interrupts should be initialised here if the machine emulates a PC 996 * interrupts should be initialised here if the machine emulates a PC
997 * in any way. 997 * in any way.
998 **/ 998 **/
999void __init pre_intr_init_hook(void) 999void __init x86_quirk_pre_intr_init(void)
1000{ 1000{
1001 if (x86_quirks->arch_pre_intr_init) { 1001 if (x86_quirks->arch_pre_intr_init) {
1002 if (x86_quirks->arch_pre_intr_init()) 1002 if (x86_quirks->arch_pre_intr_init())
@@ -1006,7 +1006,7 @@ void __init pre_intr_init_hook(void)
1006} 1006}
1007 1007
1008/** 1008/**
1009 * intr_init_hook - post gate setup interrupt initialisation 1009 * x86_quirk_intr_init - post gate setup interrupt initialisation
1010 * 1010 *
1011 * Description: 1011 * Description:
1012 * Fill in any interrupts that may have been left out by the general 1012 * Fill in any interrupts that may have been left out by the general
@@ -1014,7 +1014,7 @@ void __init pre_intr_init_hook(void)
1014 * than the devices on the I/O bus (like APIC interrupts in intel MP 1014 * than the devices on the I/O bus (like APIC interrupts in intel MP
1015 * systems) are started here. 1015 * systems) are started here.
1016 **/ 1016 **/
1017void __init intr_init_hook(void) 1017void __init x86_quirk_intr_init(void)
1018{ 1018{
1019 if (x86_quirks->arch_intr_init) { 1019 if (x86_quirks->arch_intr_init) {
1020 if (x86_quirks->arch_intr_init()) 1020 if (x86_quirks->arch_intr_init())
@@ -1023,25 +1023,13 @@ void __init intr_init_hook(void)
1023} 1023}
1024 1024
1025/** 1025/**
1026 * pre_setup_arch_hook - hook called prior to any setup_arch() execution 1026 * x86_quirk_trap_init - initialise system specific traps
1027 *
1028 * Description:
1029 * generally used to activate any machine specific identification
1030 * routines that may be needed before setup_arch() runs. On Voyager
1031 * this is used to get the board revision and type.
1032 **/
1033void __init pre_setup_arch_hook(void)
1034{
1035}
1036
1037/**
1038 * trap_init_hook - initialise system specific traps
1039 * 1027 *
1040 * Description: 1028 * Description:
1041 * Called as the final act of trap_init(). Used in VISWS to initialise 1029 * Called as the final act of trap_init(). Used in VISWS to initialise
1042 * the various board specific APIC traps. 1030 * the various board specific APIC traps.
1043 **/ 1031 **/
1044void __init trap_init_hook(void) 1032void __init x86_quirk_trap_init(void)
1045{ 1033{
1046 if (x86_quirks->arch_trap_init) { 1034 if (x86_quirks->arch_trap_init) {
1047 if (x86_quirks->arch_trap_init()) 1035 if (x86_quirks->arch_trap_init())
@@ -1051,29 +1039,29 @@ void __init trap_init_hook(void)
1051 1039
1052static struct irqaction irq0 = { 1040static struct irqaction irq0 = {
1053 .handler = timer_interrupt, 1041 .handler = timer_interrupt,
1054 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, 1042 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
1055 .mask = CPU_MASK_NONE, 1043 .mask = CPU_MASK_NONE,
1056 .name = "timer" 1044 .name = "timer"
1057}; 1045};
1058 1046
1059/** 1047/**
1060 * pre_time_init_hook - do any specific initialisations before. 1048 * x86_quirk_pre_time_init - do any specific initialisations before.
1061 * 1049 *
1062 **/ 1050 **/
1063void __init pre_time_init_hook(void) 1051void __init x86_quirk_pre_time_init(void)
1064{ 1052{
1065 if (x86_quirks->arch_pre_time_init) 1053 if (x86_quirks->arch_pre_time_init)
1066 x86_quirks->arch_pre_time_init(); 1054 x86_quirks->arch_pre_time_init();
1067} 1055}
1068 1056
1069/** 1057/**
1070 * time_init_hook - do any specific initialisations for the system timer. 1058 * x86_quirk_time_init - do any specific initialisations for the system timer.
1071 * 1059 *
1072 * Description: 1060 * Description:
1073 * Must plug the system timer interrupt source at HZ into the IRQ listed 1061 * Must plug the system timer interrupt source at HZ into the IRQ listed
1074 * in irq_vectors.h:TIMER_IRQ 1062 * in irq_vectors.h:TIMER_IRQ
1075 **/ 1063 **/
1076void __init time_init_hook(void) 1064void __init x86_quirk_time_init(void)
1077{ 1065{
1078 if (x86_quirks->arch_time_init) { 1066 if (x86_quirks->arch_time_init) {
1079 /* 1067 /*
@@ -1088,25 +1076,4 @@ void __init time_init_hook(void)
1088 irq0.mask = cpumask_of_cpu(0); 1076 irq0.mask = cpumask_of_cpu(0);
1089 setup_irq(0, &irq0); 1077 setup_irq(0, &irq0);
1090} 1078}
1091
1092#ifdef CONFIG_MCA
1093/**
1094 * mca_nmi_hook - hook into MCA specific NMI chain
1095 *
1096 * Description:
1097 * The MCA (Microchannel Architecture) has an NMI chain for NMI sources
1098 * along the MCA bus. Use this to hook into that chain if you will need
1099 * it.
1100 **/
1101void mca_nmi_hook(void)
1102{
1103 /*
1104 * If I recall correctly, there's a whole bunch of other things that
1105 * we can do to check for NMI problems, but that's all I know about
1106 * at the moment.
1107 */
1108 pr_warning("NMI generated from unknown source!\n");
1109}
1110#endif /* CONFIG_MCA */
1111
1112#endif /* CONFIG_X86_32 */ 1079#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 764c74e871f..5c5d87f0b2e 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -33,7 +33,7 @@
33#include <linux/time.h> 33#include <linux/time.h>
34#include <linux/mca.h> 34#include <linux/mca.h>
35 35
36#include <asm/arch_hooks.h> 36#include <asm/setup.h>
37#include <asm/hpet.h> 37#include <asm/hpet.h>
38#include <asm/time.h> 38#include <asm/time.h>
39#include <asm/timer.h> 39#include <asm/timer.h>
@@ -118,7 +118,7 @@ void __init hpet_time_init(void)
118{ 118{
119 if (!hpet_enable()) 119 if (!hpet_enable())
120 setup_pit_timer(); 120 setup_pit_timer();
121 time_init_hook(); 121 x86_quirk_time_init();
122} 122}
123 123
124/* 124/*
@@ -131,7 +131,7 @@ void __init hpet_time_init(void)
131 */ 131 */
132void __init time_init(void) 132void __init time_init(void)
133{ 133{
134 pre_time_init_hook(); 134 x86_quirk_pre_time_init();
135 tsc_init(); 135 tsc_init();
136 late_time_init = choose_time_init(); 136 late_time_init = choose_time_init();
137} 137}
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index e6e695acd72..241ec3923f6 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -115,7 +115,7 @@ unsigned long __init calibrate_cpu(void)
115 115
116static struct irqaction irq0 = { 116static struct irqaction irq0 = {
117 .handler = timer_interrupt, 117 .handler = timer_interrupt,
118 .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, 118 .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER,
119 .mask = CPU_MASK_NONE, 119 .mask = CPU_MASK_NONE,
120 .name = "timer" 120 .name = "timer"
121}; 121};
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index d8ccc3c6552..66d874e5404 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -29,7 +29,7 @@
29 29
30#include <linux/linkage.h> 30#include <linux/linkage.h>
31#include <asm/segment.h> 31#include <asm/segment.h>
32#include <asm/page.h> 32#include <asm/page_types.h>
33 33
34/* We can free up trampoline after bootup if cpu hotplug is not supported. */ 34/* We can free up trampoline after bootup if cpu hotplug is not supported. */
35#ifndef CONFIG_HOTPLUG_CPU 35#ifndef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 95a012a4664..cddfb8d386b 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -25,8 +25,8 @@
25 */ 25 */
26 26
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <asm/pgtable.h> 28#include <asm/pgtable_types.h>
29#include <asm/page.h> 29#include <asm/page_types.h>
30#include <asm/msr.h> 30#include <asm/msr.h>
31#include <asm/segment.h> 31#include <asm/segment.h>
32#include <asm/processor-flags.h> 32#include <asm/processor-flags.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c85a86cb7fb..1dba866967e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -61,7 +61,7 @@
61#include <asm/proto.h> 61#include <asm/proto.h>
62#else 62#else
63#include <asm/processor-flags.h> 63#include <asm/processor-flags.h>
64#include <asm/arch_hooks.h> 64#include <asm/setup.h>
65#include <asm/traps.h> 65#include <asm/traps.h>
66 66
67#include "cpu/mcheck/mce.h" 67#include "cpu/mcheck/mce.h"
@@ -942,7 +942,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
942 info.si_signo = SIGILL; 942 info.si_signo = SIGILL;
943 info.si_errno = 0; 943 info.si_errno = 0;
944 info.si_code = ILL_BADSTK; 944 info.si_code = ILL_BADSTK;
945 info.si_addr = 0; 945 info.si_addr = NULL;
946 if (notify_die(DIE_TRAP, "iret exception", 946 if (notify_die(DIE_TRAP, "iret exception",
947 regs, error_code, 32, SIGILL) == NOTIFY_STOP) 947 regs, error_code, 32, SIGILL) == NOTIFY_STOP)
948 return; 948 return;
@@ -1023,6 +1023,6 @@ void __init trap_init(void)
1023 cpu_init(); 1023 cpu_init();
1024 1024
1025#ifdef CONFIG_X86_32 1025#ifdef CONFIG_X86_32
1026 trap_init_hook(); 1026 x86_quirk_trap_init();
1027#endif 1027#endif
1028} 1028}
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 34199d30ff4..191a876e9e8 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -24,7 +24,6 @@
24 24
25#include <asm/visws/cobalt.h> 25#include <asm/visws/cobalt.h>
26#include <asm/visws/piix4.h> 26#include <asm/visws/piix4.h>
27#include <asm/arch_hooks.h>
28#include <asm/io_apic.h> 27#include <asm/io_apic.h>
29#include <asm/fixmap.h> 28#include <asm/fixmap.h>
30#include <asm/reboot.h> 29#include <asm/reboot.h>
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index a4791ef412d..33a788d5879 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -28,7 +28,6 @@
28 28
29#include <asm/vmi.h> 29#include <asm/vmi.h>
30#include <asm/vmi_time.h> 30#include <asm/vmi_time.h>
31#include <asm/arch_hooks.h>
32#include <asm/apicdef.h> 31#include <asm/apicdef.h>
33#include <asm/apic.h> 32#include <asm/apic.h>
34#include <asm/timer.h> 33#include <asm/timer.h>
@@ -202,7 +201,7 @@ static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
202static struct irqaction vmi_clock_action = { 201static struct irqaction vmi_clock_action = {
203 .name = "vmi-timer", 202 .name = "vmi-timer",
204 .handler = vmi_timer_interrupt, 203 .handler = vmi_timer_interrupt,
205 .flags = IRQF_DISABLED | IRQF_NOBALANCING, 204 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
206 .mask = CPU_MASK_ALL, 205 .mask = CPU_MASK_ALL,
207}; 206};
208 207
@@ -283,10 +282,12 @@ void __devinit vmi_time_ap_init(void)
283#endif 282#endif
284 283
285/** vmi clocksource */ 284/** vmi clocksource */
285static struct clocksource clocksource_vmi;
286 286
287static cycle_t read_real_cycles(void) 287static cycle_t read_real_cycles(void)
288{ 288{
289 return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); 289 cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
290 return max(ret, clocksource_vmi.cycle_last);
290} 291}
291 292
292static struct clocksource clocksource_vmi = { 293static struct clocksource clocksource_vmi = {
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 3eba7f7bac0..0d860963f26 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -12,7 +12,7 @@
12 12
13#include <asm-generic/vmlinux.lds.h> 13#include <asm-generic/vmlinux.lds.h>
14#include <asm/thread_info.h> 14#include <asm/thread_info.h>
15#include <asm/page.h> 15#include <asm/page_types.h>
16#include <asm/cache.h> 16#include <asm/cache.h>
17#include <asm/boot.h> 17#include <asm/boot.h>
18 18
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 087a7f2c639..fbfced6f680 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -6,7 +6,7 @@
6 6
7#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
8#include <asm/asm-offsets.h> 8#include <asm/asm-offsets.h>
9#include <asm/page.h> 9#include <asm/page_types.h>
10 10
11#undef i386 /* in case the preprocessor is a 32bit one */ 11#undef i386 /* in case the preprocessor is a 32bit one */
12 12
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index e665d1c623c..72bd275a9b5 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
207 hrtimer_add_expires_ns(&pt->timer, pt->period); 207 hrtimer_add_expires_ns(&pt->timer, pt->period);
208 pt->scheduled = hrtimer_get_expires_ns(&pt->timer); 208 pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
209 if (pt->period) 209 if (pt->period)
210 ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer); 210 ps->channels[0].count_load_time = ktime_get();
211 211
212 return (pt->period == 0 ? 0 : 1); 212 return (pt->period == 0 ? 0 : 1);
213} 213}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index c019b8edcdb..cf17ed52f6f 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
87} 87}
88EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); 88EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
89 89
90void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
91{
92 kvm_apic_timer_intr_post(vcpu, vec);
93 /* TODO: PIT, RTC etc. */
94}
95EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
96
97void __kvm_migrate_timers(struct kvm_vcpu *vcpu) 90void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
98{ 91{
99 __kvm_migrate_apic_timer(vcpu); 92 __kvm_migrate_apic_timer(vcpu);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2bf32a03cee..82579ee538d 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
89 89
90void kvm_pic_reset(struct kvm_kpic_state *s); 90void kvm_pic_reset(struct kvm_kpic_state *s);
91 91
92void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
93void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); 92void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
94void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); 93void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
95void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); 94void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afac68c0815..f0b67f2cdd6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -35,6 +35,12 @@
35#include "kvm_cache_regs.h" 35#include "kvm_cache_regs.h"
36#include "irq.h" 36#include "irq.h"
37 37
38#ifndef CONFIG_X86_64
39#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
40#else
41#define mod_64(x, y) ((x) % (y))
42#endif
43
38#define PRId64 "d" 44#define PRId64 "d"
39#define PRIx64 "llx" 45#define PRIx64 "llx"
40#define PRIu64 "u" 46#define PRIu64 "u"
@@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic)
511 517
512static u32 apic_get_tmcct(struct kvm_lapic *apic) 518static u32 apic_get_tmcct(struct kvm_lapic *apic)
513{ 519{
514 u64 counter_passed; 520 ktime_t remaining;
515 ktime_t passed, now; 521 s64 ns;
516 u32 tmcct; 522 u32 tmcct;
517 523
518 ASSERT(apic != NULL); 524 ASSERT(apic != NULL);
519 525
520 now = apic->timer.dev.base->get_time();
521 tmcct = apic_get_reg(apic, APIC_TMICT);
522
523 /* if initial count is 0, current count should also be 0 */ 526 /* if initial count is 0, current count should also be 0 */
524 if (tmcct == 0) 527 if (apic_get_reg(apic, APIC_TMICT) == 0)
525 return 0; 528 return 0;
526 529
527 if (unlikely(ktime_to_ns(now) <= 530 remaining = hrtimer_expires_remaining(&apic->timer.dev);
528 ktime_to_ns(apic->timer.last_update))) { 531 if (ktime_to_ns(remaining) < 0)
529 /* Wrap around */ 532 remaining = ktime_set(0, 0);
530 passed = ktime_add(( { 533
531 (ktime_t) { 534 ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
532 .tv64 = KTIME_MAX - 535 tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
533 (apic->timer.last_update).tv64}; }
534 ), now);
535 apic_debug("time elapsed\n");
536 } else
537 passed = ktime_sub(now, apic->timer.last_update);
538
539 counter_passed = div64_u64(ktime_to_ns(passed),
540 (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
541
542 if (counter_passed > tmcct) {
543 if (unlikely(!apic_lvtt_period(apic))) {
544 /* one-shot timers stick at 0 until reset */
545 tmcct = 0;
546 } else {
547 /*
548 * periodic timers reset to APIC_TMICT when they
549 * hit 0. The while loop simulates this happening N
550 * times. (counter_passed %= tmcct) would also work,
551 * but might be slower or not work on 32-bit??
552 */
553 while (counter_passed > tmcct)
554 counter_passed -= tmcct;
555 tmcct -= counter_passed;
556 }
557 } else {
558 tmcct -= counter_passed;
559 }
560 536
561 return tmcct; 537 return tmcct;
562} 538}
@@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
653{ 629{
654 ktime_t now = apic->timer.dev.base->get_time(); 630 ktime_t now = apic->timer.dev.base->get_time();
655 631
656 apic->timer.last_update = now;
657
658 apic->timer.period = apic_get_reg(apic, APIC_TMICT) * 632 apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
659 APIC_BUS_CYCLE_NS * apic->timer.divide_count; 633 APIC_BUS_CYCLE_NS * apic->timer.divide_count;
660 atomic_set(&apic->timer.pending, 0); 634 atomic_set(&apic->timer.pending, 0);
@@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
1110 } 1084 }
1111} 1085}
1112 1086
1113void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
1114{
1115 struct kvm_lapic *apic = vcpu->arch.apic;
1116
1117 if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
1118 apic->timer.last_update = ktime_add_ns(
1119 apic->timer.last_update,
1120 apic->timer.period);
1121}
1122
1123int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) 1087int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
1124{ 1088{
1125 int vector = kvm_apic_has_interrupt(vcpu); 1089 int vector = kvm_apic_has_interrupt(vcpu);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 81858881287..45ab6ee7120 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,7 +12,6 @@ struct kvm_lapic {
12 atomic_t pending; 12 atomic_t pending;
13 s64 period; /* unit: ns */ 13 s64 period; /* unit: ns */
14 u32 divide_count; 14 u32 divide_count;
15 ktime_t last_update;
16 struct hrtimer dev; 15 struct hrtimer dev;
17 } timer; 16 } timer;
18 struct kvm_vcpu *vcpu; 17 struct kvm_vcpu *vcpu;
@@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
42void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); 41void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
43int kvm_lapic_enabled(struct kvm_vcpu *vcpu); 42int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
44int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); 43int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
45void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
46 44
47void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); 45void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
48void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); 46void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 83f11c7474a..2d4477c7147 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1698 if (largepage) 1698 if (largepage)
1699 spte |= PT_PAGE_SIZE_MASK; 1699 spte |= PT_PAGE_SIZE_MASK;
1700 if (mt_mask) { 1700 if (mt_mask) {
1701 mt_mask = get_memory_type(vcpu, gfn) << 1701 if (!kvm_is_mmio_pfn(pfn)) {
1702 kvm_x86_ops->get_mt_mask_shift(); 1702 mt_mask = get_memory_type(vcpu, gfn) <<
1703 kvm_x86_ops->get_mt_mask_shift();
1704 mt_mask |= VMX_EPT_IGMT_BIT;
1705 } else
1706 mt_mask = MTRR_TYPE_UNCACHABLE <<
1707 kvm_x86_ops->get_mt_mask_shift();
1703 spte |= mt_mask; 1708 spte |= mt_mask;
1704 } 1709 }
1705 1710
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1452851ae25..a9e769e4e25 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
1600 /* Okay, we can deliver the interrupt: grab it and update PIC state. */ 1600 /* Okay, we can deliver the interrupt: grab it and update PIC state. */
1601 intr_vector = kvm_cpu_get_interrupt(vcpu); 1601 intr_vector = kvm_cpu_get_interrupt(vcpu);
1602 svm_inject_irq(svm, intr_vector); 1602 svm_inject_irq(svm, intr_vector);
1603 kvm_timer_intr_post(vcpu, intr_vector);
1604out: 1603out:
1605 update_cr8_intercept(vcpu); 1604 update_cr8_intercept(vcpu);
1606} 1605}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6259d746764..7611af57682 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
903 data = vmcs_readl(GUEST_SYSENTER_ESP); 903 data = vmcs_readl(GUEST_SYSENTER_ESP);
904 break; 904 break;
905 default: 905 default:
906 vmx_load_host_state(to_vmx(vcpu));
906 msr = find_msr_entry(to_vmx(vcpu), msr_index); 907 msr = find_msr_entry(to_vmx(vcpu), msr_index);
907 if (msr) { 908 if (msr) {
908 data = msr->data; 909 data = msr->data;
@@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
3285 } 3286 }
3286 if (vcpu->arch.interrupt.pending) { 3287 if (vcpu->arch.interrupt.pending) {
3287 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); 3288 vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
3288 kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
3289 if (kvm_cpu_has_interrupt(vcpu)) 3289 if (kvm_cpu_has_interrupt(vcpu))
3290 enable_irq_window(vcpu); 3290 enable_irq_window(vcpu);
3291 } 3291 }
@@ -3687,8 +3687,7 @@ static int __init vmx_init(void)
3687 if (vm_need_ept()) { 3687 if (vm_need_ept()) {
3688 bypass_guest_pf = 0; 3688 bypass_guest_pf = 0;
3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | 3689 kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
3690 VMX_EPT_WRITABLE_MASK | 3690 VMX_EPT_WRITABLE_MASK);
3691 VMX_EPT_IGMT_BIT);
3692 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, 3691 kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
3693 VMX_EPT_EXECUTABLE_MASK, 3692 VMX_EPT_EXECUTABLE_MASK,
3694 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); 3693 VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cc17546a240..758b7a155ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext)
967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: 967 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
968 case KVM_CAP_SET_TSS_ADDR: 968 case KVM_CAP_SET_TSS_ADDR:
969 case KVM_CAP_EXT_CPUID: 969 case KVM_CAP_EXT_CPUID:
970 case KVM_CAP_CLOCKSOURCE:
971 case KVM_CAP_PIT: 970 case KVM_CAP_PIT:
972 case KVM_CAP_NOP_IO_DELAY: 971 case KVM_CAP_NOP_IO_DELAY:
973 case KVM_CAP_MP_STATE: 972 case KVM_CAP_MP_STATE:
@@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext)
992 case KVM_CAP_IOMMU: 991 case KVM_CAP_IOMMU:
993 r = iommu_found(); 992 r = iommu_found();
994 break; 993 break;
994 case KVM_CAP_CLOCKSOURCE:
995 r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
996 break;
995 default: 997 default:
996 r = 0; 998 r = 0;
997 break; 999 break;
@@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
4127 4129
4128} 4130}
4129 4131
4130void kvm_arch_destroy_vm(struct kvm *kvm) 4132void kvm_arch_sync_events(struct kvm *kvm)
4131{ 4133{
4132 kvm_free_all_assigned_devices(kvm); 4134 kvm_free_all_assigned_devices(kvm);
4135}
4136
4137void kvm_arch_destroy_vm(struct kvm *kvm)
4138{
4133 kvm_iommu_unmap_guest(kvm); 4139 kvm_iommu_unmap_guest(kvm);
4134 kvm_free_pit(kvm); 4140 kvm_free_pit(kvm);
4135 kfree(kvm->arch.vpic); 4141 kfree(kvm->arch.vpic);
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index c70e12b1a63..8dab8f7844d 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -3,7 +3,6 @@ config LGUEST_GUEST
3 select PARAVIRT 3 select PARAVIRT
4 depends on X86_32 4 depends on X86_32
5 depends on !X86_PAE 5 depends on !X86_PAE
6 depends on !X86_VOYAGER
7 select VIRTIO 6 select VIRTIO
8 select VIRTIO_RING 7 select VIRTIO_RING
9 select VIRTIO_CONSOLE 8 select VIRTIO_CONSOLE
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index ad374003742..51f1504cddd 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -28,7 +28,7 @@
28 28
29#include <linux/linkage.h> 29#include <linux/linkage.h>
30#include <asm/dwarf2.h> 30#include <asm/dwarf2.h>
31#include <asm/page.h> 31#include <asm/page_types.h>
32#include <asm/errno.h> 32#include <asm/errno.h>
33#include <asm/asm-offsets.h> 33#include <asm/asm-offsets.h>
34#include <asm/thread_info.h> 34#include <asm/thread_info.h>
diff --git a/arch/x86/mach-voyager/Makefile b/arch/x86/mach-voyager/Makefile
deleted file mode 100644
index 15c250b371d..00000000000
--- a/arch/x86/mach-voyager/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
1#
2# Makefile for the linux kernel.
3#
4
5EXTRA_CFLAGS := -Iarch/x86/kernel
6obj-y := setup.o voyager_basic.o voyager_thread.o
7
8obj-$(CONFIG_SMP) += voyager_smp.o voyager_cat.o
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
deleted file mode 100644
index 66b7eb57d8e..00000000000
--- a/arch/x86/mach-voyager/setup.c
+++ /dev/null
@@ -1,119 +0,0 @@
1/*
2 * Machine specific setup for generic
3 */
4
5#include <linux/init.h>
6#include <linux/interrupt.h>
7#include <asm/arch_hooks.h>
8#include <asm/voyager.h>
9#include <asm/e820.h>
10#include <asm/io.h>
11#include <asm/setup.h>
12#include <asm/cpu.h>
13
14void __init pre_intr_init_hook(void)
15{
16 init_ISA_irqs();
17}
18
19/*
20 * IRQ2 is cascade interrupt to second interrupt controller
21 */
22static struct irqaction irq2 = {
23 .handler = no_action,
24 .mask = CPU_MASK_NONE,
25 .name = "cascade",
26};
27
28void __init intr_init_hook(void)
29{
30#ifdef CONFIG_SMP
31 voyager_smp_intr_init();
32#endif
33
34 setup_irq(2, &irq2);
35}
36
37static void voyager_disable_tsc(void)
38{
39 /* Voyagers run their CPUs from independent clocks, so disable
40 * the TSC code because we can't sync them */
41 setup_clear_cpu_cap(X86_FEATURE_TSC);
42}
43
44void __init pre_setup_arch_hook(void)
45{
46 voyager_disable_tsc();
47}
48
49void __init pre_time_init_hook(void)
50{
51 voyager_disable_tsc();
52}
53
54void __init trap_init_hook(void)
55{
56}
57
58static struct irqaction irq0 = {
59 .handler = timer_interrupt,
60 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL,
61 .mask = CPU_MASK_NONE,
62 .name = "timer"
63};
64
65void __init time_init_hook(void)
66{
67 irq0.mask = cpumask_of_cpu(safe_smp_processor_id());
68 setup_irq(0, &irq0);
69}
70
71/* Hook for machine specific memory setup. */
72
73char *__init machine_specific_memory_setup(void)
74{
75 char *who;
76 int new_nr;
77
78 who = "NOT VOYAGER";
79
80 if (voyager_level == 5) {
81 __u32 addr, length;
82 int i;
83
84 who = "Voyager-SUS";
85
86 e820.nr_map = 0;
87 for (i = 0; voyager_memory_detect(i, &addr, &length); i++) {
88 e820_add_region(addr, length, E820_RAM);
89 }
90 return who;
91 } else if (voyager_level == 4) {
92 __u32 tom;
93 __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
94 /* select the DINO config space */
95 outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT);
96 /* Read DINO top of memory register */
97 tom = ((inb(catbase + 0x4) & 0xf0) << 16)
98 + ((inb(catbase + 0x5) & 0x7f) << 24);
99
100 if (inb(catbase) != VOYAGER_DINO) {
101 printk(KERN_ERR
102 "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
103 tom = (boot_params.screen_info.ext_mem_k) << 10;
104 }
105 who = "Voyager-TOM";
106 e820_add_region(0, 0x9f000, E820_RAM);
107 /* map from 1M to top of memory */
108 e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
109 E820_RAM);
110 /* FIXME: Should check the ASICs to see if I need to
111 * take out the 8M window. Just do it at the moment
112 * */
113 e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024,
114 E820_RESERVED);
115 return who;
116 }
117
118 return default_machine_specific_memory_setup();
119}
diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c
deleted file mode 100644
index 46d6f806769..00000000000
--- a/arch/x86/mach-voyager/voyager_basic.c
+++ /dev/null
@@ -1,317 +0,0 @@
1/* Copyright (C) 1999,2001
2 *
3 * Author: J.E.J.Bottomley@HansenPartnership.com
4 *
5 * This file contains all the voyager specific routines for getting
6 * initialisation of the architecture to function. For additional
7 * features see:
8 *
9 * voyager_cat.c - Voyager CAT bus interface
10 * voyager_smp.c - Voyager SMP hal (emulates linux smp.c)
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/sched.h>
16#include <linux/ptrace.h>
17#include <linux/ioport.h>
18#include <linux/interrupt.h>
19#include <linux/init.h>
20#include <linux/delay.h>
21#include <linux/reboot.h>
22#include <linux/sysrq.h>
23#include <linux/smp.h>
24#include <linux/nodemask.h>
25#include <asm/io.h>
26#include <asm/voyager.h>
27#include <asm/vic.h>
28#include <linux/pm.h>
29#include <asm/tlbflush.h>
30#include <asm/arch_hooks.h>
31#include <asm/i8253.h>
32
33/*
34 * Power off function, if any
35 */
36void (*pm_power_off) (void);
37EXPORT_SYMBOL(pm_power_off);
38
39int voyager_level = 0;
40
41struct voyager_SUS *voyager_SUS = NULL;
42
43#ifdef CONFIG_SMP
44static void voyager_dump(int dummy1, struct tty_struct *dummy3)
45{
46 /* get here via a sysrq */
47 voyager_smp_dump();
48}
49
50static struct sysrq_key_op sysrq_voyager_dump_op = {
51 .handler = voyager_dump,
52 .help_msg = "Voyager",
53 .action_msg = "Dump Voyager Status",
54};
55#endif
56
57void voyager_detect(struct voyager_bios_info *bios)
58{
59 if (bios->len != 0xff) {
60 int class = (bios->class_1 << 8)
61 | (bios->class_2 & 0xff);
62
63 printk("Voyager System detected.\n"
64 " Class %x, Revision %d.%d\n",
65 class, bios->major, bios->minor);
66 if (class == VOYAGER_LEVEL4)
67 voyager_level = 4;
68 else if (class < VOYAGER_LEVEL5_AND_ABOVE)
69 voyager_level = 3;
70 else
71 voyager_level = 5;
72 printk(" Architecture Level %d\n", voyager_level);
73 if (voyager_level < 4)
74 printk
75 ("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
76 /* install the power off handler */
77 pm_power_off = voyager_power_off;
78#ifdef CONFIG_SMP
79 register_sysrq_key('v', &sysrq_voyager_dump_op);
80#endif
81 } else {
82 printk("\n\n**WARNING**: No Voyager Subsystem Found\n");
83 }
84}
85
86void voyager_system_interrupt(int cpl, void *dev_id)
87{
88 printk("Voyager: detected system interrupt\n");
89}
90
91/* Routine to read information from the extended CMOS area */
92__u8 voyager_extended_cmos_read(__u16 addr)
93{
94 outb(addr & 0xff, 0x74);
95 outb((addr >> 8) & 0xff, 0x75);
96 return inb(0x76);
97}
98
99/* internal definitions for the SUS Click Map of memory */
100
101#define CLICK_ENTRIES 16
102#define CLICK_SIZE 4096 /* click to byte conversion for Length */
103
104typedef struct ClickMap {
105 struct Entry {
106 __u32 Address;
107 __u32 Length;
108 } Entry[CLICK_ENTRIES];
109} ClickMap_t;
110
111/* This routine is pretty much an awful hack to read the bios clickmap by
112 * mapping it into page 0. There are usually three regions in the map:
113 * Base Memory
114 * Extended Memory
115 * zero length marker for end of map
116 *
117 * Returns are 0 for failure and 1 for success on extracting region.
118 */
119int __init voyager_memory_detect(int region, __u32 * start, __u32 * length)
120{
121 int i;
122 int retval = 0;
123 __u8 cmos[4];
124 ClickMap_t *map;
125 unsigned long map_addr;
126 unsigned long old;
127
128 if (region >= CLICK_ENTRIES) {
129 printk("Voyager: Illegal ClickMap region %d\n", region);
130 return 0;
131 }
132
133 for (i = 0; i < sizeof(cmos); i++)
134 cmos[i] =
135 voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
136
137 map_addr = *(unsigned long *)cmos;
138
139 /* steal page 0 for this */
140 old = pg0[0];
141 pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
142 local_flush_tlb();
143 /* now clear everything out but page 0 */
144 map = (ClickMap_t *) (map_addr & (~PAGE_MASK));
145
146 /* zero length is the end of the clickmap */
147 if (map->Entry[region].Length != 0) {
148 *length = map->Entry[region].Length * CLICK_SIZE;
149 *start = map->Entry[region].Address;
150 retval = 1;
151 }
152
153 /* replace the mapping */
154 pg0[0] = old;
155 local_flush_tlb();
156 return retval;
157}
158
159/* voyager specific handling code for timer interrupts. Used to hand
160 * off the timer tick to the SMP code, since the VIC doesn't have an
161 * internal timer (The QIC does, but that's another story). */
162void voyager_timer_interrupt(void)
163{
164 if ((jiffies & 0x3ff) == 0) {
165
166 /* There seems to be something flaky in either
167 * hardware or software that is resetting the timer 0
168 * count to something much higher than it should be
169 * This seems to occur in the boot sequence, just
170 * before root is mounted. Therefore, every 10
171 * seconds or so, we sanity check the timer zero count
172 * and kick it back to where it should be.
173 *
174 * FIXME: This is the most awful hack yet seen. I
175 * should work out exactly what is interfering with
176 * the timer count settings early in the boot sequence
177 * and swiftly introduce it to something sharp and
178 * pointy. */
179 __u16 val;
180
181 spin_lock(&i8253_lock);
182
183 outb_p(0x00, 0x43);
184 val = inb_p(0x40);
185 val |= inb(0x40) << 8;
186 spin_unlock(&i8253_lock);
187
188 if (val > LATCH) {
189 printk
190 ("\nVOYAGER: countdown timer value too high (%d), resetting\n\n",
191 val);
192 spin_lock(&i8253_lock);
193 outb(0x34, 0x43);
194 outb_p(LATCH & 0xff, 0x40); /* LSB */
195 outb(LATCH >> 8, 0x40); /* MSB */
196 spin_unlock(&i8253_lock);
197 }
198 }
199#ifdef CONFIG_SMP
200 smp_vic_timer_interrupt();
201#endif
202}
203
204void voyager_power_off(void)
205{
206 printk("VOYAGER Power Off\n");
207
208 if (voyager_level == 5) {
209 voyager_cat_power_off();
210 } else if (voyager_level == 4) {
211 /* This doesn't apparently work on most L4 machines,
212 * but the specs say to do this to get automatic power
213 * off. Unfortunately, if it doesn't power off the
214 * machine, it ends up doing a cold restart, which
215 * isn't really intended, so comment out the code */
216#if 0
217 int port;
218
219 /* enable the voyager Configuration Space */
220 outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, VOYAGER_MC_SETUP);
221 /* the port for the power off flag is an offset from the
222 floating base */
223 port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21;
224 /* set the power off flag */
225 outb(inb(port) | 0x1, port);
226#endif
227 }
228 /* and wait for it to happen */
229 local_irq_disable();
230 for (;;)
231 halt();
232}
233
234/* copied from process.c */
235static inline void kb_wait(void)
236{
237 int i;
238
239 for (i = 0; i < 0x10000; i++)
240 if ((inb_p(0x64) & 0x02) == 0)
241 break;
242}
243
244void machine_shutdown(void)
245{
246 /* Architecture specific shutdown needed before a kexec */
247}
248
249void machine_restart(char *cmd)
250{
251 printk("Voyager Warm Restart\n");
252 kb_wait();
253
254 if (voyager_level == 5) {
255 /* write magic values to the RTC to inform system that
256 * shutdown is beginning */
257 outb(0x8f, 0x70);
258 outb(0x5, 0x71);
259
260 udelay(50);
261 outb(0xfe, 0x64); /* pull reset low */
262 } else if (voyager_level == 4) {
263 __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
264 __u8 basebd = inb(VOYAGER_MC_SETUP);
265
266 outb(basebd | 0x08, VOYAGER_MC_SETUP);
267 outb(0x02, catbase + 0x21);
268 }
269 local_irq_disable();
270 for (;;)
271 halt();
272}
273
274void machine_emergency_restart(void)
275{
276 /*for now, just hook this to a warm restart */
277 machine_restart(NULL);
278}
279
280void mca_nmi_hook(void)
281{
282 __u8 dumpval __maybe_unused = inb(0xf823);
283 __u8 swnmi __maybe_unused = inb(0xf813);
284
285 /* FIXME: assume dump switch pressed */
286 /* check to see if the dump switch was pressed */
287 VDEBUG(("VOYAGER: dumpval = 0x%x, swnmi = 0x%x\n", dumpval, swnmi));
288 /* clear swnmi */
289 outb(0xff, 0xf813);
290 /* tell SUS to ignore dump */
291 if (voyager_level == 5 && voyager_SUS != NULL) {
292 if (voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
293 voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND;
294 voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS;
295 udelay(1000);
296 voyager_SUS->kernel_mbox = VOYAGER_IGNORE_DUMP;
297 voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS;
298 }
299 }
300 printk(KERN_ERR
301 "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n",
302 smp_processor_id());
303 show_stack(NULL, NULL);
304 show_state();
305}
306
307void machine_halt(void)
308{
309 /* treat a halt like a power off */
310 machine_power_off();
311}
312
313void machine_power_off(void)
314{
315 if (pm_power_off)
316 pm_power_off();
317}
diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c
deleted file mode 100644
index 2ad598c104a..00000000000
--- a/arch/x86/mach-voyager/voyager_cat.c
+++ /dev/null
@@ -1,1197 +0,0 @@
1/* -*- mode: c; c-basic-offset: 8 -*- */
2
3/* Copyright (C) 1999,2001
4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 *
7 * This file contains all the logic for manipulating the CAT bus
8 * in a level 5 machine.
9 *
10 * The CAT bus is a serial configuration and test bus. Its primary
11 * uses are to probe the initial configuration of the system and to
12 * diagnose error conditions when a system interrupt occurs. The low
13 * level interface is fairly primitive, so most of this file consists
14 * of bit shift manipulations to send and receive packets on the
15 * serial bus */
16
17#include <linux/types.h>
18#include <linux/completion.h>
19#include <linux/sched.h>
20#include <asm/voyager.h>
21#include <asm/vic.h>
22#include <linux/ioport.h>
23#include <linux/init.h>
24#include <linux/slab.h>
25#include <linux/delay.h>
26#include <asm/io.h>
27
28#ifdef VOYAGER_CAT_DEBUG
29#define CDEBUG(x) printk x
30#else
31#define CDEBUG(x)
32#endif
33
34/* the CAT command port */
35#define CAT_CMD (sspb + 0xe)
36/* the CAT data port */
37#define CAT_DATA (sspb + 0xd)
38
39/* the internal cat functions */
40static void cat_pack(__u8 * msg, __u16 start_bit, __u8 * data, __u16 num_bits);
41static void cat_unpack(__u8 * msg, __u16 start_bit, __u8 * data,
42 __u16 num_bits);
43static void cat_build_header(__u8 * header, const __u16 len,
44 const __u16 smallest_reg_bits,
45 const __u16 longest_reg_bits);
46static int cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp,
47 __u8 reg, __u8 op);
48static int cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp,
49 __u8 reg, __u8 * value);
50static int cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes,
51 __u8 pad_bits);
52static int cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
53 __u8 value);
54static int cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
55 __u8 * value);
56static int cat_subread(voyager_module_t * modp, voyager_asic_t * asicp,
57 __u16 offset, __u16 len, void *buf);
58static int cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
59 __u8 reg, __u8 value);
60static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp);
61static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp);
62
63static inline const char *cat_module_name(int module_id)
64{
65 switch (module_id) {
66 case 0x10:
67 return "Processor Slot 0";
68 case 0x11:
69 return "Processor Slot 1";
70 case 0x12:
71 return "Processor Slot 2";
72 case 0x13:
73 return "Processor Slot 4";
74 case 0x14:
75 return "Memory Slot 0";
76 case 0x15:
77 return "Memory Slot 1";
78 case 0x18:
79 return "Primary Microchannel";
80 case 0x19:
81 return "Secondary Microchannel";
82 case 0x1a:
83 return "Power Supply Interface";
84 case 0x1c:
85 return "Processor Slot 5";
86 case 0x1d:
87 return "Processor Slot 6";
88 case 0x1e:
89 return "Processor Slot 7";
90 case 0x1f:
91 return "Processor Slot 8";
92 default:
93 return "Unknown Module";
94 }
95}
96
97static int sspb = 0; /* stores the super port location */
98int voyager_8slot = 0; /* set to true if a 51xx monster */
99
100voyager_module_t *voyager_cat_list;
101
102/* the I/O port assignments for the VIC and QIC */
103static struct resource vic_res = {
104 .name = "Voyager Interrupt Controller",
105 .start = 0xFC00,
106 .end = 0xFC6F
107};
108static struct resource qic_res = {
109 .name = "Quad Interrupt Controller",
110 .start = 0xFC70,
111 .end = 0xFCFF
112};
113
114/* This function is used to pack a data bit stream inside a message.
115 * It writes num_bits of the data buffer in msg starting at start_bit.
116 * Note: This function assumes that any unused bit in the data stream
117 * is set to zero so that the ors will work correctly */
118static void
119cat_pack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
120{
121 /* compute initial shift needed */
122 const __u16 offset = start_bit % BITS_PER_BYTE;
123 __u16 len = num_bits / BITS_PER_BYTE;
124 __u16 byte = start_bit / BITS_PER_BYTE;
125 __u16 residue = (num_bits % BITS_PER_BYTE) + offset;
126 int i;
127
128 /* adjust if we have more than a byte of residue */
129 if (residue >= BITS_PER_BYTE) {
130 residue -= BITS_PER_BYTE;
131 len++;
132 }
133
134 /* clear out the bits. We assume here that if len==0 then
135 * residue >= offset. This is always true for the catbus
136 * operations */
137 msg[byte] &= 0xff << (BITS_PER_BYTE - offset);
138 msg[byte++] |= data[0] >> offset;
139 if (len == 0)
140 return;
141 for (i = 1; i < len; i++)
142 msg[byte++] = (data[i - 1] << (BITS_PER_BYTE - offset))
143 | (data[i] >> offset);
144 if (residue != 0) {
145 __u8 mask = 0xff >> residue;
146 __u8 last_byte = data[i - 1] << (BITS_PER_BYTE - offset)
147 | (data[i] >> offset);
148
149 last_byte &= ~mask;
150 msg[byte] &= mask;
151 msg[byte] |= last_byte;
152 }
153 return;
154}
155
156/* unpack the data again (same arguments as cat_pack()). data buffer
157 * must be zero populated.
158 *
159 * Function: given a message string move to start_bit and copy num_bits into
160 * data (starting at bit 0 in data).
161 */
162static void
163cat_unpack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
164{
165 /* compute initial shift needed */
166 const __u16 offset = start_bit % BITS_PER_BYTE;
167 __u16 len = num_bits / BITS_PER_BYTE;
168 const __u8 last_bits = num_bits % BITS_PER_BYTE;
169 __u16 byte = start_bit / BITS_PER_BYTE;
170 int i;
171
172 if (last_bits != 0)
173 len++;
174
175 /* special case: want < 8 bits from msg and we can get it from
176 * a single byte of the msg */
177 if (len == 0 && BITS_PER_BYTE - offset >= num_bits) {
178 data[0] = msg[byte] << offset;
179 data[0] &= 0xff >> (BITS_PER_BYTE - num_bits);
180 return;
181 }
182 for (i = 0; i < len; i++) {
183 /* this annoying if has to be done just in case a read of
184 * msg one beyond the array causes a panic */
185 if (offset != 0) {
186 data[i] = msg[byte++] << offset;
187 data[i] |= msg[byte] >> (BITS_PER_BYTE - offset);
188 } else {
189 data[i] = msg[byte++];
190 }
191 }
192 /* do we need to truncate the final byte */
193 if (last_bits != 0) {
194 data[i - 1] &= 0xff << (BITS_PER_BYTE - last_bits);
195 }
196 return;
197}
198
199static void
200cat_build_header(__u8 * header, const __u16 len, const __u16 smallest_reg_bits,
201 const __u16 longest_reg_bits)
202{
203 int i;
204 __u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE;
205 __u8 *last_byte = &header[len - 1];
206
207 if (start_bit == 0)
208 start_bit = 1; /* must have at least one bit in the hdr */
209
210 for (i = 0; i < len; i++)
211 header[i] = 0;
212
213 for (i = start_bit; i > 0; i--)
214 *last_byte = ((*last_byte) << 1) + 1;
215
216}
217
218static int
219cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 op)
220{
221 __u8 parity, inst, inst_buf[4] = { 0 };
222 __u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE];
223 __u16 ibytes, hbytes, padbits;
224 int i;
225
226 /*
227 * Parity is the parity of the register number + 1 (READ_REGISTER
228 * and WRITE_REGISTER always add '1' to the number of bits == 1)
229 */
230 parity = (__u8) (1 + (reg & 0x01) +
231 ((__u8) (reg & 0x02) >> 1) +
232 ((__u8) (reg & 0x04) >> 2) +
233 ((__u8) (reg & 0x08) >> 3)) % 2;
234
235 inst = ((parity << 7) | (reg << 2) | op);
236
237 outb(VOYAGER_CAT_IRCYC, CAT_CMD);
238 if (!modp->scan_path_connected) {
239 if (asicp->asic_id != VOYAGER_CAT_ID) {
240 printk
241 ("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
242 return 1;
243 }
244 outb(VOYAGER_CAT_HEADER, CAT_DATA);
245 outb(inst, CAT_DATA);
246 if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
247 CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n"));
248 return 1;
249 }
250 return 0;
251 }
252 ibytes = modp->inst_bits / BITS_PER_BYTE;
253 if ((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
254 padbits = BITS_PER_BYTE - padbits;
255 ibytes++;
256 }
257 hbytes = modp->largest_reg / BITS_PER_BYTE;
258 if (modp->largest_reg % BITS_PER_BYTE)
259 hbytes++;
260 CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes));
261 /* initialise the instruction sequence to 0xff */
262 for (i = 0; i < ibytes + hbytes; i++)
263 iseq[i] = 0xff;
264 cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg);
265 cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE);
266 inst_buf[0] = inst;
267 inst_buf[1] = 0xFF >> (modp->largest_reg % BITS_PER_BYTE);
268 cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length);
269#ifdef VOYAGER_CAT_DEBUG
270 printk("ins = 0x%x, iseq: ", inst);
271 for (i = 0; i < ibytes + hbytes; i++)
272 printk("0x%x ", iseq[i]);
273 printk("\n");
274#endif
275 if (cat_shiftout(iseq, ibytes, hbytes, padbits)) {
276 CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n"));
277 return 1;
278 }
279 CDEBUG(("CAT SHIFTOUT DONE\n"));
280 return 0;
281}
282
283static int
284cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
285 __u8 * value)
286{
287 if (!modp->scan_path_connected) {
288 if (asicp->asic_id != VOYAGER_CAT_ID) {
289 CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n"));
290 return 1;
291 }
292 if (reg > VOYAGER_SUBADDRHI)
293 outb(VOYAGER_CAT_RUN, CAT_CMD);
294 outb(VOYAGER_CAT_DRCYC, CAT_CMD);
295 outb(VOYAGER_CAT_HEADER, CAT_DATA);
296 *value = inb(CAT_DATA);
297 outb(0xAA, CAT_DATA);
298 if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
299 CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n"));
300 return 1;
301 }
302 return 0;
303 } else {
304 __u16 sbits = modp->num_asics - 1 + asicp->ireg_length;
305 __u16 sbytes = sbits / BITS_PER_BYTE;
306 __u16 tbytes;
307 __u8 string[VOYAGER_MAX_SCAN_PATH],
308 trailer[VOYAGER_MAX_REG_SIZE];
309 __u8 padbits;
310 int i;
311
312 outb(VOYAGER_CAT_DRCYC, CAT_CMD);
313
314 if ((padbits = sbits % BITS_PER_BYTE) != 0) {
315 padbits = BITS_PER_BYTE - padbits;
316 sbytes++;
317 }
318 tbytes = asicp->ireg_length / BITS_PER_BYTE;
319 if (asicp->ireg_length % BITS_PER_BYTE)
320 tbytes++;
321 CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n",
322 tbytes, sbytes, padbits));
323 cat_build_header(trailer, tbytes, 1, asicp->ireg_length);
324
325 for (i = tbytes - 1; i >= 0; i--) {
326 outb(trailer[i], CAT_DATA);
327 string[sbytes + i] = inb(CAT_DATA);
328 }
329
330 for (i = sbytes - 1; i >= 0; i--) {
331 outb(0xaa, CAT_DATA);
332 string[i] = inb(CAT_DATA);
333 }
334 *value = 0;
335 cat_unpack(string,
336 padbits + (tbytes * BITS_PER_BYTE) +
337 asicp->asic_location, value, asicp->ireg_length);
338#ifdef VOYAGER_CAT_DEBUG
339 printk("value=0x%x, string: ", *value);
340 for (i = 0; i < tbytes + sbytes; i++)
341 printk("0x%x ", string[i]);
342 printk("\n");
343#endif
344
345 /* sanity check the rest of the return */
346 for (i = 0; i < tbytes; i++) {
347 __u8 input = 0;
348
349 cat_unpack(string, padbits + (i * BITS_PER_BYTE),
350 &input, BITS_PER_BYTE);
351 if (trailer[i] != input) {
352 CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i]));
353 return 1;
354 }
355 }
356 CDEBUG(("cat_getdata DONE\n"));
357 return 0;
358 }
359}
360
361static int
362cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
363{
364 int i;
365
366 for (i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
367 outb(data[i], CAT_DATA);
368
369 for (i = header_bytes - 1; i >= 0; i--) {
370 __u8 header = 0;
371 __u8 input;
372
373 outb(data[i], CAT_DATA);
374 input = inb(CAT_DATA);
375 CDEBUG(("cat_shiftout: returned 0x%x\n", input));
376 cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits,
377 &header, BITS_PER_BYTE);
378 if (input != header) {
379 CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header));
380 return 1;
381 }
382 }
383 return 0;
384}
385
386static int
387cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
388 __u8 reg, __u8 value)
389{
390 outb(VOYAGER_CAT_DRCYC, CAT_CMD);
391 if (!modp->scan_path_connected) {
392 if (asicp->asic_id != VOYAGER_CAT_ID) {
393 CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n"));
394 return 1;
395 }
396 outb(VOYAGER_CAT_HEADER, CAT_DATA);
397 outb(value, CAT_DATA);
398 if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
399 CDEBUG(("cat_senddata: failed to get correct header response to sent data\n"));
400 return 1;
401 }
402 if (reg > VOYAGER_SUBADDRHI) {
403 outb(VOYAGER_CAT_RUN, CAT_CMD);
404 outb(VOYAGER_CAT_END, CAT_CMD);
405 outb(VOYAGER_CAT_RUN, CAT_CMD);
406 }
407
408 return 0;
409 } else {
410 __u16 hbytes = asicp->ireg_length / BITS_PER_BYTE;
411 __u16 dbytes =
412 (modp->num_asics - 1 + asicp->ireg_length) / BITS_PER_BYTE;
413 __u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH],
414 hseq[VOYAGER_MAX_REG_SIZE];
415 int i;
416
417 if ((padbits = (modp->num_asics - 1
418 + asicp->ireg_length) % BITS_PER_BYTE) != 0) {
419 padbits = BITS_PER_BYTE - padbits;
420 dbytes++;
421 }
422 if (asicp->ireg_length % BITS_PER_BYTE)
423 hbytes++;
424
425 cat_build_header(hseq, hbytes, 1, asicp->ireg_length);
426
427 for (i = 0; i < dbytes + hbytes; i++)
428 dseq[i] = 0xff;
429 CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n",
430 dbytes, hbytes, padbits));
431 cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length,
432 hseq, hbytes * BITS_PER_BYTE);
433 cat_pack(dseq, asicp->asic_location, &value,
434 asicp->ireg_length);
435#ifdef VOYAGER_CAT_DEBUG
436 printk("dseq ");
437 for (i = 0; i < hbytes + dbytes; i++) {
438 printk("0x%x ", dseq[i]);
439 }
440 printk("\n");
441#endif
442 return cat_shiftout(dseq, dbytes, hbytes, padbits);
443 }
444}
445
446static int
447cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 value)
448{
449 if (cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
450 return 1;
451 return cat_senddata(modp, asicp, reg, value);
452}
453
454static int
455cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
456 __u8 * value)
457{
458 if (cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
459 return 1;
460 return cat_getdata(modp, asicp, reg, value);
461}
462
463static int
464cat_subaddrsetup(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
465 __u16 len)
466{
467 __u8 val;
468
469 if (len > 1) {
470 /* set auto increment */
471 __u8 newval;
472
473 if (cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
474 CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n"));
475 return 1;
476 }
477 CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n",
478 val));
479 newval = val | VOYAGER_AUTO_INC;
480 if (newval != val) {
481 if (cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
482 CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n"));
483 return 1;
484 }
485 }
486 }
487 if (cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8) (offset & 0xff))) {
488 CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n"));
489 return 1;
490 }
491 if (asicp->subaddr > VOYAGER_SUBADDR_LO) {
492 if (cat_write
493 (modp, asicp, VOYAGER_SUBADDRHI, (__u8) (offset >> 8))) {
494 CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n"));
495 return 1;
496 }
497 cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val);
498 CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset,
499 val));
500 }
501 cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val);
502 CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val));
503 return 0;
504}
505
506static int
507cat_subwrite(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
508 __u16 len, void *buf)
509{
510 int i, retval;
511
512 /* FIXME: need special actions for VOYAGER_CAT_ID here */
513 if (asicp->asic_id == VOYAGER_CAT_ID) {
514 CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n"));
515 /* FIXME -- This is supposed to be handled better
516 * There is a problem writing to the cat asic in the
517 * PSI. The 30us delay seems to work, though */
518 udelay(30);
519 }
520
521 if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
522 printk("cat_subwrite: cat_subaddrsetup FAILED\n");
523 return retval;
524 }
525
526 if (cat_sendinst
527 (modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
528 printk("cat_subwrite: cat_sendinst FAILED\n");
529 return 1;
530 }
531 for (i = 0; i < len; i++) {
532 if (cat_senddata(modp, asicp, 0xFF, ((__u8 *) buf)[i])) {
533 printk
534 ("cat_subwrite: cat_sendata element at %d FAILED\n",
535 i);
536 return 1;
537 }
538 }
539 return 0;
540}
541static int
542cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
543 __u16 len, void *buf)
544{
545 int i, retval;
546
547 if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
548 CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n"));
549 return retval;
550 }
551
552 if (cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
553 CDEBUG(("cat_subread: cat_sendinst failed\n"));
554 return 1;
555 }
556 for (i = 0; i < len; i++) {
557 if (cat_getdata(modp, asicp, 0xFF, &((__u8 *) buf)[i])) {
558 CDEBUG(("cat_subread: cat_getdata element %d failed\n",
559 i));
560 return 1;
561 }
562 }
563 return 0;
564}
565
566/* buffer for storing EPROM data read in during initialisation */
567static __initdata __u8 eprom_buf[0xFFFF];
568static voyager_module_t *voyager_initial_module;
569
570/* Initialise the cat bus components. We assume this is called by the
571 * boot cpu *after* all memory initialisation has been done (so we can
572 * use kmalloc) but before smp initialisation, so we can probe the SMP
573 * configuration and pick up necessary information. */
574void __init voyager_cat_init(void)
575{
576 voyager_module_t **modpp = &voyager_initial_module;
577 voyager_asic_t **asicpp;
578 voyager_asic_t *qabc_asic = NULL;
579 int i, j;
580 unsigned long qic_addr = 0;
581 __u8 qabc_data[0x20];
582 __u8 num_submodules, val;
583 voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *) & eprom_buf[0];
584
585 __u8 cmos[4];
586 unsigned long addr;
587
588 /* initiallise the SUS mailbox */
589 for (i = 0; i < sizeof(cmos); i++)
590 cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i);
591 addr = *(unsigned long *)cmos;
592 if ((addr & 0xff000000) != 0xff000000) {
593 printk(KERN_ERR
594 "Voyager failed to get SUS mailbox (addr = 0x%lx\n",
595 addr);
596 } else {
597 static struct resource res;
598
599 res.name = "voyager SUS";
600 res.start = addr;
601 res.end = addr + 0x3ff;
602
603 request_resource(&iomem_resource, &res);
604 voyager_SUS = (struct voyager_SUS *)
605 ioremap(addr, 0x400);
606 printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n",
607 voyager_SUS->SUS_version);
608 voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION;
609 voyager_SUS->kernel_flags = VOYAGER_OS_HAS_SYSINT;
610 }
611
612 /* clear the processor counts */
613 voyager_extended_vic_processors = 0;
614 voyager_quad_processors = 0;
615
616 printk("VOYAGER: beginning CAT bus probe\n");
617 /* set up the SuperSet Port Block which tells us where the
618 * CAT communication port is */
619 sspb = inb(VOYAGER_SSPB_RELOCATION_PORT) * 0x100;
620 VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb));
621
622 /* now find out if were 8 slot or normal */
623 if ((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
624 == EIGHT_SLOT_IDENTIFIER) {
625 voyager_8slot = 1;
626 printk(KERN_NOTICE
627 "Voyager: Eight slot 51xx configuration detected\n");
628 }
629
630 for (i = VOYAGER_MIN_MODULE; i <= VOYAGER_MAX_MODULE; i++) {
631 __u8 input;
632 int asic;
633 __u16 eprom_size;
634 __u16 sp_offset;
635
636 outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
637 outb(i, VOYAGER_CAT_CONFIG_PORT);
638
639 /* check the presence of the module */
640 outb(VOYAGER_CAT_RUN, CAT_CMD);
641 outb(VOYAGER_CAT_IRCYC, CAT_CMD);
642 outb(VOYAGER_CAT_HEADER, CAT_DATA);
643 /* stream series of alternating 1's and 0's to stimulate
644 * response */
645 outb(0xAA, CAT_DATA);
646 input = inb(CAT_DATA);
647 outb(VOYAGER_CAT_END, CAT_CMD);
648 if (input != VOYAGER_CAT_HEADER) {
649 continue;
650 }
651 CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i,
652 cat_module_name(i)));
653 *modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++]; */
654 if (*modpp == NULL) {
655 printk("**WARNING** kmalloc failure in cat_init\n");
656 continue;
657 }
658 memset(*modpp, 0, sizeof(voyager_module_t));
659 /* need temporary asic for cat_subread. It will be
660 * filled in correctly later */
661 (*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count]; */
662 if ((*modpp)->asic == NULL) {
663 printk("**WARNING** kmalloc failure in cat_init\n");
664 continue;
665 }
666 memset((*modpp)->asic, 0, sizeof(voyager_asic_t));
667 (*modpp)->asic->asic_id = VOYAGER_CAT_ID;
668 (*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI;
669 (*modpp)->module_addr = i;
670 (*modpp)->scan_path_connected = 0;
671 if (i == VOYAGER_PSI) {
672 /* Exception leg for modules with no EEPROM */
673 printk("Module \"%s\"\n", cat_module_name(i));
674 continue;
675 }
676
677 CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
678 outb(VOYAGER_CAT_RUN, CAT_CMD);
679 cat_disconnect(*modpp, (*modpp)->asic);
680 if (cat_subread(*modpp, (*modpp)->asic,
681 VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
682 &eprom_size)) {
683 printk
684 ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
685 i);
686 outb(VOYAGER_CAT_END, CAT_CMD);
687 continue;
688 }
689 if (eprom_size > sizeof(eprom_buf)) {
690 printk
691 ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n",
692 i, eprom_size);
693 outb(VOYAGER_CAT_END, CAT_CMD);
694 continue;
695 }
696 outb(VOYAGER_CAT_END, CAT_CMD);
697 outb(VOYAGER_CAT_RUN, CAT_CMD);
698 CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
699 eprom_size));
700 if (cat_subread
701 (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
702 outb(VOYAGER_CAT_END, CAT_CMD);
703 continue;
704 }
705 outb(VOYAGER_CAT_END, CAT_CMD);
706 printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n",
707 cat_module_name(i), eprom_hdr->version_id,
708 *((__u32 *) eprom_hdr->tracer), eprom_hdr->num_asics);
709 (*modpp)->ee_size = eprom_hdr->ee_size;
710 (*modpp)->num_asics = eprom_hdr->num_asics;
711 asicpp = &((*modpp)->asic);
712 sp_offset = eprom_hdr->scan_path_offset;
713 /* All we really care about are the Quad cards. We
714 * identify them because they are in a processor slot
715 * and have only four asics */
716 if ((i < 0x10 || (i >= 0x14 && i < 0x1c) || i > 0x1f)) {
717 modpp = &((*modpp)->next);
718 continue;
719 }
720 /* Now we know it's in a processor slot, does it have
721 * a quad baseboard submodule */
722 outb(VOYAGER_CAT_RUN, CAT_CMD);
723 cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODPRESENT,
724 &num_submodules);
725 /* lowest two bits, active low */
726 num_submodules = ~(0xfc | num_submodules);
727 CDEBUG(("VOYAGER CAT: %d submodules present\n",
728 num_submodules));
729 if (num_submodules == 0) {
730 /* fill in the dyadic extended processors */
731 __u8 cpu = i & 0x07;
732
733 printk("Module \"%s\": Dyadic Processor Card\n",
734 cat_module_name(i));
735 voyager_extended_vic_processors |= (1 << cpu);
736 cpu += 4;
737 voyager_extended_vic_processors |= (1 << cpu);
738 outb(VOYAGER_CAT_END, CAT_CMD);
739 continue;
740 }
741
742 /* now we want to read the asics on the first submodule,
743 * which should be the quad base board */
744
745 cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, &val);
746 CDEBUG(("cat_init: SUBMODSELECT value = 0x%x\n", val));
747 val = (val & 0x7c) | VOYAGER_QUAD_BASEBOARD;
748 cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val);
749
750 outb(VOYAGER_CAT_END, CAT_CMD);
751
752 CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
753 outb(VOYAGER_CAT_RUN, CAT_CMD);
754 cat_disconnect(*modpp, (*modpp)->asic);
755 if (cat_subread(*modpp, (*modpp)->asic,
756 VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
757 &eprom_size)) {
758 printk
759 ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
760 i);
761 outb(VOYAGER_CAT_END, CAT_CMD);
762 continue;
763 }
764 if (eprom_size > sizeof(eprom_buf)) {
765 printk
766 ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n",
767 i, eprom_size);
768 outb(VOYAGER_CAT_END, CAT_CMD);
769 continue;
770 }
771 outb(VOYAGER_CAT_END, CAT_CMD);
772 outb(VOYAGER_CAT_RUN, CAT_CMD);
773 CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
774 eprom_size));
775 if (cat_subread
776 (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
777 outb(VOYAGER_CAT_END, CAT_CMD);
778 continue;
779 }
780 outb(VOYAGER_CAT_END, CAT_CMD);
781 /* Now do everything for the QBB submodule 1 */
782 (*modpp)->ee_size = eprom_hdr->ee_size;
783 (*modpp)->num_asics = eprom_hdr->num_asics;
784 asicpp = &((*modpp)->asic);
785 sp_offset = eprom_hdr->scan_path_offset;
786 /* get rid of the dummy CAT asic and read the real one */
787 kfree((*modpp)->asic);
788 for (asic = 0; asic < (*modpp)->num_asics; asic++) {
789 int j;
790 voyager_asic_t *asicp = *asicpp = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++]; */
791 voyager_sp_table_t *sp_table;
792 voyager_at_t *asic_table;
793 voyager_jtt_t *jtag_table;
794
795 if (asicp == NULL) {
796 printk
797 ("**WARNING** kmalloc failure in cat_init\n");
798 continue;
799 }
800 asicpp = &(asicp->next);
801 asicp->asic_location = asic;
802 sp_table =
803 (voyager_sp_table_t *) (eprom_buf + sp_offset);
804 asicp->asic_id = sp_table->asic_id;
805 asic_table =
806 (voyager_at_t *) (eprom_buf +
807 sp_table->asic_data_offset);
808 for (j = 0; j < 4; j++)
809 asicp->jtag_id[j] = asic_table->jtag_id[j];
810 jtag_table =
811 (voyager_jtt_t *) (eprom_buf +
812 asic_table->jtag_offset);
813 asicp->ireg_length = jtag_table->ireg_len;
814 asicp->bit_location = (*modpp)->inst_bits;
815 (*modpp)->inst_bits += asicp->ireg_length;
816 if (asicp->ireg_length > (*modpp)->largest_reg)
817 (*modpp)->largest_reg = asicp->ireg_length;
818 if (asicp->ireg_length < (*modpp)->smallest_reg ||
819 (*modpp)->smallest_reg == 0)
820 (*modpp)->smallest_reg = asicp->ireg_length;
821 CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n",
822 asicp->asic_id, asicp->ireg_length,
823 asicp->bit_location));
824 if (asicp->asic_id == VOYAGER_QUAD_QABC) {
825 CDEBUG(("VOYAGER CAT: QABC ASIC found\n"));
826 qabc_asic = asicp;
827 }
828 sp_offset += sizeof(voyager_sp_table_t);
829 }
830 CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", (*modpp)->inst_bits, (*modpp)->largest_reg, (*modpp)->smallest_reg));
831 /* OK, now we have the QUAD ASICs set up, use them.
832 * we need to:
833 *
834 * 1. Find the Memory area for the Quad CPIs.
835 * 2. Find the Extended VIC processor
836 * 3. Configure a second extended VIC processor (This
837 * cannot be done for the 51xx.
838 * */
839 outb(VOYAGER_CAT_RUN, CAT_CMD);
840 cat_connect(*modpp, (*modpp)->asic);
841 CDEBUG(("CAT CONNECTED!!\n"));
842 cat_subread(*modpp, qabc_asic, 0, sizeof(qabc_data), qabc_data);
843 qic_addr = qabc_data[5] << 8;
844 qic_addr = (qic_addr | qabc_data[6]) << 8;
845 qic_addr = (qic_addr | qabc_data[7]) << 8;
846 printk
847 ("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
848 cat_module_name(i), qic_addr, qabc_data[8]);
849#if 0 /* plumbing fails---FIXME */
850 if ((qabc_data[8] & 0xf0) == 0) {
851 /* FIXME: 32 way 8 CPU slot monster cannot be
852 * plumbed this way---need to check for it */
853
854 printk("Plumbing second Extended Quad Processor\n");
855 /* second VIC line hardwired to Quad CPU 1 */
856 qabc_data[8] |= 0x20;
857 cat_subwrite(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
858#ifdef VOYAGER_CAT_DEBUG
859 /* verify plumbing */
860 cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
861 if ((qabc_data[8] & 0xf0) == 0) {
862 CDEBUG(("PLUMBING FAILED: 0x%x\n",
863 qabc_data[8]));
864 }
865#endif
866 }
867#endif
868
869 {
870 struct resource *res =
871 kzalloc(sizeof(struct resource), GFP_KERNEL);
872 res->name = kmalloc(128, GFP_KERNEL);
873 sprintf((char *)res->name, "Voyager %s Quad CPI",
874 cat_module_name(i));
875 res->start = qic_addr;
876 res->end = qic_addr + 0x3ff;
877 request_resource(&iomem_resource, res);
878 }
879
880 qic_addr = (unsigned long)ioremap_cache(qic_addr, 0x400);
881
882 for (j = 0; j < 4; j++) {
883 __u8 cpu;
884
885 if (voyager_8slot) {
886 /* 8 slot has a different mapping,
887 * each slot has only one vic line, so
888 * 1 cpu in each slot must be < 8 */
889 cpu = (i & 0x07) + j * 8;
890 } else {
891 cpu = (i & 0x03) + j * 4;
892 }
893 if ((qabc_data[8] & (1 << j))) {
894 voyager_extended_vic_processors |= (1 << cpu);
895 }
896 if (qabc_data[8] & (1 << (j + 4))) {
897 /* Second SET register plumbed: Quad
898 * card has two VIC connected CPUs.
899 * Secondary cannot be booted as a VIC
900 * CPU */
901 voyager_extended_vic_processors |= (1 << cpu);
902 voyager_allowed_boot_processors &=
903 (~(1 << cpu));
904 }
905
906 voyager_quad_processors |= (1 << cpu);
907 voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *)
908 (qic_addr + (j << 8));
909 CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu,
910 (unsigned long)voyager_quad_cpi_addr[cpu]));
911 }
912 outb(VOYAGER_CAT_END, CAT_CMD);
913
914 *asicpp = NULL;
915 modpp = &((*modpp)->next);
916 }
917 *modpp = NULL;
918 printk
919 ("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n",
920 voyager_extended_vic_processors, voyager_quad_processors,
921 voyager_allowed_boot_processors);
922 request_resource(&ioport_resource, &vic_res);
923 if (voyager_quad_processors)
924 request_resource(&ioport_resource, &qic_res);
925 /* set up the front power switch */
926}
927
928int voyager_cat_readb(__u8 module, __u8 asic, int reg)
929{
930 return 0;
931}
932
933static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp)
934{
935 __u8 val;
936 int err = 0;
937
938 if (!modp->scan_path_connected)
939 return 0;
940 if (asicp->asic_id != VOYAGER_CAT_ID) {
941 CDEBUG(("cat_disconnect: ASIC is not CAT\n"));
942 return 1;
943 }
944 err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
945 if (err) {
946 CDEBUG(("cat_disconnect: failed to read SCANPATH\n"));
947 return err;
948 }
949 val &= VOYAGER_DISCONNECT_ASIC;
950 err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
951 if (err) {
952 CDEBUG(("cat_disconnect: failed to write SCANPATH\n"));
953 return err;
954 }
955 outb(VOYAGER_CAT_END, CAT_CMD);
956 outb(VOYAGER_CAT_RUN, CAT_CMD);
957 modp->scan_path_connected = 0;
958
959 return 0;
960}
961
962static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp)
963{
964 __u8 val;
965 int err = 0;
966
967 if (modp->scan_path_connected)
968 return 0;
969 if (asicp->asic_id != VOYAGER_CAT_ID) {
970 CDEBUG(("cat_connect: ASIC is not CAT\n"));
971 return 1;
972 }
973
974 err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
975 if (err) {
976 CDEBUG(("cat_connect: failed to read SCANPATH\n"));
977 return err;
978 }
979 val |= VOYAGER_CONNECT_ASIC;
980 err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
981 if (err) {
982 CDEBUG(("cat_connect: failed to write SCANPATH\n"));
983 return err;
984 }
985 outb(VOYAGER_CAT_END, CAT_CMD);
986 outb(VOYAGER_CAT_RUN, CAT_CMD);
987 modp->scan_path_connected = 1;
988
989 return 0;
990}
991
992void voyager_cat_power_off(void)
993{
994 /* Power the machine off by writing to the PSI over the CAT
995 * bus */
996 __u8 data;
997 voyager_module_t psi = { 0 };
998 voyager_asic_t psi_asic = { 0 };
999
1000 psi.asic = &psi_asic;
1001 psi.asic->asic_id = VOYAGER_CAT_ID;
1002 psi.asic->subaddr = VOYAGER_SUBADDR_HI;
1003 psi.module_addr = VOYAGER_PSI;
1004 psi.scan_path_connected = 0;
1005
1006 outb(VOYAGER_CAT_END, CAT_CMD);
1007 /* Connect the PSI to the CAT Bus */
1008 outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
1009 outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
1010 outb(VOYAGER_CAT_RUN, CAT_CMD);
1011 cat_disconnect(&psi, &psi_asic);
1012 /* Read the status */
1013 cat_subread(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
1014 outb(VOYAGER_CAT_END, CAT_CMD);
1015 CDEBUG(("PSI STATUS 0x%x\n", data));
1016 /* These two writes are power off prep and perform */
1017 data = PSI_CLEAR;
1018 outb(VOYAGER_CAT_RUN, CAT_CMD);
1019 cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
1020 outb(VOYAGER_CAT_END, CAT_CMD);
1021 data = PSI_POWER_DOWN;
1022 outb(VOYAGER_CAT_RUN, CAT_CMD);
1023 cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
1024 outb(VOYAGER_CAT_END, CAT_CMD);
1025}
1026
1027struct voyager_status voyager_status = { 0 };
1028
1029void voyager_cat_psi(__u8 cmd, __u16 reg, __u8 * data)
1030{
1031 voyager_module_t psi = { 0 };
1032 voyager_asic_t psi_asic = { 0 };
1033
1034 psi.asic = &psi_asic;
1035 psi.asic->asic_id = VOYAGER_CAT_ID;
1036 psi.asic->subaddr = VOYAGER_SUBADDR_HI;
1037 psi.module_addr = VOYAGER_PSI;
1038 psi.scan_path_connected = 0;
1039
1040 outb(VOYAGER_CAT_END, CAT_CMD);
1041 /* Connect the PSI to the CAT Bus */
1042 outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
1043 outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
1044 outb(VOYAGER_CAT_RUN, CAT_CMD);
1045 cat_disconnect(&psi, &psi_asic);
1046 switch (cmd) {
1047 case VOYAGER_PSI_READ:
1048 cat_read(&psi, &psi_asic, reg, data);
1049 break;
1050 case VOYAGER_PSI_WRITE:
1051 cat_write(&psi, &psi_asic, reg, *data);
1052 break;
1053 case VOYAGER_PSI_SUBREAD:
1054 cat_subread(&psi, &psi_asic, reg, 1, data);
1055 break;
1056 case VOYAGER_PSI_SUBWRITE:
1057 cat_subwrite(&psi, &psi_asic, reg, 1, data);
1058 break;
1059 default:
1060 printk(KERN_ERR "Voyager PSI, unrecognised command %d\n", cmd);
1061 break;
1062 }
1063 outb(VOYAGER_CAT_END, CAT_CMD);
1064}
1065
1066void voyager_cat_do_common_interrupt(void)
1067{
1068 /* This is caused either by a memory parity error or something
1069 * in the PSI */
1070 __u8 data;
1071 voyager_module_t psi = { 0 };
1072 voyager_asic_t psi_asic = { 0 };
1073 struct voyager_psi psi_reg;
1074 int i;
1075 re_read:
1076 psi.asic = &psi_asic;
1077 psi.asic->asic_id = VOYAGER_CAT_ID;
1078 psi.asic->subaddr = VOYAGER_SUBADDR_HI;
1079 psi.module_addr = VOYAGER_PSI;
1080 psi.scan_path_connected = 0;
1081
1082 outb(VOYAGER_CAT_END, CAT_CMD);
1083 /* Connect the PSI to the CAT Bus */
1084 outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
1085 outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
1086 outb(VOYAGER_CAT_RUN, CAT_CMD);
1087 cat_disconnect(&psi, &psi_asic);
1088 /* Read the status. NOTE: Need to read *all* the PSI regs here
1089 * otherwise the cmn int will be reasserted */
1090 for (i = 0; i < sizeof(psi_reg.regs); i++) {
1091 cat_read(&psi, &psi_asic, i, &((__u8 *) & psi_reg.regs)[i]);
1092 }
1093 outb(VOYAGER_CAT_END, CAT_CMD);
1094 if ((psi_reg.regs.checkbit & 0x02) == 0) {
1095 psi_reg.regs.checkbit |= 0x02;
1096 cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit);
1097 printk("VOYAGER RE-READ PSI\n");
1098 goto re_read;
1099 }
1100 outb(VOYAGER_CAT_RUN, CAT_CMD);
1101 for (i = 0; i < sizeof(psi_reg.subregs); i++) {
1102 /* This looks strange, but the PSI doesn't do auto increment
1103 * correctly */
1104 cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i,
1105 1, &((__u8 *) & psi_reg.subregs)[i]);
1106 }
1107 outb(VOYAGER_CAT_END, CAT_CMD);
1108#ifdef VOYAGER_CAT_DEBUG
1109 printk("VOYAGER PSI: ");
1110 for (i = 0; i < sizeof(psi_reg.regs); i++)
1111 printk("%02x ", ((__u8 *) & psi_reg.regs)[i]);
1112 printk("\n ");
1113 for (i = 0; i < sizeof(psi_reg.subregs); i++)
1114 printk("%02x ", ((__u8 *) & psi_reg.subregs)[i]);
1115 printk("\n");
1116#endif
1117 if (psi_reg.regs.intstatus & PSI_MON) {
1118 /* switch off or power fail */
1119
1120 if (psi_reg.subregs.supply & PSI_SWITCH_OFF) {
1121 if (voyager_status.switch_off) {
1122 printk(KERN_ERR
1123 "Voyager front panel switch turned off again---Immediate power off!\n");
1124 voyager_cat_power_off();
1125 /* not reached */
1126 } else {
1127 printk(KERN_ERR
1128 "Voyager front panel switch turned off\n");
1129 voyager_status.switch_off = 1;
1130 voyager_status.request_from_kernel = 1;
1131 wake_up_process(voyager_thread);
1132 }
1133 /* Tell the hardware we're taking care of the
1134 * shutdown, otherwise it will power the box off
1135 * within 3 seconds of the switch being pressed and,
1136 * which is much more important to us, continue to
1137 * assert the common interrupt */
1138 data = PSI_CLR_SWITCH_OFF;
1139 outb(VOYAGER_CAT_RUN, CAT_CMD);
1140 cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG,
1141 1, &data);
1142 outb(VOYAGER_CAT_END, CAT_CMD);
1143 } else {
1144
1145 VDEBUG(("Voyager ac fail reg 0x%x\n",
1146 psi_reg.subregs.ACfail));
1147 if ((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
1148 /* No further update */
1149 return;
1150 }
1151#if 0
1152 /* Don't bother trying to find out who failed.
1153 * FIXME: This probably makes the code incorrect on
1154 * anything other than a 345x */
1155 for (i = 0; i < 5; i++) {
1156 if (psi_reg.subregs.ACfail & (1 << i)) {
1157 break;
1158 }
1159 }
1160 printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i);
1161#endif
1162 /* DON'T do this: it shuts down the AC PSI
1163 outb(VOYAGER_CAT_RUN, CAT_CMD);
1164 data = PSI_MASK_MASK | i;
1165 cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
1166 1, &data);
1167 outb(VOYAGER_CAT_END, CAT_CMD);
1168 */
1169 printk(KERN_ERR "Voyager AC power failure\n");
1170 outb(VOYAGER_CAT_RUN, CAT_CMD);
1171 data = PSI_COLD_START;
1172 cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG,
1173 1, &data);
1174 outb(VOYAGER_CAT_END, CAT_CMD);
1175 voyager_status.power_fail = 1;
1176 voyager_status.request_from_kernel = 1;
1177 wake_up_process(voyager_thread);
1178 }
1179
1180 } else if (psi_reg.regs.intstatus & PSI_FAULT) {
1181 /* Major fault! */
1182 printk(KERN_ERR
1183 "Voyager PSI Detected major fault, immediate power off!\n");
1184 voyager_cat_power_off();
1185 /* not reached */
1186 } else if (psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
1187 | PSI_CURRENT | PSI_DVM
1188 | PSI_PSCFAULT | PSI_STAT_CHG)) {
1189 /* other psi fault */
1190
1191 printk(KERN_WARNING "Voyager PSI status 0x%x\n", data);
1192 /* clear the PSI fault */
1193 outb(VOYAGER_CAT_RUN, CAT_CMD);
1194 cat_write(&psi, &psi_asic, VOYAGER_PSI_STATUS_REG, 0);
1195 outb(VOYAGER_CAT_END, CAT_CMD);
1196 }
1197}
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
deleted file mode 100644
index 98e3c2bc756..00000000000
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ /dev/null
@@ -1,1805 +0,0 @@
1/* -*- mode: c; c-basic-offset: 8 -*- */
2
3/* Copyright (C) 1999,2001
4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 *
7 * This file provides all the same external entries as smp.c but uses
8 * the voyager hal to provide the functionality
9 */
10#include <linux/cpu.h>
11#include <linux/module.h>
12#include <linux/mm.h>
13#include <linux/kernel_stat.h>
14#include <linux/delay.h>
15#include <linux/mc146818rtc.h>
16#include <linux/cache.h>
17#include <linux/interrupt.h>
18#include <linux/init.h>
19#include <linux/kernel.h>
20#include <linux/bootmem.h>
21#include <linux/completion.h>
22#include <asm/desc.h>
23#include <asm/voyager.h>
24#include <asm/vic.h>
25#include <asm/mtrr.h>
26#include <asm/pgalloc.h>
27#include <asm/tlbflush.h>
28#include <asm/arch_hooks.h>
29#include <asm/trampoline.h>
30
31/* TLB state -- visible externally, indexed physically */
32DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 };
33
34/* CPU IRQ affinity -- set to all ones initially */
35static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned =
36 {[0 ... NR_CPUS-1] = ~0UL };
37
38/* per CPU data structure (for /proc/cpuinfo et al), visible externally
39 * indexed physically */
40DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
41EXPORT_PER_CPU_SYMBOL(cpu_info);
42
43/* physical ID of the CPU used to boot the system */
44unsigned char boot_cpu_id;
45
46/* The memory line addresses for the Quad CPIs */
47struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS] __cacheline_aligned;
48
49/* The masks for the Extended VIC processors, filled in by cat_init */
50__u32 voyager_extended_vic_processors = 0;
51
52/* Masks for the extended Quad processors which cannot be VIC booted */
53__u32 voyager_allowed_boot_processors = 0;
54
55/* The mask for the Quad Processors (both extended and non-extended) */
56__u32 voyager_quad_processors = 0;
57
58/* Total count of live CPUs, used in process.c to display
59 * the CPU information and in irq.c for the per CPU irq
60 * activity count. Finally exported by i386_ksyms.c */
61static int voyager_extended_cpus = 1;
62
63/* Used for the invalidate map that's also checked in the spinlock */
64static volatile unsigned long smp_invalidate_needed;
65
66/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
67 * by scheduler but indexed physically */
68cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
69
70/* The internal functions */
71static void send_CPI(__u32 cpuset, __u8 cpi);
72static void ack_CPI(__u8 cpi);
73static int ack_QIC_CPI(__u8 cpi);
74static void ack_special_QIC_CPI(__u8 cpi);
75static void ack_VIC_CPI(__u8 cpi);
76static void send_CPI_allbutself(__u8 cpi);
77static void mask_vic_irq(unsigned int irq);
78static void unmask_vic_irq(unsigned int irq);
79static unsigned int startup_vic_irq(unsigned int irq);
80static void enable_local_vic_irq(unsigned int irq);
81static void disable_local_vic_irq(unsigned int irq);
82static void before_handle_vic_irq(unsigned int irq);
83static void after_handle_vic_irq(unsigned int irq);
84static void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask);
85static void ack_vic_irq(unsigned int irq);
86static void vic_enable_cpi(void);
87static void do_boot_cpu(__u8 cpuid);
88static void do_quad_bootstrap(void);
89static void initialize_secondary(void);
90
91int hard_smp_processor_id(void);
92int safe_smp_processor_id(void);
93
94/* Inline functions */
95static inline void send_one_QIC_CPI(__u8 cpu, __u8 cpi)
96{
97 voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi =
98 (smp_processor_id() << 16) + cpi;
99}
100
101static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi)
102{
103 int cpu;
104
105 for_each_online_cpu(cpu) {
106 if (cpuset & (1 << cpu)) {
107#ifdef VOYAGER_DEBUG
108 if (!cpu_online(cpu))
109 VDEBUG(("CPU%d sending cpi %d to CPU%d not in "
110 "cpu_online_map\n",
111 hard_smp_processor_id(), cpi, cpu));
112#endif
113 send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
114 }
115 }
116}
117
118static inline void wrapper_smp_local_timer_interrupt(void)
119{
120 irq_enter();
121 smp_local_timer_interrupt();
122 irq_exit();
123}
124
125static inline void send_one_CPI(__u8 cpu, __u8 cpi)
126{
127 if (voyager_quad_processors & (1 << cpu))
128 send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
129 else
130 send_CPI(1 << cpu, cpi);
131}
132
133static inline void send_CPI_allbutself(__u8 cpi)
134{
135 __u8 cpu = smp_processor_id();
136 __u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu);
137 send_CPI(mask, cpi);
138}
139
140static inline int is_cpu_quad(void)
141{
142 __u8 cpumask = inb(VIC_PROC_WHO_AM_I);
143 return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER);
144}
145
146static inline int is_cpu_extended(void)
147{
148 __u8 cpu = hard_smp_processor_id();
149
150 return (voyager_extended_vic_processors & (1 << cpu));
151}
152
153static inline int is_cpu_vic_boot(void)
154{
155 __u8 cpu = hard_smp_processor_id();
156
157 return (voyager_extended_vic_processors
158 & voyager_allowed_boot_processors & (1 << cpu));
159}
160
161static inline void ack_CPI(__u8 cpi)
162{
163 switch (cpi) {
164 case VIC_CPU_BOOT_CPI:
165 if (is_cpu_quad() && !is_cpu_vic_boot())
166 ack_QIC_CPI(cpi);
167 else
168 ack_VIC_CPI(cpi);
169 break;
170 case VIC_SYS_INT:
171 case VIC_CMN_INT:
172 /* These are slightly strange. Even on the Quad card,
173 * They are vectored as VIC CPIs */
174 if (is_cpu_quad())
175 ack_special_QIC_CPI(cpi);
176 else
177 ack_VIC_CPI(cpi);
178 break;
179 default:
180 printk("VOYAGER ERROR: CPI%d is in common CPI code\n", cpi);
181 break;
182 }
183}
184
185/* local variables */
186
187/* The VIC IRQ descriptors -- these look almost identical to the
188 * 8259 IRQs except that masks and things must be kept per processor
189 */
190static struct irq_chip vic_chip = {
191 .name = "VIC",
192 .startup = startup_vic_irq,
193 .mask = mask_vic_irq,
194 .unmask = unmask_vic_irq,
195 .set_affinity = set_vic_irq_affinity,
196};
197
198/* used to count up as CPUs are brought on line (starts at 0) */
199static int cpucount = 0;
200
201/* The per cpu profile stuff - used in smp_local_timer_interrupt */
202static DEFINE_PER_CPU(int, prof_multiplier) = 1;
203static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
204static DEFINE_PER_CPU(int, prof_counter) = 1;
205
206/* the map used to check if a CPU has booted */
207static __u32 cpu_booted_map;
208
209/* the synchronize flag used to hold all secondary CPUs spinning in
210 * a tight loop until the boot sequence is ready for them */
211static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
212
213/* This is for the new dynamic CPU boot code */
214
215/* The per processor IRQ masks (these are usually kept in sync) */
216static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
217
218/* the list of IRQs to be enabled by the VIC_ENABLE_IRQ_CPI */
219static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 };
220
221/* Lock for enable/disable of VIC interrupts */
222static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
223
224/* The boot processor is correctly set up in PC mode when it
225 * comes up, but the secondaries need their master/slave 8259
226 * pairs initializing correctly */
227
228/* Interrupt counters (per cpu) and total - used to try to
229 * even up the interrupt handling routines */
230static long vic_intr_total = 0;
231static long vic_intr_count[NR_CPUS] __cacheline_aligned = { 0 };
232static unsigned long vic_tick[NR_CPUS] __cacheline_aligned = { 0 };
233
234/* Since we can only use CPI0, we fake all the other CPIs */
235static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned;
236
237/* debugging routine to read the isr of the cpu's pic */
238static inline __u16 vic_read_isr(void)
239{
240 __u16 isr;
241
242 outb(0x0b, 0xa0);
243 isr = inb(0xa0) << 8;
244 outb(0x0b, 0x20);
245 isr |= inb(0x20);
246
247 return isr;
248}
249
250static __init void qic_setup(void)
251{
252 if (!is_cpu_quad()) {
253 /* not a quad, no setup */
254 return;
255 }
256 outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
257 outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
258
259 if (is_cpu_extended()) {
260 /* the QIC duplicate of the VIC base register */
261 outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER);
262 outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER);
263
264 /* FIXME: should set up the QIC timer and memory parity
265 * error vectors here */
266 }
267}
268
269static __init void vic_setup_pic(void)
270{
271 outb(1, VIC_REDIRECT_REGISTER_1);
272 /* clear the claim registers for dynamic routing */
273 outb(0, VIC_CLAIM_REGISTER_0);
274 outb(0, VIC_CLAIM_REGISTER_1);
275
276 outb(0, VIC_PRIORITY_REGISTER);
277 /* Set the Primary and Secondary Microchannel vector
278 * bases to be the same as the ordinary interrupts
279 *
280 * FIXME: This would be more efficient using separate
281 * vectors. */
282 outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE);
283 outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE);
284 /* Now initiallise the master PIC belonging to this CPU by
285 * sending the four ICWs */
286
287 /* ICW1: level triggered, ICW4 needed */
288 outb(0x19, 0x20);
289
290 /* ICW2: vector base */
291 outb(FIRST_EXTERNAL_VECTOR, 0x21);
292
293 /* ICW3: slave at line 2 */
294 outb(0x04, 0x21);
295
296 /* ICW4: 8086 mode */
297 outb(0x01, 0x21);
298
299 /* now the same for the slave PIC */
300
301 /* ICW1: level trigger, ICW4 needed */
302 outb(0x19, 0xA0);
303
304 /* ICW2: slave vector base */
305 outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1);
306
307 /* ICW3: slave ID */
308 outb(0x02, 0xA1);
309
310 /* ICW4: 8086 mode */
311 outb(0x01, 0xA1);
312}
313
314static void do_quad_bootstrap(void)
315{
316 if (is_cpu_quad() && is_cpu_vic_boot()) {
317 int i;
318 unsigned long flags;
319 __u8 cpuid = hard_smp_processor_id();
320
321 local_irq_save(flags);
322
323 for (i = 0; i < 4; i++) {
324 /* FIXME: this would be >>3 &0x7 on the 32 way */
325 if (((cpuid >> 2) & 0x03) == i)
326 /* don't lower our own mask! */
327 continue;
328
329 /* masquerade as local Quad CPU */
330 outb(QIC_CPUID_ENABLE | i, QIC_PROCESSOR_ID);
331 /* enable the startup CPI */
332 outb(QIC_BOOT_CPI_MASK, QIC_MASK_REGISTER1);
333 /* restore cpu id */
334 outb(0, QIC_PROCESSOR_ID);
335 }
336 local_irq_restore(flags);
337 }
338}
339
340void prefill_possible_map(void)
341{
342 /* This is empty on voyager because we need a much
343 * earlier detection which is done in find_smp_config */
344}
345
346/* Set up all the basic stuff: read the SMP config and make all the
347 * SMP information reflect only the boot cpu. All others will be
348 * brought on-line later. */
349void __init find_smp_config(void)
350{
351 int i;
352
353 boot_cpu_id = hard_smp_processor_id();
354
355 printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
356
357 /* initialize the CPU structures (moved from smp_boot_cpus) */
358 for (i = 0; i < nr_cpu_ids; i++)
359 cpu_irq_affinity[i] = ~0;
360 cpu_online_map = cpumask_of_cpu(boot_cpu_id);
361
362 /* The boot CPU must be extended */
363 voyager_extended_vic_processors = 1 << boot_cpu_id;
364 /* initially, all of the first 8 CPUs can boot */
365 voyager_allowed_boot_processors = 0xff;
366 /* set up everything for just this CPU, we can alter
367 * this as we start the other CPUs later */
368 /* now get the CPU disposition from the extended CMOS */
369 cpus_addr(phys_cpu_present_map)[0] =
370 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
371 cpus_addr(phys_cpu_present_map)[0] |=
372 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
373 cpus_addr(phys_cpu_present_map)[0] |=
374 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
375 2) << 16;
376 cpus_addr(phys_cpu_present_map)[0] |=
377 voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
378 3) << 24;
379 init_cpu_possible(&phys_cpu_present_map);
380 printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n",
381 cpus_addr(phys_cpu_present_map)[0]);
382 /* Here we set up the VIC to enable SMP */
383 /* enable the CPIs by writing the base vector to their register */
384 outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER);
385 outb(1, VIC_REDIRECT_REGISTER_1);
386 /* set the claim registers for static routing --- Boot CPU gets
387 * all interrupts untill all other CPUs started */
388 outb(0xff, VIC_CLAIM_REGISTER_0);
389 outb(0xff, VIC_CLAIM_REGISTER_1);
390 /* Set the Primary and Secondary Microchannel vector
391 * bases to be the same as the ordinary interrupts
392 *
393 * FIXME: This would be more efficient using separate
394 * vectors. */
395 outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE);
396 outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE);
397
398 /* Finally tell the firmware that we're driving */
399 outb(inb(VOYAGER_SUS_IN_CONTROL_PORT) | VOYAGER_IN_CONTROL_FLAG,
400 VOYAGER_SUS_IN_CONTROL_PORT);
401
402 current_thread_info()->cpu = boot_cpu_id;
403 percpu_write(cpu_number, boot_cpu_id);
404}
405
406/*
407 * The bootstrap kernel entry code has set these up. Save them
408 * for a given CPU, id is physical */
409void __init smp_store_cpu_info(int id)
410{
411 struct cpuinfo_x86 *c = &cpu_data(id);
412
413 *c = boot_cpu_data;
414 c->cpu_index = id;
415
416 identify_secondary_cpu(c);
417}
418
419/* Routine initially called when a non-boot CPU is brought online */
420static void __init start_secondary(void *unused)
421{
422 __u8 cpuid = hard_smp_processor_id();
423
424 cpu_init();
425
426 /* OK, we're in the routine */
427 ack_CPI(VIC_CPU_BOOT_CPI);
428
429 /* setup the 8259 master slave pair belonging to this CPU ---
430 * we won't actually receive any until the boot CPU
431 * relinquishes it's static routing mask */
432 vic_setup_pic();
433
434 qic_setup();
435
436 if (is_cpu_quad() && !is_cpu_vic_boot()) {
437 /* clear the boot CPI */
438 __u8 dummy;
439
440 dummy =
441 voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
442 printk("read dummy %d\n", dummy);
443 }
444
445 /* lower the mask to receive CPIs */
446 vic_enable_cpi();
447
448 VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid));
449
450 notify_cpu_starting(cpuid);
451
452 /* enable interrupts */
453 local_irq_enable();
454
455 /* get our bogomips */
456 calibrate_delay();
457
458 /* save our processor parameters */
459 smp_store_cpu_info(cpuid);
460
461 /* if we're a quad, we may need to bootstrap other CPUs */
462 do_quad_bootstrap();
463
464 /* FIXME: this is rather a poor hack to prevent the CPU
465 * activating softirqs while it's supposed to be waiting for
466 * permission to proceed. Without this, the new per CPU stuff
467 * in the softirqs will fail */
468 local_irq_disable();
469 cpu_set(cpuid, cpu_callin_map);
470
471 /* signal that we're done */
472 cpu_booted_map = 1;
473
474 while (!cpu_isset(cpuid, smp_commenced_mask))
475 rep_nop();
476 local_irq_enable();
477
478 local_flush_tlb();
479
480 cpu_set(cpuid, cpu_online_map);
481 wmb();
482 cpu_idle();
483}
484
485/* Routine to kick start the given CPU and wait for it to report ready
486 * (or timeout in startup). When this routine returns, the requested
487 * CPU is either fully running and configured or known to be dead.
488 *
489 * We call this routine sequentially 1 CPU at a time, so no need for
490 * locking */
491
492static void __init do_boot_cpu(__u8 cpu)
493{
494 struct task_struct *idle;
495 int timeout;
496 unsigned long flags;
497 int quad_boot = (1 << cpu) & voyager_quad_processors
498 & ~(voyager_extended_vic_processors
499 & voyager_allowed_boot_processors);
500
501 /* This is the format of the CPI IDT gate (in real mode) which
502 * we're hijacking to boot the CPU */
503 union IDTFormat {
504 struct seg {
505 __u16 Offset;
506 __u16 Segment;
507 } idt;
508 __u32 val;
509 } hijack_source;
510
511 __u32 *hijack_vector;
512 __u32 start_phys_address = setup_trampoline();
513
514 /* There's a clever trick to this: The linux trampoline is
515 * compiled to begin at absolute location zero, so make the
516 * address zero but have the data segment selector compensate
517 * for the actual address */
518 hijack_source.idt.Offset = start_phys_address & 0x000F;
519 hijack_source.idt.Segment = (start_phys_address >> 4) & 0xFFFF;
520
521 cpucount++;
522 alternatives_smp_switch(1);
523
524 idle = fork_idle(cpu);
525 if (IS_ERR(idle))
526 panic("failed fork for CPU%d", cpu);
527 idle->thread.ip = (unsigned long)start_secondary;
528 /* init_tasks (in sched.c) is indexed logically */
529 stack_start.sp = (void *)idle->thread.sp;
530
531 per_cpu(current_task, cpu) = idle;
532 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
533 irq_ctx_init(cpu);
534
535 /* Note: Don't modify initial ss override */
536 VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu,
537 (unsigned long)hijack_source.val, hijack_source.idt.Segment,
538 hijack_source.idt.Offset, stack_start.sp));
539
540 /* init lowmem identity mapping */
541 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
542 min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
543 flush_tlb_all();
544
545 if (quad_boot) {
546 printk("CPU %d: non extended Quad boot\n", cpu);
547 hijack_vector =
548 (__u32 *)
549 phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE) * 4);
550 *hijack_vector = hijack_source.val;
551 } else {
552 printk("CPU%d: extended VIC boot\n", cpu);
553 hijack_vector =
554 (__u32 *)
555 phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE) * 4);
556 *hijack_vector = hijack_source.val;
557 /* VIC errata, may also receive interrupt at this address */
558 hijack_vector =
559 (__u32 *)
560 phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI +
561 VIC_DEFAULT_CPI_BASE) * 4);
562 *hijack_vector = hijack_source.val;
563 }
564 /* All non-boot CPUs start with interrupts fully masked. Need
565 * to lower the mask of the CPI we're about to send. We do
566 * this in the VIC by masquerading as the processor we're
567 * about to boot and lowering its interrupt mask */
568 local_irq_save(flags);
569 if (quad_boot) {
570 send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI);
571 } else {
572 outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
573 /* here we're altering registers belonging to `cpu' */
574
575 outb(VIC_BOOT_INTERRUPT_MASK, 0x21);
576 /* now go back to our original identity */
577 outb(boot_cpu_id, VIC_PROCESSOR_ID);
578
579 /* and boot the CPU */
580
581 send_CPI((1 << cpu), VIC_CPU_BOOT_CPI);
582 }
583 cpu_booted_map = 0;
584 local_irq_restore(flags);
585
586 /* now wait for it to become ready (or timeout) */
587 for (timeout = 0; timeout < 50000; timeout++) {
588 if (cpu_booted_map)
589 break;
590 udelay(100);
591 }
592 /* reset the page table */
593 zap_low_mappings();
594
595 if (cpu_booted_map) {
596 VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n",
597 cpu, smp_processor_id()));
598
599 printk("CPU%d: ", cpu);
600 print_cpu_info(&cpu_data(cpu));
601 wmb();
602 cpu_set(cpu, cpu_callout_map);
603 cpu_set(cpu, cpu_present_map);
604 } else {
605 printk("CPU%d FAILED TO BOOT: ", cpu);
606 if (*
607 ((volatile unsigned char *)phys_to_virt(start_phys_address))
608 == 0xA5)
609 printk("Stuck.\n");
610 else
611 printk("Not responding.\n");
612
613 cpucount--;
614 }
615}
616
617void __init smp_boot_cpus(void)
618{
619 int i;
620
621 /* CAT BUS initialisation must be done after the memory */
622 /* FIXME: The L4 has a catbus too, it just needs to be
623 * accessed in a totally different way */
624 if (voyager_level == 5) {
625 voyager_cat_init();
626
627 /* now that the cat has probed the Voyager System Bus, sanity
628 * check the cpu map */
629 if (((voyager_quad_processors | voyager_extended_vic_processors)
630 & cpus_addr(phys_cpu_present_map)[0]) !=
631 cpus_addr(phys_cpu_present_map)[0]) {
632 /* should panic */
633 printk("\n\n***WARNING*** "
634 "Sanity check of CPU present map FAILED\n");
635 }
636 } else if (voyager_level == 4)
637 voyager_extended_vic_processors =
638 cpus_addr(phys_cpu_present_map)[0];
639
640 /* this sets up the idle task to run on the current cpu */
641 voyager_extended_cpus = 1;
642 /* Remove the global_irq_holder setting, it triggers a BUG() on
643 * schedule at the moment */
644 //global_irq_holder = boot_cpu_id;
645
646 /* FIXME: Need to do something about this but currently only works
647 * on CPUs with a tsc which none of mine have.
648 smp_tune_scheduling();
649 */
650 smp_store_cpu_info(boot_cpu_id);
651 /* setup the jump vector */
652 initial_code = (unsigned long)initialize_secondary;
653 printk("CPU%d: ", boot_cpu_id);
654 print_cpu_info(&cpu_data(boot_cpu_id));
655
656 if (is_cpu_quad()) {
657 /* booting on a Quad CPU */
658 printk("VOYAGER SMP: Boot CPU is Quad\n");
659 qic_setup();
660 do_quad_bootstrap();
661 }
662
663 /* enable our own CPIs */
664 vic_enable_cpi();
665
666 cpu_set(boot_cpu_id, cpu_online_map);
667 cpu_set(boot_cpu_id, cpu_callout_map);
668
669 /* loop over all the extended VIC CPUs and boot them. The
670 * Quad CPUs must be bootstrapped by their extended VIC cpu */
671 for (i = 0; i < nr_cpu_ids; i++) {
672 if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
673 continue;
674 do_boot_cpu(i);
675 /* This udelay seems to be needed for the Quad boots
676 * don't remove unless you know what you're doing */
677 udelay(1000);
678 }
679 /* we could compute the total bogomips here, but why bother?,
680 * Code added from smpboot.c */
681 {
682 unsigned long bogosum = 0;
683
684 for_each_online_cpu(i)
685 bogosum += cpu_data(i).loops_per_jiffy;
686 printk(KERN_INFO "Total of %d processors activated "
687 "(%lu.%02lu BogoMIPS).\n",
688 cpucount + 1, bogosum / (500000 / HZ),
689 (bogosum / (5000 / HZ)) % 100);
690 }
691 voyager_extended_cpus = hweight32(voyager_extended_vic_processors);
692 printk("VOYAGER: Extended (interrupt handling CPUs): "
693 "%d, non-extended: %d\n", voyager_extended_cpus,
694 num_booting_cpus() - voyager_extended_cpus);
695 /* that's it, switch to symmetric mode */
696 outb(0, VIC_PRIORITY_REGISTER);
697 outb(0, VIC_CLAIM_REGISTER_0);
698 outb(0, VIC_CLAIM_REGISTER_1);
699
700 VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus()));
701}
702
703/* Reload the secondary CPUs task structure (this function does not
704 * return ) */
705static void __init initialize_secondary(void)
706{
707#if 0
708 // AC kernels only
709 set_current(hard_get_current());
710#endif
711
712 /*
713 * We don't actually need to load the full TSS,
714 * basically just the stack pointer and the eip.
715 */
716
717 asm volatile ("movl %0,%%esp\n\t"
718 "jmp *%1"::"r" (current->thread.sp),
719 "r"(current->thread.ip));
720}
721
722/* handle a Voyager SYS_INT -- If we don't, the base board will
723 * panic the system.
724 *
725 * System interrupts occur because some problem was detected on the
726 * various busses. To find out what you have to probe all the
727 * hardware via the CAT bus. FIXME: At the moment we do nothing. */
728void smp_vic_sys_interrupt(struct pt_regs *regs)
729{
730 ack_CPI(VIC_SYS_INT);
731 printk("Voyager SYSTEM INTERRUPT\n");
732}
733
734/* Handle a voyager CMN_INT; These interrupts occur either because of
735 * a system status change or because a single bit memory error
736 * occurred. FIXME: At the moment, ignore all this. */
737void smp_vic_cmn_interrupt(struct pt_regs *regs)
738{
739 static __u8 in_cmn_int = 0;
740 static DEFINE_SPINLOCK(cmn_int_lock);
741
742 /* common ints are broadcast, so make sure we only do this once */
743 _raw_spin_lock(&cmn_int_lock);
744 if (in_cmn_int)
745 goto unlock_end;
746
747 in_cmn_int++;
748 _raw_spin_unlock(&cmn_int_lock);
749
750 VDEBUG(("Voyager COMMON INTERRUPT\n"));
751
752 if (voyager_level == 5)
753 voyager_cat_do_common_interrupt();
754
755 _raw_spin_lock(&cmn_int_lock);
756 in_cmn_int = 0;
757 unlock_end:
758 _raw_spin_unlock(&cmn_int_lock);
759 ack_CPI(VIC_CMN_INT);
760}
761
762/*
763 * Reschedule call back. Nothing to do, all the work is done
764 * automatically when we return from the interrupt. */
765static void smp_reschedule_interrupt(void)
766{
767 /* do nothing */
768}
769
770static struct mm_struct *flush_mm;
771static unsigned long flush_va;
772static DEFINE_SPINLOCK(tlbstate_lock);
773
774/*
775 * We cannot call mmdrop() because we are in interrupt context,
776 * instead update mm->cpu_vm_mask.
777 *
778 * We need to reload %cr3 since the page tables may be going
779 * away from under us..
780 */
781static inline void voyager_leave_mm(unsigned long cpu)
782{
783 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
784 BUG();
785 cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
786 load_cr3(swapper_pg_dir);
787}
788
789/*
790 * Invalidate call-back
791 */
792static void smp_invalidate_interrupt(void)
793{
794 __u8 cpu = smp_processor_id();
795
796 if (!test_bit(cpu, &smp_invalidate_needed))
797 return;
798 /* This will flood messages. Don't uncomment unless you see
799 * Problems with cross cpu invalidation
800 VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
801 smp_processor_id()));
802 */
803
804 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
805 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
806 if (flush_va == TLB_FLUSH_ALL)
807 local_flush_tlb();
808 else
809 __flush_tlb_one(flush_va);
810 } else
811 voyager_leave_mm(cpu);
812 }
813 smp_mb__before_clear_bit();
814 clear_bit(cpu, &smp_invalidate_needed);
815 smp_mb__after_clear_bit();
816}
817
818/* All the new flush operations for 2.4 */
819
820/* This routine is called with a physical cpu mask */
821static void
822voyager_flush_tlb_others(unsigned long cpumask, struct mm_struct *mm,
823 unsigned long va)
824{
825 int stuck = 50000;
826
827 if (!cpumask)
828 BUG();
829 if ((cpumask & cpus_addr(cpu_online_map)[0]) != cpumask)
830 BUG();
831 if (cpumask & (1 << smp_processor_id()))
832 BUG();
833 if (!mm)
834 BUG();
835
836 spin_lock(&tlbstate_lock);
837
838 flush_mm = mm;
839 flush_va = va;
840 atomic_set_mask(cpumask, &smp_invalidate_needed);
841 /*
842 * We have to send the CPI only to
843 * CPUs affected.
844 */
845 send_CPI(cpumask, VIC_INVALIDATE_CPI);
846
847 while (smp_invalidate_needed) {
848 mb();
849 if (--stuck == 0) {
850 printk("***WARNING*** Stuck doing invalidate CPI "
851 "(CPU%d)\n", smp_processor_id());
852 break;
853 }
854 }
855
856 /* Uncomment only to debug invalidation problems
857 VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
858 */
859
860 flush_mm = NULL;
861 flush_va = 0;
862 spin_unlock(&tlbstate_lock);
863}
864
865void flush_tlb_current_task(void)
866{
867 struct mm_struct *mm = current->mm;
868 unsigned long cpu_mask;
869
870 preempt_disable();
871
872 cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
873 local_flush_tlb();
874 if (cpu_mask)
875 voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
876
877 preempt_enable();
878}
879
880void flush_tlb_mm(struct mm_struct *mm)
881{
882 unsigned long cpu_mask;
883
884 preempt_disable();
885
886 cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
887
888 if (current->active_mm == mm) {
889 if (current->mm)
890 local_flush_tlb();
891 else
892 voyager_leave_mm(smp_processor_id());
893 }
894 if (cpu_mask)
895 voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
896
897 preempt_enable();
898}
899
900void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
901{
902 struct mm_struct *mm = vma->vm_mm;
903 unsigned long cpu_mask;
904
905 preempt_disable();
906
907 cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
908 if (current->active_mm == mm) {
909 if (current->mm)
910 __flush_tlb_one(va);
911 else
912 voyager_leave_mm(smp_processor_id());
913 }
914
915 if (cpu_mask)
916 voyager_flush_tlb_others(cpu_mask, mm, va);
917
918 preempt_enable();
919}
920
921EXPORT_SYMBOL(flush_tlb_page);
922
923/* enable the requested IRQs */
924static void smp_enable_irq_interrupt(void)
925{
926 __u8 irq;
927 __u8 cpu = get_cpu();
928
929 VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu,
930 vic_irq_enable_mask[cpu]));
931
932 spin_lock(&vic_irq_lock);
933 for (irq = 0; irq < 16; irq++) {
934 if (vic_irq_enable_mask[cpu] & (1 << irq))
935 enable_local_vic_irq(irq);
936 }
937 vic_irq_enable_mask[cpu] = 0;
938 spin_unlock(&vic_irq_lock);
939
940 put_cpu_no_resched();
941}
942
943/*
944 * CPU halt call-back
945 */
946static void smp_stop_cpu_function(void *dummy)
947{
948 VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id()));
949 cpu_clear(smp_processor_id(), cpu_online_map);
950 local_irq_disable();
951 for (;;)
952 halt();
953}
954
955/* execute a thread on a new CPU. The function to be called must be
956 * previously set up. This is used to schedule a function for
957 * execution on all CPUs - set up the function then broadcast a
958 * function_interrupt CPI to come here on each CPU */
959static void smp_call_function_interrupt(void)
960{
961 irq_enter();
962 generic_smp_call_function_interrupt();
963 __get_cpu_var(irq_stat).irq_call_count++;
964 irq_exit();
965}
966
967static void smp_call_function_single_interrupt(void)
968{
969 irq_enter();
970 generic_smp_call_function_single_interrupt();
971 __get_cpu_var(irq_stat).irq_call_count++;
972 irq_exit();
973}
974
975/* Sorry about the name. In an APIC based system, the APICs
976 * themselves are programmed to send a timer interrupt. This is used
977 * by linux to reschedule the processor. Voyager doesn't have this,
978 * so we use the system clock to interrupt one processor, which in
979 * turn, broadcasts a timer CPI to all the others --- we receive that
980 * CPI here. We don't use this actually for counting so losing
981 * ticks doesn't matter
982 *
983 * FIXME: For those CPUs which actually have a local APIC, we could
984 * try to use it to trigger this interrupt instead of having to
985 * broadcast the timer tick. Unfortunately, all my pentium DYADs have
986 * no local APIC, so I can't do this
987 *
988 * This function is currently a placeholder and is unused in the code */
989void smp_apic_timer_interrupt(struct pt_regs *regs)
990{
991 struct pt_regs *old_regs = set_irq_regs(regs);
992 wrapper_smp_local_timer_interrupt();
993 set_irq_regs(old_regs);
994}
995
996/* All of the QUAD interrupt GATES */
997void smp_qic_timer_interrupt(struct pt_regs *regs)
998{
999 struct pt_regs *old_regs = set_irq_regs(regs);
1000 ack_QIC_CPI(QIC_TIMER_CPI);
1001 wrapper_smp_local_timer_interrupt();
1002 set_irq_regs(old_regs);
1003}
1004
1005void smp_qic_invalidate_interrupt(struct pt_regs *regs)
1006{
1007 ack_QIC_CPI(QIC_INVALIDATE_CPI);
1008 smp_invalidate_interrupt();
1009}
1010
1011void smp_qic_reschedule_interrupt(struct pt_regs *regs)
1012{
1013 ack_QIC_CPI(QIC_RESCHEDULE_CPI);
1014 smp_reschedule_interrupt();
1015}
1016
1017void smp_qic_enable_irq_interrupt(struct pt_regs *regs)
1018{
1019 ack_QIC_CPI(QIC_ENABLE_IRQ_CPI);
1020 smp_enable_irq_interrupt();
1021}
1022
1023void smp_qic_call_function_interrupt(struct pt_regs *regs)
1024{
1025 ack_QIC_CPI(QIC_CALL_FUNCTION_CPI);
1026 smp_call_function_interrupt();
1027}
1028
1029void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
1030{
1031 ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
1032 smp_call_function_single_interrupt();
1033}
1034
1035void smp_vic_cpi_interrupt(struct pt_regs *regs)
1036{
1037 struct pt_regs *old_regs = set_irq_regs(regs);
1038 __u8 cpu = smp_processor_id();
1039
1040 if (is_cpu_quad())
1041 ack_QIC_CPI(VIC_CPI_LEVEL0);
1042 else
1043 ack_VIC_CPI(VIC_CPI_LEVEL0);
1044
1045 if (test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
1046 wrapper_smp_local_timer_interrupt();
1047 if (test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
1048 smp_invalidate_interrupt();
1049 if (test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
1050 smp_reschedule_interrupt();
1051 if (test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
1052 smp_enable_irq_interrupt();
1053 if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
1054 smp_call_function_interrupt();
1055 if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
1056 smp_call_function_single_interrupt();
1057 set_irq_regs(old_regs);
1058}
1059
1060static void do_flush_tlb_all(void *info)
1061{
1062 unsigned long cpu = smp_processor_id();
1063
1064 __flush_tlb_all();
1065 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
1066 voyager_leave_mm(cpu);
1067}
1068
1069/* flush the TLB of every active CPU in the system */
1070void flush_tlb_all(void)
1071{
1072 on_each_cpu(do_flush_tlb_all, 0, 1);
1073}
1074
1075/* send a reschedule CPI to one CPU by physical CPU number*/
1076static void voyager_smp_send_reschedule(int cpu)
1077{
1078 send_one_CPI(cpu, VIC_RESCHEDULE_CPI);
1079}
1080
1081int hard_smp_processor_id(void)
1082{
1083 __u8 i;
1084 __u8 cpumask = inb(VIC_PROC_WHO_AM_I);
1085 if ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
1086 return cpumask & 0x1F;
1087
1088 for (i = 0; i < 8; i++) {
1089 if (cpumask & (1 << i))
1090 return i;
1091 }
1092 printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask);
1093 return 0;
1094}
1095
1096int safe_smp_processor_id(void)
1097{
1098 return hard_smp_processor_id();
1099}
1100
1101/* broadcast a halt to all other CPUs */
1102static void voyager_smp_send_stop(void)
1103{
1104 smp_call_function(smp_stop_cpu_function, NULL, 1);
1105}
1106
1107/* this function is triggered in time.c when a clock tick fires
1108 * we need to re-broadcast the tick to all CPUs */
1109void smp_vic_timer_interrupt(void)
1110{
1111 send_CPI_allbutself(VIC_TIMER_CPI);
1112 smp_local_timer_interrupt();
1113}
1114
1115/* local (per CPU) timer interrupt. It does both profiling and
1116 * process statistics/rescheduling.
1117 *
1118 * We do profiling in every local tick, statistics/rescheduling
1119 * happen only every 'profiling multiplier' ticks. The default
1120 * multiplier is 1 and it can be changed by writing the new multiplier
1121 * value into /proc/profile.
1122 */
1123void smp_local_timer_interrupt(void)
1124{
1125 int cpu = smp_processor_id();
1126 long weight;
1127
1128 profile_tick(CPU_PROFILING);
1129 if (--per_cpu(prof_counter, cpu) <= 0) {
1130 /*
1131 * The multiplier may have changed since the last time we got
1132 * to this point as a result of the user writing to
1133 * /proc/profile. In this case we need to adjust the APIC
1134 * timer accordingly.
1135 *
1136 * Interrupts are already masked off at this point.
1137 */
1138 per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
1139 if (per_cpu(prof_counter, cpu) !=
1140 per_cpu(prof_old_multiplier, cpu)) {
1141 /* FIXME: need to update the vic timer tick here */
1142 per_cpu(prof_old_multiplier, cpu) =
1143 per_cpu(prof_counter, cpu);
1144 }
1145
1146 update_process_times(user_mode_vm(get_irq_regs()));
1147 }
1148
1149 if (((1 << cpu) & voyager_extended_vic_processors) == 0)
1150 /* only extended VIC processors participate in
1151 * interrupt distribution */
1152 return;
1153
1154 /*
1155 * We take the 'long' return path, and there every subsystem
1156 * grabs the appropriate locks (kernel lock/ irq lock).
1157 *
1158 * we might want to decouple profiling from the 'long path',
1159 * and do the profiling totally in assembly.
1160 *
1161 * Currently this isn't too much of an issue (performance wise),
1162 * we can take more than 100K local irqs per second on a 100 MHz P5.
1163 */
1164
1165 if ((++vic_tick[cpu] & 0x7) != 0)
1166 return;
1167 /* get here every 16 ticks (about every 1/6 of a second) */
1168
1169 /* Change our priority to give someone else a chance at getting
1170 * the IRQ. The algorithm goes like this:
1171 *
1172 * In the VIC, the dynamically routed interrupt is always
1173 * handled by the lowest priority eligible (i.e. receiving
1174 * interrupts) CPU. If >1 eligible CPUs are equal lowest, the
1175 * lowest processor number gets it.
1176 *
1177 * The priority of a CPU is controlled by a special per-CPU
1178 * VIC priority register which is 3 bits wide 0 being lowest
1179 * and 7 highest priority..
1180 *
1181 * Therefore we subtract the average number of interrupts from
1182 * the number we've fielded. If this number is negative, we
1183 * lower the activity count and if it is positive, we raise
1184 * it.
1185 *
1186 * I'm afraid this still leads to odd looking interrupt counts:
1187 * the totals are all roughly equal, but the individual ones
1188 * look rather skewed.
1189 *
1190 * FIXME: This algorithm is total crap when mixed with SMP
1191 * affinity code since we now try to even up the interrupt
1192 * counts when an affinity binding is keeping them on a
1193 * particular CPU*/
1194 weight = (vic_intr_count[cpu] * voyager_extended_cpus
1195 - vic_intr_total) >> 4;
1196 weight += 4;
1197 if (weight > 7)
1198 weight = 7;
1199 if (weight < 0)
1200 weight = 0;
1201
1202 outb((__u8) weight, VIC_PRIORITY_REGISTER);
1203
1204#ifdef VOYAGER_DEBUG
1205 if ((vic_tick[cpu] & 0xFFF) == 0) {
1206 /* print this message roughly every 25 secs */
1207 printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n",
1208 cpu, vic_tick[cpu], weight);
1209 }
1210#endif
1211}
1212
1213/* setup the profiling timer */
1214int setup_profiling_timer(unsigned int multiplier)
1215{
1216 int i;
1217
1218 if ((!multiplier))
1219 return -EINVAL;
1220
1221 /*
1222 * Set the new multiplier for each CPU. CPUs don't start using the
1223 * new values until the next timer interrupt in which they do process
1224 * accounting.
1225 */
1226 for (i = 0; i < nr_cpu_ids; ++i)
1227 per_cpu(prof_multiplier, i) = multiplier;
1228
1229 return 0;
1230}
1231
1232/* This is a bit of a mess, but forced on us by the genirq changes
1233 * there's no genirq handler that really does what voyager wants
1234 * so hack it up with the simple IRQ handler */
1235static void handle_vic_irq(unsigned int irq, struct irq_desc *desc)
1236{
1237 before_handle_vic_irq(irq);
1238 handle_simple_irq(irq, desc);
1239 after_handle_vic_irq(irq);
1240}
1241
1242/* The CPIs are handled in the per cpu 8259s, so they must be
1243 * enabled to be received: FIX: enabling the CPIs in the early
1244 * boot sequence interferes with bug checking; enable them later
1245 * on in smp_init */
1246#define VIC_SET_GATE(cpi, vector) \
1247 set_intr_gate((cpi) + VIC_DEFAULT_CPI_BASE, (vector))
1248#define QIC_SET_GATE(cpi, vector) \
1249 set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
1250
1251void __init voyager_smp_intr_init(void)
1252{
1253 int i;
1254
1255 /* initialize the per cpu irq mask to all disabled */
1256 for (i = 0; i < nr_cpu_ids; i++)
1257 vic_irq_mask[i] = 0xFFFF;
1258
1259 VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
1260
1261 VIC_SET_GATE(VIC_SYS_INT, vic_sys_interrupt);
1262 VIC_SET_GATE(VIC_CMN_INT, vic_cmn_interrupt);
1263
1264 QIC_SET_GATE(QIC_TIMER_CPI, qic_timer_interrupt);
1265 QIC_SET_GATE(QIC_INVALIDATE_CPI, qic_invalidate_interrupt);
1266 QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt);
1267 QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt);
1268 QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt);
1269
1270 /* now put the VIC descriptor into the first 48 IRQs
1271 *
1272 * This is for later: first 16 correspond to PC IRQs; next 16
1273 * are Primary MC IRQs and final 16 are Secondary MC IRQs */
1274 for (i = 0; i < 48; i++)
1275 set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq);
1276}
1277
1278/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per
1279 * processor to receive CPI */
1280static void send_CPI(__u32 cpuset, __u8 cpi)
1281{
1282 int cpu;
1283 __u32 quad_cpuset = (cpuset & voyager_quad_processors);
1284
1285 if (cpi < VIC_START_FAKE_CPI) {
1286 /* fake CPI are only used for booting, so send to the
1287 * extended quads as well---Quads must be VIC booted */
1288 outb((__u8) (cpuset), VIC_CPI_Registers[cpi]);
1289 return;
1290 }
1291 if (quad_cpuset)
1292 send_QIC_CPI(quad_cpuset, cpi);
1293 cpuset &= ~quad_cpuset;
1294 cpuset &= 0xff; /* only first 8 CPUs vaild for VIC CPI */
1295 if (cpuset == 0)
1296 return;
1297 for_each_online_cpu(cpu) {
1298 if (cpuset & (1 << cpu))
1299 set_bit(cpi, &vic_cpi_mailbox[cpu]);
1300 }
1301 if (cpuset)
1302 outb((__u8) cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
1303}
1304
1305/* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and
1306 * set the cache line to shared by reading it.
1307 *
1308 * DON'T make this inline otherwise the cache line read will be
1309 * optimised away
1310 * */
1311static int ack_QIC_CPI(__u8 cpi)
1312{
1313 __u8 cpu = hard_smp_processor_id();
1314
1315 cpi &= 7;
1316
1317 outb(1 << cpi, QIC_INTERRUPT_CLEAR1);
1318 return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi;
1319}
1320
1321static void ack_special_QIC_CPI(__u8 cpi)
1322{
1323 switch (cpi) {
1324 case VIC_CMN_INT:
1325 outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0);
1326 break;
1327 case VIC_SYS_INT:
1328 outb(QIC_SYS_INT, QIC_INTERRUPT_CLEAR0);
1329 break;
1330 }
1331 /* also clear at the VIC, just in case (nop for non-extended proc) */
1332 ack_VIC_CPI(cpi);
1333}
1334
1335/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */
1336static void ack_VIC_CPI(__u8 cpi)
1337{
1338#ifdef VOYAGER_DEBUG
1339 unsigned long flags;
1340 __u16 isr;
1341 __u8 cpu = smp_processor_id();
1342
1343 local_irq_save(flags);
1344 isr = vic_read_isr();
1345 if ((isr & (1 << (cpi & 7))) == 0) {
1346 printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi);
1347 }
1348#endif
1349 /* send specific EOI; the two system interrupts have
1350 * bit 4 set for a separate vector but behave as the
1351 * corresponding 3 bit intr */
1352 outb_p(0x60 | (cpi & 7), 0x20);
1353
1354#ifdef VOYAGER_DEBUG
1355 if ((vic_read_isr() & (1 << (cpi & 7))) != 0) {
1356 printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi);
1357 }
1358 local_irq_restore(flags);
1359#endif
1360}
1361
1362/* cribbed with thanks from irq.c */
1363#define __byte(x,y) (((unsigned char *)&(y))[x])
1364#define cached_21(cpu) (__byte(0,vic_irq_mask[cpu]))
1365#define cached_A1(cpu) (__byte(1,vic_irq_mask[cpu]))
1366
1367static unsigned int startup_vic_irq(unsigned int irq)
1368{
1369 unmask_vic_irq(irq);
1370
1371 return 0;
1372}
1373
1374/* The enable and disable routines. This is where we run into
1375 * conflicting architectural philosophy. Fundamentally, the voyager
1376 * architecture does not expect to have to disable interrupts globally
1377 * (the IRQ controllers belong to each CPU). The processor masquerade
1378 * which is used to start the system shouldn't be used in a running OS
1379 * since it will cause great confusion if two separate CPUs drive to
1380 * the same IRQ controller (I know, I've tried it).
1381 *
1382 * The solution is a variant on the NCR lazy SPL design:
1383 *
1384 * 1) To disable an interrupt, do nothing (other than set the
1385 * IRQ_DISABLED flag). This dares the interrupt actually to arrive.
1386 *
1387 * 2) If the interrupt dares to come in, raise the local mask against
1388 * it (this will result in all the CPU masks being raised
1389 * eventually).
1390 *
1391 * 3) To enable the interrupt, lower the mask on the local CPU and
1392 * broadcast an Interrupt enable CPI which causes all other CPUs to
1393 * adjust their masks accordingly. */
1394
1395static void unmask_vic_irq(unsigned int irq)
1396{
1397 /* linux doesn't to processor-irq affinity, so enable on
1398 * all CPUs we know about */
1399 int cpu = smp_processor_id(), real_cpu;
1400 __u16 mask = (1 << irq);
1401 __u32 processorList = 0;
1402 unsigned long flags;
1403
1404 VDEBUG(("VOYAGER: unmask_vic_irq(%d) CPU%d affinity 0x%lx\n",
1405 irq, cpu, cpu_irq_affinity[cpu]));
1406 spin_lock_irqsave(&vic_irq_lock, flags);
1407 for_each_online_cpu(real_cpu) {
1408 if (!(voyager_extended_vic_processors & (1 << real_cpu)))
1409 continue;
1410 if (!(cpu_irq_affinity[real_cpu] & mask)) {
1411 /* irq has no affinity for this CPU, ignore */
1412 continue;
1413 }
1414 if (real_cpu == cpu) {
1415 enable_local_vic_irq(irq);
1416 } else if (vic_irq_mask[real_cpu] & mask) {
1417 vic_irq_enable_mask[real_cpu] |= mask;
1418 processorList |= (1 << real_cpu);
1419 }
1420 }
1421 spin_unlock_irqrestore(&vic_irq_lock, flags);
1422 if (processorList)
1423 send_CPI(processorList, VIC_ENABLE_IRQ_CPI);
1424}
1425
1426static void mask_vic_irq(unsigned int irq)
1427{
1428 /* lazy disable, do nothing */
1429}
1430
1431static void enable_local_vic_irq(unsigned int irq)
1432{
1433 __u8 cpu = smp_processor_id();
1434 __u16 mask = ~(1 << irq);
1435 __u16 old_mask = vic_irq_mask[cpu];
1436
1437 vic_irq_mask[cpu] &= mask;
1438 if (vic_irq_mask[cpu] == old_mask)
1439 return;
1440
1441 VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n",
1442 irq, cpu));
1443
1444 if (irq & 8) {
1445 outb_p(cached_A1(cpu), 0xA1);
1446 (void)inb_p(0xA1);
1447 } else {
1448 outb_p(cached_21(cpu), 0x21);
1449 (void)inb_p(0x21);
1450 }
1451}
1452
1453static void disable_local_vic_irq(unsigned int irq)
1454{
1455 __u8 cpu = smp_processor_id();
1456 __u16 mask = (1 << irq);
1457 __u16 old_mask = vic_irq_mask[cpu];
1458
1459 if (irq == 7)
1460 return;
1461
1462 vic_irq_mask[cpu] |= mask;
1463 if (old_mask == vic_irq_mask[cpu])
1464 return;
1465
1466 VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n",
1467 irq, cpu));
1468
1469 if (irq & 8) {
1470 outb_p(cached_A1(cpu), 0xA1);
1471 (void)inb_p(0xA1);
1472 } else {
1473 outb_p(cached_21(cpu), 0x21);
1474 (void)inb_p(0x21);
1475 }
1476}
1477
1478/* The VIC is level triggered, so the ack can only be issued after the
1479 * interrupt completes. However, we do Voyager lazy interrupt
1480 * handling here: It is an extremely expensive operation to mask an
1481 * interrupt in the vic, so we merely set a flag (IRQ_DISABLED). If
1482 * this interrupt actually comes in, then we mask and ack here to push
1483 * the interrupt off to another CPU */
1484static void before_handle_vic_irq(unsigned int irq)
1485{
1486 irq_desc_t *desc = irq_to_desc(irq);
1487 __u8 cpu = smp_processor_id();
1488
1489 _raw_spin_lock(&vic_irq_lock);
1490 vic_intr_total++;
1491 vic_intr_count[cpu]++;
1492
1493 if (!(cpu_irq_affinity[cpu] & (1 << irq))) {
1494 /* The irq is not in our affinity mask, push it off
1495 * onto another CPU */
1496 VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d "
1497 "on cpu %d\n", irq, cpu));
1498 disable_local_vic_irq(irq);
1499 /* set IRQ_INPROGRESS to prevent the handler in irq.c from
1500 * actually calling the interrupt routine */
1501 desc->status |= IRQ_REPLAY | IRQ_INPROGRESS;
1502 } else if (desc->status & IRQ_DISABLED) {
1503 /* Damn, the interrupt actually arrived, do the lazy
1504 * disable thing. The interrupt routine in irq.c will
1505 * not handle a IRQ_DISABLED interrupt, so nothing more
1506 * need be done here */
1507 VDEBUG(("VOYAGER DEBUG: lazy disable of irq %d on CPU %d\n",
1508 irq, cpu));
1509 disable_local_vic_irq(irq);
1510 desc->status |= IRQ_REPLAY;
1511 } else {
1512 desc->status &= ~IRQ_REPLAY;
1513 }
1514
1515 _raw_spin_unlock(&vic_irq_lock);
1516}
1517
1518/* Finish the VIC interrupt: basically mask */
1519static void after_handle_vic_irq(unsigned int irq)
1520{
1521 irq_desc_t *desc = irq_to_desc(irq);
1522
1523 _raw_spin_lock(&vic_irq_lock);
1524 {
1525 unsigned int status = desc->status & ~IRQ_INPROGRESS;
1526#ifdef VOYAGER_DEBUG
1527 __u16 isr;
1528#endif
1529
1530 desc->status = status;
1531 if ((status & IRQ_DISABLED))
1532 disable_local_vic_irq(irq);
1533#ifdef VOYAGER_DEBUG
1534 /* DEBUG: before we ack, check what's in progress */
1535 isr = vic_read_isr();
1536 if ((isr & (1 << irq) && !(status & IRQ_REPLAY)) == 0) {
1537 int i;
1538 __u8 cpu = smp_processor_id();
1539 __u8 real_cpu;
1540 int mask; /* Um... initialize me??? --RR */
1541
1542 printk("VOYAGER SMP: CPU%d lost interrupt %d\n",
1543 cpu, irq);
1544 for_each_possible_cpu(real_cpu, mask) {
1545
1546 outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu,
1547 VIC_PROCESSOR_ID);
1548 isr = vic_read_isr();
1549 if (isr & (1 << irq)) {
1550 printk
1551 ("VOYAGER SMP: CPU%d ack irq %d\n",
1552 real_cpu, irq);
1553 ack_vic_irq(irq);
1554 }
1555 outb(cpu, VIC_PROCESSOR_ID);
1556 }
1557 }
1558#endif /* VOYAGER_DEBUG */
1559 /* as soon as we ack, the interrupt is eligible for
1560 * receipt by another CPU so everything must be in
1561 * order here */
1562 ack_vic_irq(irq);
1563 if (status & IRQ_REPLAY) {
1564 /* replay is set if we disable the interrupt
1565 * in the before_handle_vic_irq() routine, so
1566 * clear the in progress bit here to allow the
1567 * next CPU to handle this correctly */
1568 desc->status &= ~(IRQ_REPLAY | IRQ_INPROGRESS);
1569 }
1570#ifdef VOYAGER_DEBUG
1571 isr = vic_read_isr();
1572 if ((isr & (1 << irq)) != 0)
1573 printk("VOYAGER SMP: after_handle_vic_irq() after "
1574 "ack irq=%d, isr=0x%x\n", irq, isr);
1575#endif /* VOYAGER_DEBUG */
1576 }
1577 _raw_spin_unlock(&vic_irq_lock);
1578
1579 /* All code after this point is out of the main path - the IRQ
1580 * may be intercepted by another CPU if reasserted */
1581}
1582
1583/* Linux processor - interrupt affinity manipulations.
1584 *
1585 * For each processor, we maintain a 32 bit irq affinity mask.
1586 * Initially it is set to all 1's so every processor accepts every
1587 * interrupt. In this call, we change the processor's affinity mask:
1588 *
1589 * Change from enable to disable:
1590 *
1591 * If the interrupt ever comes in to the processor, we will disable it
1592 * and ack it to push it off to another CPU, so just accept the mask here.
1593 *
1594 * Change from disable to enable:
1595 *
1596 * change the mask and then do an interrupt enable CPI to re-enable on
1597 * the selected processors */
1598
1599void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask)
1600{
1601 /* Only extended processors handle interrupts */
1602 unsigned long real_mask;
1603 unsigned long irq_mask = 1 << irq;
1604 int cpu;
1605
1606 real_mask = cpus_addr(*mask)[0] & voyager_extended_vic_processors;
1607
1608 if (cpus_addr(*mask)[0] == 0)
1609 /* can't have no CPUs to accept the interrupt -- extremely
1610 * bad things will happen */
1611 return;
1612
1613 if (irq == 0)
1614 /* can't change the affinity of the timer IRQ. This
1615 * is due to the constraint in the voyager
1616 * architecture that the CPI also comes in on and IRQ
1617 * line and we have chosen IRQ0 for this. If you
1618 * raise the mask on this interrupt, the processor
1619 * will no-longer be able to accept VIC CPIs */
1620 return;
1621
1622 if (irq >= 32)
1623 /* You can only have 32 interrupts in a voyager system
1624 * (and 32 only if you have a secondary microchannel
1625 * bus) */
1626 return;
1627
1628 for_each_online_cpu(cpu) {
1629 unsigned long cpu_mask = 1 << cpu;
1630
1631 if (cpu_mask & real_mask) {
1632 /* enable the interrupt for this cpu */
1633 cpu_irq_affinity[cpu] |= irq_mask;
1634 } else {
1635 /* disable the interrupt for this cpu */
1636 cpu_irq_affinity[cpu] &= ~irq_mask;
1637 }
1638 }
1639 /* this is magic, we now have the correct affinity maps, so
1640 * enable the interrupt. This will send an enable CPI to
1641 * those CPUs who need to enable it in their local masks,
1642 * causing them to correct for the new affinity . If the
1643 * interrupt is currently globally disabled, it will simply be
1644 * disabled again as it comes in (voyager lazy disable). If
1645 * the affinity map is tightened to disable the interrupt on a
1646 * cpu, it will be pushed off when it comes in */
1647 unmask_vic_irq(irq);
1648}
1649
1650static void ack_vic_irq(unsigned int irq)
1651{
1652 if (irq & 8) {
1653 outb(0x62, 0x20); /* Specific EOI to cascade */
1654 outb(0x60 | (irq & 7), 0xA0);
1655 } else {
1656 outb(0x60 | (irq & 7), 0x20);
1657 }
1658}
1659
1660/* enable the CPIs. In the VIC, the CPIs are delivered by the 8259
1661 * but are not vectored by it. This means that the 8259 mask must be
1662 * lowered to receive them */
1663static __init void vic_enable_cpi(void)
1664{
1665 __u8 cpu = smp_processor_id();
1666
1667 /* just take a copy of the current mask (nop for boot cpu) */
1668 vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id];
1669
1670 enable_local_vic_irq(VIC_CPI_LEVEL0);
1671 enable_local_vic_irq(VIC_CPI_LEVEL1);
1672 /* for sys int and cmn int */
1673 enable_local_vic_irq(7);
1674
1675 if (is_cpu_quad()) {
1676 outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
1677 outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
1678 VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n",
1679 cpu, QIC_CPI_ENABLE));
1680 }
1681
1682 VDEBUG(("VOYAGER SMP: ENABLE CPI: CPU%d: MASK 0x%x\n",
1683 cpu, vic_irq_mask[cpu]));
1684}
1685
1686void voyager_smp_dump()
1687{
1688 int old_cpu = smp_processor_id(), cpu;
1689
1690 /* dump the interrupt masks of each processor */
1691 for_each_online_cpu(cpu) {
1692 __u16 imr, isr, irr;
1693 unsigned long flags;
1694
1695 local_irq_save(flags);
1696 outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
1697 imr = (inb(0xa1) << 8) | inb(0x21);
1698 outb(0x0a, 0xa0);
1699 irr = inb(0xa0) << 8;
1700 outb(0x0a, 0x20);
1701 irr |= inb(0x20);
1702 outb(0x0b, 0xa0);
1703 isr = inb(0xa0) << 8;
1704 outb(0x0b, 0x20);
1705 isr |= inb(0x20);
1706 outb(old_cpu, VIC_PROCESSOR_ID);
1707 local_irq_restore(flags);
1708 printk("\tCPU%d: mask=0x%x, IMR=0x%x, IRR=0x%x, ISR=0x%x\n",
1709 cpu, vic_irq_mask[cpu], imr, irr, isr);
1710#if 0
1711 /* These lines are put in to try to unstick an un ack'd irq */
1712 if (isr != 0) {
1713 int irq;
1714 for (irq = 0; irq < 16; irq++) {
1715 if (isr & (1 << irq)) {
1716 printk("\tCPU%d: ack irq %d\n",
1717 cpu, irq);
1718 local_irq_save(flags);
1719 outb(VIC_CPU_MASQUERADE_ENABLE | cpu,
1720 VIC_PROCESSOR_ID);
1721 ack_vic_irq(irq);
1722 outb(old_cpu, VIC_PROCESSOR_ID);
1723 local_irq_restore(flags);
1724 }
1725 }
1726 }
1727#endif
1728 }
1729}
1730
1731void smp_voyager_power_off(void *dummy)
1732{
1733 if (smp_processor_id() == boot_cpu_id)
1734 voyager_power_off();
1735 else
1736 smp_stop_cpu_function(NULL);
1737}
1738
1739static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
1740{
1741 /* FIXME: ignore max_cpus for now */
1742 smp_boot_cpus();
1743}
1744
1745static void __cpuinit voyager_smp_prepare_boot_cpu(void)
1746{
1747 int cpu = smp_processor_id();
1748 switch_to_new_gdt(cpu);
1749
1750 cpu_set(cpu, cpu_online_map);
1751 cpu_set(cpu, cpu_callout_map);
1752 cpu_set(cpu, cpu_possible_map);
1753 cpu_set(cpu, cpu_present_map);
1754
1755}
1756
1757static int __cpuinit voyager_cpu_up(unsigned int cpu)
1758{
1759 /* This only works at boot for x86. See "rewrite" above. */
1760 if (cpu_isset(cpu, smp_commenced_mask))
1761 return -ENOSYS;
1762
1763 /* In case one didn't come up */
1764 if (!cpu_isset(cpu, cpu_callin_map))
1765 return -EIO;
1766 /* Unleash the CPU! */
1767 cpu_set(cpu, smp_commenced_mask);
1768 while (!cpu_online(cpu))
1769 mb();
1770 return 0;
1771}
1772
1773static void __init voyager_smp_cpus_done(unsigned int max_cpus)
1774{
1775 zap_low_mappings();
1776}
1777
1778void __init smp_setup_processor_id(void)
1779{
1780 current_thread_info()->cpu = hard_smp_processor_id();
1781}
1782
1783static void voyager_send_call_func(const struct cpumask *callmask)
1784{
1785 __u32 mask = cpus_addr(*callmask)[0] & ~(1 << smp_processor_id());
1786 send_CPI(mask, VIC_CALL_FUNCTION_CPI);
1787}
1788
1789static void voyager_send_call_func_single(int cpu)
1790{
1791 send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI);
1792}
1793
1794struct smp_ops smp_ops = {
1795 .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu,
1796 .smp_prepare_cpus = voyager_smp_prepare_cpus,
1797 .cpu_up = voyager_cpu_up,
1798 .smp_cpus_done = voyager_smp_cpus_done,
1799
1800 .smp_send_stop = voyager_smp_send_stop,
1801 .smp_send_reschedule = voyager_smp_send_reschedule,
1802
1803 .send_call_func_ipi = voyager_send_call_func,
1804 .send_call_func_single_ipi = voyager_send_call_func_single,
1805};
diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c
deleted file mode 100644
index 15464a20fb3..00000000000
--- a/arch/x86/mach-voyager/voyager_thread.c
+++ /dev/null
@@ -1,128 +0,0 @@
1/* -*- mode: c; c-basic-offset: 8 -*- */
2
3/* Copyright (C) 2001
4 *
5 * Author: J.E.J.Bottomley@HansenPartnership.com
6 *
7 * This module provides the machine status monitor thread for the
8 * voyager architecture. This allows us to monitor the machine
9 * environment (temp, voltage, fan function) and the front panel and
10 * internal UPS. If a fault is detected, this thread takes corrective
11 * action (usually just informing init)
12 * */
13
14#include <linux/module.h>
15#include <linux/mm.h>
16#include <linux/kernel_stat.h>
17#include <linux/delay.h>
18#include <linux/mc146818rtc.h>
19#include <linux/init.h>
20#include <linux/bootmem.h>
21#include <linux/kmod.h>
22#include <linux/completion.h>
23#include <linux/sched.h>
24#include <linux/kthread.h>
25#include <asm/desc.h>
26#include <asm/voyager.h>
27#include <asm/vic.h>
28#include <asm/mtrr.h>
29#include <asm/msr.h>
30
31struct task_struct *voyager_thread;
32static __u8 set_timeout;
33
34static int execute(const char *string)
35{
36 int ret;
37
38 char *envp[] = {
39 "HOME=/",
40 "TERM=linux",
41 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
42 NULL,
43 };
44 char *argv[] = {
45 "/bin/bash",
46 "-c",
47 (char *)string,
48 NULL,
49 };
50
51 if ((ret =
52 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
53 printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string,
54 ret);
55 }
56 return ret;
57}
58
59static void check_from_kernel(void)
60{
61 if (voyager_status.switch_off) {
62
63 /* FIXME: This should be configurable via proc */
64 execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1");
65 } else if (voyager_status.power_fail) {
66 VDEBUG(("Voyager daemon detected AC power failure\n"));
67
68 /* FIXME: This should be configureable via proc */
69 execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1");
70 set_timeout = 1;
71 }
72}
73
74static void check_continuing_condition(void)
75{
76 if (voyager_status.power_fail) {
77 __u8 data;
78 voyager_cat_psi(VOYAGER_PSI_SUBREAD,
79 VOYAGER_PSI_AC_FAIL_REG, &data);
80 if ((data & 0x1f) == 0) {
81 /* all power restored */
82 printk(KERN_NOTICE
83 "VOYAGER AC power restored, cancelling shutdown\n");
84 /* FIXME: should be user configureable */
85 execute
86 ("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
87 set_timeout = 0;
88 }
89 }
90}
91
92static int thread(void *unused)
93{
94 printk(KERN_NOTICE "Voyager starting monitor thread\n");
95
96 for (;;) {
97 set_current_state(TASK_INTERRUPTIBLE);
98 schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT);
99
100 VDEBUG(("Voyager Daemon awoken\n"));
101 if (voyager_status.request_from_kernel == 0) {
102 /* probably awoken from timeout */
103 check_continuing_condition();
104 } else {
105 check_from_kernel();
106 voyager_status.request_from_kernel = 0;
107 }
108 }
109}
110
111static int __init voyager_thread_start(void)
112{
113 voyager_thread = kthread_run(thread, NULL, "kvoyagerd");
114 if (IS_ERR(voyager_thread)) {
115 printk(KERN_ERR
116 "Voyager: Failed to create system monitor thread.\n");
117 return PTR_ERR(voyager_thread);
118 }
119 return 0;
120}
121
122static void __exit voyager_thread_stop(void)
123{
124 kthread_stop(voyager_thread);
125}
126
127module_init(voyager_thread_start);
128module_exit(voyager_thread_stop);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 29644175490..a03b7279efa 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1,74 +1,79 @@
1/* 1/*
2 * Copyright (C) 1995 Linus Torvalds 2 * Copyright (C) 1995 Linus Torvalds
3 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. 3 * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
4 * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
4 */ 5 */
5
6#include <linux/signal.h>
7#include <linux/sched.h>
8#include <linux/kernel.h>
9#include <linux/errno.h>
10#include <linux/string.h>
11#include <linux/types.h>
12#include <linux/ptrace.h>
13#include <linux/mmiotrace.h>
14#include <linux/mman.h>
15#include <linux/mm.h>
16#include <linux/smp.h>
17#include <linux/interrupt.h> 6#include <linux/interrupt.h>
18#include <linux/init.h> 7#include <linux/mmiotrace.h>
19#include <linux/tty.h> 8#include <linux/bootmem.h>
20#include <linux/vt_kern.h> /* For unblank_screen() */
21#include <linux/compiler.h> 9#include <linux/compiler.h>
22#include <linux/highmem.h> 10#include <linux/highmem.h>
23#include <linux/bootmem.h> /* for max_low_pfn */
24#include <linux/vmalloc.h>
25#include <linux/module.h>
26#include <linux/kprobes.h> 11#include <linux/kprobes.h>
27#include <linux/uaccess.h> 12#include <linux/uaccess.h>
13#include <linux/vmalloc.h>
14#include <linux/vt_kern.h>
15#include <linux/signal.h>
16#include <linux/kernel.h>
17#include <linux/ptrace.h>
18#include <linux/string.h>
19#include <linux/module.h>
28#include <linux/kdebug.h> 20#include <linux/kdebug.h>
21#include <linux/errno.h>
29#include <linux/magic.h> 22#include <linux/magic.h>
23#include <linux/sched.h>
24#include <linux/types.h>
25#include <linux/init.h>
26#include <linux/mman.h>
27#include <linux/tty.h>
28#include <linux/smp.h>
29#include <linux/mm.h>
30
31#include <asm-generic/sections.h>
30 32
31#include <asm/system.h>
32#include <asm/desc.h>
33#include <asm/segment.h>
34#include <asm/pgalloc.h>
35#include <asm/smp.h>
36#include <asm/tlbflush.h> 33#include <asm/tlbflush.h>
34#include <asm/pgalloc.h>
35#include <asm/segment.h>
36#include <asm/system.h>
37#include <asm/proto.h> 37#include <asm/proto.h>
38#include <asm-generic/sections.h>
39#include <asm/traps.h> 38#include <asm/traps.h>
39#include <asm/desc.h>
40 40
41/* 41/*
42 * Page fault error code bits 42 * Page fault error code bits:
43 * bit 0 == 0 means no page found, 1 means protection fault 43 *
44 * bit 1 == 0 means read, 1 means write 44 * bit 0 == 0: no page found 1: protection fault
45 * bit 2 == 0 means kernel, 1 means user-mode 45 * bit 1 == 0: read access 1: write access
46 * bit 3 == 1 means use of reserved bit detected 46 * bit 2 == 0: kernel-mode access 1: user-mode access
47 * bit 4 == 1 means fault was an instruction fetch 47 * bit 3 == 1: use of reserved bit detected
48 * bit 4 == 1: fault was an instruction fetch
48 */ 49 */
49#define PF_PROT (1<<0) 50enum x86_pf_error_code {
50#define PF_WRITE (1<<1)
51#define PF_USER (1<<2)
52#define PF_RSVD (1<<3)
53#define PF_INSTR (1<<4)
54 51
52 PF_PROT = 1 << 0,
53 PF_WRITE = 1 << 1,
54 PF_USER = 1 << 2,
55 PF_RSVD = 1 << 3,
56 PF_INSTR = 1 << 4,
57};
58
59/*
60 * Returns 0 if mmiotrace is disabled, or if the fault is not
61 * handled by mmiotrace:
62 */
55static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 63static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
56{ 64{
57#ifdef CONFIG_MMIOTRACE
58 if (unlikely(is_kmmio_active())) 65 if (unlikely(is_kmmio_active()))
59 if (kmmio_handler(regs, addr) == 1) 66 if (kmmio_handler(regs, addr) == 1)
60 return -1; 67 return -1;
61#endif
62 return 0; 68 return 0;
63} 69}
64 70
65static inline int notify_page_fault(struct pt_regs *regs) 71static inline int notify_page_fault(struct pt_regs *regs)
66{ 72{
67#ifdef CONFIG_KPROBES
68 int ret = 0; 73 int ret = 0;
69 74
70 /* kprobe_running() needs smp_processor_id() */ 75 /* kprobe_running() needs smp_processor_id() */
71 if (!user_mode_vm(regs)) { 76 if (kprobes_built_in() && !user_mode_vm(regs)) {
72 preempt_disable(); 77 preempt_disable();
73 if (kprobe_running() && kprobe_fault_handler(regs, 14)) 78 if (kprobe_running() && kprobe_fault_handler(regs, 14))
74 ret = 1; 79 ret = 1;
@@ -76,29 +81,76 @@ static inline int notify_page_fault(struct pt_regs *regs)
76 } 81 }
77 82
78 return ret; 83 return ret;
79#else
80 return 0;
81#endif
82} 84}
83 85
84/* 86/*
85 * X86_32 87 * Prefetch quirks:
86 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
87 * Check that here and ignore it.
88 * 88 *
89 * X86_64 89 * 32-bit mode:
90 * Sometimes the CPU reports invalid exceptions on prefetch.
91 * Check that here and ignore it.
92 * 90 *
93 * Opcode checker based on code by Richard Brunner 91 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
92 * Check that here and ignore it.
93 *
94 * 64-bit mode:
95 *
96 * Sometimes the CPU reports invalid exceptions on prefetch.
97 * Check that here and ignore it.
98 *
99 * Opcode checker based on code by Richard Brunner.
94 */ 100 */
95static int is_prefetch(struct pt_regs *regs, unsigned long error_code, 101static inline int
96 unsigned long addr) 102check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
103 unsigned char opcode, int *prefetch)
97{ 104{
105 unsigned char instr_hi = opcode & 0xf0;
106 unsigned char instr_lo = opcode & 0x0f;
107
108 switch (instr_hi) {
109 case 0x20:
110 case 0x30:
111 /*
112 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
113 * In X86_64 long mode, the CPU will signal invalid
114 * opcode if some of these prefixes are present so
115 * X86_64 will never get here anyway
116 */
117 return ((instr_lo & 7) == 0x6);
118#ifdef CONFIG_X86_64
119 case 0x40:
120 /*
121 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
122 * Need to figure out under what instruction mode the
123 * instruction was issued. Could check the LDT for lm,
124 * but for now it's good enough to assume that long
125 * mode only uses well known segments or kernel.
126 */
127 return (!user_mode(regs)) || (regs->cs == __USER_CS);
128#endif
129 case 0x60:
130 /* 0x64 thru 0x67 are valid prefixes in all modes. */
131 return (instr_lo & 0xC) == 0x4;
132 case 0xF0:
133 /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
134 return !instr_lo || (instr_lo>>1) == 1;
135 case 0x00:
136 /* Prefetch instruction is 0x0F0D or 0x0F18 */
137 if (probe_kernel_address(instr, opcode))
138 return 0;
139
140 *prefetch = (instr_lo == 0xF) &&
141 (opcode == 0x0D || opcode == 0x18);
142 return 0;
143 default:
144 return 0;
145 }
146}
147
148static int
149is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
150{
151 unsigned char *max_instr;
98 unsigned char *instr; 152 unsigned char *instr;
99 int scan_more = 1;
100 int prefetch = 0; 153 int prefetch = 0;
101 unsigned char *max_instr;
102 154
103 /* 155 /*
104 * If it was a exec (instruction fetch) fault on NX page, then 156 * If it was a exec (instruction fetch) fault on NX page, then
@@ -107,106 +159,170 @@ static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
107 if (error_code & PF_INSTR) 159 if (error_code & PF_INSTR)
108 return 0; 160 return 0;
109 161
110 instr = (unsigned char *)convert_ip_to_linear(current, regs); 162 instr = (void *)convert_ip_to_linear(current, regs);
111 max_instr = instr + 15; 163 max_instr = instr + 15;
112 164
113 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) 165 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
114 return 0; 166 return 0;
115 167
116 while (scan_more && instr < max_instr) { 168 while (instr < max_instr) {
117 unsigned char opcode; 169 unsigned char opcode;
118 unsigned char instr_hi;
119 unsigned char instr_lo;
120 170
121 if (probe_kernel_address(instr, opcode)) 171 if (probe_kernel_address(instr, opcode))
122 break; 172 break;
123 173
124 instr_hi = opcode & 0xf0;
125 instr_lo = opcode & 0x0f;
126 instr++; 174 instr++;
127 175
128 switch (instr_hi) { 176 if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
129 case 0x20:
130 case 0x30:
131 /*
132 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
133 * In X86_64 long mode, the CPU will signal invalid
134 * opcode if some of these prefixes are present so
135 * X86_64 will never get here anyway
136 */
137 scan_more = ((instr_lo & 7) == 0x6);
138 break;
139#ifdef CONFIG_X86_64
140 case 0x40:
141 /*
142 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
143 * Need to figure out under what instruction mode the
144 * instruction was issued. Could check the LDT for lm,
145 * but for now it's good enough to assume that long
146 * mode only uses well known segments or kernel.
147 */
148 scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
149 break;
150#endif
151 case 0x60:
152 /* 0x64 thru 0x67 are valid prefixes in all modes. */
153 scan_more = (instr_lo & 0xC) == 0x4;
154 break;
155 case 0xF0:
156 /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
157 scan_more = !instr_lo || (instr_lo>>1) == 1;
158 break;
159 case 0x00:
160 /* Prefetch instruction is 0x0F0D or 0x0F18 */
161 scan_more = 0;
162
163 if (probe_kernel_address(instr, opcode))
164 break;
165 prefetch = (instr_lo == 0xF) &&
166 (opcode == 0x0D || opcode == 0x18);
167 break; 177 break;
168 default:
169 scan_more = 0;
170 break;
171 }
172 } 178 }
173 return prefetch; 179 return prefetch;
174} 180}
175 181
176static void force_sig_info_fault(int si_signo, int si_code, 182static void
177 unsigned long address, struct task_struct *tsk) 183force_sig_info_fault(int si_signo, int si_code, unsigned long address,
184 struct task_struct *tsk)
178{ 185{
179 siginfo_t info; 186 siginfo_t info;
180 187
181 info.si_signo = si_signo; 188 info.si_signo = si_signo;
182 info.si_errno = 0; 189 info.si_errno = 0;
183 info.si_code = si_code; 190 info.si_code = si_code;
184 info.si_addr = (void __user *)address; 191 info.si_addr = (void __user *)address;
192
185 force_sig_info(si_signo, &info, tsk); 193 force_sig_info(si_signo, &info, tsk);
186} 194}
187 195
188#ifdef CONFIG_X86_64 196DEFINE_SPINLOCK(pgd_lock);
189static int bad_address(void *p) 197LIST_HEAD(pgd_list);
198
199#ifdef CONFIG_X86_32
200static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
190{ 201{
191 unsigned long dummy; 202 unsigned index = pgd_index(address);
192 return probe_kernel_address((unsigned long *)p, dummy); 203 pgd_t *pgd_k;
204 pud_t *pud, *pud_k;
205 pmd_t *pmd, *pmd_k;
206
207 pgd += index;
208 pgd_k = init_mm.pgd + index;
209
210 if (!pgd_present(*pgd_k))
211 return NULL;
212
213 /*
214 * set_pgd(pgd, *pgd_k); here would be useless on PAE
215 * and redundant with the set_pmd() on non-PAE. As would
216 * set_pud.
217 */
218 pud = pud_offset(pgd, address);
219 pud_k = pud_offset(pgd_k, address);
220 if (!pud_present(*pud_k))
221 return NULL;
222
223 pmd = pmd_offset(pud, address);
224 pmd_k = pmd_offset(pud_k, address);
225 if (!pmd_present(*pmd_k))
226 return NULL;
227
228 if (!pmd_present(*pmd)) {
229 set_pmd(pmd, *pmd_k);
230 arch_flush_lazy_mmu_mode();
231 } else {
232 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
233 }
234
235 return pmd_k;
236}
237
238void vmalloc_sync_all(void)
239{
240 unsigned long address;
241
242 if (SHARED_KERNEL_PMD)
243 return;
244
245 for (address = VMALLOC_START & PMD_MASK;
246 address >= TASK_SIZE && address < FIXADDR_TOP;
247 address += PMD_SIZE) {
248
249 unsigned long flags;
250 struct page *page;
251
252 spin_lock_irqsave(&pgd_lock, flags);
253 list_for_each_entry(page, &pgd_list, lru) {
254 if (!vmalloc_sync_one(page_address(page), address))
255 break;
256 }
257 spin_unlock_irqrestore(&pgd_lock, flags);
258 }
259}
260
261/*
262 * 32-bit:
263 *
264 * Handle a fault on the vmalloc or module mapping area
265 */
266static noinline int vmalloc_fault(unsigned long address)
267{
268 unsigned long pgd_paddr;
269 pmd_t *pmd_k;
270 pte_t *pte_k;
271
272 /* Make sure we are in vmalloc area: */
273 if (!(address >= VMALLOC_START && address < VMALLOC_END))
274 return -1;
275
276 /*
277 * Synchronize this task's top level page-table
278 * with the 'reference' page table.
279 *
280 * Do _not_ use "current" here. We might be inside
281 * an interrupt in the middle of a task switch..
282 */
283 pgd_paddr = read_cr3();
284 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
285 if (!pmd_k)
286 return -1;
287
288 pte_k = pte_offset_kernel(pmd_k, address);
289 if (!pte_present(*pte_k))
290 return -1;
291
292 return 0;
293}
294
295/*
296 * Did it hit the DOS screen memory VA from vm86 mode?
297 */
298static inline void
299check_v8086_mode(struct pt_regs *regs, unsigned long address,
300 struct task_struct *tsk)
301{
302 unsigned long bit;
303
304 if (!v8086_mode(regs))
305 return;
306
307 bit = (address - 0xA0000) >> PAGE_SHIFT;
308 if (bit < 32)
309 tsk->thread.screen_bitmap |= 1 << bit;
193} 310}
194#endif
195 311
196static void dump_pagetable(unsigned long address) 312static void dump_pagetable(unsigned long address)
197{ 313{
198#ifdef CONFIG_X86_32
199 __typeof__(pte_val(__pte(0))) page; 314 __typeof__(pte_val(__pte(0))) page;
200 315
201 page = read_cr3(); 316 page = read_cr3();
202 page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; 317 page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
318
203#ifdef CONFIG_X86_PAE 319#ifdef CONFIG_X86_PAE
204 printk("*pdpt = %016Lx ", page); 320 printk("*pdpt = %016Lx ", page);
205 if ((page >> PAGE_SHIFT) < max_low_pfn 321 if ((page >> PAGE_SHIFT) < max_low_pfn
206 && page & _PAGE_PRESENT) { 322 && page & _PAGE_PRESENT) {
207 page &= PAGE_MASK; 323 page &= PAGE_MASK;
208 page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) 324 page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
209 & (PTRS_PER_PMD - 1)]; 325 & (PTRS_PER_PMD - 1)];
210 printk(KERN_CONT "*pde = %016Lx ", page); 326 printk(KERN_CONT "*pde = %016Lx ", page);
211 page &= ~_PAGE_NX; 327 page &= ~_PAGE_NX;
212 } 328 }
@@ -218,19 +334,145 @@ static void dump_pagetable(unsigned long address)
218 * We must not directly access the pte in the highpte 334 * We must not directly access the pte in the highpte
219 * case if the page table is located in highmem. 335 * case if the page table is located in highmem.
220 * And let's rather not kmap-atomic the pte, just in case 336 * And let's rather not kmap-atomic the pte, just in case
221 * it's allocated already. 337 * it's allocated already:
222 */ 338 */
223 if ((page >> PAGE_SHIFT) < max_low_pfn 339 if ((page >> PAGE_SHIFT) < max_low_pfn
224 && (page & _PAGE_PRESENT) 340 && (page & _PAGE_PRESENT)
225 && !(page & _PAGE_PSE)) { 341 && !(page & _PAGE_PSE)) {
342
226 page &= PAGE_MASK; 343 page &= PAGE_MASK;
227 page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) 344 page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
228 & (PTRS_PER_PTE - 1)]; 345 & (PTRS_PER_PTE - 1)];
229 printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page); 346 printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
230 } 347 }
231 348
232 printk("\n"); 349 printk("\n");
233#else /* CONFIG_X86_64 */ 350}
351
352#else /* CONFIG_X86_64: */
353
354void vmalloc_sync_all(void)
355{
356 unsigned long address;
357
358 for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
359 address += PGDIR_SIZE) {
360
361 const pgd_t *pgd_ref = pgd_offset_k(address);
362 unsigned long flags;
363 struct page *page;
364
365 if (pgd_none(*pgd_ref))
366 continue;
367
368 spin_lock_irqsave(&pgd_lock, flags);
369 list_for_each_entry(page, &pgd_list, lru) {
370 pgd_t *pgd;
371 pgd = (pgd_t *)page_address(page) + pgd_index(address);
372 if (pgd_none(*pgd))
373 set_pgd(pgd, *pgd_ref);
374 else
375 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
376 }
377 spin_unlock_irqrestore(&pgd_lock, flags);
378 }
379}
380
381/*
382 * 64-bit:
383 *
384 * Handle a fault on the vmalloc area
385 *
386 * This assumes no large pages in there.
387 */
388static noinline int vmalloc_fault(unsigned long address)
389{
390 pgd_t *pgd, *pgd_ref;
391 pud_t *pud, *pud_ref;
392 pmd_t *pmd, *pmd_ref;
393 pte_t *pte, *pte_ref;
394
395 /* Make sure we are in vmalloc area: */
396 if (!(address >= VMALLOC_START && address < VMALLOC_END))
397 return -1;
398
399 /*
400 * Copy kernel mappings over when needed. This can also
401 * happen within a race in page table update. In the later
402 * case just flush:
403 */
404 pgd = pgd_offset(current->active_mm, address);
405 pgd_ref = pgd_offset_k(address);
406 if (pgd_none(*pgd_ref))
407 return -1;
408
409 if (pgd_none(*pgd))
410 set_pgd(pgd, *pgd_ref);
411 else
412 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
413
414 /*
415 * Below here mismatches are bugs because these lower tables
416 * are shared:
417 */
418
419 pud = pud_offset(pgd, address);
420 pud_ref = pud_offset(pgd_ref, address);
421 if (pud_none(*pud_ref))
422 return -1;
423
424 if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
425 BUG();
426
427 pmd = pmd_offset(pud, address);
428 pmd_ref = pmd_offset(pud_ref, address);
429 if (pmd_none(*pmd_ref))
430 return -1;
431
432 if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
433 BUG();
434
435 pte_ref = pte_offset_kernel(pmd_ref, address);
436 if (!pte_present(*pte_ref))
437 return -1;
438
439 pte = pte_offset_kernel(pmd, address);
440
441 /*
442 * Don't use pte_page here, because the mappings can point
443 * outside mem_map, and the NUMA hash lookup cannot handle
444 * that:
445 */
446 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
447 BUG();
448
449 return 0;
450}
451
452static const char errata93_warning[] =
453KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
454KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
455KERN_ERR "******* Please consider a BIOS update.\n"
456KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
457
458/*
459 * No vm86 mode in 64-bit mode:
460 */
461static inline void
462check_v8086_mode(struct pt_regs *regs, unsigned long address,
463 struct task_struct *tsk)
464{
465}
466
467static int bad_address(void *p)
468{
469 unsigned long dummy;
470
471 return probe_kernel_address((unsigned long *)p, dummy);
472}
473
474static void dump_pagetable(unsigned long address)
475{
234 pgd_t *pgd; 476 pgd_t *pgd;
235 pud_t *pud; 477 pud_t *pud;
236 pmd_t *pmd; 478 pmd_t *pmd;
@@ -239,102 +481,77 @@ static void dump_pagetable(unsigned long address)
239 pgd = (pgd_t *)read_cr3(); 481 pgd = (pgd_t *)read_cr3();
240 482
241 pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); 483 pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
484
242 pgd += pgd_index(address); 485 pgd += pgd_index(address);
243 if (bad_address(pgd)) goto bad; 486 if (bad_address(pgd))
487 goto bad;
488
244 printk("PGD %lx ", pgd_val(*pgd)); 489 printk("PGD %lx ", pgd_val(*pgd));
245 if (!pgd_present(*pgd)) goto ret; 490
491 if (!pgd_present(*pgd))
492 goto out;
246 493
247 pud = pud_offset(pgd, address); 494 pud = pud_offset(pgd, address);
248 if (bad_address(pud)) goto bad; 495 if (bad_address(pud))
496 goto bad;
497
249 printk("PUD %lx ", pud_val(*pud)); 498 printk("PUD %lx ", pud_val(*pud));
250 if (!pud_present(*pud) || pud_large(*pud)) 499 if (!pud_present(*pud) || pud_large(*pud))
251 goto ret; 500 goto out;
252 501
253 pmd = pmd_offset(pud, address); 502 pmd = pmd_offset(pud, address);
254 if (bad_address(pmd)) goto bad; 503 if (bad_address(pmd))
504 goto bad;
505
255 printk("PMD %lx ", pmd_val(*pmd)); 506 printk("PMD %lx ", pmd_val(*pmd));
256 if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; 507 if (!pmd_present(*pmd) || pmd_large(*pmd))
508 goto out;
257 509
258 pte = pte_offset_kernel(pmd, address); 510 pte = pte_offset_kernel(pmd, address);
259 if (bad_address(pte)) goto bad; 511 if (bad_address(pte))
512 goto bad;
513
260 printk("PTE %lx", pte_val(*pte)); 514 printk("PTE %lx", pte_val(*pte));
261ret: 515out:
262 printk("\n"); 516 printk("\n");
263 return; 517 return;
264bad: 518bad:
265 printk("BAD\n"); 519 printk("BAD\n");
266#endif
267} 520}
268 521
269#ifdef CONFIG_X86_32 522#endif /* CONFIG_X86_64 */
270static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
271{
272 unsigned index = pgd_index(address);
273 pgd_t *pgd_k;
274 pud_t *pud, *pud_k;
275 pmd_t *pmd, *pmd_k;
276
277 pgd += index;
278 pgd_k = init_mm.pgd + index;
279
280 if (!pgd_present(*pgd_k))
281 return NULL;
282 523
283 /* 524/*
284 * set_pgd(pgd, *pgd_k); here would be useless on PAE 525 * Workaround for K8 erratum #93 & buggy BIOS.
285 * and redundant with the set_pmd() on non-PAE. As would 526 *
286 * set_pud. 527 * BIOS SMM functions are required to use a specific workaround
287 */ 528 * to avoid corruption of the 64bit RIP register on C stepping K8.
288 529 *
289 pud = pud_offset(pgd, address); 530 * A lot of BIOS that didn't get tested properly miss this.
290 pud_k = pud_offset(pgd_k, address); 531 *
291 if (!pud_present(*pud_k)) 532 * The OS sees this as a page fault with the upper 32bits of RIP cleared.
292 return NULL; 533 * Try to work around it here.
293 534 *
294 pmd = pmd_offset(pud, address); 535 * Note we only handle faults in kernel here.
295 pmd_k = pmd_offset(pud_k, address); 536 * Does nothing on 32-bit.
296 if (!pmd_present(*pmd_k))
297 return NULL;
298 if (!pmd_present(*pmd)) {
299 set_pmd(pmd, *pmd_k);
300 arch_flush_lazy_mmu_mode();
301 } else
302 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
303 return pmd_k;
304}
305#endif
306
307#ifdef CONFIG_X86_64
308static const char errata93_warning[] =
309KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
310KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
311KERN_ERR "******* Please consider a BIOS update.\n"
312KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
313#endif
314
315/* Workaround for K8 erratum #93 & buggy BIOS.
316 BIOS SMM functions are required to use a specific workaround
317 to avoid corruption of the 64bit RIP register on C stepping K8.
318 A lot of BIOS that didn't get tested properly miss this.
319 The OS sees this as a page fault with the upper 32bits of RIP cleared.
320 Try to work around it here.
321 Note we only handle faults in kernel here.
322 Does nothing for X86_32
323 */ 537 */
324static int is_errata93(struct pt_regs *regs, unsigned long address) 538static int is_errata93(struct pt_regs *regs, unsigned long address)
325{ 539{
326#ifdef CONFIG_X86_64 540#ifdef CONFIG_X86_64
327 static int warned; 541 static int once;
542
328 if (address != regs->ip) 543 if (address != regs->ip)
329 return 0; 544 return 0;
545
330 if ((address >> 32) != 0) 546 if ((address >> 32) != 0)
331 return 0; 547 return 0;
548
332 address |= 0xffffffffUL << 32; 549 address |= 0xffffffffUL << 32;
333 if ((address >= (u64)_stext && address <= (u64)_etext) || 550 if ((address >= (u64)_stext && address <= (u64)_etext) ||
334 (address >= MODULES_VADDR && address <= MODULES_END)) { 551 (address >= MODULES_VADDR && address <= MODULES_END)) {
335 if (!warned) { 552 if (!once) {
336 printk(errata93_warning); 553 printk(errata93_warning);
337 warned = 1; 554 once = 1;
338 } 555 }
339 regs->ip = address; 556 regs->ip = address;
340 return 1; 557 return 1;
@@ -344,16 +561,17 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
344} 561}
345 562
346/* 563/*
347 * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal 564 * Work around K8 erratum #100 K8 in compat mode occasionally jumps
348 * addresses >4GB. We catch this in the page fault handler because these 565 * to illegal addresses >4GB.
349 * addresses are not reachable. Just detect this case and return. Any code 566 *
567 * We catch this in the page fault handler because these addresses
568 * are not reachable. Just detect this case and return. Any code
350 * segment in LDT is compatibility mode. 569 * segment in LDT is compatibility mode.
351 */ 570 */
352static int is_errata100(struct pt_regs *regs, unsigned long address) 571static int is_errata100(struct pt_regs *regs, unsigned long address)
353{ 572{
354#ifdef CONFIG_X86_64 573#ifdef CONFIG_X86_64
355 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && 574 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32))
356 (address >> 32))
357 return 1; 575 return 1;
358#endif 576#endif
359 return 0; 577 return 0;
@@ -363,8 +581,9 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
363{ 581{
364#ifdef CONFIG_X86_F00F_BUG 582#ifdef CONFIG_X86_F00F_BUG
365 unsigned long nr; 583 unsigned long nr;
584
366 /* 585 /*
367 * Pentium F0 0F C7 C8 bug workaround. 586 * Pentium F0 0F C7 C8 bug workaround:
368 */ 587 */
369 if (boot_cpu_data.f00f_bug) { 588 if (boot_cpu_data.f00f_bug) {
370 nr = (address - idt_descr.address) >> 3; 589 nr = (address - idt_descr.address) >> 3;
@@ -378,80 +597,87 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
378 return 0; 597 return 0;
379} 598}
380 599
381static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, 600static const char nx_warning[] = KERN_CRIT
382 unsigned long address) 601"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
602
603static void
604show_fault_oops(struct pt_regs *regs, unsigned long error_code,
605 unsigned long address)
383{ 606{
384#ifdef CONFIG_X86_32
385 if (!oops_may_print()) 607 if (!oops_may_print())
386 return; 608 return;
387#endif
388 609
389#ifdef CONFIG_X86_PAE
390 if (error_code & PF_INSTR) { 610 if (error_code & PF_INSTR) {
391 unsigned int level; 611 unsigned int level;
612
392 pte_t *pte = lookup_address(address, &level); 613 pte_t *pte = lookup_address(address, &level);
393 614
394 if (pte && pte_present(*pte) && !pte_exec(*pte)) 615 if (pte && pte_present(*pte) && !pte_exec(*pte))
395 printk(KERN_CRIT "kernel tried to execute " 616 printk(nx_warning, current_uid());
396 "NX-protected page - exploit attempt? "
397 "(uid: %d)\n", current_uid());
398 } 617 }
399#endif
400 618
401 printk(KERN_ALERT "BUG: unable to handle kernel "); 619 printk(KERN_ALERT "BUG: unable to handle kernel ");
402 if (address < PAGE_SIZE) 620 if (address < PAGE_SIZE)
403 printk(KERN_CONT "NULL pointer dereference"); 621 printk(KERN_CONT "NULL pointer dereference");
404 else 622 else
405 printk(KERN_CONT "paging request"); 623 printk(KERN_CONT "paging request");
624
406 printk(KERN_CONT " at %p\n", (void *) address); 625 printk(KERN_CONT " at %p\n", (void *) address);
407 printk(KERN_ALERT "IP:"); 626 printk(KERN_ALERT "IP:");
408 printk_address(regs->ip, 1); 627 printk_address(regs->ip, 1);
628
409 dump_pagetable(address); 629 dump_pagetable(address);
410} 630}
411 631
412#ifdef CONFIG_X86_64 632static noinline void
413static noinline void pgtable_bad(struct pt_regs *regs, 633pgtable_bad(struct pt_regs *regs, unsigned long error_code,
414 unsigned long error_code, unsigned long address) 634 unsigned long address)
415{ 635{
416 unsigned long flags = oops_begin(); 636 struct task_struct *tsk;
417 int sig = SIGKILL; 637 unsigned long flags;
418 struct task_struct *tsk = current; 638 int sig;
639
640 flags = oops_begin();
641 tsk = current;
642 sig = SIGKILL;
419 643
420 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 644 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
421 tsk->comm, address); 645 tsk->comm, address);
422 dump_pagetable(address); 646 dump_pagetable(address);
423 tsk->thread.cr2 = address; 647
424 tsk->thread.trap_no = 14; 648 tsk->thread.cr2 = address;
425 tsk->thread.error_code = error_code; 649 tsk->thread.trap_no = 14;
650 tsk->thread.error_code = error_code;
651
426 if (__die("Bad pagetable", regs, error_code)) 652 if (__die("Bad pagetable", regs, error_code))
427 sig = 0; 653 sig = 0;
654
428 oops_end(flags, regs, sig); 655 oops_end(flags, regs, sig);
429} 656}
430#endif
431 657
432static noinline void no_context(struct pt_regs *regs, 658static noinline void
433 unsigned long error_code, unsigned long address) 659no_context(struct pt_regs *regs, unsigned long error_code,
660 unsigned long address)
434{ 661{
435 struct task_struct *tsk = current; 662 struct task_struct *tsk = current;
436 unsigned long *stackend; 663 unsigned long *stackend;
437
438#ifdef CONFIG_X86_64
439 unsigned long flags; 664 unsigned long flags;
440 int sig; 665 int sig;
441#endif
442 666
443 /* Are we prepared to handle this kernel fault? */ 667 /* Are we prepared to handle this kernel fault? */
444 if (fixup_exception(regs)) 668 if (fixup_exception(regs))
445 return; 669 return;
446 670
447 /* 671 /*
448 * X86_32 672 * 32-bit:
449 * Valid to do another page fault here, because if this fault 673 *
450 * had been triggered by is_prefetch fixup_exception would have 674 * Valid to do another page fault here, because if this fault
451 * handled it. 675 * had been triggered by is_prefetch fixup_exception would have
676 * handled it.
677 *
678 * 64-bit:
452 * 679 *
453 * X86_64 680 * Hall of shame of CPU/BIOS bugs.
454 * Hall of shame of CPU/BIOS bugs.
455 */ 681 */
456 if (is_prefetch(regs, error_code, address)) 682 if (is_prefetch(regs, error_code, address))
457 return; 683 return;
@@ -461,54 +687,70 @@ static noinline void no_context(struct pt_regs *regs,
461 687
462 /* 688 /*
463 * Oops. The kernel tried to access some bad page. We'll have to 689 * Oops. The kernel tried to access some bad page. We'll have to
464 * terminate things with extreme prejudice. 690 * terminate things with extreme prejudice:
465 */ 691 */
466#ifdef CONFIG_X86_32
467 bust_spinlocks(1);
468#else
469 flags = oops_begin(); 692 flags = oops_begin();
470#endif
471 693
472 show_fault_oops(regs, error_code, address); 694 show_fault_oops(regs, error_code, address);
473 695
474 stackend = end_of_stack(tsk); 696 stackend = end_of_stack(tsk);
475 if (*stackend != STACK_END_MAGIC) 697 if (*stackend != STACK_END_MAGIC)
476 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); 698 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
477 699
478 tsk->thread.cr2 = address; 700 tsk->thread.cr2 = address;
479 tsk->thread.trap_no = 14; 701 tsk->thread.trap_no = 14;
480 tsk->thread.error_code = error_code; 702 tsk->thread.error_code = error_code;
481 703
482#ifdef CONFIG_X86_32
483 die("Oops", regs, error_code);
484 bust_spinlocks(0);
485 do_exit(SIGKILL);
486#else
487 sig = SIGKILL; 704 sig = SIGKILL;
488 if (__die("Oops", regs, error_code)) 705 if (__die("Oops", regs, error_code))
489 sig = 0; 706 sig = 0;
707
490 /* Executive summary in case the body of the oops scrolled away */ 708 /* Executive summary in case the body of the oops scrolled away */
491 printk(KERN_EMERG "CR2: %016lx\n", address); 709 printk(KERN_EMERG "CR2: %016lx\n", address);
710
492 oops_end(flags, regs, sig); 711 oops_end(flags, regs, sig);
493#endif
494} 712}
495 713
496static void __bad_area_nosemaphore(struct pt_regs *regs, 714/*
497 unsigned long error_code, unsigned long address, 715 * Print out info about fatal segfaults, if the show_unhandled_signals
498 int si_code) 716 * sysctl is set:
717 */
718static inline void
719show_signal_msg(struct pt_regs *regs, unsigned long error_code,
720 unsigned long address, struct task_struct *tsk)
721{
722 if (!unhandled_signal(tsk, SIGSEGV))
723 return;
724
725 if (!printk_ratelimit())
726 return;
727
728 printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
729 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
730 tsk->comm, task_pid_nr(tsk), address,
731 (void *)regs->ip, (void *)regs->sp, error_code);
732
733 print_vma_addr(KERN_CONT " in ", regs->ip);
734
735 printk(KERN_CONT "\n");
736}
737
738static void
739__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
740 unsigned long address, int si_code)
499{ 741{
500 struct task_struct *tsk = current; 742 struct task_struct *tsk = current;
501 743
502 /* User mode accesses just cause a SIGSEGV */ 744 /* User mode accesses just cause a SIGSEGV */
503 if (error_code & PF_USER) { 745 if (error_code & PF_USER) {
504 /* 746 /*
505 * It's possible to have interrupts off here. 747 * It's possible to have interrupts off here:
506 */ 748 */
507 local_irq_enable(); 749 local_irq_enable();
508 750
509 /* 751 /*
510 * Valid to do another page fault here because this one came 752 * Valid to do another page fault here because this one came
511 * from user space. 753 * from user space:
512 */ 754 */
513 if (is_prefetch(regs, error_code, address)) 755 if (is_prefetch(regs, error_code, address))
514 return; 756 return;
@@ -516,22 +758,16 @@ static void __bad_area_nosemaphore(struct pt_regs *regs,
516 if (is_errata100(regs, address)) 758 if (is_errata100(regs, address))
517 return; 759 return;
518 760
519 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && 761 if (unlikely(show_unhandled_signals))
520 printk_ratelimit()) { 762 show_signal_msg(regs, error_code, address, tsk);
521 printk( 763
522 "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", 764 /* Kernel addresses are always protection faults: */
523 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, 765 tsk->thread.cr2 = address;
524 tsk->comm, task_pid_nr(tsk), address, 766 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
525 (void *) regs->ip, (void *) regs->sp, error_code); 767 tsk->thread.trap_no = 14;
526 print_vma_addr(" in ", regs->ip);
527 printk("\n");
528 }
529 768
530 tsk->thread.cr2 = address;
531 /* Kernel addresses are always protection faults */
532 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
533 tsk->thread.trap_no = 14;
534 force_sig_info_fault(SIGSEGV, si_code, address, tsk); 769 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
770
535 return; 771 return;
536 } 772 }
537 773
@@ -541,15 +777,16 @@ static void __bad_area_nosemaphore(struct pt_regs *regs,
541 no_context(regs, error_code, address); 777 no_context(regs, error_code, address);
542} 778}
543 779
544static noinline void bad_area_nosemaphore(struct pt_regs *regs, 780static noinline void
545 unsigned long error_code, unsigned long address) 781bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
782 unsigned long address)
546{ 783{
547 __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); 784 __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
548} 785}
549 786
550static void __bad_area(struct pt_regs *regs, 787static void
551 unsigned long error_code, unsigned long address, 788__bad_area(struct pt_regs *regs, unsigned long error_code,
552 int si_code) 789 unsigned long address, int si_code)
553{ 790{
554 struct mm_struct *mm = current->mm; 791 struct mm_struct *mm = current->mm;
555 792
@@ -562,67 +799,75 @@ static void __bad_area(struct pt_regs *regs,
562 __bad_area_nosemaphore(regs, error_code, address, si_code); 799 __bad_area_nosemaphore(regs, error_code, address, si_code);
563} 800}
564 801
565static noinline void bad_area(struct pt_regs *regs, 802static noinline void
566 unsigned long error_code, unsigned long address) 803bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
567{ 804{
568 __bad_area(regs, error_code, address, SEGV_MAPERR); 805 __bad_area(regs, error_code, address, SEGV_MAPERR);
569} 806}
570 807
571static noinline void bad_area_access_error(struct pt_regs *regs, 808static noinline void
572 unsigned long error_code, unsigned long address) 809bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
810 unsigned long address)
573{ 811{
574 __bad_area(regs, error_code, address, SEGV_ACCERR); 812 __bad_area(regs, error_code, address, SEGV_ACCERR);
575} 813}
576 814
577/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ 815/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
578static void out_of_memory(struct pt_regs *regs, 816static void
579 unsigned long error_code, unsigned long address) 817out_of_memory(struct pt_regs *regs, unsigned long error_code,
818 unsigned long address)
580{ 819{
581 /* 820 /*
582 * We ran out of memory, call the OOM killer, and return the userspace 821 * We ran out of memory, call the OOM killer, and return the userspace
583 * (which will retry the fault, or kill us if we got oom-killed). 822 * (which will retry the fault, or kill us if we got oom-killed):
584 */ 823 */
585 up_read(&current->mm->mmap_sem); 824 up_read(&current->mm->mmap_sem);
825
586 pagefault_out_of_memory(); 826 pagefault_out_of_memory();
587} 827}
588 828
589static void do_sigbus(struct pt_regs *regs, 829static void
590 unsigned long error_code, unsigned long address) 830do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
591{ 831{
592 struct task_struct *tsk = current; 832 struct task_struct *tsk = current;
593 struct mm_struct *mm = tsk->mm; 833 struct mm_struct *mm = tsk->mm;
594 834
595 up_read(&mm->mmap_sem); 835 up_read(&mm->mmap_sem);
596 836
597 /* Kernel mode? Handle exceptions or die */ 837 /* Kernel mode? Handle exceptions or die: */
598 if (!(error_code & PF_USER)) 838 if (!(error_code & PF_USER))
599 no_context(regs, error_code, address); 839 no_context(regs, error_code, address);
600#ifdef CONFIG_X86_32 840
601 /* User space => ok to do another page fault */ 841 /* User-space => ok to do another page fault: */
602 if (is_prefetch(regs, error_code, address)) 842 if (is_prefetch(regs, error_code, address))
603 return; 843 return;
604#endif 844
605 tsk->thread.cr2 = address; 845 tsk->thread.cr2 = address;
606 tsk->thread.error_code = error_code; 846 tsk->thread.error_code = error_code;
607 tsk->thread.trap_no = 14; 847 tsk->thread.trap_no = 14;
848
608 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); 849 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
609} 850}
610 851
611static noinline void mm_fault_error(struct pt_regs *regs, 852static noinline void
612 unsigned long error_code, unsigned long address, unsigned int fault) 853mm_fault_error(struct pt_regs *regs, unsigned long error_code,
854 unsigned long address, unsigned int fault)
613{ 855{
614 if (fault & VM_FAULT_OOM) 856 if (fault & VM_FAULT_OOM) {
615 out_of_memory(regs, error_code, address); 857 out_of_memory(regs, error_code, address);
616 else if (fault & VM_FAULT_SIGBUS) 858 } else {
617 do_sigbus(regs, error_code, address); 859 if (fault & VM_FAULT_SIGBUS)
618 else 860 do_sigbus(regs, error_code, address);
619 BUG(); 861 else
862 BUG();
863 }
620} 864}
621 865
622static int spurious_fault_check(unsigned long error_code, pte_t *pte) 866static int spurious_fault_check(unsigned long error_code, pte_t *pte)
623{ 867{
624 if ((error_code & PF_WRITE) && !pte_write(*pte)) 868 if ((error_code & PF_WRITE) && !pte_write(*pte))
625 return 0; 869 return 0;
870
626 if ((error_code & PF_INSTR) && !pte_exec(*pte)) 871 if ((error_code & PF_INSTR) && !pte_exec(*pte))
627 return 0; 872 return 0;
628 873
@@ -630,21 +875,25 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
630} 875}
631 876
632/* 877/*
633 * Handle a spurious fault caused by a stale TLB entry. This allows 878 * Handle a spurious fault caused by a stale TLB entry.
634 * us to lazily refresh the TLB when increasing the permissions of a 879 *
635 * kernel page (RO -> RW or NX -> X). Doing it eagerly is very 880 * This allows us to lazily refresh the TLB when increasing the
636 * expensive since that implies doing a full cross-processor TLB 881 * permissions of a kernel page (RO -> RW or NX -> X). Doing it
637 * flush, even if no stale TLB entries exist on other processors. 882 * eagerly is very expensive since that implies doing a full
883 * cross-processor TLB flush, even if no stale TLB entries exist
884 * on other processors.
885 *
638 * There are no security implications to leaving a stale TLB when 886 * There are no security implications to leaving a stale TLB when
639 * increasing the permissions on a page. 887 * increasing the permissions on a page.
640 */ 888 */
641static noinline int spurious_fault(unsigned long error_code, 889static noinline int
642 unsigned long address) 890spurious_fault(unsigned long error_code, unsigned long address)
643{ 891{
644 pgd_t *pgd; 892 pgd_t *pgd;
645 pud_t *pud; 893 pud_t *pud;
646 pmd_t *pmd; 894 pmd_t *pmd;
647 pte_t *pte; 895 pte_t *pte;
896 int ret;
648 897
649 /* Reserved-bit violation or user access to kernel space? */ 898 /* Reserved-bit violation or user access to kernel space? */
650 if (error_code & (PF_USER | PF_RSVD)) 899 if (error_code & (PF_USER | PF_RSVD))
@@ -672,123 +921,46 @@ static noinline int spurious_fault(unsigned long error_code,
672 if (!pte_present(*pte)) 921 if (!pte_present(*pte))
673 return 0; 922 return 0;
674 923
675 return spurious_fault_check(error_code, pte); 924 ret = spurious_fault_check(error_code, pte);
676} 925 if (!ret)
677 926 return 0;
678/*
679 * X86_32
680 * Handle a fault on the vmalloc or module mapping area
681 *
682 * X86_64
683 * Handle a fault on the vmalloc area
684 *
685 * This assumes no large pages in there.
686 */
687static noinline int vmalloc_fault(unsigned long address)
688{
689#ifdef CONFIG_X86_32
690 unsigned long pgd_paddr;
691 pmd_t *pmd_k;
692 pte_t *pte_k;
693
694 /* Make sure we are in vmalloc area */
695 if (!(address >= VMALLOC_START && address < VMALLOC_END))
696 return -1;
697 927
698 /* 928 /*
699 * Synchronize this task's top level page-table 929 * Make sure we have permissions in PMD.
700 * with the 'reference' page table. 930 * If not, then there's a bug in the page tables:
701 *
702 * Do _not_ use "current" here. We might be inside
703 * an interrupt in the middle of a task switch..
704 */ 931 */
705 pgd_paddr = read_cr3(); 932 ret = spurious_fault_check(error_code, (pte_t *) pmd);
706 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); 933 WARN_ONCE(!ret, "PMD has incorrect permission bits\n");
707 if (!pmd_k)
708 return -1;
709 pte_k = pte_offset_kernel(pmd_k, address);
710 if (!pte_present(*pte_k))
711 return -1;
712 return 0;
713#else
714 pgd_t *pgd, *pgd_ref;
715 pud_t *pud, *pud_ref;
716 pmd_t *pmd, *pmd_ref;
717 pte_t *pte, *pte_ref;
718 934
719 /* Make sure we are in vmalloc area */ 935 return ret;
720 if (!(address >= VMALLOC_START && address < VMALLOC_END))
721 return -1;
722
723 /* Copy kernel mappings over when needed. This can also
724 happen within a race in page table update. In the later
725 case just flush. */
726
727 pgd = pgd_offset(current->active_mm, address);
728 pgd_ref = pgd_offset_k(address);
729 if (pgd_none(*pgd_ref))
730 return -1;
731 if (pgd_none(*pgd))
732 set_pgd(pgd, *pgd_ref);
733 else
734 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
735
736 /* Below here mismatches are bugs because these lower tables
737 are shared */
738
739 pud = pud_offset(pgd, address);
740 pud_ref = pud_offset(pgd_ref, address);
741 if (pud_none(*pud_ref))
742 return -1;
743 if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
744 BUG();
745 pmd = pmd_offset(pud, address);
746 pmd_ref = pmd_offset(pud_ref, address);
747 if (pmd_none(*pmd_ref))
748 return -1;
749 if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
750 BUG();
751 pte_ref = pte_offset_kernel(pmd_ref, address);
752 if (!pte_present(*pte_ref))
753 return -1;
754 pte = pte_offset_kernel(pmd, address);
755 /* Don't use pte_page here, because the mappings can point
756 outside mem_map, and the NUMA hash lookup cannot handle
757 that. */
758 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
759 BUG();
760 return 0;
761#endif
762} 936}
763 937
764int show_unhandled_signals = 1; 938int show_unhandled_signals = 1;
765 939
766static inline int access_error(unsigned long error_code, int write, 940static inline int
767 struct vm_area_struct *vma) 941access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
768{ 942{
769 if (write) { 943 if (write) {
770 /* write, present and write, not present */ 944 /* write, present and write, not present: */
771 if (unlikely(!(vma->vm_flags & VM_WRITE))) 945 if (unlikely(!(vma->vm_flags & VM_WRITE)))
772 return 1; 946 return 1;
773 } else if (unlikely(error_code & PF_PROT)) { 947 return 0;
774 /* read, present */
775 return 1;
776 } else {
777 /* read, not present */
778 if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
779 return 1;
780 } 948 }
781 949
950 /* read, present: */
951 if (unlikely(error_code & PF_PROT))
952 return 1;
953
954 /* read, not present: */
955 if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
956 return 1;
957
782 return 0; 958 return 0;
783} 959}
784 960
785static int fault_in_kernel_space(unsigned long address) 961static int fault_in_kernel_space(unsigned long address)
786{ 962{
787#ifdef CONFIG_X86_32 963 return address >= TASK_SIZE_MAX;
788 return address >= TASK_SIZE;
789#else /* !CONFIG_X86_32 */
790 return address >= TASK_SIZE64;
791#endif /* CONFIG_X86_32 */
792} 964}
793 965
794/* 966/*
@@ -796,23 +968,22 @@ static int fault_in_kernel_space(unsigned long address)
796 * and the problem, and then passes it off to one of the appropriate 968 * and the problem, and then passes it off to one of the appropriate
797 * routines. 969 * routines.
798 */ 970 */
799#ifdef CONFIG_X86_64 971dotraplinkage void __kprobes
800asmlinkage 972do_page_fault(struct pt_regs *regs, unsigned long error_code)
801#endif
802void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
803{ 973{
804 unsigned long address; 974 struct vm_area_struct *vma;
805 struct task_struct *tsk; 975 struct task_struct *tsk;
976 unsigned long address;
806 struct mm_struct *mm; 977 struct mm_struct *mm;
807 struct vm_area_struct *vma;
808 int write; 978 int write;
809 int fault; 979 int fault;
810 980
811 tsk = current; 981 tsk = current;
812 mm = tsk->mm; 982 mm = tsk->mm;
983
813 prefetchw(&mm->mmap_sem); 984 prefetchw(&mm->mmap_sem);
814 985
815 /* get the address */ 986 /* Get the faulting address: */
816 address = read_cr2(); 987 address = read_cr2();
817 988
818 if (unlikely(kmmio_fault(regs, address))) 989 if (unlikely(kmmio_fault(regs, address)))
@@ -836,22 +1007,23 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
836 vmalloc_fault(address) >= 0) 1007 vmalloc_fault(address) >= 0)
837 return; 1008 return;
838 1009
839 /* Can handle a stale RO->RW TLB */ 1010 /* Can handle a stale RO->RW TLB: */
840 if (spurious_fault(error_code, address)) 1011 if (spurious_fault(error_code, address))
841 return; 1012 return;
842 1013
843 /* kprobes don't want to hook the spurious faults. */ 1014 /* kprobes don't want to hook the spurious faults: */
844 if (notify_page_fault(regs)) 1015 if (notify_page_fault(regs))
845 return; 1016 return;
846 /* 1017 /*
847 * Don't take the mm semaphore here. If we fixup a prefetch 1018 * Don't take the mm semaphore here. If we fixup a prefetch
848 * fault we could otherwise deadlock. 1019 * fault we could otherwise deadlock:
849 */ 1020 */
850 bad_area_nosemaphore(regs, error_code, address); 1021 bad_area_nosemaphore(regs, error_code, address);
1022
851 return; 1023 return;
852 } 1024 }
853 1025
854 /* kprobes don't want to hook the spurious faults. */ 1026 /* kprobes don't want to hook the spurious faults: */
855 if (unlikely(notify_page_fault(regs))) 1027 if (unlikely(notify_page_fault(regs)))
856 return; 1028 return;
857 /* 1029 /*
@@ -859,22 +1031,22 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
859 * vmalloc fault has been handled. 1031 * vmalloc fault has been handled.
860 * 1032 *
861 * User-mode registers count as a user access even for any 1033 * User-mode registers count as a user access even for any
862 * potential system fault or CPU buglet. 1034 * potential system fault or CPU buglet:
863 */ 1035 */
864 if (user_mode_vm(regs)) { 1036 if (user_mode_vm(regs)) {
865 local_irq_enable(); 1037 local_irq_enable();
866 error_code |= PF_USER; 1038 error_code |= PF_USER;
867 } else if (regs->flags & X86_EFLAGS_IF) 1039 } else {
868 local_irq_enable(); 1040 if (regs->flags & X86_EFLAGS_IF)
1041 local_irq_enable();
1042 }
869 1043
870#ifdef CONFIG_X86_64
871 if (unlikely(error_code & PF_RSVD)) 1044 if (unlikely(error_code & PF_RSVD))
872 pgtable_bad(regs, error_code, address); 1045 pgtable_bad(regs, error_code, address);
873#endif
874 1046
875 /* 1047 /*
876 * If we're in an interrupt, have no user context or are running in an 1048 * If we're in an interrupt, have no user context or are running
877 * atomic region then we must not take the fault. 1049 * in an atomic region then we must not take the fault:
878 */ 1050 */
879 if (unlikely(in_atomic() || !mm)) { 1051 if (unlikely(in_atomic() || !mm)) {
880 bad_area_nosemaphore(regs, error_code, address); 1052 bad_area_nosemaphore(regs, error_code, address);
@@ -883,19 +1055,19 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
883 1055
884 /* 1056 /*
885 * When running in the kernel we expect faults to occur only to 1057 * When running in the kernel we expect faults to occur only to
886 * addresses in user space. All other faults represent errors in the 1058 * addresses in user space. All other faults represent errors in
887 * kernel and should generate an OOPS. Unfortunately, in the case of an 1059 * the kernel and should generate an OOPS. Unfortunately, in the
888 * erroneous fault occurring in a code path which already holds mmap_sem 1060 * case of an erroneous fault occurring in a code path which already
889 * we will deadlock attempting to validate the fault against the 1061 * holds mmap_sem we will deadlock attempting to validate the fault
890 * address space. Luckily the kernel only validly references user 1062 * against the address space. Luckily the kernel only validly
891 * space from well defined areas of code, which are listed in the 1063 * references user space from well defined areas of code, which are
892 * exceptions table. 1064 * listed in the exceptions table.
893 * 1065 *
894 * As the vast majority of faults will be valid we will only perform 1066 * As the vast majority of faults will be valid we will only perform
895 * the source reference check when there is a possibility of a deadlock. 1067 * the source reference check when there is a possibility of a
896 * Attempt to lock the address space, if we cannot we then validate the 1068 * deadlock. Attempt to lock the address space, if we cannot we then
897 * source. If this is invalid we can skip the address space check, 1069 * validate the source. If this is invalid we can skip the address
898 * thus avoiding the deadlock. 1070 * space check, thus avoiding the deadlock:
899 */ 1071 */
900 if (unlikely(!down_read_trylock(&mm->mmap_sem))) { 1072 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
901 if ((error_code & PF_USER) == 0 && 1073 if ((error_code & PF_USER) == 0 &&
@@ -906,8 +1078,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
906 down_read(&mm->mmap_sem); 1078 down_read(&mm->mmap_sem);
907 } else { 1079 } else {
908 /* 1080 /*
909 * The above down_read_trylock() might have succeeded in which 1081 * The above down_read_trylock() might have succeeded in
910 * case we'll have missed the might_sleep() from down_read(). 1082 * which case we'll have missed the might_sleep() from
1083 * down_read():
911 */ 1084 */
912 might_sleep(); 1085 might_sleep();
913 } 1086 }
@@ -927,7 +1100,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
927 /* 1100 /*
928 * Accessing the stack below %sp is always a bug. 1101 * Accessing the stack below %sp is always a bug.
929 * The large cushion allows instructions like enter 1102 * The large cushion allows instructions like enter
930 * and pusha to work. ("enter $65535,$31" pushes 1103 * and pusha to work. ("enter $65535, $31" pushes
931 * 32 pointers and then decrements %sp by 65535.) 1104 * 32 pointers and then decrements %sp by 65535.)
932 */ 1105 */
933 if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { 1106 if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
@@ -946,6 +1119,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
946 */ 1119 */
947good_area: 1120good_area:
948 write = error_code & PF_WRITE; 1121 write = error_code & PF_WRITE;
1122
949 if (unlikely(access_error(error_code, write, vma))) { 1123 if (unlikely(access_error(error_code, write, vma))) {
950 bad_area_access_error(regs, error_code, address); 1124 bad_area_access_error(regs, error_code, address);
951 return; 1125 return;
@@ -954,75 +1128,21 @@ good_area:
954 /* 1128 /*
955 * If for any reason at all we couldn't handle the fault, 1129 * If for any reason at all we couldn't handle the fault,
956 * make sure we exit gracefully rather than endlessly redo 1130 * make sure we exit gracefully rather than endlessly redo
957 * the fault. 1131 * the fault:
958 */ 1132 */
959 fault = handle_mm_fault(mm, vma, address, write); 1133 fault = handle_mm_fault(mm, vma, address, write);
1134
960 if (unlikely(fault & VM_FAULT_ERROR)) { 1135 if (unlikely(fault & VM_FAULT_ERROR)) {
961 mm_fault_error(regs, error_code, address, fault); 1136 mm_fault_error(regs, error_code, address, fault);
962 return; 1137 return;
963 } 1138 }
1139
964 if (fault & VM_FAULT_MAJOR) 1140 if (fault & VM_FAULT_MAJOR)
965 tsk->maj_flt++; 1141 tsk->maj_flt++;
966 else 1142 else
967 tsk->min_flt++; 1143 tsk->min_flt++;
968 1144
969#ifdef CONFIG_X86_32 1145 check_v8086_mode(regs, address, tsk);
970 /*
971 * Did it hit the DOS screen memory VA from vm86 mode?
972 */
973 if (v8086_mode(regs)) {
974 unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
975 if (bit < 32)
976 tsk->thread.screen_bitmap |= 1 << bit;
977 }
978#endif
979 up_read(&mm->mmap_sem);
980}
981
982DEFINE_SPINLOCK(pgd_lock);
983LIST_HEAD(pgd_list);
984 1146
985void vmalloc_sync_all(void) 1147 up_read(&mm->mmap_sem);
986{
987 unsigned long address;
988
989#ifdef CONFIG_X86_32
990 if (SHARED_KERNEL_PMD)
991 return;
992
993 for (address = VMALLOC_START & PMD_MASK;
994 address >= TASK_SIZE && address < FIXADDR_TOP;
995 address += PMD_SIZE) {
996 unsigned long flags;
997 struct page *page;
998
999 spin_lock_irqsave(&pgd_lock, flags);
1000 list_for_each_entry(page, &pgd_list, lru) {
1001 if (!vmalloc_sync_one(page_address(page),
1002 address))
1003 break;
1004 }
1005 spin_unlock_irqrestore(&pgd_lock, flags);
1006 }
1007#else /* CONFIG_X86_64 */
1008 for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
1009 address += PGDIR_SIZE) {
1010 const pgd_t *pgd_ref = pgd_offset_k(address);
1011 unsigned long flags;
1012 struct page *page;
1013
1014 if (pgd_none(*pgd_ref))
1015 continue;
1016 spin_lock_irqsave(&pgd_lock, flags);
1017 list_for_each_entry(page, &pgd_list, lru) {
1018 pgd_t *pgd;
1019 pgd = (pgd_t *)page_address(page) + pgd_index(address);
1020 if (pgd_none(*pgd))
1021 set_pgd(pgd, *pgd_ref);
1022 else
1023 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
1024 }
1025 spin_unlock_irqrestore(&pgd_lock, flags);
1026 }
1027#endif
1028} 1148}
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index ca53224fc56..d5e28424622 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -20,6 +20,64 @@
20#include <asm/pat.h> 20#include <asm/pat.h>
21#include <linux/module.h> 21#include <linux/module.h>
22 22
23#ifdef CONFIG_X86_PAE
24static int
25is_io_mapping_possible(resource_size_t base, unsigned long size)
26{
27 return 1;
28}
29#else
30static int
31is_io_mapping_possible(resource_size_t base, unsigned long size)
32{
33 /* There is no way to map greater than 1 << 32 address without PAE */
34 if (base + size > 0x100000000ULL)
35 return 0;
36
37 return 1;
38}
39#endif
40
41int
42reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot)
43{
44 unsigned long ret_flag;
45
46 if (!is_io_mapping_possible(base, size))
47 goto out_err;
48
49 if (!pat_enabled) {
50 *prot = pgprot_noncached(PAGE_KERNEL);
51 return 0;
52 }
53
54 if (reserve_memtype(base, base + size, _PAGE_CACHE_WC, &ret_flag))
55 goto out_err;
56
57 if (ret_flag == _PAGE_CACHE_WB)
58 goto out_free;
59
60 if (kernel_map_sync_memtype(base, size, ret_flag))
61 goto out_free;
62
63 *prot = __pgprot(__PAGE_KERNEL | ret_flag);
64 return 0;
65
66out_free:
67 free_memtype(base, base + size);
68out_err:
69 return -EINVAL;
70}
71EXPORT_SYMBOL_GPL(reserve_io_memtype_wc);
72
73void
74free_io_memtype(u64 base, unsigned long size)
75{
76 if (pat_enabled)
77 free_memtype(base, base + size);
78}
79EXPORT_SYMBOL_GPL(free_io_memtype);
80
23/* Map 'pfn' using fixed map 'type' and protections 'prot' 81/* Map 'pfn' using fixed map 'type' and protections 'prot'
24 */ 82 */
25void * 83void *
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c
index 9cab18b0b85..0bcd7883d03 100644
--- a/arch/x86/mm/memtest.c
+++ b/arch/x86/mm/memtest.c
@@ -9,44 +9,44 @@
9 9
10#include <asm/e820.h> 10#include <asm/e820.h>
11 11
12static void __init memtest(unsigned long start_phys, unsigned long size, 12static u64 patterns[] __initdata = {
13 unsigned pattern) 13 0,
14 0xffffffffffffffffULL,
15 0x5555555555555555ULL,
16 0xaaaaaaaaaaaaaaaaULL,
17 0x1111111111111111ULL,
18 0x2222222222222222ULL,
19 0x4444444444444444ULL,
20 0x8888888888888888ULL,
21 0x3333333333333333ULL,
22 0x6666666666666666ULL,
23 0x9999999999999999ULL,
24 0xccccccccccccccccULL,
25 0x7777777777777777ULL,
26 0xbbbbbbbbbbbbbbbbULL,
27 0xddddddddddddddddULL,
28 0xeeeeeeeeeeeeeeeeULL,
29 0x7a6c7258554e494cULL, /* yeah ;-) */
30};
31
32static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
14{ 33{
15 unsigned long i; 34 printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n",
16 unsigned long *start; 35 (unsigned long long) pattern,
17 unsigned long start_bad; 36 (unsigned long long) start_bad,
18 unsigned long last_bad; 37 (unsigned long long) end_bad);
19 unsigned long val; 38 reserve_early(start_bad, end_bad, "BAD RAM");
20 unsigned long start_phys_aligned; 39}
21 unsigned long count;
22 unsigned long incr;
23
24 switch (pattern) {
25 case 0:
26 val = 0UL;
27 break;
28 case 1:
29 val = -1UL;
30 break;
31 case 2:
32#ifdef CONFIG_X86_64
33 val = 0x5555555555555555UL;
34#else
35 val = 0x55555555UL;
36#endif
37 break;
38 case 3:
39#ifdef CONFIG_X86_64
40 val = 0xaaaaaaaaaaaaaaaaUL;
41#else
42 val = 0xaaaaaaaaUL;
43#endif
44 break;
45 default:
46 return;
47 }
48 40
49 incr = sizeof(unsigned long); 41static void __init memtest(u64 pattern, u64 start_phys, u64 size)
42{
43 u64 i, count;
44 u64 *start;
45 u64 start_bad, last_bad;
46 u64 start_phys_aligned;
47 size_t incr;
48
49 incr = sizeof(pattern);
50 start_phys_aligned = ALIGN(start_phys, incr); 50 start_phys_aligned = ALIGN(start_phys, incr);
51 count = (size - (start_phys_aligned - start_phys))/incr; 51 count = (size - (start_phys_aligned - start_phys))/incr;
52 start = __va(start_phys_aligned); 52 start = __va(start_phys_aligned);
@@ -54,25 +54,42 @@ static void __init memtest(unsigned long start_phys, unsigned long size,
54 last_bad = 0; 54 last_bad = 0;
55 55
56 for (i = 0; i < count; i++) 56 for (i = 0; i < count; i++)
57 start[i] = val; 57 start[i] = pattern;
58 for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { 58 for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
59 if (*start != val) { 59 if (*start == pattern)
60 if (start_phys_aligned == last_bad + incr) { 60 continue;
61 last_bad += incr; 61 if (start_phys_aligned == last_bad + incr) {
62 } else { 62 last_bad += incr;
63 if (start_bad) { 63 continue;
64 printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved",
65 val, start_bad, last_bad + incr);
66 reserve_early(start_bad, last_bad + incr, "BAD RAM");
67 }
68 start_bad = last_bad = start_phys_aligned;
69 }
70 } 64 }
65 if (start_bad)
66 reserve_bad_mem(pattern, start_bad, last_bad + incr);
67 start_bad = last_bad = start_phys_aligned;
71 } 68 }
72 if (start_bad) { 69 if (start_bad)
73 printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved", 70 reserve_bad_mem(pattern, start_bad, last_bad + incr);
74 val, start_bad, last_bad + incr); 71}
75 reserve_early(start_bad, last_bad + incr, "BAD RAM"); 72
73static void __init do_one_pass(u64 pattern, u64 start, u64 end)
74{
75 u64 size = 0;
76
77 while (start < end) {
78 start = find_e820_area_size(start, &size, 1);
79
80 /* done ? */
81 if (start >= end)
82 break;
83 if (start + size > end)
84 size = end - start;
85
86 printk(KERN_INFO " %010llx - %010llx pattern %016llx\n",
87 (unsigned long long) start,
88 (unsigned long long) start + size,
89 (unsigned long long) cpu_to_be64(pattern));
90 memtest(pattern, start, size);
91
92 start += size;
76 } 93 }
77} 94}
78 95
@@ -90,33 +107,22 @@ early_param("memtest", parse_memtest);
90 107
91void __init early_memtest(unsigned long start, unsigned long end) 108void __init early_memtest(unsigned long start, unsigned long end)
92{ 109{
93 u64 t_start, t_size; 110 unsigned int i;
94 unsigned pattern; 111 unsigned int idx = 0;
95 112
96 if (!memtest_pattern) 113 if (!memtest_pattern)
97 return; 114 return;
98 115
99 printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); 116 printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
100 for (pattern = 0; pattern < memtest_pattern; pattern++) { 117 for (i = 0; i < memtest_pattern; i++) {
101 t_start = start; 118 idx = i % ARRAY_SIZE(patterns);
102 t_size = 0; 119 do_one_pass(patterns[idx], start, end);
103 while (t_start < end) { 120 }
104 t_start = find_e820_area_size(t_start, &t_size, 1);
105
106 /* done ? */
107 if (t_start >= end)
108 break;
109 if (t_start + t_size > end)
110 t_size = end - t_start;
111
112 printk(KERN_CONT "\n %010llx - %010llx pattern %d",
113 (unsigned long long)t_start,
114 (unsigned long long)t_start + t_size, pattern);
115
116 memtest(t_start, t_size, pattern);
117 121
118 t_start += t_size; 122 if (idx > 0) {
119 } 123 printk(KERN_INFO "early_memtest: wipe out "
124 "test pattern from memory\n");
125 /* additional test with pattern 0 will do this */
126 do_one_pass(0, start, end);
120 } 127 }
121 printk(KERN_CONT "\n");
122} 128}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index d1f7439d173..3957cd6d645 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -194,7 +194,7 @@ void *alloc_remap(int nid, unsigned long size)
194 size = ALIGN(size, L1_CACHE_BYTES); 194 size = ALIGN(size, L1_CACHE_BYTES);
195 195
196 if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) 196 if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
197 return 0; 197 return NULL;
198 198
199 node_remap_alloc_vaddr[nid] += size; 199 node_remap_alloc_vaddr[nid] += size;
200 memset(allocation, 0, size); 200 memset(allocation, 0, size);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index deb1c1ab786..64c9cf043cd 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -166,7 +166,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
166 return shift; 166 return shift;
167} 167}
168 168
169int early_pfn_to_nid(unsigned long pfn) 169int __meminit __early_pfn_to_nid(unsigned long pfn)
170{ 170{
171 return phys_to_nid(pfn << PAGE_SHIFT); 171 return phys_to_nid(pfn << PAGE_SHIFT);
172} 172}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8ca0d8566fc..8253bc97587 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -482,6 +482,13 @@ static int split_large_page(pte_t *kpte, unsigned long address)
482 pbase = (pte_t *)page_address(base); 482 pbase = (pte_t *)page_address(base);
483 paravirt_alloc_pte(&init_mm, page_to_pfn(base)); 483 paravirt_alloc_pte(&init_mm, page_to_pfn(base));
484 ref_prot = pte_pgprot(pte_clrhuge(*kpte)); 484 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
485 /*
486 * If we ever want to utilize the PAT bit, we need to
487 * update this function to make sure it's converted from
488 * bit 12 to bit 7 when we cross from the 2MB level to
489 * the 4K level:
490 */
491 WARN_ON_ONCE(pgprot_val(ref_prot) & _PAGE_PAT_LARGE);
485 492
486#ifdef CONFIG_X86_64 493#ifdef CONFIG_X86_64
487 if (level == PG_LEVEL_1G) { 494 if (level == PG_LEVEL_1G) {
@@ -508,18 +515,13 @@ static int split_large_page(pte_t *kpte, unsigned long address)
508#endif 515#endif
509 516
510 /* 517 /*
511 * Install the new, split up pagetable. Important details here: 518 * Install the new, split up pagetable.
512 *
513 * On Intel the NX bit of all levels must be cleared to make a
514 * page executable. See section 4.13.2 of Intel 64 and IA-32
515 * Architectures Software Developer's Manual).
516 * 519 *
517 * Mark the entry present. The current mapping might be 520 * We use the standard kernel pagetable protections for the new
518 * set to not present, which we preserved above. 521 * pagetable protections, the actual ptes set above control the
522 * primary protection behavior:
519 */ 523 */
520 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); 524 __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
521 pgprot_val(ref_prot) |= _PAGE_PRESENT;
522 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
523 base = NULL; 525 base = NULL;
524 526
525out_unlock: 527out_unlock:
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 05f9aef6818..fdfedb65d45 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -634,6 +634,33 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
634} 634}
635 635
636/* 636/*
637 * Change the memory type for the physial address range in kernel identity
638 * mapping space if that range is a part of identity map.
639 */
640int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
641{
642 unsigned long id_sz;
643
644 if (!pat_enabled || base >= __pa(high_memory))
645 return 0;
646
647 id_sz = (__pa(high_memory) < base + size) ?
648 __pa(high_memory) - base :
649 size;
650
651 if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) {
652 printk(KERN_INFO
653 "%s:%d ioremap_change_attr failed %s "
654 "for %Lx-%Lx\n",
655 current->comm, current->pid,
656 cattr_name(flags),
657 base, (unsigned long long)(base + size));
658 return -EINVAL;
659 }
660 return 0;
661}
662
663/*
637 * Internal interface to reserve a range of physical memory with prot. 664 * Internal interface to reserve a range of physical memory with prot.
638 * Reserved non RAM regions only and after successful reserve_memtype, 665 * Reserved non RAM regions only and after successful reserve_memtype,
639 * this func also keeps identity mapping (if any) in sync with this new prot. 666 * this func also keeps identity mapping (if any) in sync with this new prot.
@@ -642,7 +669,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
642 int strict_prot) 669 int strict_prot)
643{ 670{
644 int is_ram = 0; 671 int is_ram = 0;
645 int id_sz, ret; 672 int ret;
646 unsigned long flags; 673 unsigned long flags;
647 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); 674 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
648 675
@@ -679,23 +706,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
679 flags); 706 flags);
680 } 707 }
681 708
682 /* Need to keep identity mapping in sync */ 709 if (kernel_map_sync_memtype(paddr, size, flags) < 0) {
683 if (paddr >= __pa(high_memory))
684 return 0;
685
686 id_sz = (__pa(high_memory) < paddr + size) ?
687 __pa(high_memory) - paddr :
688 size;
689
690 if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
691 free_memtype(paddr, paddr + size); 710 free_memtype(paddr, paddr + size);
692 printk(KERN_ERR
693 "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
694 "for %Lx-%Lx\n",
695 current->comm, current->pid,
696 cattr_name(flags),
697 (unsigned long long)paddr,
698 (unsigned long long)(paddr + size));
699 return -EINVAL; 711 return -EINVAL;
700 } 712 }
701 return 0; 713 return 0;
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index d1e9b53f9d3..b641388d828 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -8,7 +8,7 @@
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/segment.h> 10#include <asm/segment.h>
11#include <asm/page.h> 11#include <asm/page_types.h>
12#include <asm/asm-offsets.h> 12#include <asm/asm-offsets.h>
13#include <asm/processor-flags.h> 13#include <asm/processor-flags.h>
14 14
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 000415947d9..9356547d8c0 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -18,7 +18,7 @@
18 .text 18 .text
19#include <linux/linkage.h> 19#include <linux/linkage.h>
20#include <asm/segment.h> 20#include <asm/segment.h>
21#include <asm/page.h> 21#include <asm/page_types.h>
22#include <asm/asm-offsets.h> 22#include <asm/asm-offsets.h>
23#include <asm/processor-flags.h> 23#include <asm/processor-flags.h>
24 24
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 9c98cc6ba97..7133cdf9098 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -85,8 +85,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
85 unsigned long addr, end; 85 unsigned long addr, end;
86 unsigned offset; 86 unsigned offset;
87 end = (start + PMD_SIZE - 1) & PMD_MASK; 87 end = (start + PMD_SIZE - 1) & PMD_MASK;
88 if (end >= TASK_SIZE64) 88 if (end >= TASK_SIZE_MAX)
89 end = TASK_SIZE64; 89 end = TASK_SIZE_MAX;
90 end -= len; 90 end -= len;
91 /* This loses some more bits than a modulo, but is cheaper */ 91 /* This loses some more bits than a modulo, but is cheaper */
92 offset = get_random_int() & (PTRS_PER_PTE - 1); 92 offset = get_random_int() & (PTRS_PER_PTE - 1);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 87b9ab16642..b83e119fbeb 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,7 +6,7 @@ config XEN
6 bool "Xen guest support" 6 bool "Xen guest support"
7 select PARAVIRT 7 select PARAVIRT
8 select PARAVIRT_CLOCK 8 select PARAVIRT_CLOCK
9 depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER)) 9 depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
10 depends on X86_CMPXCHG && X86_TSC 10 depends on X86_CMPXCHG && X86_TSC
11 help 11 help
12 This is the Linux Xen port. Enabling this will allow the 12 This is the Linux Xen port. Enabling this will allow the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 86497d5f44c..c52f4034c7f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -940,6 +940,9 @@ asmlinkage void __init xen_start_kernel(void)
940 possible map and a non-dummy shared_info. */ 940 possible map and a non-dummy shared_info. */
941 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; 941 per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
942 942
943 local_irq_disable();
944 early_boot_irqs_off();
945
943 xen_raw_console_write("mapping kernel into physical memory\n"); 946 xen_raw_console_write("mapping kernel into physical memory\n");
944 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); 947 pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
945 948
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 63d49a523ed..1a5ff24e29c 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -8,7 +8,7 @@
8 8
9#include <asm/boot.h> 9#include <asm/boot.h>
10#include <asm/asm.h> 10#include <asm/asm.h>
11#include <asm/page.h> 11#include <asm/page_types.h>
12 12
13#include <xen/interface/elfnote.h> 13#include <xen/interface/elfnote.h>
14#include <asm/xen/interface.h> 14#include <asm/xen/interface.h>