diff options
Diffstat (limited to 'arch')
162 files changed, 4348 insertions, 3965 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac22bb7719f7..a2ae4c05f46f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -19,6 +19,8 @@ config X86_64 | |||
19 | config X86 | 19 | config X86 |
20 | def_bool y | 20 | def_bool y |
21 | select HAVE_AOUT if X86_32 | 21 | select HAVE_AOUT if X86_32 |
22 | select HAVE_READQ | ||
23 | select HAVE_WRITEQ | ||
22 | select HAVE_UNSTABLE_SCHED_CLOCK | 24 | select HAVE_UNSTABLE_SCHED_CLOCK |
23 | select HAVE_IDE | 25 | select HAVE_IDE |
24 | select HAVE_OPROFILE | 26 | select HAVE_OPROFILE |
@@ -87,6 +89,10 @@ config GENERIC_IOMAP | |||
87 | config GENERIC_BUG | 89 | config GENERIC_BUG |
88 | def_bool y | 90 | def_bool y |
89 | depends on BUG | 91 | depends on BUG |
92 | select GENERIC_BUG_RELATIVE_POINTERS if X86_64 | ||
93 | |||
94 | config GENERIC_BUG_RELATIVE_POINTERS | ||
95 | bool | ||
90 | 96 | ||
91 | config GENERIC_HWEIGHT | 97 | config GENERIC_HWEIGHT |
92 | def_bool y | 98 | def_bool y |
@@ -242,21 +248,13 @@ config X86_FIND_SMP_CONFIG | |||
242 | def_bool y | 248 | def_bool y |
243 | depends on X86_MPPARSE || X86_VOYAGER | 249 | depends on X86_MPPARSE || X86_VOYAGER |
244 | 250 | ||
245 | if ACPI | ||
246 | config X86_MPPARSE | 251 | config X86_MPPARSE |
247 | def_bool y | 252 | bool "Enable MPS table" if ACPI |
248 | bool "Enable MPS table" | 253 | default y |
249 | depends on X86_LOCAL_APIC | 254 | depends on X86_LOCAL_APIC |
250 | help | 255 | help |
251 | For old smp systems that do not have proper acpi support. Newer systems | 256 | For old smp systems that do not have proper acpi support. Newer systems |
252 | (esp with 64bit cpus) with acpi support, MADT and DSDT will override it | 257 | (esp with 64bit cpus) with acpi support, MADT and DSDT will override it |
253 | endif | ||
254 | |||
255 | if !ACPI | ||
256 | config X86_MPPARSE | ||
257 | def_bool y | ||
258 | depends on X86_LOCAL_APIC | ||
259 | endif | ||
260 | 258 | ||
261 | choice | 259 | choice |
262 | prompt "Subarchitecture Type" | 260 | prompt "Subarchitecture Type" |
@@ -465,10 +463,6 @@ config X86_CYCLONE_TIMER | |||
465 | def_bool y | 463 | def_bool y |
466 | depends on X86_GENERICARCH | 464 | depends on X86_GENERICARCH |
467 | 465 | ||
468 | config ES7000_CLUSTERED_APIC | ||
469 | def_bool y | ||
470 | depends on SMP && X86_ES7000 && MPENTIUMIII | ||
471 | |||
472 | source "arch/x86/Kconfig.cpu" | 466 | source "arch/x86/Kconfig.cpu" |
473 | 467 | ||
474 | config HPET_TIMER | 468 | config HPET_TIMER |
@@ -569,7 +563,7 @@ config AMD_IOMMU | |||
569 | 563 | ||
570 | # need this always selected by IOMMU for the VIA workaround | 564 | # need this always selected by IOMMU for the VIA workaround |
571 | config SWIOTLB | 565 | config SWIOTLB |
572 | bool | 566 | def_bool y if X86_64 |
573 | help | 567 | help |
574 | Support for software bounce buffers used on x86-64 systems | 568 | Support for software bounce buffers used on x86-64 systems |
575 | which don't have a hardware IOMMU (e.g. the current generation | 569 | which don't have a hardware IOMMU (e.g. the current generation |
@@ -660,6 +654,30 @@ config X86_VISWS_APIC | |||
660 | def_bool y | 654 | def_bool y |
661 | depends on X86_32 && X86_VISWS | 655 | depends on X86_32 && X86_VISWS |
662 | 656 | ||
657 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | ||
658 | bool "Reroute for broken boot IRQs" | ||
659 | default n | ||
660 | depends on X86_IO_APIC | ||
661 | help | ||
662 | This option enables a workaround that fixes a source of | ||
663 | spurious interrupts. This is recommended when threaded | ||
664 | interrupt handling is used on systems where the generation of | ||
665 | superfluous "boot interrupts" cannot be disabled. | ||
666 | |||
667 | Some chipsets generate a legacy INTx "boot IRQ" when the IRQ | ||
668 | entry in the chipset's IO-APIC is masked (as, e.g. the RT | ||
669 | kernel does during interrupt handling). On chipsets where this | ||
670 | boot IRQ generation cannot be disabled, this workaround keeps | ||
671 | the original IRQ line masked so that only the equivalent "boot | ||
672 | IRQ" is delivered to the CPUs. The workaround also tells the | ||
673 | kernel to set up the IRQ handler on the boot IRQ line. In this | ||
674 | way only one interrupt is delivered to the kernel. Otherwise | ||
675 | the spurious second interrupt may cause the kernel to bring | ||
676 | down (vital) interrupt lines. | ||
677 | |||
678 | Only affects "broken" chipsets. Interrupt sharing may be | ||
679 | increased on these systems. | ||
680 | |||
663 | config X86_MCE | 681 | config X86_MCE |
664 | bool "Machine Check Exception" | 682 | bool "Machine Check Exception" |
665 | depends on !X86_VOYAGER | 683 | depends on !X86_VOYAGER |
@@ -956,24 +974,37 @@ config X86_PAE | |||
956 | config ARCH_PHYS_ADDR_T_64BIT | 974 | config ARCH_PHYS_ADDR_T_64BIT |
957 | def_bool X86_64 || X86_PAE | 975 | def_bool X86_64 || X86_PAE |
958 | 976 | ||
977 | config DIRECT_GBPAGES | ||
978 | bool "Enable 1GB pages for kernel pagetables" if EMBEDDED | ||
979 | default y | ||
980 | depends on X86_64 | ||
981 | help | ||
982 | Allow the kernel linear mapping to use 1GB pages on CPUs that | ||
983 | support it. This can improve the kernel's performance a tiny bit by | ||
984 | reducing TLB pressure. If in doubt, say "Y". | ||
985 | |||
959 | # Common NUMA Features | 986 | # Common NUMA Features |
960 | config NUMA | 987 | config NUMA |
961 | bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" | 988 | bool "Numa Memory Allocation and Scheduler Support" |
962 | depends on SMP | 989 | depends on SMP |
963 | depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) | 990 | depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) |
964 | default n if X86_PC | 991 | default n if X86_PC |
965 | default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) | 992 | default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) |
966 | help | 993 | help |
967 | Enable NUMA (Non Uniform Memory Access) support. | 994 | Enable NUMA (Non Uniform Memory Access) support. |
995 | |||
968 | The kernel will try to allocate memory used by a CPU on the | 996 | The kernel will try to allocate memory used by a CPU on the |
969 | local memory controller of the CPU and add some more | 997 | local memory controller of the CPU and add some more |
970 | NUMA awareness to the kernel. | 998 | NUMA awareness to the kernel. |
971 | 999 | ||
972 | For 32-bit this is currently highly experimental and should be only | 1000 | For 64-bit this is recommended if the system is Intel Core i7 |
973 | used for kernel development. It might also cause boot failures. | 1001 | (or later), AMD Opteron, or EM64T NUMA. |
974 | For 64-bit this is recommended on all multiprocessor Opteron systems. | 1002 | |
975 | If the system is EM64T, you should say N unless your system is | 1003 | For 32-bit this is only needed on (rare) 32-bit-only platforms |
976 | EM64T NUMA. | 1004 | that support NUMA topologies, such as NUMAQ / Summit, or if you |
1005 | boot a 32-bit kernel on a 64-bit NUMA platform. | ||
1006 | |||
1007 | Otherwise, you should say N. | ||
977 | 1008 | ||
978 | comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" | 1009 | comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" |
979 | depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) | 1010 | depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) |
@@ -1493,6 +1524,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG | |||
1493 | def_bool y | 1524 | def_bool y |
1494 | depends on X86_64 || (X86_32 && HIGHMEM) | 1525 | depends on X86_64 || (X86_32 && HIGHMEM) |
1495 | 1526 | ||
1527 | config ARCH_ENABLE_MEMORY_HOTREMOVE | ||
1528 | def_bool y | ||
1529 | depends on MEMORY_HOTPLUG | ||
1530 | |||
1496 | config HAVE_ARCH_EARLY_PFN_TO_NID | 1531 | config HAVE_ARCH_EARLY_PFN_TO_NID |
1497 | def_bool X86_64 | 1532 | def_bool X86_64 |
1498 | depends on NUMA | 1533 | depends on NUMA |
@@ -1632,13 +1667,6 @@ config APM_ALLOW_INTS | |||
1632 | many of the newer IBM Thinkpads. If you experience hangs when you | 1667 | many of the newer IBM Thinkpads. If you experience hangs when you |
1633 | suspend, try setting this to Y. Otherwise, say N. | 1668 | suspend, try setting this to Y. Otherwise, say N. |
1634 | 1669 | ||
1635 | config APM_REAL_MODE_POWER_OFF | ||
1636 | bool "Use real mode APM BIOS call to power off" | ||
1637 | help | ||
1638 | Use real mode APM BIOS calls to switch off the computer. This is | ||
1639 | a work-around for a number of buggy BIOSes. Switch this option on if | ||
1640 | your computer crashes instead of powering off properly. | ||
1641 | |||
1642 | endif # APM | 1670 | endif # APM |
1643 | 1671 | ||
1644 | source "arch/x86/kernel/cpu/cpufreq/Kconfig" | 1672 | source "arch/x86/kernel/cpu/cpufreq/Kconfig" |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2a3dfbd5e677..4ee768660f75 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -114,18 +114,6 @@ config DEBUG_RODATA | |||
114 | data. This is recommended so that we can catch kernel bugs sooner. | 114 | data. This is recommended so that we can catch kernel bugs sooner. |
115 | If in doubt, say "Y". | 115 | If in doubt, say "Y". |
116 | 116 | ||
117 | config DIRECT_GBPAGES | ||
118 | bool "Enable gbpages-mapped kernel pagetables" | ||
119 | depends on DEBUG_KERNEL && EXPERIMENTAL && X86_64 | ||
120 | help | ||
121 | Enable gigabyte pages support (if the CPU supports it). This can | ||
122 | improve the kernel's performance a tiny bit by reducing TLB | ||
123 | pressure. | ||
124 | |||
125 | This is experimental code. | ||
126 | |||
127 | If in doubt, say "N". | ||
128 | |||
129 | config DEBUG_RODATA_TEST | 117 | config DEBUG_RODATA_TEST |
130 | bool "Testcase for the DEBUG_RODATA feature" | 118 | bool "Testcase for the DEBUG_RODATA feature" |
131 | depends on DEBUG_RODATA | 119 | depends on DEBUG_RODATA |
@@ -307,10 +295,10 @@ config OPTIMIZE_INLINING | |||
307 | developers have marked 'inline'. Doing so takes away freedom from gcc to | 295 | developers have marked 'inline'. Doing so takes away freedom from gcc to |
308 | do what it thinks is best, which is desirable for the gcc 3.x series of | 296 | do what it thinks is best, which is desirable for the gcc 3.x series of |
309 | compilers. The gcc 4.x series have a rewritten inlining algorithm and | 297 | compilers. The gcc 4.x series have a rewritten inlining algorithm and |
310 | disabling this option will generate a smaller kernel there. Hopefully | 298 | enabling this option will generate a smaller kernel there. Hopefully |
311 | this algorithm is so good that allowing gcc4 to make the decision can | 299 | this algorithm is so good that allowing gcc 4.x and above to make the |
312 | become the default in the future, until then this option is there to | 300 | decision will become the default in the future. Until then this option |
313 | test gcc for this. | 301 | is there to test gcc for this. |
314 | 302 | ||
315 | If unsure, say N. | 303 | If unsure, say N. |
316 | 304 | ||
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index b939cb476dec..5d4742ed4aa2 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c | |||
@@ -34,7 +34,7 @@ static struct mode_info cga_modes[] = { | |||
34 | { VIDEO_80x25, 80, 25, 0 }, | 34 | { VIDEO_80x25, 80, 25, 0 }, |
35 | }; | 35 | }; |
36 | 36 | ||
37 | __videocard video_vga; | 37 | static __videocard video_vga; |
38 | 38 | ||
39 | /* Set basic 80x25 mode */ | 39 | /* Set basic 80x25 mode */ |
40 | static u8 vga_set_basic_mode(void) | 40 | static u8 vga_set_basic_mode(void) |
@@ -259,7 +259,7 @@ static int vga_probe(void) | |||
259 | return mode_count[adapter]; | 259 | return mode_count[adapter]; |
260 | } | 260 | } |
261 | 261 | ||
262 | __videocard video_vga = { | 262 | static __videocard video_vga = { |
263 | .card_name = "VGA", | 263 | .card_name = "VGA", |
264 | .probe = vga_probe, | 264 | .probe = vga_probe, |
265 | .set_mode = vga_set_mode, | 265 | .set_mode = vga_set_mode, |
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index 83598b23093a..3bef2c1febe9 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c | |||
@@ -226,7 +226,7 @@ static unsigned int mode_menu(void) | |||
226 | 226 | ||
227 | #ifdef CONFIG_VIDEO_RETAIN | 227 | #ifdef CONFIG_VIDEO_RETAIN |
228 | /* Save screen content to the heap */ | 228 | /* Save screen content to the heap */ |
229 | struct saved_screen { | 229 | static struct saved_screen { |
230 | int x, y; | 230 | int x, y; |
231 | int curx, cury; | 231 | int curx, cury; |
232 | u16 *data; | 232 | u16 *data; |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 13b8c86ae985..b30a08ed8eb4 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -77,7 +77,7 @@ CONFIG_AUDIT=y | |||
77 | CONFIG_AUDITSYSCALL=y | 77 | CONFIG_AUDITSYSCALL=y |
78 | CONFIG_AUDIT_TREE=y | 78 | CONFIG_AUDIT_TREE=y |
79 | # CONFIG_IKCONFIG is not set | 79 | # CONFIG_IKCONFIG is not set |
80 | CONFIG_LOG_BUF_SHIFT=17 | 80 | CONFIG_LOG_BUF_SHIFT=18 |
81 | CONFIG_CGROUPS=y | 81 | CONFIG_CGROUPS=y |
82 | # CONFIG_CGROUP_DEBUG is not set | 82 | # CONFIG_CGROUP_DEBUG is not set |
83 | CONFIG_CGROUP_NS=y | 83 | CONFIG_CGROUP_NS=y |
@@ -298,7 +298,7 @@ CONFIG_KEXEC=y | |||
298 | CONFIG_CRASH_DUMP=y | 298 | CONFIG_CRASH_DUMP=y |
299 | # CONFIG_KEXEC_JUMP is not set | 299 | # CONFIG_KEXEC_JUMP is not set |
300 | CONFIG_PHYSICAL_START=0x1000000 | 300 | CONFIG_PHYSICAL_START=0x1000000 |
301 | CONFIG_RELOCATABLE=y | 301 | # CONFIG_RELOCATABLE is not set |
302 | CONFIG_PHYSICAL_ALIGN=0x200000 | 302 | CONFIG_PHYSICAL_ALIGN=0x200000 |
303 | CONFIG_HOTPLUG_CPU=y | 303 | CONFIG_HOTPLUG_CPU=y |
304 | # CONFIG_COMPAT_VDSO is not set | 304 | # CONFIG_COMPAT_VDSO is not set |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index f0a03d7a7d63..0e7dbc0a3e46 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -77,7 +77,7 @@ CONFIG_AUDIT=y | |||
77 | CONFIG_AUDITSYSCALL=y | 77 | CONFIG_AUDITSYSCALL=y |
78 | CONFIG_AUDIT_TREE=y | 78 | CONFIG_AUDIT_TREE=y |
79 | # CONFIG_IKCONFIG is not set | 79 | # CONFIG_IKCONFIG is not set |
80 | CONFIG_LOG_BUF_SHIFT=17 | 80 | CONFIG_LOG_BUF_SHIFT=18 |
81 | CONFIG_CGROUPS=y | 81 | CONFIG_CGROUPS=y |
82 | # CONFIG_CGROUP_DEBUG is not set | 82 | # CONFIG_CGROUP_DEBUG is not set |
83 | CONFIG_CGROUP_NS=y | 83 | CONFIG_CGROUP_NS=y |
@@ -298,7 +298,7 @@ CONFIG_SCHED_HRTICK=y | |||
298 | CONFIG_KEXEC=y | 298 | CONFIG_KEXEC=y |
299 | CONFIG_CRASH_DUMP=y | 299 | CONFIG_CRASH_DUMP=y |
300 | CONFIG_PHYSICAL_START=0x1000000 | 300 | CONFIG_PHYSICAL_START=0x1000000 |
301 | CONFIG_RELOCATABLE=y | 301 | # CONFIG_RELOCATABLE is not set |
302 | CONFIG_PHYSICAL_ALIGN=0x200000 | 302 | CONFIG_PHYSICAL_ALIGN=0x200000 |
303 | CONFIG_HOTPLUG_CPU=y | 303 | CONFIG_HOTPLUG_CPU=y |
304 | # CONFIG_COMPAT_VDSO is not set | 304 | # CONFIG_COMPAT_VDSO is not set |
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 4bc02b23674b..b195f85526e3 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -32,6 +32,8 @@ | |||
32 | #include <asm/proto.h> | 32 | #include <asm/proto.h> |
33 | #include <asm/vdso.h> | 33 | #include <asm/vdso.h> |
34 | 34 | ||
35 | #include <asm/sigframe.h> | ||
36 | |||
35 | #define DEBUG_SIG 0 | 37 | #define DEBUG_SIG 0 |
36 | 38 | ||
37 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | 39 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) |
@@ -41,7 +43,6 @@ | |||
41 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ | 43 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ |
42 | X86_EFLAGS_CF) | 44 | X86_EFLAGS_CF) |
43 | 45 | ||
44 | asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); | ||
45 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); | 46 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); |
46 | 47 | ||
47 | int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) | 48 | int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) |
@@ -173,47 +174,28 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, | |||
173 | /* | 174 | /* |
174 | * Do a signal return; undo the signal stack. | 175 | * Do a signal return; undo the signal stack. |
175 | */ | 176 | */ |
177 | #define COPY(x) { \ | ||
178 | err |= __get_user(regs->x, &sc->x); \ | ||
179 | } | ||
176 | 180 | ||
177 | struct sigframe | 181 | #define COPY_SEG_CPL3(seg) { \ |
178 | { | 182 | unsigned short tmp; \ |
179 | u32 pretcode; | 183 | err |= __get_user(tmp, &sc->seg); \ |
180 | int sig; | 184 | regs->seg = tmp | 3; \ |
181 | struct sigcontext_ia32 sc; | ||
182 | struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */ | ||
183 | unsigned int extramask[_COMPAT_NSIG_WORDS-1]; | ||
184 | char retcode[8]; | ||
185 | /* fp state follows here */ | ||
186 | }; | ||
187 | |||
188 | struct rt_sigframe | ||
189 | { | ||
190 | u32 pretcode; | ||
191 | int sig; | ||
192 | u32 pinfo; | ||
193 | u32 puc; | ||
194 | compat_siginfo_t info; | ||
195 | struct ucontext_ia32 uc; | ||
196 | char retcode[8]; | ||
197 | /* fp state follows here */ | ||
198 | }; | ||
199 | |||
200 | #define COPY(x) { \ | ||
201 | unsigned int reg; \ | ||
202 | err |= __get_user(reg, &sc->x); \ | ||
203 | regs->x = reg; \ | ||
204 | } | 185 | } |
205 | 186 | ||
206 | #define RELOAD_SEG(seg,mask) \ | 187 | #define RELOAD_SEG(seg) { \ |
207 | { unsigned int cur; \ | 188 | unsigned int cur, pre; \ |
208 | unsigned short pre; \ | 189 | err |= __get_user(pre, &sc->seg); \ |
209 | err |= __get_user(pre, &sc->seg); \ | 190 | savesegment(seg, cur); \ |
210 | savesegment(seg, cur); \ | 191 | pre |= 3; \ |
211 | pre |= mask; \ | 192 | if (pre != cur) \ |
212 | if (pre != cur) loadsegment(seg, pre); } | 193 | loadsegment(seg, pre); \ |
194 | } | ||
213 | 195 | ||
214 | static int ia32_restore_sigcontext(struct pt_regs *regs, | 196 | static int ia32_restore_sigcontext(struct pt_regs *regs, |
215 | struct sigcontext_ia32 __user *sc, | 197 | struct sigcontext_ia32 __user *sc, |
216 | unsigned int *peax) | 198 | unsigned int *pax) |
217 | { | 199 | { |
218 | unsigned int tmpflags, gs, oldgs, err = 0; | 200 | unsigned int tmpflags, gs, oldgs, err = 0; |
219 | void __user *buf; | 201 | void __user *buf; |
@@ -240,18 +222,16 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, | |||
240 | if (gs != oldgs) | 222 | if (gs != oldgs) |
241 | load_gs_index(gs); | 223 | load_gs_index(gs); |
242 | 224 | ||
243 | RELOAD_SEG(fs, 3); | 225 | RELOAD_SEG(fs); |
244 | RELOAD_SEG(ds, 3); | 226 | RELOAD_SEG(ds); |
245 | RELOAD_SEG(es, 3); | 227 | RELOAD_SEG(es); |
246 | 228 | ||
247 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 229 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
248 | COPY(dx); COPY(cx); COPY(ip); | 230 | COPY(dx); COPY(cx); COPY(ip); |
249 | /* Don't touch extended registers */ | 231 | /* Don't touch extended registers */ |
250 | 232 | ||
251 | err |= __get_user(regs->cs, &sc->cs); | 233 | COPY_SEG_CPL3(cs); |
252 | regs->cs |= 3; | 234 | COPY_SEG_CPL3(ss); |
253 | err |= __get_user(regs->ss, &sc->ss); | ||
254 | regs->ss |= 3; | ||
255 | 235 | ||
256 | err |= __get_user(tmpflags, &sc->flags); | 236 | err |= __get_user(tmpflags, &sc->flags); |
257 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | 237 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
@@ -262,15 +242,13 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, | |||
262 | buf = compat_ptr(tmp); | 242 | buf = compat_ptr(tmp); |
263 | err |= restore_i387_xstate_ia32(buf); | 243 | err |= restore_i387_xstate_ia32(buf); |
264 | 244 | ||
265 | err |= __get_user(tmp, &sc->ax); | 245 | err |= __get_user(*pax, &sc->ax); |
266 | *peax = tmp; | ||
267 | |||
268 | return err; | 246 | return err; |
269 | } | 247 | } |
270 | 248 | ||
271 | asmlinkage long sys32_sigreturn(struct pt_regs *regs) | 249 | asmlinkage long sys32_sigreturn(struct pt_regs *regs) |
272 | { | 250 | { |
273 | struct sigframe __user *frame = (struct sigframe __user *)(regs->sp-8); | 251 | struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); |
274 | sigset_t set; | 252 | sigset_t set; |
275 | unsigned int ax; | 253 | unsigned int ax; |
276 | 254 | ||
@@ -300,12 +278,12 @@ badframe: | |||
300 | 278 | ||
301 | asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) | 279 | asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) |
302 | { | 280 | { |
303 | struct rt_sigframe __user *frame; | 281 | struct rt_sigframe_ia32 __user *frame; |
304 | sigset_t set; | 282 | sigset_t set; |
305 | unsigned int ax; | 283 | unsigned int ax; |
306 | struct pt_regs tregs; | 284 | struct pt_regs tregs; |
307 | 285 | ||
308 | frame = (struct rt_sigframe __user *)(regs->sp - 4); | 286 | frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); |
309 | 287 | ||
310 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | 288 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) |
311 | goto badframe; | 289 | goto badframe; |
@@ -359,20 +337,15 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, | |||
359 | err |= __put_user(regs->dx, &sc->dx); | 337 | err |= __put_user(regs->dx, &sc->dx); |
360 | err |= __put_user(regs->cx, &sc->cx); | 338 | err |= __put_user(regs->cx, &sc->cx); |
361 | err |= __put_user(regs->ax, &sc->ax); | 339 | err |= __put_user(regs->ax, &sc->ax); |
362 | err |= __put_user(regs->cs, &sc->cs); | ||
363 | err |= __put_user(regs->ss, &sc->ss); | ||
364 | err |= __put_user(current->thread.trap_no, &sc->trapno); | 340 | err |= __put_user(current->thread.trap_no, &sc->trapno); |
365 | err |= __put_user(current->thread.error_code, &sc->err); | 341 | err |= __put_user(current->thread.error_code, &sc->err); |
366 | err |= __put_user(regs->ip, &sc->ip); | 342 | err |= __put_user(regs->ip, &sc->ip); |
343 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); | ||
367 | err |= __put_user(regs->flags, &sc->flags); | 344 | err |= __put_user(regs->flags, &sc->flags); |
368 | err |= __put_user(regs->sp, &sc->sp_at_signal); | 345 | err |= __put_user(regs->sp, &sc->sp_at_signal); |
346 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); | ||
369 | 347 | ||
370 | tmp = save_i387_xstate_ia32(fpstate); | 348 | err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate); |
371 | if (tmp < 0) | ||
372 | err = -EFAULT; | ||
373 | else | ||
374 | err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL), | ||
375 | &sc->fpstate); | ||
376 | 349 | ||
377 | /* non-iBCS2 extensions.. */ | 350 | /* non-iBCS2 extensions.. */ |
378 | err |= __put_user(mask, &sc->oldmask); | 351 | err |= __put_user(mask, &sc->oldmask); |
@@ -400,7 +373,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, | |||
400 | } | 373 | } |
401 | 374 | ||
402 | /* This is the legacy signal stack switching. */ | 375 | /* This is the legacy signal stack switching. */ |
403 | else if ((regs->ss & 0xffff) != __USER_DS && | 376 | else if ((regs->ss & 0xffff) != __USER32_DS && |
404 | !(ka->sa.sa_flags & SA_RESTORER) && | 377 | !(ka->sa.sa_flags & SA_RESTORER) && |
405 | ka->sa.sa_restorer) | 378 | ka->sa.sa_restorer) |
406 | sp = (unsigned long) ka->sa.sa_restorer; | 379 | sp = (unsigned long) ka->sa.sa_restorer; |
@@ -408,6 +381,8 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, | |||
408 | if (used_math()) { | 381 | if (used_math()) { |
409 | sp = sp - sig_xstate_ia32_size; | 382 | sp = sp - sig_xstate_ia32_size; |
410 | *fpstate = (struct _fpstate_ia32 *) sp; | 383 | *fpstate = (struct _fpstate_ia32 *) sp; |
384 | if (save_i387_xstate_ia32(*fpstate) < 0) | ||
385 | return (void __user *) -1L; | ||
411 | } | 386 | } |
412 | 387 | ||
413 | sp -= frame_size; | 388 | sp -= frame_size; |
@@ -420,7 +395,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, | |||
420 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | 395 | int ia32_setup_frame(int sig, struct k_sigaction *ka, |
421 | compat_sigset_t *set, struct pt_regs *regs) | 396 | compat_sigset_t *set, struct pt_regs *regs) |
422 | { | 397 | { |
423 | struct sigframe __user *frame; | 398 | struct sigframe_ia32 __user *frame; |
424 | void __user *restorer; | 399 | void __user *restorer; |
425 | int err = 0; | 400 | int err = 0; |
426 | void __user *fpstate = NULL; | 401 | void __user *fpstate = NULL; |
@@ -430,12 +405,10 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
430 | u16 poplmovl; | 405 | u16 poplmovl; |
431 | u32 val; | 406 | u32 val; |
432 | u16 int80; | 407 | u16 int80; |
433 | u16 pad; | ||
434 | } __attribute__((packed)) code = { | 408 | } __attribute__((packed)) code = { |
435 | 0xb858, /* popl %eax ; movl $...,%eax */ | 409 | 0xb858, /* popl %eax ; movl $...,%eax */ |
436 | __NR_ia32_sigreturn, | 410 | __NR_ia32_sigreturn, |
437 | 0x80cd, /* int $0x80 */ | 411 | 0x80cd, /* int $0x80 */ |
438 | 0, | ||
439 | }; | 412 | }; |
440 | 413 | ||
441 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | 414 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); |
@@ -471,7 +444,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
471 | * These are actually not used anymore, but left because some | 444 | * These are actually not used anymore, but left because some |
472 | * gdb versions depend on them as a marker. | 445 | * gdb versions depend on them as a marker. |
473 | */ | 446 | */ |
474 | err |= __copy_to_user(frame->retcode, &code, 8); | 447 | err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode); |
475 | if (err) | 448 | if (err) |
476 | return -EFAULT; | 449 | return -EFAULT; |
477 | 450 | ||
@@ -501,7 +474,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
501 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 474 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
502 | compat_sigset_t *set, struct pt_regs *regs) | 475 | compat_sigset_t *set, struct pt_regs *regs) |
503 | { | 476 | { |
504 | struct rt_sigframe __user *frame; | 477 | struct rt_sigframe_ia32 __user *frame; |
505 | void __user *restorer; | 478 | void __user *restorer; |
506 | int err = 0; | 479 | int err = 0; |
507 | void __user *fpstate = NULL; | 480 | void __user *fpstate = NULL; |
@@ -511,8 +484,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
511 | u8 movl; | 484 | u8 movl; |
512 | u32 val; | 485 | u32 val; |
513 | u16 int80; | 486 | u16 int80; |
514 | u16 pad; | 487 | u8 pad; |
515 | u8 pad2; | ||
516 | } __attribute__((packed)) code = { | 488 | } __attribute__((packed)) code = { |
517 | 0xb8, | 489 | 0xb8, |
518 | __NR_ia32_rt_sigreturn, | 490 | __NR_ia32_rt_sigreturn, |
@@ -559,7 +531,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
559 | * Not actually used anymore, but left because some gdb | 531 | * Not actually used anymore, but left because some gdb |
560 | * versions need it. | 532 | * versions need it. |
561 | */ | 533 | */ |
562 | err |= __copy_to_user(frame->retcode, &code, 8); | 534 | err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode); |
563 | if (err) | 535 | if (err) |
564 | return -EFAULT; | 536 | return -EFAULT; |
565 | 537 | ||
@@ -572,11 +544,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
572 | regs->dx = (unsigned long) &frame->info; | 544 | regs->dx = (unsigned long) &frame->info; |
573 | regs->cx = (unsigned long) &frame->uc; | 545 | regs->cx = (unsigned long) &frame->uc; |
574 | 546 | ||
575 | /* Make -mregparm=3 work */ | ||
576 | regs->ax = sig; | ||
577 | regs->dx = (unsigned long) &frame->info; | ||
578 | regs->cx = (unsigned long) &frame->uc; | ||
579 | |||
580 | loadsegment(ds, __USER32_DS); | 547 | loadsegment(ds, __USER32_DS); |
581 | loadsegment(es, __USER32_DS); | 548 | loadsegment(es, __USER32_DS); |
582 | 549 | ||
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3b1510b4fc57..25caa0738af5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -193,6 +193,7 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); | |||
193 | static inline void lapic_shutdown(void) { } | 193 | static inline void lapic_shutdown(void) { } |
194 | #define local_apic_timer_c2_ok 1 | 194 | #define local_apic_timer_c2_ok 1 |
195 | static inline void init_apic_mappings(void) { } | 195 | static inline void init_apic_mappings(void) { } |
196 | static inline void disable_local_APIC(void) { } | ||
196 | 197 | ||
197 | #endif /* !CONFIG_X86_LOCAL_APIC */ | 198 | #endif /* !CONFIG_X86_LOCAL_APIC */ |
198 | 199 | ||
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 1d9543b9d358..ce547f24a1cd 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h | |||
@@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void) | |||
24 | #define INT_DELIVERY_MODE (dest_Fixed) | 24 | #define INT_DELIVERY_MODE (dest_Fixed) |
25 | #define INT_DEST_MODE (0) /* phys delivery to target proc */ | 25 | #define INT_DEST_MODE (0) /* phys delivery to target proc */ |
26 | #define NO_BALANCE_IRQ (0) | 26 | #define NO_BALANCE_IRQ (0) |
27 | #define WAKE_SECONDARY_VIA_INIT | ||
28 | |||
29 | 27 | ||
30 | static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) | 28 | static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) |
31 | { | 29 | { |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 360010322711..9fa9dcdf344b 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -168,7 +168,15 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) | |||
168 | */ | 168 | */ |
169 | static inline void change_bit(int nr, volatile unsigned long *addr) | 169 | static inline void change_bit(int nr, volatile unsigned long *addr) |
170 | { | 170 | { |
171 | asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); | 171 | if (IS_IMMEDIATE(nr)) { |
172 | asm volatile(LOCK_PREFIX "xorb %1,%0" | ||
173 | : CONST_MASK_ADDR(nr, addr) | ||
174 | : "iq" ((u8)CONST_MASK(nr))); | ||
175 | } else { | ||
176 | asm volatile(LOCK_PREFIX "btc %1,%0" | ||
177 | : BITOP_ADDR(addr) | ||
178 | : "Ir" (nr)); | ||
179 | } | ||
172 | } | 180 | } |
173 | 181 | ||
174 | /** | 182 | /** |
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 3def2065fcea..d9cf1cd156d2 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h | |||
@@ -9,7 +9,7 @@ | |||
9 | #ifdef CONFIG_X86_32 | 9 | #ifdef CONFIG_X86_32 |
10 | # define __BUG_C0 "2:\t.long 1b, %c0\n" | 10 | # define __BUG_C0 "2:\t.long 1b, %c0\n" |
11 | #else | 11 | #else |
12 | # define __BUG_C0 "2:\t.quad 1b, %c0\n" | 12 | # define __BUG_C0 "2:\t.long 1b - 2b, %c0 - 2b\n" |
13 | #endif | 13 | #endif |
14 | 14 | ||
15 | #define BUG() \ | 15 | #define BUG() \ |
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h index e02ae2d89acf..f110ad417df3 100644 --- a/arch/x86/include/asm/byteorder.h +++ b/arch/x86/include/asm/byteorder.h | |||
@@ -4,26 +4,33 @@ | |||
4 | #include <asm/types.h> | 4 | #include <asm/types.h> |
5 | #include <linux/compiler.h> | 5 | #include <linux/compiler.h> |
6 | 6 | ||
7 | #ifdef __GNUC__ | 7 | #define __LITTLE_ENDIAN |
8 | 8 | ||
9 | #ifdef __i386__ | 9 | static inline __attribute_const__ __u32 __arch_swab32(__u32 val) |
10 | |||
11 | static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) | ||
12 | { | 10 | { |
13 | #ifdef CONFIG_X86_BSWAP | 11 | #ifdef __i386__ |
14 | asm("bswap %0" : "=r" (x) : "0" (x)); | 12 | # ifdef CONFIG_X86_BSWAP |
15 | #else | 13 | asm("bswap %0" : "=r" (val) : "0" (val)); |
14 | # else | ||
16 | asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ | 15 | asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ |
17 | "rorl $16,%0\n\t" /* swap words */ | 16 | "rorl $16,%0\n\t" /* swap words */ |
18 | "xchgb %b0,%h0" /* swap higher bytes */ | 17 | "xchgb %b0,%h0" /* swap higher bytes */ |
19 | : "=q" (x) | 18 | : "=q" (val) |
20 | : "0" (x)); | 19 | : "0" (val)); |
20 | # endif | ||
21 | |||
22 | #else /* __i386__ */ | ||
23 | asm("bswapl %0" | ||
24 | : "=r" (val) | ||
25 | : "0" (val)); | ||
21 | #endif | 26 | #endif |
22 | return x; | 27 | return val; |
23 | } | 28 | } |
29 | #define __arch_swab32 __arch_swab32 | ||
24 | 30 | ||
25 | static inline __attribute_const__ __u64 ___arch__swab64(__u64 val) | 31 | static inline __attribute_const__ __u64 __arch_swab64(__u64 val) |
26 | { | 32 | { |
33 | #ifdef __i386__ | ||
27 | union { | 34 | union { |
28 | struct { | 35 | struct { |
29 | __u32 a; | 36 | __u32 a; |
@@ -32,50 +39,27 @@ static inline __attribute_const__ __u64 ___arch__swab64(__u64 val) | |||
32 | __u64 u; | 39 | __u64 u; |
33 | } v; | 40 | } v; |
34 | v.u = val; | 41 | v.u = val; |
35 | #ifdef CONFIG_X86_BSWAP | 42 | # ifdef CONFIG_X86_BSWAP |
36 | asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" | 43 | asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" |
37 | : "=r" (v.s.a), "=r" (v.s.b) | 44 | : "=r" (v.s.a), "=r" (v.s.b) |
38 | : "0" (v.s.a), "1" (v.s.b)); | 45 | : "0" (v.s.a), "1" (v.s.b)); |
39 | #else | 46 | # else |
40 | v.s.a = ___arch__swab32(v.s.a); | 47 | v.s.a = __arch_swab32(v.s.a); |
41 | v.s.b = ___arch__swab32(v.s.b); | 48 | v.s.b = __arch_swab32(v.s.b); |
42 | asm("xchgl %0,%1" | 49 | asm("xchgl %0,%1" |
43 | : "=r" (v.s.a), "=r" (v.s.b) | 50 | : "=r" (v.s.a), "=r" (v.s.b) |
44 | : "0" (v.s.a), "1" (v.s.b)); | 51 | : "0" (v.s.a), "1" (v.s.b)); |
45 | #endif | 52 | # endif |
46 | return v.u; | 53 | return v.u; |
47 | } | ||
48 | |||
49 | #else /* __i386__ */ | 54 | #else /* __i386__ */ |
50 | |||
51 | static inline __attribute_const__ __u64 ___arch__swab64(__u64 x) | ||
52 | { | ||
53 | asm("bswapq %0" | 55 | asm("bswapq %0" |
54 | : "=r" (x) | 56 | : "=r" (val) |
55 | : "0" (x)); | 57 | : "0" (val)); |
56 | return x; | 58 | return val; |
57 | } | ||
58 | |||
59 | static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) | ||
60 | { | ||
61 | asm("bswapl %0" | ||
62 | : "=r" (x) | ||
63 | : "0" (x)); | ||
64 | return x; | ||
65 | } | ||
66 | |||
67 | #endif | 59 | #endif |
60 | } | ||
61 | #define __arch_swab64 __arch_swab64 | ||
68 | 62 | ||
69 | /* Do not define swab16. Gcc is smart enough to recognize "C" version and | 63 | #include <linux/byteorder.h> |
70 | convert it into rotation or exhange. */ | ||
71 | |||
72 | #define __arch__swab64(x) ___arch__swab64(x) | ||
73 | #define __arch__swab32(x) ___arch__swab32(x) | ||
74 | |||
75 | #define __BYTEORDER_HAS_U64__ | ||
76 | |||
77 | #endif /* __GNUC__ */ | ||
78 | |||
79 | #include <linux/byteorder/little_endian.h> | ||
80 | 64 | ||
81 | #endif /* _ASM_X86_BYTEORDER_H */ | 65 | #endif /* _ASM_X86_BYTEORDER_H */ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index cfdf8c2c5c31..ea408dcba513 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -80,7 +80,6 @@ | |||
80 | #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ | 80 | #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ |
81 | #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ | 81 | #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ |
82 | #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ | 82 | #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ |
83 | #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ | ||
84 | #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ | 83 | #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ |
85 | #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ | 84 | #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ |
86 | #define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ | 85 | #define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ |
@@ -92,6 +91,8 @@ | |||
92 | #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ | 91 | #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ |
93 | #define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ | 92 | #define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ |
94 | #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ | 93 | #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ |
94 | #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ | ||
95 | #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ | ||
95 | 96 | ||
96 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 97 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
97 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 98 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
@@ -117,6 +118,7 @@ | |||
117 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ | 118 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ |
118 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ | 119 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ |
119 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ | 120 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ |
121 | #define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ | ||
120 | 122 | ||
121 | /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ | 123 | /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ |
122 | #define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ | 124 | #define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ |
@@ -237,6 +239,7 @@ extern const char * const x86_power_flags[32]; | |||
237 | #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) | 239 | #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) |
238 | #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) | 240 | #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) |
239 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) | 241 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) |
242 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) | ||
240 | 243 | ||
241 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) | 244 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) |
242 | # define cpu_has_invlpg 1 | 245 | # define cpu_has_invlpg 1 |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 097794ff6b79..dc22c0733282 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -71,12 +71,10 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) | |||
71 | /* Make sure we keep the same behaviour */ | 71 | /* Make sure we keep the same behaviour */ |
72 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | 72 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) |
73 | { | 73 | { |
74 | #ifdef CONFIG_X86_64 | ||
75 | struct dma_mapping_ops *ops = get_dma_ops(dev); | 74 | struct dma_mapping_ops *ops = get_dma_ops(dev); |
76 | if (ops->mapping_error) | 75 | if (ops->mapping_error) |
77 | return ops->mapping_error(dev, dma_addr); | 76 | return ops->mapping_error(dev, dma_addr); |
78 | 77 | ||
79 | #endif | ||
80 | return (dma_addr == bad_dma_address); | 78 | return (dma_addr == bad_dma_address); |
81 | } | 79 | } |
82 | 80 | ||
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 804b6e6be929..3afc5e87cfdd 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
@@ -6,56 +6,91 @@ | |||
6 | #endif | 6 | #endif |
7 | 7 | ||
8 | /* | 8 | /* |
9 | Macros for dwarf2 CFI unwind table entries. | 9 | * Macros for dwarf2 CFI unwind table entries. |
10 | See "as.info" for details on these pseudo ops. Unfortunately | 10 | * See "as.info" for details on these pseudo ops. Unfortunately |
11 | they are only supported in very new binutils, so define them | 11 | * they are only supported in very new binutils, so define them |
12 | away for older version. | 12 | * away for older version. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #ifdef CONFIG_AS_CFI | 15 | #ifdef CONFIG_AS_CFI |
16 | 16 | ||
17 | #define CFI_STARTPROC .cfi_startproc | 17 | #define CFI_STARTPROC .cfi_startproc |
18 | #define CFI_ENDPROC .cfi_endproc | 18 | #define CFI_ENDPROC .cfi_endproc |
19 | #define CFI_DEF_CFA .cfi_def_cfa | 19 | #define CFI_DEF_CFA .cfi_def_cfa |
20 | #define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register | 20 | #define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register |
21 | #define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset | 21 | #define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset |
22 | #define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset | 22 | #define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset |
23 | #define CFI_OFFSET .cfi_offset | 23 | #define CFI_OFFSET .cfi_offset |
24 | #define CFI_REL_OFFSET .cfi_rel_offset | 24 | #define CFI_REL_OFFSET .cfi_rel_offset |
25 | #define CFI_REGISTER .cfi_register | 25 | #define CFI_REGISTER .cfi_register |
26 | #define CFI_RESTORE .cfi_restore | 26 | #define CFI_RESTORE .cfi_restore |
27 | #define CFI_REMEMBER_STATE .cfi_remember_state | 27 | #define CFI_REMEMBER_STATE .cfi_remember_state |
28 | #define CFI_RESTORE_STATE .cfi_restore_state | 28 | #define CFI_RESTORE_STATE .cfi_restore_state |
29 | #define CFI_UNDEFINED .cfi_undefined | 29 | #define CFI_UNDEFINED .cfi_undefined |
30 | 30 | ||
31 | #ifdef CONFIG_AS_CFI_SIGNAL_FRAME | 31 | #ifdef CONFIG_AS_CFI_SIGNAL_FRAME |
32 | #define CFI_SIGNAL_FRAME .cfi_signal_frame | 32 | #define CFI_SIGNAL_FRAME .cfi_signal_frame |
33 | #else | 33 | #else |
34 | #define CFI_SIGNAL_FRAME | 34 | #define CFI_SIGNAL_FRAME |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | #else | 37 | #else |
38 | 38 | ||
39 | /* Due to the structure of pre-exisiting code, don't use assembler line | 39 | /* |
40 | comment character # to ignore the arguments. Instead, use a dummy macro. */ | 40 | * Due to the structure of pre-exisiting code, don't use assembler line |
41 | * comment character # to ignore the arguments. Instead, use a dummy macro. | ||
42 | */ | ||
41 | .macro cfi_ignore a=0, b=0, c=0, d=0 | 43 | .macro cfi_ignore a=0, b=0, c=0, d=0 |
42 | .endm | 44 | .endm |
43 | 45 | ||
44 | #define CFI_STARTPROC cfi_ignore | 46 | #define CFI_STARTPROC cfi_ignore |
45 | #define CFI_ENDPROC cfi_ignore | 47 | #define CFI_ENDPROC cfi_ignore |
46 | #define CFI_DEF_CFA cfi_ignore | 48 | #define CFI_DEF_CFA cfi_ignore |
47 | #define CFI_DEF_CFA_REGISTER cfi_ignore | 49 | #define CFI_DEF_CFA_REGISTER cfi_ignore |
48 | #define CFI_DEF_CFA_OFFSET cfi_ignore | 50 | #define CFI_DEF_CFA_OFFSET cfi_ignore |
49 | #define CFI_ADJUST_CFA_OFFSET cfi_ignore | 51 | #define CFI_ADJUST_CFA_OFFSET cfi_ignore |
50 | #define CFI_OFFSET cfi_ignore | 52 | #define CFI_OFFSET cfi_ignore |
51 | #define CFI_REL_OFFSET cfi_ignore | 53 | #define CFI_REL_OFFSET cfi_ignore |
52 | #define CFI_REGISTER cfi_ignore | 54 | #define CFI_REGISTER cfi_ignore |
53 | #define CFI_RESTORE cfi_ignore | 55 | #define CFI_RESTORE cfi_ignore |
54 | #define CFI_REMEMBER_STATE cfi_ignore | 56 | #define CFI_REMEMBER_STATE cfi_ignore |
55 | #define CFI_RESTORE_STATE cfi_ignore | 57 | #define CFI_RESTORE_STATE cfi_ignore |
56 | #define CFI_UNDEFINED cfi_ignore | 58 | #define CFI_UNDEFINED cfi_ignore |
57 | #define CFI_SIGNAL_FRAME cfi_ignore | 59 | #define CFI_SIGNAL_FRAME cfi_ignore |
58 | 60 | ||
59 | #endif | 61 | #endif |
60 | 62 | ||
63 | /* | ||
64 | * An attempt to make CFI annotations more or less | ||
65 | * correct and shorter. It is implied that you know | ||
66 | * what you're doing if you use them. | ||
67 | */ | ||
68 | #ifdef __ASSEMBLY__ | ||
69 | #ifdef CONFIG_X86_64 | ||
70 | .macro pushq_cfi reg | ||
71 | pushq \reg | ||
72 | CFI_ADJUST_CFA_OFFSET 8 | ||
73 | .endm | ||
74 | |||
75 | .macro popq_cfi reg | ||
76 | popq \reg | ||
77 | CFI_ADJUST_CFA_OFFSET -8 | ||
78 | .endm | ||
79 | |||
80 | .macro movq_cfi reg offset=0 | ||
81 | movq %\reg, \offset(%rsp) | ||
82 | CFI_REL_OFFSET \reg, \offset | ||
83 | .endm | ||
84 | |||
85 | .macro movq_cfi_restore offset reg | ||
86 | movq \offset(%rsp), %\reg | ||
87 | CFI_RESTORE \reg | ||
88 | .endm | ||
89 | #else /*!CONFIG_X86_64*/ | ||
90 | |||
91 | /* 32bit defenitions are missed yet */ | ||
92 | |||
93 | #endif /*!CONFIG_X86_64*/ | ||
94 | #endif /*__ASSEMBLY__*/ | ||
95 | |||
61 | #endif /* _ASM_X86_DWARF2_H */ | 96 | #endif /* _ASM_X86_DWARF2_H */ |
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index 94826cf87455..cc70c1c78ca4 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h | |||
@@ -8,7 +8,9 @@ enum reboot_type { | |||
8 | BOOT_BIOS = 'b', | 8 | BOOT_BIOS = 'b', |
9 | #endif | 9 | #endif |
10 | BOOT_ACPI = 'a', | 10 | BOOT_ACPI = 'a', |
11 | BOOT_EFI = 'e' | 11 | BOOT_EFI = 'e', |
12 | BOOT_CF9 = 'p', | ||
13 | BOOT_CF9_COND = 'q', | ||
12 | }; | 14 | }; |
13 | 15 | ||
14 | extern enum reboot_type reboot_type; | 16 | extern enum reboot_type reboot_type; |
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 380f0b4f17ed..e24ef876915f 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h | |||
@@ -9,31 +9,27 @@ static inline int apic_id_registered(void) | |||
9 | return (1); | 9 | return (1); |
10 | } | 10 | } |
11 | 11 | ||
12 | static inline cpumask_t target_cpus(void) | 12 | static inline cpumask_t target_cpus_cluster(void) |
13 | { | 13 | { |
14 | #if defined CONFIG_ES7000_CLUSTERED_APIC | ||
15 | return CPU_MASK_ALL; | 14 | return CPU_MASK_ALL; |
16 | #else | 15 | } |
16 | |||
17 | static inline cpumask_t target_cpus(void) | ||
18 | { | ||
17 | return cpumask_of_cpu(smp_processor_id()); | 19 | return cpumask_of_cpu(smp_processor_id()); |
18 | #endif | ||
19 | } | 20 | } |
20 | 21 | ||
21 | #if defined CONFIG_ES7000_CLUSTERED_APIC | 22 | #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) |
22 | #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) | 23 | #define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) |
23 | #define INT_DELIVERY_MODE (dest_LowestPrio) | 24 | #define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ |
24 | #define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ | 25 | #define NO_BALANCE_IRQ_CLUSTER (1) |
25 | #define NO_BALANCE_IRQ (1) | 26 | |
26 | #undef WAKE_SECONDARY_VIA_INIT | ||
27 | #define WAKE_SECONDARY_VIA_MIP | ||
28 | #else | ||
29 | #define APIC_DFR_VALUE (APIC_DFR_FLAT) | 27 | #define APIC_DFR_VALUE (APIC_DFR_FLAT) |
30 | #define INT_DELIVERY_MODE (dest_Fixed) | 28 | #define INT_DELIVERY_MODE (dest_Fixed) |
31 | #define INT_DEST_MODE (0) /* phys delivery to target procs */ | 29 | #define INT_DEST_MODE (0) /* phys delivery to target procs */ |
32 | #define NO_BALANCE_IRQ (0) | 30 | #define NO_BALANCE_IRQ (0) |
33 | #undef APIC_DEST_LOGICAL | 31 | #undef APIC_DEST_LOGICAL |
34 | #define APIC_DEST_LOGICAL 0x0 | 32 | #define APIC_DEST_LOGICAL 0x0 |
35 | #define WAKE_SECONDARY_VIA_INIT | ||
36 | #endif | ||
37 | 33 | ||
38 | static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) | 34 | static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) |
39 | { | 35 | { |
@@ -60,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu) | |||
60 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | 56 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel |
61 | * document number 292116). So here it goes... | 57 | * document number 292116). So here it goes... |
62 | */ | 58 | */ |
59 | static inline void init_apic_ldr_cluster(void) | ||
60 | { | ||
61 | unsigned long val; | ||
62 | int cpu = smp_processor_id(); | ||
63 | |||
64 | apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); | ||
65 | val = calculate_ldr(cpu); | ||
66 | apic_write(APIC_LDR, val); | ||
67 | } | ||
68 | |||
63 | static inline void init_apic_ldr(void) | 69 | static inline void init_apic_ldr(void) |
64 | { | 70 | { |
65 | unsigned long val; | 71 | unsigned long val; |
@@ -70,10 +76,6 @@ static inline void init_apic_ldr(void) | |||
70 | apic_write(APIC_LDR, val); | 76 | apic_write(APIC_LDR, val); |
71 | } | 77 | } |
72 | 78 | ||
73 | #ifndef CONFIG_X86_GENERICARCH | ||
74 | extern void enable_apic_mode(void); | ||
75 | #endif | ||
76 | |||
77 | extern int apic_version [MAX_APICS]; | 79 | extern int apic_version [MAX_APICS]; |
78 | static inline void setup_apic_routing(void) | 80 | static inline void setup_apic_routing(void) |
79 | { | 81 | { |
@@ -144,7 +146,7 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid) | |||
144 | return (1); | 146 | return (1); |
145 | } | 147 | } |
146 | 148 | ||
147 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 149 | static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) |
148 | { | 150 | { |
149 | int num_bits_set; | 151 | int num_bits_set; |
150 | int cpus_found = 0; | 152 | int cpus_found = 0; |
@@ -154,11 +156,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | |||
154 | num_bits_set = cpus_weight(cpumask); | 156 | num_bits_set = cpus_weight(cpumask); |
155 | /* Return id to all */ | 157 | /* Return id to all */ |
156 | if (num_bits_set == NR_CPUS) | 158 | if (num_bits_set == NR_CPUS) |
157 | #if defined CONFIG_ES7000_CLUSTERED_APIC | ||
158 | return 0xFF; | 159 | return 0xFF; |
159 | #else | ||
160 | return cpu_to_logical_apicid(0); | ||
161 | #endif | ||
162 | /* | 160 | /* |
163 | * The cpus in the mask must all be on the apic cluster. If are not | 161 | * The cpus in the mask must all be on the apic cluster. If are not |
164 | * on the same apicid cluster return default value of TARGET_CPUS. | 162 | * on the same apicid cluster return default value of TARGET_CPUS. |
@@ -171,11 +169,40 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | |||
171 | if (apicid_cluster(apicid) != | 169 | if (apicid_cluster(apicid) != |
172 | apicid_cluster(new_apicid)){ | 170 | apicid_cluster(new_apicid)){ |
173 | printk ("%s: Not a valid mask!\n", __func__); | 171 | printk ("%s: Not a valid mask!\n", __func__); |
174 | #if defined CONFIG_ES7000_CLUSTERED_APIC | ||
175 | return 0xFF; | 172 | return 0xFF; |
176 | #else | 173 | } |
174 | apicid = new_apicid; | ||
175 | cpus_found++; | ||
176 | } | ||
177 | cpu++; | ||
178 | } | ||
179 | return apicid; | ||
180 | } | ||
181 | |||
182 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | ||
183 | { | ||
184 | int num_bits_set; | ||
185 | int cpus_found = 0; | ||
186 | int cpu; | ||
187 | int apicid; | ||
188 | |||
189 | num_bits_set = cpus_weight(cpumask); | ||
190 | /* Return id to all */ | ||
191 | if (num_bits_set == NR_CPUS) | ||
192 | return cpu_to_logical_apicid(0); | ||
193 | /* | ||
194 | * The cpus in the mask must all be on the apic cluster. If are not | ||
195 | * on the same apicid cluster return default value of TARGET_CPUS. | ||
196 | */ | ||
197 | cpu = first_cpu(cpumask); | ||
198 | apicid = cpu_to_logical_apicid(cpu); | ||
199 | while (cpus_found < num_bits_set) { | ||
200 | if (cpu_isset(cpu, cpumask)) { | ||
201 | int new_apicid = cpu_to_logical_apicid(cpu); | ||
202 | if (apicid_cluster(apicid) != | ||
203 | apicid_cluster(new_apicid)){ | ||
204 | printk ("%s: Not a valid mask!\n", __func__); | ||
177 | return cpu_to_logical_apicid(0); | 205 | return cpu_to_logical_apicid(0); |
178 | #endif | ||
179 | } | 206 | } |
180 | apicid = new_apicid; | 207 | apicid = new_apicid; |
181 | cpus_found++; | 208 | cpus_found++; |
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 398493461913..78f0daaee436 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h | |||
@@ -1,36 +1,12 @@ | |||
1 | #ifndef __ASM_ES7000_WAKECPU_H | 1 | #ifndef __ASM_ES7000_WAKECPU_H |
2 | #define __ASM_ES7000_WAKECPU_H | 2 | #define __ASM_ES7000_WAKECPU_H |
3 | 3 | ||
4 | /* | 4 | #define TRAMPOLINE_PHYS_LOW 0x467 |
5 | * This file copes with machines that wakeup secondary CPUs by the | 5 | #define TRAMPOLINE_PHYS_HIGH 0x469 |
6 | * INIT, INIT, STARTUP sequence. | ||
7 | */ | ||
8 | |||
9 | #ifdef CONFIG_ES7000_CLUSTERED_APIC | ||
10 | #define WAKE_SECONDARY_VIA_MIP | ||
11 | #else | ||
12 | #define WAKE_SECONDARY_VIA_INIT | ||
13 | #endif | ||
14 | |||
15 | #ifdef WAKE_SECONDARY_VIA_MIP | ||
16 | extern int es7000_start_cpu(int cpu, unsigned long eip); | ||
17 | static inline int | ||
18 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | ||
19 | { | ||
20 | int boot_error = 0; | ||
21 | boot_error = es7000_start_cpu(phys_apicid, start_eip); | ||
22 | return boot_error; | ||
23 | } | ||
24 | #endif | ||
25 | |||
26 | #define TRAMPOLINE_LOW phys_to_virt(0x467) | ||
27 | #define TRAMPOLINE_HIGH phys_to_virt(0x469) | ||
28 | |||
29 | #define boot_cpu_apicid boot_cpu_physical_apicid | ||
30 | 6 | ||
31 | static inline void wait_for_init_deassert(atomic_t *deassert) | 7 | static inline void wait_for_init_deassert(atomic_t *deassert) |
32 | { | 8 | { |
33 | #ifdef WAKE_SECONDARY_VIA_INIT | 9 | #ifndef CONFIG_ES7000_CLUSTERED_APIC |
34 | while (!atomic_read(deassert)) | 10 | while (!atomic_read(deassert)) |
35 | cpu_relax(); | 11 | cpu_relax(); |
36 | #endif | 12 | #endif |
@@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) | |||
50 | { | 26 | { |
51 | } | 27 | } |
52 | 28 | ||
53 | #define inquire_remote_apic(apicid) do { \ | 29 | extern void __inquire_remote_apic(int apicid); |
54 | if (apic_verbosity >= APIC_DEBUG) \ | 30 | |
55 | __inquire_remote_apic(apicid); \ | 31 | static inline void inquire_remote_apic(int apicid) |
56 | } while (0) | 32 | { |
33 | if (apic_verbosity >= APIC_DEBUG) | ||
34 | __inquire_remote_apic(apicid); | ||
35 | } | ||
57 | 36 | ||
58 | #endif /* __ASM_MACH_WAKECPU_H */ | 37 | #endif /* __ASM_MACH_WAKECPU_H */ |
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 74252264433d..6cfdafa409d8 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h | |||
@@ -29,6 +29,39 @@ extern int fix_aperture; | |||
29 | #define AMD64_GARTCACHECTL 0x9c | 29 | #define AMD64_GARTCACHECTL 0x9c |
30 | #define AMD64_GARTEN (1<<0) | 30 | #define AMD64_GARTEN (1<<0) |
31 | 31 | ||
32 | #ifdef CONFIG_GART_IOMMU | ||
33 | extern int gart_iommu_aperture; | ||
34 | extern int gart_iommu_aperture_allowed; | ||
35 | extern int gart_iommu_aperture_disabled; | ||
36 | |||
37 | extern void early_gart_iommu_check(void); | ||
38 | extern void gart_iommu_init(void); | ||
39 | extern void gart_iommu_shutdown(void); | ||
40 | extern void __init gart_parse_options(char *); | ||
41 | extern void gart_iommu_hole_init(void); | ||
42 | |||
43 | #else | ||
44 | #define gart_iommu_aperture 0 | ||
45 | #define gart_iommu_aperture_allowed 0 | ||
46 | #define gart_iommu_aperture_disabled 1 | ||
47 | |||
48 | static inline void early_gart_iommu_check(void) | ||
49 | { | ||
50 | } | ||
51 | static inline void gart_iommu_init(void) | ||
52 | { | ||
53 | } | ||
54 | static inline void gart_iommu_shutdown(void) | ||
55 | { | ||
56 | } | ||
57 | static inline void gart_parse_options(char *options) | ||
58 | { | ||
59 | } | ||
60 | static inline void gart_iommu_hole_init(void) | ||
61 | { | ||
62 | } | ||
63 | #endif | ||
64 | |||
32 | extern int agp_amd64_init(void); | 65 | extern int agp_amd64_init(void); |
33 | 66 | ||
34 | static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) | 67 | static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) |
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 5cbd4fcc06fd..0ac17d33a8c7 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_GENAPIC_32_H | 2 | #define _ASM_X86_GENAPIC_32_H |
3 | 3 | ||
4 | #include <asm/mpspec.h> | 4 | #include <asm/mpspec.h> |
5 | #include <asm/atomic.h> | ||
5 | 6 | ||
6 | /* | 7 | /* |
7 | * Generic APIC driver interface. | 8 | * Generic APIC driver interface. |
@@ -65,6 +66,14 @@ struct genapic { | |||
65 | void (*send_IPI_allbutself)(int vector); | 66 | void (*send_IPI_allbutself)(int vector); |
66 | void (*send_IPI_all)(int vector); | 67 | void (*send_IPI_all)(int vector); |
67 | #endif | 68 | #endif |
69 | int (*wakeup_cpu)(int apicid, unsigned long start_eip); | ||
70 | int trampoline_phys_low; | ||
71 | int trampoline_phys_high; | ||
72 | void (*wait_for_init_deassert)(atomic_t *deassert); | ||
73 | void (*smp_callin_clear_local_apic)(void); | ||
74 | void (*store_NMI_vector)(unsigned short *high, unsigned short *low); | ||
75 | void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); | ||
76 | void (*inquire_remote_apic)(int apicid); | ||
68 | }; | 77 | }; |
69 | 78 | ||
70 | #define APICFUNC(x) .x = x, | 79 | #define APICFUNC(x) .x = x, |
@@ -105,16 +114,24 @@ struct genapic { | |||
105 | APICFUNC(get_apic_id) \ | 114 | APICFUNC(get_apic_id) \ |
106 | .apic_id_mask = APIC_ID_MASK, \ | 115 | .apic_id_mask = APIC_ID_MASK, \ |
107 | APICFUNC(cpu_mask_to_apicid) \ | 116 | APICFUNC(cpu_mask_to_apicid) \ |
108 | APICFUNC(vector_allocation_domain) \ | 117 | APICFUNC(vector_allocation_domain) \ |
109 | APICFUNC(acpi_madt_oem_check) \ | 118 | APICFUNC(acpi_madt_oem_check) \ |
110 | IPIFUNC(send_IPI_mask) \ | 119 | IPIFUNC(send_IPI_mask) \ |
111 | IPIFUNC(send_IPI_allbutself) \ | 120 | IPIFUNC(send_IPI_allbutself) \ |
112 | IPIFUNC(send_IPI_all) \ | 121 | IPIFUNC(send_IPI_all) \ |
113 | APICFUNC(enable_apic_mode) \ | 122 | APICFUNC(enable_apic_mode) \ |
114 | APICFUNC(phys_pkg_id) \ | 123 | APICFUNC(phys_pkg_id) \ |
124 | .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ | ||
125 | .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ | ||
126 | APICFUNC(wait_for_init_deassert) \ | ||
127 | APICFUNC(smp_callin_clear_local_apic) \ | ||
128 | APICFUNC(store_NMI_vector) \ | ||
129 | APICFUNC(restore_NMI_vector) \ | ||
130 | APICFUNC(inquire_remote_apic) \ | ||
115 | } | 131 | } |
116 | 132 | ||
117 | extern struct genapic *genapic; | 133 | extern struct genapic *genapic; |
134 | extern void es7000_update_genapic_to_cluster(void); | ||
118 | 135 | ||
119 | enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; | 136 | enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; |
120 | #define get_uv_system_type() UV_NONE | 137 | #define get_uv_system_type() UV_NONE |
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 13c4e96199ea..2cae011668b7 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h | |||
@@ -32,6 +32,8 @@ struct genapic { | |||
32 | unsigned int (*get_apic_id)(unsigned long x); | 32 | unsigned int (*get_apic_id)(unsigned long x); |
33 | unsigned long (*set_apic_id)(unsigned int id); | 33 | unsigned long (*set_apic_id)(unsigned int id); |
34 | unsigned long apic_id_mask; | 34 | unsigned long apic_id_mask; |
35 | /* wakeup_secondary_cpu */ | ||
36 | int (*wakeup_cpu)(int apicid, unsigned long start_eip); | ||
35 | }; | 37 | }; |
36 | 38 | ||
37 | extern struct genapic *genapic; | 39 | extern struct genapic *genapic; |
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index 5ca135e72f2b..cf7954d1405f 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h | |||
@@ -22,6 +22,8 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); | |||
22 | #define __ARCH_IRQ_STAT | 22 | #define __ARCH_IRQ_STAT |
23 | #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) | 23 | #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) |
24 | 24 | ||
25 | #define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++) | ||
26 | |||
25 | void ack_bad_irq(unsigned int irq); | 27 | void ack_bad_irq(unsigned int irq); |
26 | #include <linux/irq_cpustat.h> | 28 | #include <linux/irq_cpustat.h> |
27 | 29 | ||
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index 1ba381fc51d3..b5a6b5d56704 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h | |||
@@ -11,6 +11,8 @@ | |||
11 | 11 | ||
12 | #define __ARCH_IRQ_STAT 1 | 12 | #define __ARCH_IRQ_STAT 1 |
13 | 13 | ||
14 | #define inc_irq_stat(member) add_pda(member, 1) | ||
15 | |||
14 | #define local_softirq_pending() read_pda(__softirq_pending) | 16 | #define local_softirq_pending() read_pda(__softirq_pending) |
15 | 17 | ||
16 | #define __ARCH_SET_SOFTIRQ_PENDING 1 | 18 | #define __ARCH_SET_SOFTIRQ_PENDING 1 |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b97aecb0b61d..8de644b6b959 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); | |||
109 | #endif | 109 | #endif |
110 | #endif | 110 | #endif |
111 | 111 | ||
112 | #ifdef CONFIG_X86_32 | 112 | extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); |
113 | extern void (*const interrupt[NR_VECTORS])(void); | ||
114 | #endif | ||
115 | 113 | ||
116 | typedef int vector_irq_t[NR_VECTORS]; | 114 | typedef int vector_irq_t[NR_VECTORS]; |
117 | DECLARE_PER_CPU(vector_irq_t, vector_irq); | 115 | DECLARE_PER_CPU(vector_irq_t, vector_irq); |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h new file mode 100644 index 000000000000..369f5c5d09a1 --- /dev/null +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -0,0 +1,26 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008, VMware, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
12 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
13 | * details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | * | ||
19 | */ | ||
20 | #ifndef ASM_X86__HYPERVISOR_H | ||
21 | #define ASM_X86__HYPERVISOR_H | ||
22 | |||
23 | extern unsigned long get_hypervisor_tsc_freq(void); | ||
24 | extern void init_hypervisor(struct cpuinfo_x86 *c); | ||
25 | |||
26 | #endif | ||
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 97989c0e534c..50ca486fd88c 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h | |||
@@ -129,24 +129,6 @@ typedef struct compat_siginfo { | |||
129 | } _sifields; | 129 | } _sifields; |
130 | } compat_siginfo_t; | 130 | } compat_siginfo_t; |
131 | 131 | ||
132 | struct sigframe32 { | ||
133 | u32 pretcode; | ||
134 | int sig; | ||
135 | struct sigcontext_ia32 sc; | ||
136 | struct _fpstate_ia32 fpstate; | ||
137 | unsigned int extramask[_COMPAT_NSIG_WORDS-1]; | ||
138 | }; | ||
139 | |||
140 | struct rt_sigframe32 { | ||
141 | u32 pretcode; | ||
142 | int sig; | ||
143 | u32 pinfo; | ||
144 | u32 puc; | ||
145 | compat_siginfo_t info; | ||
146 | struct ucontext_ia32 uc; | ||
147 | struct _fpstate_ia32 fpstate; | ||
148 | }; | ||
149 | |||
150 | struct ustat32 { | 132 | struct ustat32 { |
151 | __u32 f_tfree; | 133 | __u32 f_tfree; |
152 | compat_ino_t f_tinode; | 134 | compat_ino_t f_tinode; |
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index 44c89c3a23e9..38d87379e270 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h | |||
@@ -8,8 +8,13 @@ struct notifier_block; | |||
8 | void idle_notifier_register(struct notifier_block *n); | 8 | void idle_notifier_register(struct notifier_block *n); |
9 | void idle_notifier_unregister(struct notifier_block *n); | 9 | void idle_notifier_unregister(struct notifier_block *n); |
10 | 10 | ||
11 | #ifdef CONFIG_X86_64 | ||
11 | void enter_idle(void); | 12 | void enter_idle(void); |
12 | void exit_idle(void); | 13 | void exit_idle(void); |
14 | #else /* !CONFIG_X86_64 */ | ||
15 | static inline void enter_idle(void) { } | ||
16 | static inline void exit_idle(void) { } | ||
17 | #endif /* CONFIG_X86_64 */ | ||
13 | 18 | ||
14 | void c1e_remove_cpu(int cpu); | 19 | void c1e_remove_cpu(int cpu); |
15 | 20 | ||
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index ac2abc88cd95..05cfed4485fa 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #define ARCH_HAS_IOREMAP_WC | 4 | #define ARCH_HAS_IOREMAP_WC |
5 | 5 | ||
6 | #include <linux/compiler.h> | 6 | #include <linux/compiler.h> |
7 | #include <asm-generic/int-ll64.h> | ||
7 | 8 | ||
8 | #define build_mmio_read(name, size, type, reg, barrier) \ | 9 | #define build_mmio_read(name, size, type, reg, barrier) \ |
9 | static inline type name(const volatile void __iomem *addr) \ | 10 | static inline type name(const volatile void __iomem *addr) \ |
@@ -45,21 +46,39 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) | |||
45 | #define mmiowb() barrier() | 46 | #define mmiowb() barrier() |
46 | 47 | ||
47 | #ifdef CONFIG_X86_64 | 48 | #ifdef CONFIG_X86_64 |
49 | |||
48 | build_mmio_read(readq, "q", unsigned long, "=r", :"memory") | 50 | build_mmio_read(readq, "q", unsigned long, "=r", :"memory") |
49 | build_mmio_read(__readq, "q", unsigned long, "=r", ) | ||
50 | build_mmio_write(writeq, "q", unsigned long, "r", :"memory") | 51 | build_mmio_write(writeq, "q", unsigned long, "r", :"memory") |
51 | build_mmio_write(__writeq, "q", unsigned long, "r", ) | ||
52 | 52 | ||
53 | #define readq_relaxed(a) __readq(a) | 53 | #else |
54 | #define __raw_readq __readq | 54 | |
55 | #define __raw_writeq writeq | 55 | static inline __u64 readq(const volatile void __iomem *addr) |
56 | { | ||
57 | const volatile u32 __iomem *p = addr; | ||
58 | u32 low, high; | ||
59 | |||
60 | low = readl(p); | ||
61 | high = readl(p + 1); | ||
62 | |||
63 | return low + ((u64)high << 32); | ||
64 | } | ||
65 | |||
66 | static inline void writeq(__u64 val, volatile void __iomem *addr) | ||
67 | { | ||
68 | writel(val, addr); | ||
69 | writel(val >> 32, addr+4); | ||
70 | } | ||
56 | 71 | ||
57 | /* Let people know we have them */ | ||
58 | #define readq readq | ||
59 | #define writeq writeq | ||
60 | #endif | 72 | #endif |
61 | 73 | ||
62 | extern int iommu_bio_merge; | 74 | #define readq_relaxed(a) readq(a) |
75 | |||
76 | #define __raw_readq(a) readq(a) | ||
77 | #define __raw_writeq(val, addr) writeq(val, addr) | ||
78 | |||
79 | /* Let people know that we have them */ | ||
80 | #define readq readq | ||
81 | #define writeq writeq | ||
63 | 82 | ||
64 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
65 | # include "io_32.h" | 84 | # include "io_32.h" |
diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h index fea325a1122f..563c16270ba6 100644 --- a/arch/x86/include/asm/io_64.h +++ b/arch/x86/include/asm/io_64.h | |||
@@ -232,8 +232,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c); | |||
232 | 232 | ||
233 | #define flush_write_buffers() | 233 | #define flush_write_buffers() |
234 | 234 | ||
235 | #define BIO_VMERGE_BOUNDARY iommu_bio_merge | ||
236 | |||
237 | /* | 235 | /* |
238 | * Convert a virtual cached pointer to an uncached pointer | 236 | * Convert a virtual cached pointer to an uncached pointer |
239 | */ | 237 | */ |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 6afd9933a7dd..e475e009ae5d 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -156,11 +156,21 @@ extern int sis_apic_bug; | |||
156 | /* 1 if "noapic" boot option passed */ | 156 | /* 1 if "noapic" boot option passed */ |
157 | extern int skip_ioapic_setup; | 157 | extern int skip_ioapic_setup; |
158 | 158 | ||
159 | /* 1 if "noapic" boot option passed */ | ||
160 | extern int noioapicquirk; | ||
161 | |||
162 | /* -1 if "noapic" boot option passed */ | ||
163 | extern int noioapicreroute; | ||
164 | |||
159 | /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ | 165 | /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ |
160 | extern int timer_through_8259; | 166 | extern int timer_through_8259; |
161 | 167 | ||
162 | static inline void disable_ioapic_setup(void) | 168 | static inline void disable_ioapic_setup(void) |
163 | { | 169 | { |
170 | #ifdef CONFIG_PCI | ||
171 | noioapicquirk = 1; | ||
172 | noioapicreroute = -1; | ||
173 | #endif | ||
164 | skip_ioapic_setup = 1; | 174 | skip_ioapic_setup = 1; |
165 | } | 175 | } |
166 | 176 | ||
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 0b500c5b6446..295b13193f4d 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -12,37 +12,4 @@ extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); | |||
12 | /* 10 seconds */ | 12 | /* 10 seconds */ |
13 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 13 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
14 | 14 | ||
15 | #ifdef CONFIG_GART_IOMMU | ||
16 | extern int gart_iommu_aperture; | ||
17 | extern int gart_iommu_aperture_allowed; | ||
18 | extern int gart_iommu_aperture_disabled; | ||
19 | |||
20 | extern void early_gart_iommu_check(void); | ||
21 | extern void gart_iommu_init(void); | ||
22 | extern void gart_iommu_shutdown(void); | ||
23 | extern void __init gart_parse_options(char *); | ||
24 | extern void gart_iommu_hole_init(void); | ||
25 | |||
26 | #else | ||
27 | #define gart_iommu_aperture 0 | ||
28 | #define gart_iommu_aperture_allowed 0 | ||
29 | #define gart_iommu_aperture_disabled 1 | ||
30 | |||
31 | static inline void early_gart_iommu_check(void) | ||
32 | { | ||
33 | } | ||
34 | static inline void gart_iommu_init(void) | ||
35 | { | ||
36 | } | ||
37 | static inline void gart_iommu_shutdown(void) | ||
38 | { | ||
39 | } | ||
40 | static inline void gart_parse_options(char *options) | ||
41 | { | ||
42 | } | ||
43 | static inline void gart_iommu_hole_init(void) | ||
44 | { | ||
45 | } | ||
46 | #endif | ||
47 | |||
48 | #endif /* _ASM_X86_IOMMU_H */ | 15 | #endif /* _ASM_X86_IOMMU_H */ |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index bae0eda95486..28e409fc73f3 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -31,10 +31,6 @@ static inline int irq_canonicalize(int irq) | |||
31 | # endif | 31 | # endif |
32 | #endif | 32 | #endif |
33 | 33 | ||
34 | #ifdef CONFIG_IRQBALANCE | ||
35 | extern int irqbalance_disable(char *str); | ||
36 | #endif | ||
37 | |||
38 | #ifdef CONFIG_HOTPLUG_CPU | 34 | #ifdef CONFIG_HOTPLUG_CPU |
39 | #include <linux/cpumask.h> | 35 | #include <linux/cpumask.h> |
40 | extern void fixup_irqs(cpumask_t map); | 36 | extern void fixup_irqs(cpumask_t map); |
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h index af2f02d27fc7..86afd7473457 100644 --- a/arch/x86/include/asm/irq_regs_32.h +++ b/arch/x86/include/asm/irq_regs_32.h | |||
@@ -9,6 +9,8 @@ | |||
9 | 9 | ||
10 | #include <asm/percpu.h> | 10 | #include <asm/percpu.h> |
11 | 11 | ||
12 | #define ARCH_HAS_OWN_IRQ_REGS | ||
13 | |||
12 | DECLARE_PER_CPU(struct pt_regs *, irq_regs); | 14 | DECLARE_PER_CPU(struct pt_regs *, irq_regs); |
13 | 15 | ||
14 | static inline struct pt_regs *get_irq_regs(void) | 16 | static inline struct pt_regs *get_irq_regs(void) |
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index a1f22771a15a..c61d8b2ab8b9 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h | |||
@@ -5,21 +5,8 @@ | |||
5 | # define PA_CONTROL_PAGE 0 | 5 | # define PA_CONTROL_PAGE 0 |
6 | # define VA_CONTROL_PAGE 1 | 6 | # define VA_CONTROL_PAGE 1 |
7 | # define PA_PGD 2 | 7 | # define PA_PGD 2 |
8 | # define VA_PGD 3 | 8 | # define PA_SWAP_PAGE 3 |
9 | # define PA_PTE_0 4 | 9 | # define PAGES_NR 4 |
10 | # define VA_PTE_0 5 | ||
11 | # define PA_PTE_1 6 | ||
12 | # define VA_PTE_1 7 | ||
13 | # define PA_SWAP_PAGE 8 | ||
14 | # ifdef CONFIG_X86_PAE | ||
15 | # define PA_PMD_0 9 | ||
16 | # define VA_PMD_0 10 | ||
17 | # define PA_PMD_1 11 | ||
18 | # define VA_PMD_1 12 | ||
19 | # define PAGES_NR 13 | ||
20 | # else | ||
21 | # define PAGES_NR 9 | ||
22 | # endif | ||
23 | #else | 10 | #else |
24 | # define PA_CONTROL_PAGE 0 | 11 | # define PA_CONTROL_PAGE 0 |
25 | # define VA_CONTROL_PAGE 1 | 12 | # define VA_CONTROL_PAGE 1 |
@@ -170,6 +157,20 @@ relocate_kernel(unsigned long indirection_page, | |||
170 | unsigned long start_address) ATTRIB_NORET; | 157 | unsigned long start_address) ATTRIB_NORET; |
171 | #endif | 158 | #endif |
172 | 159 | ||
160 | #ifdef CONFIG_X86_32 | ||
161 | #define ARCH_HAS_KIMAGE_ARCH | ||
162 | |||
163 | struct kimage_arch { | ||
164 | pgd_t *pgd; | ||
165 | #ifdef CONFIG_X86_PAE | ||
166 | pmd_t *pmd0; | ||
167 | pmd_t *pmd1; | ||
168 | #endif | ||
169 | pte_t *pte0; | ||
170 | pte_t *pte1; | ||
171 | }; | ||
172 | #endif | ||
173 | |||
173 | #endif /* __ASSEMBLY__ */ | 174 | #endif /* __ASSEMBLY__ */ |
174 | 175 | ||
175 | #endif /* _ASM_X86_KEXEC_H */ | 176 | #endif /* _ASM_X86_KEXEC_H */ |
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index f61ee8f937e4..5d98d0b68ffc 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h | |||
@@ -57,5 +57,65 @@ | |||
57 | #define __ALIGN_STR ".align 16,0x90" | 57 | #define __ALIGN_STR ".align 16,0x90" |
58 | #endif | 58 | #endif |
59 | 59 | ||
60 | /* | ||
61 | * to check ENTRY_X86/END_X86 and | ||
62 | * KPROBE_ENTRY_X86/KPROBE_END_X86 | ||
63 | * unbalanced-missed-mixed appearance | ||
64 | */ | ||
65 | #define __set_entry_x86 .set ENTRY_X86_IN, 0 | ||
66 | #define __unset_entry_x86 .set ENTRY_X86_IN, 1 | ||
67 | #define __set_kprobe_x86 .set KPROBE_X86_IN, 0 | ||
68 | #define __unset_kprobe_x86 .set KPROBE_X86_IN, 1 | ||
69 | |||
70 | #define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed" | ||
71 | |||
72 | #define __check_entry_x86 \ | ||
73 | .ifdef ENTRY_X86_IN; \ | ||
74 | .ifeq ENTRY_X86_IN; \ | ||
75 | __macro_err_x86; \ | ||
76 | .abort; \ | ||
77 | .endif; \ | ||
78 | .endif | ||
79 | |||
80 | #define __check_kprobe_x86 \ | ||
81 | .ifdef KPROBE_X86_IN; \ | ||
82 | .ifeq KPROBE_X86_IN; \ | ||
83 | __macro_err_x86; \ | ||
84 | .abort; \ | ||
85 | .endif; \ | ||
86 | .endif | ||
87 | |||
88 | #define __check_entry_kprobe_x86 \ | ||
89 | __check_entry_x86; \ | ||
90 | __check_kprobe_x86 | ||
91 | |||
92 | #define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86 | ||
93 | |||
94 | #define ENTRY_X86(name) \ | ||
95 | __check_entry_kprobe_x86; \ | ||
96 | __set_entry_x86; \ | ||
97 | .globl name; \ | ||
98 | __ALIGN; \ | ||
99 | name: | ||
100 | |||
101 | #define END_X86(name) \ | ||
102 | __unset_entry_x86; \ | ||
103 | __check_entry_kprobe_x86; \ | ||
104 | .size name, .-name | ||
105 | |||
106 | #define KPROBE_ENTRY_X86(name) \ | ||
107 | __check_entry_kprobe_x86; \ | ||
108 | __set_kprobe_x86; \ | ||
109 | .pushsection .kprobes.text, "ax"; \ | ||
110 | .globl name; \ | ||
111 | __ALIGN; \ | ||
112 | name: | ||
113 | |||
114 | #define KPROBE_END_X86(name) \ | ||
115 | __unset_kprobe_x86; \ | ||
116 | __check_entry_kprobe_x86; \ | ||
117 | .size name, .-name; \ | ||
118 | .popsection | ||
119 | |||
60 | #endif /* _ASM_X86_LINKAGE_H */ | 120 | #endif /* _ASM_X86_LINKAGE_H */ |
61 | 121 | ||
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index ff3a6c236c00..6cb3a467e067 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h | |||
@@ -32,11 +32,13 @@ static inline cpumask_t target_cpus(void) | |||
32 | #define vector_allocation_domain (genapic->vector_allocation_domain) | 32 | #define vector_allocation_domain (genapic->vector_allocation_domain) |
33 | #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) | 33 | #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) |
34 | #define send_IPI_self (genapic->send_IPI_self) | 34 | #define send_IPI_self (genapic->send_IPI_self) |
35 | #define wakeup_secondary_cpu (genapic->wakeup_cpu) | ||
35 | extern void setup_apic_routing(void); | 36 | extern void setup_apic_routing(void); |
36 | #else | 37 | #else |
37 | #define INT_DELIVERY_MODE dest_LowestPrio | 38 | #define INT_DELIVERY_MODE dest_LowestPrio |
38 | #define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ | 39 | #define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ |
39 | #define TARGET_CPUS (target_cpus()) | 40 | #define TARGET_CPUS (target_cpus()) |
41 | #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init | ||
40 | /* | 42 | /* |
41 | * Set up the logical destination ID. | 43 | * Set up the logical destination ID. |
42 | * | 44 | * |
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 9d80db91e992..ceb013660146 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h | |||
@@ -1,17 +1,8 @@ | |||
1 | #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H | 1 | #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H |
2 | #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H | 2 | #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H |
3 | 3 | ||
4 | /* | 4 | #define TRAMPOLINE_PHYS_LOW (0x467) |
5 | * This file copes with machines that wakeup secondary CPUs by the | 5 | #define TRAMPOLINE_PHYS_HIGH (0x469) |
6 | * INIT, INIT, STARTUP sequence. | ||
7 | */ | ||
8 | |||
9 | #define WAKE_SECONDARY_VIA_INIT | ||
10 | |||
11 | #define TRAMPOLINE_LOW phys_to_virt(0x467) | ||
12 | #define TRAMPOLINE_HIGH phys_to_virt(0x469) | ||
13 | |||
14 | #define boot_cpu_apicid boot_cpu_physical_apicid | ||
15 | 6 | ||
16 | static inline void wait_for_init_deassert(atomic_t *deassert) | 7 | static inline void wait_for_init_deassert(atomic_t *deassert) |
17 | { | 8 | { |
@@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) | |||
33 | { | 24 | { |
34 | } | 25 | } |
35 | 26 | ||
36 | #define inquire_remote_apic(apicid) do { \ | 27 | extern void __inquire_remote_apic(int apicid); |
37 | if (apic_verbosity >= APIC_DEBUG) \ | 28 | |
38 | __inquire_remote_apic(apicid); \ | 29 | static inline void inquire_remote_apic(int apicid) |
39 | } while (0) | 30 | { |
31 | if (apic_verbosity >= APIC_DEBUG) | ||
32 | __inquire_remote_apic(apicid); | ||
33 | } | ||
40 | 34 | ||
41 | #endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ | 35 | #endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ |
diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h index dbab36d64d48..23bf52103b89 100644 --- a/arch/x86/include/asm/mach-default/smpboot_hooks.h +++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h | |||
@@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) | |||
13 | CMOS_WRITE(0xa, 0xf); | 13 | CMOS_WRITE(0xa, 0xf); |
14 | local_flush_tlb(); | 14 | local_flush_tlb(); |
15 | pr_debug("1.\n"); | 15 | pr_debug("1.\n"); |
16 | *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; | 16 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = |
17 | start_eip >> 4; | ||
17 | pr_debug("2.\n"); | 18 | pr_debug("2.\n"); |
18 | *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; | 19 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = |
20 | start_eip & 0xf; | ||
19 | pr_debug("3.\n"); | 21 | pr_debug("3.\n"); |
20 | } | 22 | } |
21 | 23 | ||
@@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void) | |||
32 | */ | 34 | */ |
33 | CMOS_WRITE(0, 0xf); | 35 | CMOS_WRITE(0, 0xf); |
34 | 36 | ||
35 | *((volatile long *) phys_to_virt(0x467)) = 0; | 37 | *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; |
36 | } | 38 | } |
37 | 39 | ||
38 | static inline void __init smpboot_setup_io_apic(void) | 40 | static inline void __init smpboot_setup_io_apic(void) |
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 5180bd7478fb..e430f47df667 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h | |||
@@ -27,6 +27,7 @@ | |||
27 | #define vector_allocation_domain (genapic->vector_allocation_domain) | 27 | #define vector_allocation_domain (genapic->vector_allocation_domain) |
28 | #define enable_apic_mode (genapic->enable_apic_mode) | 28 | #define enable_apic_mode (genapic->enable_apic_mode) |
29 | #define phys_pkg_id (genapic->phys_pkg_id) | 29 | #define phys_pkg_id (genapic->phys_pkg_id) |
30 | #define wakeup_secondary_cpu (genapic->wakeup_cpu) | ||
30 | 31 | ||
31 | extern void generic_bigsmp_probe(void); | 32 | extern void generic_bigsmp_probe(void); |
32 | 33 | ||
diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h new file mode 100644 index 000000000000..1ab16b168c8a --- /dev/null +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H | ||
2 | #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H | ||
3 | |||
4 | #define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low) | ||
5 | #define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high) | ||
6 | #define wait_for_init_deassert (genapic->wait_for_init_deassert) | ||
7 | #define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic) | ||
8 | #define store_NMI_vector (genapic->store_NMI_vector) | ||
9 | #define restore_NMI_vector (genapic->restore_NMI_vector) | ||
10 | #define inquire_remote_apic (genapic->inquire_remote_apic) | ||
11 | |||
12 | #endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ | ||
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h index 8e10015781fb..7e98ce1d2c0e 100644 --- a/arch/x86/include/asm/mmu_context_32.h +++ b/arch/x86/include/asm/mmu_context_32.h | |||
@@ -4,9 +4,8 @@ | |||
4 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) | 4 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) |
5 | { | 5 | { |
6 | #ifdef CONFIG_SMP | 6 | #ifdef CONFIG_SMP |
7 | unsigned cpu = smp_processor_id(); | 7 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) |
8 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) | 8 | x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); |
9 | per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY; | ||
10 | #endif | 9 | #endif |
11 | } | 10 | } |
12 | 11 | ||
@@ -20,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev, | |||
20 | /* stop flush ipis for the previous mm */ | 19 | /* stop flush ipis for the previous mm */ |
21 | cpu_clear(cpu, prev->cpu_vm_mask); | 20 | cpu_clear(cpu, prev->cpu_vm_mask); |
22 | #ifdef CONFIG_SMP | 21 | #ifdef CONFIG_SMP |
23 | per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; | 22 | x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); |
24 | per_cpu(cpu_tlbstate, cpu).active_mm = next; | 23 | x86_write_percpu(cpu_tlbstate.active_mm, next); |
25 | #endif | 24 | #endif |
26 | cpu_set(cpu, next->cpu_vm_mask); | 25 | cpu_set(cpu, next->cpu_vm_mask); |
27 | 26 | ||
@@ -36,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, | |||
36 | } | 35 | } |
37 | #ifdef CONFIG_SMP | 36 | #ifdef CONFIG_SMP |
38 | else { | 37 | else { |
39 | per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; | 38 | x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); |
40 | BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next); | 39 | BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); |
41 | 40 | ||
42 | if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { | 41 | if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { |
43 | /* We were in lazy tlb mode and leave_mm disabled | 42 | /* We were in lazy tlb mode and leave_mm disabled |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e38859d577a1..cb58643947b9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -85,7 +85,9 @@ | |||
85 | /* AMD64 MSRs. Not complete. See the architecture manual for a more | 85 | /* AMD64 MSRs. Not complete. See the architecture manual for a more |
86 | complete list. */ | 86 | complete list. */ |
87 | 87 | ||
88 | #define MSR_AMD64_PATCH_LEVEL 0x0000008b | ||
88 | #define MSR_AMD64_NB_CFG 0xc001001f | 89 | #define MSR_AMD64_NB_CFG 0xc001001f |
90 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 | ||
89 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 | 91 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 |
90 | #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 | 92 | #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 |
91 | #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 | 93 | #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index c2a812ebde89..4640ddd58fb9 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -22,10 +22,10 @@ static inline unsigned long long native_read_tscp(unsigned int *aux) | |||
22 | } | 22 | } |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * i386 calling convention returns 64-bit value in edx:eax, while | 25 | * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A" |
26 | * x86_64 returns at rax. Also, the "A" constraint does not really | 26 | * constraint has different meanings. For i386, "A" means exactly |
27 | * mean rdx:rax in x86_64, so we need specialized behaviour for each | 27 | * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead, |
28 | * architecture | 28 | * it means rax *or* rdx. |
29 | */ | 29 | */ |
30 | #ifdef CONFIG_X86_64 | 30 | #ifdef CONFIG_X86_64 |
31 | #define DECLARE_ARGS(val, low, high) unsigned low, high | 31 | #define DECLARE_ARGS(val, low, high) unsigned low, high |
@@ -181,10 +181,10 @@ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | |||
181 | } | 181 | } |
182 | 182 | ||
183 | #define rdtscl(low) \ | 183 | #define rdtscl(low) \ |
184 | ((low) = (u32)native_read_tsc()) | 184 | ((low) = (u32)__native_read_tsc()) |
185 | 185 | ||
186 | #define rdtscll(val) \ | 186 | #define rdtscll(val) \ |
187 | ((val) = native_read_tsc()) | 187 | ((val) = __native_read_tsc()) |
188 | 188 | ||
189 | #define rdpmc(counter, low, high) \ | 189 | #define rdpmc(counter, low, high) \ |
190 | do { \ | 190 | do { \ |
diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index c577bda5b1c5..6f499df8eddb 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h | |||
@@ -3,12 +3,8 @@ | |||
3 | 3 | ||
4 | /* This file copes with machines that wakeup secondary CPUs by NMIs */ | 4 | /* This file copes with machines that wakeup secondary CPUs by NMIs */ |
5 | 5 | ||
6 | #define WAKE_SECONDARY_VIA_NMI | 6 | #define TRAMPOLINE_PHYS_LOW (0x8) |
7 | 7 | #define TRAMPOLINE_PHYS_HIGH (0xa) | |
8 | #define TRAMPOLINE_LOW phys_to_virt(0x8) | ||
9 | #define TRAMPOLINE_HIGH phys_to_virt(0xa) | ||
10 | |||
11 | #define boot_cpu_apicid boot_cpu_logical_apicid | ||
12 | 8 | ||
13 | /* We don't do anything here because we use NMI's to boot instead */ | 9 | /* We don't do anything here because we use NMI's to boot instead */ |
14 | static inline void wait_for_init_deassert(atomic_t *deassert) | 10 | static inline void wait_for_init_deassert(atomic_t *deassert) |
@@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void) | |||
27 | static inline void store_NMI_vector(unsigned short *high, unsigned short *low) | 23 | static inline void store_NMI_vector(unsigned short *high, unsigned short *low) |
28 | { | 24 | { |
29 | printk("Storing NMI vector\n"); | 25 | printk("Storing NMI vector\n"); |
30 | *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); | 26 | *high = |
31 | *low = *((volatile unsigned short *) TRAMPOLINE_LOW); | 27 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)); |
28 | *low = | ||
29 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)); | ||
32 | } | 30 | } |
33 | 31 | ||
34 | static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) | 32 | static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) |
35 | { | 33 | { |
36 | printk("Restoring NMI vector\n"); | 34 | printk("Restoring NMI vector\n"); |
37 | *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; | 35 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = |
38 | *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; | 36 | *high; |
37 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = | ||
38 | *low; | ||
39 | } | 39 | } |
40 | 40 | ||
41 | #define inquire_remote_apic(apicid) {} | 41 | static inline void inquire_remote_apic(int apicid) |
42 | { | ||
43 | } | ||
42 | 44 | ||
43 | #endif /* __ASM_NUMAQ_WAKECPU_H */ | 45 | #endif /* __ASM_NUMAQ_WAKECPU_H */ |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 875b38edf193..647781298e7e 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -19,6 +19,8 @@ struct pci_sysdata { | |||
19 | }; | 19 | }; |
20 | 20 | ||
21 | extern int pci_routeirq; | 21 | extern int pci_routeirq; |
22 | extern int noioapicquirk; | ||
23 | extern int noioapicreroute; | ||
22 | 24 | ||
23 | /* scan a bus after allocating a pci_sysdata for it */ | 25 | /* scan a bus after allocating a pci_sysdata for it */ |
24 | extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, | 26 | extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, |
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index b17edfd23628..e0d199fe1d83 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -56,23 +56,55 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) | |||
56 | #define pte_none(x) (!(x).pte_low) | 56 | #define pte_none(x) (!(x).pte_low) |
57 | 57 | ||
58 | /* | 58 | /* |
59 | * Bits 0, 6 and 7 are taken, split up the 29 bits of offset | 59 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, |
60 | * into this range: | 60 | * split up the 29 bits of offset into this range: |
61 | */ | 61 | */ |
62 | #define PTE_FILE_MAX_BITS 29 | 62 | #define PTE_FILE_MAX_BITS 29 |
63 | #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) | ||
64 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE | ||
65 | #define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1) | ||
66 | #define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1) | ||
67 | #else | ||
68 | #define PTE_FILE_SHIFT2 (_PAGE_BIT_PROTNONE + 1) | ||
69 | #define PTE_FILE_SHIFT3 (_PAGE_BIT_FILE + 1) | ||
70 | #endif | ||
71 | #define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) | ||
72 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) | ||
63 | 73 | ||
64 | #define pte_to_pgoff(pte) \ | 74 | #define pte_to_pgoff(pte) \ |
65 | ((((pte).pte_low >> 1) & 0x1f) + (((pte).pte_low >> 8) << 5)) | 75 | ((((pte).pte_low >> PTE_FILE_SHIFT1) \ |
76 | & ((1U << PTE_FILE_BITS1) - 1)) \ | ||
77 | + ((((pte).pte_low >> PTE_FILE_SHIFT2) \ | ||
78 | & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1) \ | ||
79 | + (((pte).pte_low >> PTE_FILE_SHIFT3) \ | ||
80 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2))) | ||
66 | 81 | ||
67 | #define pgoff_to_pte(off) \ | 82 | #define pgoff_to_pte(off) \ |
68 | ((pte_t) { .pte_low = (((off) & 0x1f) << 1) + \ | 83 | ((pte_t) { .pte_low = \ |
69 | (((off) >> 5) << 8) + _PAGE_FILE }) | 84 | (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ |
85 | + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1)) \ | ||
86 | << PTE_FILE_SHIFT2) \ | ||
87 | + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | ||
88 | << PTE_FILE_SHIFT3) \ | ||
89 | + _PAGE_FILE }) | ||
70 | 90 | ||
71 | /* Encode and de-code a swap entry */ | 91 | /* Encode and de-code a swap entry */ |
72 | #define __swp_type(x) (((x).val >> 1) & 0x1f) | 92 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
73 | #define __swp_offset(x) ((x).val >> 8) | 93 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) |
74 | #define __swp_entry(type, offset) \ | 94 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) |
75 | ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) | 95 | #else |
96 | #define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) | ||
97 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) | ||
98 | #endif | ||
99 | |||
100 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) | ||
101 | |||
102 | #define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ | ||
103 | & ((1U << SWP_TYPE_BITS) - 1)) | ||
104 | #define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) | ||
105 | #define __swp_entry(type, offset) ((swp_entry_t) { \ | ||
106 | ((type) << (_PAGE_BIT_PRESENT + 1)) \ | ||
107 | | ((offset) << SWP_OFFSET_SHIFT) }) | ||
76 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) | 108 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) |
77 | #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) | 109 | #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) |
78 | 110 | ||
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 52597aeadfff..447da43cddb3 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -166,6 +166,7 @@ static inline int pte_none(pte_t pte) | |||
166 | #define PTE_FILE_MAX_BITS 32 | 166 | #define PTE_FILE_MAX_BITS 32 |
167 | 167 | ||
168 | /* Encode and de-code a swap entry */ | 168 | /* Encode and de-code a swap entry */ |
169 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) | ||
169 | #define __swp_type(x) (((x).val) & 0x1f) | 170 | #define __swp_type(x) (((x).val) & 0x1f) |
170 | #define __swp_offset(x) ((x).val >> 5) | 171 | #define __swp_offset(x) ((x).val >> 5) |
171 | #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) | 172 | #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c012f3b11671..83e69f4a37f0 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -10,7 +10,6 @@ | |||
10 | #define _PAGE_BIT_PCD 4 /* page cache disabled */ | 10 | #define _PAGE_BIT_PCD 4 /* page cache disabled */ |
11 | #define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ | 11 | #define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ |
12 | #define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ | 12 | #define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ |
13 | #define _PAGE_BIT_FILE 6 | ||
14 | #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ | 13 | #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ |
15 | #define _PAGE_BIT_PAT 7 /* on 4KB pages */ | 14 | #define _PAGE_BIT_PAT 7 /* on 4KB pages */ |
16 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ | 15 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ |
@@ -22,6 +21,12 @@ | |||
22 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 | 21 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 |
23 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ | 22 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ |
24 | 23 | ||
24 | /* If _PAGE_BIT_PRESENT is clear, we use these: */ | ||
25 | /* - if the user mapped it with PROT_NONE; pte_present gives true */ | ||
26 | #define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL | ||
27 | /* - set: nonlinear file mapping, saved PTE; unset:swap */ | ||
28 | #define _PAGE_BIT_FILE _PAGE_BIT_DIRTY | ||
29 | |||
25 | #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) | 30 | #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) |
26 | #define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) | 31 | #define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) |
27 | #define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) | 32 | #define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) |
@@ -46,11 +51,8 @@ | |||
46 | #define _PAGE_NX (_AT(pteval_t, 0)) | 51 | #define _PAGE_NX (_AT(pteval_t, 0)) |
47 | #endif | 52 | #endif |
48 | 53 | ||
49 | /* If _PAGE_PRESENT is clear, we use these: */ | 54 | #define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) |
50 | #define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, | 55 | #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) |
51 | * saved PTE; unset:swap */ | ||
52 | #define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE; | ||
53 | pte_present gives true */ | ||
54 | 56 | ||
55 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ | 57 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ |
56 | _PAGE_ACCESSED | _PAGE_DIRTY) | 58 | _PAGE_ACCESSED | _PAGE_DIRTY) |
@@ -158,8 +160,19 @@ | |||
158 | #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ | 160 | #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ |
159 | #endif | 161 | #endif |
160 | 162 | ||
163 | /* | ||
164 | * Macro to mark a page protection value as UC- | ||
165 | */ | ||
166 | #define pgprot_noncached(prot) \ | ||
167 | ((boot_cpu_data.x86 > 3) \ | ||
168 | ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS)) \ | ||
169 | : (prot)) | ||
170 | |||
161 | #ifndef __ASSEMBLY__ | 171 | #ifndef __ASSEMBLY__ |
162 | 172 | ||
173 | #define pgprot_writecombine pgprot_writecombine | ||
174 | extern pgprot_t pgprot_writecombine(pgprot_t prot); | ||
175 | |||
163 | /* | 176 | /* |
164 | * ZERO_PAGE is a global shared page that is always zero: used | 177 | * ZERO_PAGE is a global shared page that is always zero: used |
165 | * for zero-mapped memory areas etc.. | 178 | * for zero-mapped memory areas etc.. |
@@ -329,6 +342,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | |||
329 | #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) | 342 | #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) |
330 | 343 | ||
331 | #ifndef __ASSEMBLY__ | 344 | #ifndef __ASSEMBLY__ |
345 | /* Indicate that x86 has its own track and untrack pfn vma functions */ | ||
346 | #define __HAVE_PFNMAP_TRACKING | ||
347 | |||
332 | #define __HAVE_PHYS_MEM_ACCESS_PROT | 348 | #define __HAVE_PHYS_MEM_ACCESS_PROT |
333 | struct file; | 349 | struct file; |
334 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | 350 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index f9d5889b336b..72b020deb46b 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -101,15 +101,6 @@ extern unsigned long pg0[]; | |||
101 | #endif | 101 | #endif |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * Macro to mark a page protection value as "uncacheable". | ||
105 | * On processors which do not support it, this is a no-op. | ||
106 | */ | ||
107 | #define pgprot_noncached(prot) \ | ||
108 | ((boot_cpu_data.x86 > 3) \ | ||
109 | ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \ | ||
110 | : (prot)) | ||
111 | |||
112 | /* | ||
113 | * Conversion functions: convert a page and protection to a page entry, | 104 | * Conversion functions: convert a page and protection to a page entry, |
114 | * and a page entry and page directory to the page they refer to. | 105 | * and a page entry and page directory to the page they refer to. |
115 | */ | 106 | */ |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 545a0e042bb2..ba09289accaa 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -146,7 +146,7 @@ static inline void native_pgd_clear(pgd_t *pgd) | |||
146 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) | 146 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) |
147 | 147 | ||
148 | 148 | ||
149 | #define MAXMEM _AC(0x00003fffffffffff, UL) | 149 | #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) |
150 | #define VMALLOC_START _AC(0xffffc20000000000, UL) | 150 | #define VMALLOC_START _AC(0xffffc20000000000, UL) |
151 | #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) | 151 | #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) |
152 | #define VMEMMAP_START _AC(0xffffe20000000000, UL) | 152 | #define VMEMMAP_START _AC(0xffffe20000000000, UL) |
@@ -177,12 +177,6 @@ static inline int pmd_bad(pmd_t pmd) | |||
177 | #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ | 177 | #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * Macro to mark a page protection value as "uncacheable". | ||
181 | */ | ||
182 | #define pgprot_noncached(prot) \ | ||
183 | (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT)) | ||
184 | |||
185 | /* | ||
186 | * Conversion functions: convert a page and protection to a page entry, | 180 | * Conversion functions: convert a page and protection to a page entry, |
187 | * and a page entry and page directory to the page they refer to. | 181 | * and a page entry and page directory to the page they refer to. |
188 | */ | 182 | */ |
@@ -250,10 +244,22 @@ static inline int pud_large(pud_t pte) | |||
250 | extern int direct_gbpages; | 244 | extern int direct_gbpages; |
251 | 245 | ||
252 | /* Encode and de-code a swap entry */ | 246 | /* Encode and de-code a swap entry */ |
253 | #define __swp_type(x) (((x).val >> 1) & 0x3f) | 247 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
254 | #define __swp_offset(x) ((x).val >> 8) | 248 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) |
255 | #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | \ | 249 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) |
256 | ((offset) << 8) }) | 250 | #else |
251 | #define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) | ||
252 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) | ||
253 | #endif | ||
254 | |||
255 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) | ||
256 | |||
257 | #define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ | ||
258 | & ((1U << SWP_TYPE_BITS) - 1)) | ||
259 | #define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) | ||
260 | #define __swp_entry(type, offset) ((swp_entry_t) { \ | ||
261 | ((type) << (_PAGE_BIT_PRESENT + 1)) \ | ||
262 | | ((offset) << SWP_OFFSET_SHIFT) }) | ||
257 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) | 263 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) |
258 | #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) | 264 | #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) |
259 | 265 | ||
diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h index fe681147a4f7..a8894647dd9a 100644 --- a/arch/x86/include/asm/prctl.h +++ b/arch/x86/include/asm/prctl.h | |||
@@ -6,5 +6,8 @@ | |||
6 | #define ARCH_GET_FS 0x1003 | 6 | #define ARCH_GET_FS 0x1003 |
7 | #define ARCH_GET_GS 0x1004 | 7 | #define ARCH_GET_GS 0x1004 |
8 | 8 | ||
9 | #ifdef CONFIG_X86_64 | ||
10 | extern long sys_arch_prctl(int, unsigned long); | ||
11 | #endif /* CONFIG_X86_64 */ | ||
9 | 12 | ||
10 | #endif /* _ASM_X86_PRCTL_H */ | 13 | #endif /* _ASM_X86_PRCTL_H */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5ca01e383269..a570eafa4755 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -110,6 +110,7 @@ struct cpuinfo_x86 { | |||
110 | /* Index into per_cpu list: */ | 110 | /* Index into per_cpu list: */ |
111 | u16 cpu_index; | 111 | u16 cpu_index; |
112 | #endif | 112 | #endif |
113 | unsigned int x86_hyper_vendor; | ||
113 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 114 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
114 | 115 | ||
115 | #define X86_VENDOR_INTEL 0 | 116 | #define X86_VENDOR_INTEL 0 |
@@ -123,6 +124,9 @@ struct cpuinfo_x86 { | |||
123 | 124 | ||
124 | #define X86_VENDOR_UNKNOWN 0xff | 125 | #define X86_VENDOR_UNKNOWN 0xff |
125 | 126 | ||
127 | #define X86_HYPER_VENDOR_NONE 0 | ||
128 | #define X86_HYPER_VENDOR_VMWARE 1 | ||
129 | |||
126 | /* | 130 | /* |
127 | * capabilities of CPUs | 131 | * capabilities of CPUs |
128 | */ | 132 | */ |
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index df7710354f85..562d4fd31ba8 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_REBOOT_H | 1 | #ifndef _ASM_X86_REBOOT_H |
2 | #define _ASM_X86_REBOOT_H | 2 | #define _ASM_X86_REBOOT_H |
3 | 3 | ||
4 | #include <linux/kdebug.h> | ||
5 | |||
4 | struct pt_regs; | 6 | struct pt_regs; |
5 | 7 | ||
6 | struct machine_ops { | 8 | struct machine_ops { |
@@ -18,4 +20,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs); | |||
18 | void native_machine_shutdown(void); | 20 | void native_machine_shutdown(void); |
19 | void machine_real_restart(const unsigned char *code, int length); | 21 | void machine_real_restart(const unsigned char *code, int length); |
20 | 22 | ||
23 | typedef void (*nmi_shootdown_cb)(int, struct die_args*); | ||
24 | void nmi_shootdown_cpus(nmi_shootdown_cb callback); | ||
25 | |||
21 | #endif /* _ASM_X86_REBOOT_H */ | 26 | #endif /* _ASM_X86_REBOOT_H */ |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index f12d37237465..4fcd53fd5f43 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -8,6 +8,10 @@ | |||
8 | /* Interrupt control for vSMPowered x86_64 systems */ | 8 | /* Interrupt control for vSMPowered x86_64 systems */ |
9 | void vsmp_init(void); | 9 | void vsmp_init(void); |
10 | 10 | ||
11 | |||
12 | void setup_bios_corruption_check(void); | ||
13 | |||
14 | |||
11 | #ifdef CONFIG_X86_VISWS | 15 | #ifdef CONFIG_X86_VISWS |
12 | extern void visws_early_detect(void); | 16 | extern void visws_early_detect(void); |
13 | extern int is_visws_box(void); | 17 | extern int is_visws_box(void); |
@@ -16,6 +20,8 @@ static inline void visws_early_detect(void) { } | |||
16 | static inline int is_visws_box(void) { return 0; } | 20 | static inline int is_visws_box(void) { return 0; } |
17 | #endif | 21 | #endif |
18 | 22 | ||
23 | extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); | ||
24 | extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); | ||
19 | /* | 25 | /* |
20 | * Any setup quirks to be performed? | 26 | * Any setup quirks to be performed? |
21 | */ | 27 | */ |
@@ -39,6 +45,7 @@ struct x86_quirks { | |||
39 | void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, | 45 | void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, |
40 | unsigned short oemsize); | 46 | unsigned short oemsize); |
41 | int (*setup_ioapic_ids)(void); | 47 | int (*setup_ioapic_ids)(void); |
48 | int (*update_genapic)(void); | ||
42 | }; | 49 | }; |
43 | 50 | ||
44 | extern struct x86_quirks *x86_quirks; | 51 | extern struct x86_quirks *x86_quirks; |
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h new file mode 100644 index 000000000000..4e0fe26d27d3 --- /dev/null +++ b/arch/x86/include/asm/sigframe.h | |||
@@ -0,0 +1,70 @@ | |||
1 | #ifndef _ASM_X86_SIGFRAME_H | ||
2 | #define _ASM_X86_SIGFRAME_H | ||
3 | |||
4 | #include <asm/sigcontext.h> | ||
5 | #include <asm/siginfo.h> | ||
6 | #include <asm/ucontext.h> | ||
7 | |||
8 | #ifdef CONFIG_X86_32 | ||
9 | #define sigframe_ia32 sigframe | ||
10 | #define rt_sigframe_ia32 rt_sigframe | ||
11 | #define sigcontext_ia32 sigcontext | ||
12 | #define _fpstate_ia32 _fpstate | ||
13 | #define ucontext_ia32 ucontext | ||
14 | #else /* !CONFIG_X86_32 */ | ||
15 | |||
16 | #ifdef CONFIG_IA32_EMULATION | ||
17 | #include <asm/ia32.h> | ||
18 | #endif /* CONFIG_IA32_EMULATION */ | ||
19 | |||
20 | #endif /* CONFIG_X86_32 */ | ||
21 | |||
22 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) | ||
23 | struct sigframe_ia32 { | ||
24 | u32 pretcode; | ||
25 | int sig; | ||
26 | struct sigcontext_ia32 sc; | ||
27 | /* | ||
28 | * fpstate is unused. fpstate is moved/allocated after | ||
29 | * retcode[] below. This movement allows to have the FP state and the | ||
30 | * future state extensions (xsave) stay together. | ||
31 | * And at the same time retaining the unused fpstate, prevents changing | ||
32 | * the offset of extramask[] in the sigframe and thus prevent any | ||
33 | * legacy application accessing/modifying it. | ||
34 | */ | ||
35 | struct _fpstate_ia32 fpstate_unused; | ||
36 | #ifdef CONFIG_IA32_EMULATION | ||
37 | unsigned int extramask[_COMPAT_NSIG_WORDS-1]; | ||
38 | #else /* !CONFIG_IA32_EMULATION */ | ||
39 | unsigned long extramask[_NSIG_WORDS-1]; | ||
40 | #endif /* CONFIG_IA32_EMULATION */ | ||
41 | char retcode[8]; | ||
42 | /* fp state follows here */ | ||
43 | }; | ||
44 | |||
45 | struct rt_sigframe_ia32 { | ||
46 | u32 pretcode; | ||
47 | int sig; | ||
48 | u32 pinfo; | ||
49 | u32 puc; | ||
50 | #ifdef CONFIG_IA32_EMULATION | ||
51 | compat_siginfo_t info; | ||
52 | #else /* !CONFIG_IA32_EMULATION */ | ||
53 | struct siginfo info; | ||
54 | #endif /* CONFIG_IA32_EMULATION */ | ||
55 | struct ucontext_ia32 uc; | ||
56 | char retcode[8]; | ||
57 | /* fp state follows here */ | ||
58 | }; | ||
59 | #endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */ | ||
60 | |||
61 | #ifdef CONFIG_X86_64 | ||
62 | struct rt_sigframe { | ||
63 | char __user *pretcode; | ||
64 | struct ucontext uc; | ||
65 | struct siginfo info; | ||
66 | /* fp state follows here */ | ||
67 | }; | ||
68 | #endif /* CONFIG_X86_64 */ | ||
69 | |||
70 | #endif /* _ASM_X86_SIGFRAME_H */ | ||
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 96ac44f275da..7761a5d554bb 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h | |||
@@ -121,6 +121,10 @@ typedef unsigned long sigset_t; | |||
121 | 121 | ||
122 | #ifndef __ASSEMBLY__ | 122 | #ifndef __ASSEMBLY__ |
123 | 123 | ||
124 | # ifdef __KERNEL__ | ||
125 | extern void do_notify_resume(struct pt_regs *, void *, __u32); | ||
126 | # endif /* __KERNEL__ */ | ||
127 | |||
124 | #ifdef __i386__ | 128 | #ifdef __i386__ |
125 | # ifdef __KERNEL__ | 129 | # ifdef __KERNEL__ |
126 | struct old_sigaction { | 130 | struct old_sigaction { |
@@ -141,8 +145,6 @@ struct k_sigaction { | |||
141 | struct sigaction sa; | 145 | struct sigaction sa; |
142 | }; | 146 | }; |
143 | 147 | ||
144 | extern void do_notify_resume(struct pt_regs *, void *, __u32); | ||
145 | |||
146 | # else /* __KERNEL__ */ | 148 | # else /* __KERNEL__ */ |
147 | /* Here we must cater to libcs that poke about in kernel headers. */ | 149 | /* Here we must cater to libcs that poke about in kernel headers. */ |
148 | 150 | ||
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index be44f7dab395..e3cc3c063ec5 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h | |||
@@ -27,7 +27,7 @@ | |||
27 | #else /* CONFIG_X86_32 */ | 27 | #else /* CONFIG_X86_32 */ |
28 | # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ | 28 | # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ |
29 | # define MAX_PHYSADDR_BITS 44 | 29 | # define MAX_PHYSADDR_BITS 44 |
30 | # define MAX_PHYSMEM_BITS 44 | 30 | # define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */ |
31 | #endif | 31 | #endif |
32 | 32 | ||
33 | #endif /* CONFIG_SPARSEMEM */ | 33 | #endif /* CONFIG_SPARSEMEM */ |
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 87803da44010..9c6797c3e56c 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h | |||
@@ -19,6 +19,13 @@ | |||
19 | /* kernel/ioport.c */ | 19 | /* kernel/ioport.c */ |
20 | asmlinkage long sys_ioperm(unsigned long, unsigned long, int); | 20 | asmlinkage long sys_ioperm(unsigned long, unsigned long, int); |
21 | 21 | ||
22 | /* kernel/ldt.c */ | ||
23 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); | ||
24 | |||
25 | /* kernel/tls.c */ | ||
26 | asmlinkage int sys_set_thread_area(struct user_desc __user *); | ||
27 | asmlinkage int sys_get_thread_area(struct user_desc __user *); | ||
28 | |||
22 | /* X86_32 only */ | 29 | /* X86_32 only */ |
23 | #ifdef CONFIG_X86_32 | 30 | #ifdef CONFIG_X86_32 |
24 | /* kernel/process_32.c */ | 31 | /* kernel/process_32.c */ |
@@ -33,14 +40,11 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, | |||
33 | struct old_sigaction __user *); | 40 | struct old_sigaction __user *); |
34 | asmlinkage int sys_sigaltstack(unsigned long); | 41 | asmlinkage int sys_sigaltstack(unsigned long); |
35 | asmlinkage unsigned long sys_sigreturn(unsigned long); | 42 | asmlinkage unsigned long sys_sigreturn(unsigned long); |
36 | asmlinkage int sys_rt_sigreturn(unsigned long); | 43 | asmlinkage int sys_rt_sigreturn(struct pt_regs); |
37 | 44 | ||
38 | /* kernel/ioport.c */ | 45 | /* kernel/ioport.c */ |
39 | asmlinkage long sys_iopl(unsigned long); | 46 | asmlinkage long sys_iopl(unsigned long); |
40 | 47 | ||
41 | /* kernel/ldt.c */ | ||
42 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); | ||
43 | |||
44 | /* kernel/sys_i386_32.c */ | 48 | /* kernel/sys_i386_32.c */ |
45 | asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, | 49 | asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, |
46 | unsigned long, unsigned long, unsigned long); | 50 | unsigned long, unsigned long, unsigned long); |
@@ -54,10 +58,6 @@ asmlinkage int sys_uname(struct old_utsname __user *); | |||
54 | struct oldold_utsname; | 58 | struct oldold_utsname; |
55 | asmlinkage int sys_olduname(struct oldold_utsname __user *); | 59 | asmlinkage int sys_olduname(struct oldold_utsname __user *); |
56 | 60 | ||
57 | /* kernel/tls.c */ | ||
58 | asmlinkage int sys_set_thread_area(struct user_desc __user *); | ||
59 | asmlinkage int sys_get_thread_area(struct user_desc __user *); | ||
60 | |||
61 | /* kernel/vm86_32.c */ | 61 | /* kernel/vm86_32.c */ |
62 | asmlinkage int sys_vm86old(struct pt_regs); | 62 | asmlinkage int sys_vm86old(struct pt_regs); |
63 | asmlinkage int sys_vm86(struct pt_regs); | 63 | asmlinkage int sys_vm86(struct pt_regs); |
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 2ed3f0f44ff7..8e626ea33a1a 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -17,12 +17,12 @@ | |||
17 | # define AT_VECTOR_SIZE_ARCH 1 | 17 | # define AT_VECTOR_SIZE_ARCH 1 |
18 | #endif | 18 | #endif |
19 | 19 | ||
20 | #ifdef CONFIG_X86_32 | ||
21 | |||
22 | struct task_struct; /* one of the stranger aspects of C forward declarations */ | 20 | struct task_struct; /* one of the stranger aspects of C forward declarations */ |
23 | struct task_struct *__switch_to(struct task_struct *prev, | 21 | struct task_struct *__switch_to(struct task_struct *prev, |
24 | struct task_struct *next); | 22 | struct task_struct *next); |
25 | 23 | ||
24 | #ifdef CONFIG_X86_32 | ||
25 | |||
26 | /* | 26 | /* |
27 | * Saving eflags is important. It switches not only IOPL between tasks, | 27 | * Saving eflags is important. It switches not only IOPL between tasks, |
28 | * it also protects other tasks from NT leaking through sysenter etc. | 28 | * it also protects other tasks from NT leaking through sysenter etc. |
@@ -314,6 +314,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end); | |||
314 | 314 | ||
315 | void default_idle(void); | 315 | void default_idle(void); |
316 | 316 | ||
317 | void stop_this_cpu(void *dummy); | ||
318 | |||
317 | /* | 319 | /* |
318 | * Force strict CPU ordering. | 320 | * Force strict CPU ordering. |
319 | * And yes, this is required on UP too when we're talking | 321 | * And yes, this is required on UP too when we're talking |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e44d379faad2..8dbc57390d25 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -24,7 +24,7 @@ struct exec_domain; | |||
24 | struct thread_info { | 24 | struct thread_info { |
25 | struct task_struct *task; /* main task structure */ | 25 | struct task_struct *task; /* main task structure */ |
26 | struct exec_domain *exec_domain; /* execution domain */ | 26 | struct exec_domain *exec_domain; /* execution domain */ |
27 | unsigned long flags; /* low level flags */ | 27 | __u32 flags; /* low level flags */ |
28 | __u32 status; /* thread synchronous flags */ | 28 | __u32 status; /* thread synchronous flags */ |
29 | __u32 cpu; /* current CPU */ | 29 | __u32 cpu; /* current CPU */ |
30 | int preempt_count; /* 0 => preemptable, | 30 | int preempt_count; /* 0 => preemptable, |
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index fa0d79facdbc..780ba0ab94f9 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #ifndef __ASSEMBLY__ | 4 | #ifndef __ASSEMBLY__ |
5 | 5 | ||
6 | #ifdef CONFIG_X86_TRAMPOLINE | ||
6 | /* | 7 | /* |
7 | * Trampoline 80x86 program as an array. | 8 | * Trampoline 80x86 program as an array. |
8 | */ | 9 | */ |
@@ -13,8 +14,14 @@ extern unsigned char *trampoline_base; | |||
13 | extern unsigned long init_rsp; | 14 | extern unsigned long init_rsp; |
14 | extern unsigned long initial_code; | 15 | extern unsigned long initial_code; |
15 | 16 | ||
17 | #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) | ||
16 | #define TRAMPOLINE_BASE 0x6000 | 18 | #define TRAMPOLINE_BASE 0x6000 |
19 | |||
17 | extern unsigned long setup_trampoline(void); | 20 | extern unsigned long setup_trampoline(void); |
21 | extern void __init reserve_trampoline_memory(void); | ||
22 | #else | ||
23 | static inline void reserve_trampoline_memory(void) {}; | ||
24 | #endif /* CONFIG_X86_TRAMPOLINE */ | ||
18 | 25 | ||
19 | #endif /* __ASSEMBLY__ */ | 26 | #endif /* __ASSEMBLY__ */ |
20 | 27 | ||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 45dee286e45c..2ee0a3bceedf 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -46,6 +46,10 @@ dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); | |||
46 | dotraplinkage void do_invalid_TSS(struct pt_regs *, long); | 46 | dotraplinkage void do_invalid_TSS(struct pt_regs *, long); |
47 | dotraplinkage void do_segment_not_present(struct pt_regs *, long); | 47 | dotraplinkage void do_segment_not_present(struct pt_regs *, long); |
48 | dotraplinkage void do_stack_segment(struct pt_regs *, long); | 48 | dotraplinkage void do_stack_segment(struct pt_regs *, long); |
49 | #ifdef CONFIG_X86_64 | ||
50 | dotraplinkage void do_double_fault(struct pt_regs *, long); | ||
51 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); | ||
52 | #endif | ||
49 | dotraplinkage void do_general_protection(struct pt_regs *, long); | 53 | dotraplinkage void do_general_protection(struct pt_regs *, long); |
50 | dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); | 54 | dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); |
51 | dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); | 55 | dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); |
@@ -72,10 +76,13 @@ static inline int get_si_code(unsigned long condition) | |||
72 | extern int panic_on_unrecovered_nmi; | 76 | extern int panic_on_unrecovered_nmi; |
73 | extern int kstack_depth_to_print; | 77 | extern int kstack_depth_to_print; |
74 | 78 | ||
75 | #ifdef CONFIG_X86_32 | ||
76 | void math_error(void __user *); | 79 | void math_error(void __user *); |
77 | unsigned long patch_espfix_desc(unsigned long, unsigned long); | ||
78 | asmlinkage void math_emulate(long); | 80 | asmlinkage void math_emulate(long); |
81 | #ifdef CONFIG_X86_32 | ||
82 | unsigned long patch_espfix_desc(unsigned long, unsigned long); | ||
83 | #else | ||
84 | asmlinkage void smp_thermal_interrupt(void); | ||
85 | asmlinkage void mce_threshold_interrupt(void); | ||
79 | #endif | 86 | #endif |
80 | 87 | ||
81 | #endif /* _ASM_X86_TRAPS_H */ | 88 | #endif /* _ASM_X86_TRAPS_H */ |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 9cd83a8e40d5..38ae163cc91b 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -34,8 +34,6 @@ static inline cycles_t get_cycles(void) | |||
34 | 34 | ||
35 | static __always_inline cycles_t vget_cycles(void) | 35 | static __always_inline cycles_t vget_cycles(void) |
36 | { | 36 | { |
37 | cycles_t cycles; | ||
38 | |||
39 | /* | 37 | /* |
40 | * We only do VDSOs on TSC capable CPUs, so this shouldnt | 38 | * We only do VDSOs on TSC capable CPUs, so this shouldnt |
41 | * access boot_cpu_data (which is not VDSO-safe): | 39 | * access boot_cpu_data (which is not VDSO-safe): |
@@ -44,11 +42,7 @@ static __always_inline cycles_t vget_cycles(void) | |||
44 | if (!cpu_has_tsc) | 42 | if (!cpu_has_tsc) |
45 | return 0; | 43 | return 0; |
46 | #endif | 44 | #endif |
47 | rdtsc_barrier(); | 45 | return (cycles_t)__native_read_tsc(); |
48 | cycles = (cycles_t)__native_read_tsc(); | ||
49 | rdtsc_barrier(); | ||
50 | |||
51 | return cycles; | ||
52 | } | 46 | } |
53 | 47 | ||
54 | extern void tsc_init(void); | 48 | extern void tsc_init(void); |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 35c54921b2e4..580c3ee6c58c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -350,14 +350,14 @@ do { \ | |||
350 | 350 | ||
351 | #define __put_user_nocheck(x, ptr, size) \ | 351 | #define __put_user_nocheck(x, ptr, size) \ |
352 | ({ \ | 352 | ({ \ |
353 | long __pu_err; \ | 353 | int __pu_err; \ |
354 | __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ | 354 | __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ |
355 | __pu_err; \ | 355 | __pu_err; \ |
356 | }) | 356 | }) |
357 | 357 | ||
358 | #define __get_user_nocheck(x, ptr, size) \ | 358 | #define __get_user_nocheck(x, ptr, size) \ |
359 | ({ \ | 359 | ({ \ |
360 | long __gu_err; \ | 360 | int __gu_err; \ |
361 | unsigned long __gu_val; \ | 361 | unsigned long __gu_val; \ |
362 | __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ | 362 | __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ |
363 | (x) = (__force __typeof__(*(ptr)))__gu_val; \ | 363 | (x) = (__force __typeof__(*(ptr)))__gu_val; \ |
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index d931d3b7e6f7..7ed17ff502b9 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h | |||
@@ -32,13 +32,18 @@ | |||
32 | enum uv_bios_cmd { | 32 | enum uv_bios_cmd { |
33 | UV_BIOS_COMMON, | 33 | UV_BIOS_COMMON, |
34 | UV_BIOS_GET_SN_INFO, | 34 | UV_BIOS_GET_SN_INFO, |
35 | UV_BIOS_FREQ_BASE | 35 | UV_BIOS_FREQ_BASE, |
36 | UV_BIOS_WATCHLIST_ALLOC, | ||
37 | UV_BIOS_WATCHLIST_FREE, | ||
38 | UV_BIOS_MEMPROTECT, | ||
39 | UV_BIOS_GET_PARTITION_ADDR | ||
36 | }; | 40 | }; |
37 | 41 | ||
38 | /* | 42 | /* |
39 | * Status values returned from a BIOS call. | 43 | * Status values returned from a BIOS call. |
40 | */ | 44 | */ |
41 | enum { | 45 | enum { |
46 | BIOS_STATUS_MORE_PASSES = 1, | ||
42 | BIOS_STATUS_SUCCESS = 0, | 47 | BIOS_STATUS_SUCCESS = 0, |
43 | BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, | 48 | BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, |
44 | BIOS_STATUS_EINVAL = -EINVAL, | 49 | BIOS_STATUS_EINVAL = -EINVAL, |
@@ -71,6 +76,21 @@ union partition_info_u { | |||
71 | }; | 76 | }; |
72 | }; | 77 | }; |
73 | 78 | ||
79 | union uv_watchlist_u { | ||
80 | u64 val; | ||
81 | struct { | ||
82 | u64 blade : 16, | ||
83 | size : 32, | ||
84 | filler : 16; | ||
85 | }; | ||
86 | }; | ||
87 | |||
88 | enum uv_memprotect { | ||
89 | UV_MEMPROT_RESTRICT_ACCESS, | ||
90 | UV_MEMPROT_ALLOW_AMO, | ||
91 | UV_MEMPROT_ALLOW_RW | ||
92 | }; | ||
93 | |||
74 | /* | 94 | /* |
75 | * bios calls have 6 parameters | 95 | * bios calls have 6 parameters |
76 | */ | 96 | */ |
@@ -80,14 +100,20 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); | |||
80 | 100 | ||
81 | extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); | 101 | extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); |
82 | extern s64 uv_bios_freq_base(u64, u64 *); | 102 | extern s64 uv_bios_freq_base(u64, u64 *); |
103 | extern int uv_bios_mq_watchlist_alloc(int, unsigned long, unsigned int, | ||
104 | unsigned long *); | ||
105 | extern int uv_bios_mq_watchlist_free(int, int); | ||
106 | extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); | ||
107 | extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); | ||
83 | 108 | ||
84 | extern void uv_bios_init(void); | 109 | extern void uv_bios_init(void); |
85 | 110 | ||
111 | extern unsigned long sn_rtc_cycles_per_second; | ||
86 | extern int uv_type; | 112 | extern int uv_type; |
87 | extern long sn_partition_id; | 113 | extern long sn_partition_id; |
88 | extern long uv_coherency_id; | 114 | extern long sn_coherency_id; |
89 | extern long uv_region_size; | 115 | extern long sn_region_size; |
90 | #define partition_coherence_id() (uv_coherency_id) | 116 | #define partition_coherence_id() (sn_coherency_id) |
91 | 117 | ||
92 | extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ | 118 | extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ |
93 | 119 | ||
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 7a5782610b2b..777327ef05c1 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -113,25 +113,37 @@ | |||
113 | */ | 113 | */ |
114 | #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) | 114 | #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) |
115 | 115 | ||
116 | struct uv_scir_s { | ||
117 | struct timer_list timer; | ||
118 | unsigned long offset; | ||
119 | unsigned long last; | ||
120 | unsigned long idle_on; | ||
121 | unsigned long idle_off; | ||
122 | unsigned char state; | ||
123 | unsigned char enabled; | ||
124 | }; | ||
125 | |||
116 | /* | 126 | /* |
117 | * The following defines attributes of the HUB chip. These attributes are | 127 | * The following defines attributes of the HUB chip. These attributes are |
118 | * frequently referenced and are kept in the per-cpu data areas of each cpu. | 128 | * frequently referenced and are kept in the per-cpu data areas of each cpu. |
119 | * They are kept together in a struct to minimize cache misses. | 129 | * They are kept together in a struct to minimize cache misses. |
120 | */ | 130 | */ |
121 | struct uv_hub_info_s { | 131 | struct uv_hub_info_s { |
122 | unsigned long global_mmr_base; | 132 | unsigned long global_mmr_base; |
123 | unsigned long gpa_mask; | 133 | unsigned long gpa_mask; |
124 | unsigned long gnode_upper; | 134 | unsigned long gnode_upper; |
125 | unsigned long lowmem_remap_top; | 135 | unsigned long lowmem_remap_top; |
126 | unsigned long lowmem_remap_base; | 136 | unsigned long lowmem_remap_base; |
127 | unsigned short pnode; | 137 | unsigned short pnode; |
128 | unsigned short pnode_mask; | 138 | unsigned short pnode_mask; |
129 | unsigned short coherency_domain_number; | 139 | unsigned short coherency_domain_number; |
130 | unsigned short numa_blade_id; | 140 | unsigned short numa_blade_id; |
131 | unsigned char blade_processor_id; | 141 | unsigned char blade_processor_id; |
132 | unsigned char m_val; | 142 | unsigned char m_val; |
133 | unsigned char n_val; | 143 | unsigned char n_val; |
144 | struct uv_scir_s scir; | ||
134 | }; | 145 | }; |
146 | |||
135 | DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | 147 | DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); |
136 | #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) | 148 | #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) |
137 | #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) | 149 | #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) |
@@ -163,6 +175,30 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | |||
163 | 175 | ||
164 | #define UV_APIC_PNODE_SHIFT 6 | 176 | #define UV_APIC_PNODE_SHIFT 6 |
165 | 177 | ||
178 | /* Local Bus from cpu's perspective */ | ||
179 | #define LOCAL_BUS_BASE 0x1c00000 | ||
180 | #define LOCAL_BUS_SIZE (4 * 1024 * 1024) | ||
181 | |||
182 | /* | ||
183 | * System Controller Interface Reg | ||
184 | * | ||
185 | * Note there are NO leds on a UV system. This register is only | ||
186 | * used by the system controller to monitor system-wide operation. | ||
187 | * There are 64 regs per node. With Nahelem cpus (2 cores per node, | ||
188 | * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on | ||
189 | * a node. | ||
190 | * | ||
191 | * The window is located at top of ACPI MMR space | ||
192 | */ | ||
193 | #define SCIR_WINDOW_COUNT 64 | ||
194 | #define SCIR_LOCAL_MMR_BASE (LOCAL_BUS_BASE + \ | ||
195 | LOCAL_BUS_SIZE - \ | ||
196 | SCIR_WINDOW_COUNT) | ||
197 | |||
198 | #define SCIR_CPU_HEARTBEAT 0x01 /* timer interrupt */ | ||
199 | #define SCIR_CPU_ACTIVITY 0x02 /* not idle */ | ||
200 | #define SCIR_CPU_HB_INTERVAL (HZ) /* once per second */ | ||
201 | |||
166 | /* | 202 | /* |
167 | * Macros for converting between kernel virtual addresses, socket local physical | 203 | * Macros for converting between kernel virtual addresses, socket local physical |
168 | * addresses, and UV global physical addresses. | 204 | * addresses, and UV global physical addresses. |
@@ -174,7 +210,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | |||
174 | static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) | 210 | static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) |
175 | { | 211 | { |
176 | if (paddr < uv_hub_info->lowmem_remap_top) | 212 | if (paddr < uv_hub_info->lowmem_remap_top) |
177 | paddr += uv_hub_info->lowmem_remap_base; | 213 | paddr |= uv_hub_info->lowmem_remap_base; |
178 | return paddr | uv_hub_info->gnode_upper; | 214 | return paddr | uv_hub_info->gnode_upper; |
179 | } | 215 | } |
180 | 216 | ||
@@ -182,19 +218,7 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) | |||
182 | /* socket virtual --> UV global physical address */ | 218 | /* socket virtual --> UV global physical address */ |
183 | static inline unsigned long uv_gpa(void *v) | 219 | static inline unsigned long uv_gpa(void *v) |
184 | { | 220 | { |
185 | return __pa(v) | uv_hub_info->gnode_upper; | 221 | return uv_soc_phys_ram_to_gpa(__pa(v)); |
186 | } | ||
187 | |||
188 | /* socket virtual --> UV global physical address */ | ||
189 | static inline void *uv_vgpa(void *v) | ||
190 | { | ||
191 | return (void *)uv_gpa(v); | ||
192 | } | ||
193 | |||
194 | /* UV global physical address --> socket virtual */ | ||
195 | static inline void *uv_va(unsigned long gpa) | ||
196 | { | ||
197 | return __va(gpa & uv_hub_info->gpa_mask); | ||
198 | } | 222 | } |
199 | 223 | ||
200 | /* pnode, offset --> socket virtual */ | 224 | /* pnode, offset --> socket virtual */ |
@@ -277,6 +301,16 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) | |||
277 | *uv_local_mmr_address(offset) = val; | 301 | *uv_local_mmr_address(offset) = val; |
278 | } | 302 | } |
279 | 303 | ||
304 | static inline unsigned char uv_read_local_mmr8(unsigned long offset) | ||
305 | { | ||
306 | return *((unsigned char *)uv_local_mmr_address(offset)); | ||
307 | } | ||
308 | |||
309 | static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val) | ||
310 | { | ||
311 | *((unsigned char *)uv_local_mmr_address(offset)) = val; | ||
312 | } | ||
313 | |||
280 | /* | 314 | /* |
281 | * Structures and definitions for converting between cpu, node, pnode, and blade | 315 | * Structures and definitions for converting between cpu, node, pnode, and blade |
282 | * numbers. | 316 | * numbers. |
@@ -351,5 +385,20 @@ static inline int uv_num_possible_blades(void) | |||
351 | return uv_possible_blades; | 385 | return uv_possible_blades; |
352 | } | 386 | } |
353 | 387 | ||
354 | #endif /* _ASM_X86_UV_UV_HUB_H */ | 388 | /* Update SCIR state */ |
389 | static inline void uv_set_scir_bits(unsigned char value) | ||
390 | { | ||
391 | if (uv_hub_info->scir.state != value) { | ||
392 | uv_hub_info->scir.state = value; | ||
393 | uv_write_local_mmr8(uv_hub_info->scir.offset, value); | ||
394 | } | ||
395 | } | ||
396 | static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) | ||
397 | { | ||
398 | if (uv_cpu_hub_info(cpu)->scir.state != value) { | ||
399 | uv_cpu_hub_info(cpu)->scir.state = value; | ||
400 | uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value); | ||
401 | } | ||
402 | } | ||
355 | 403 | ||
404 | #endif /* _ASM_X86_UV_UV_HUB_H */ | ||
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h new file mode 100644 index 000000000000..c11b7e100d83 --- /dev/null +++ b/arch/x86/include/asm/vmware.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008, VMware, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
12 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
13 | * details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | * | ||
19 | */ | ||
20 | #ifndef ASM_X86__VMWARE_H | ||
21 | #define ASM_X86__VMWARE_H | ||
22 | |||
23 | extern unsigned long vmware_get_tsc_khz(void); | ||
24 | extern int vmware_platform(void); | ||
25 | extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); | ||
26 | |||
27 | #endif | ||
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 3f6000d95fe2..5e79ca694326 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -33,8 +33,14 @@ | |||
33 | #ifndef _ASM_X86_XEN_HYPERCALL_H | 33 | #ifndef _ASM_X86_XEN_HYPERCALL_H |
34 | #define _ASM_X86_XEN_HYPERCALL_H | 34 | #define _ASM_X86_XEN_HYPERCALL_H |
35 | 35 | ||
36 | #include <linux/kernel.h> | ||
37 | #include <linux/spinlock.h> | ||
36 | #include <linux/errno.h> | 38 | #include <linux/errno.h> |
37 | #include <linux/string.h> | 39 | #include <linux/string.h> |
40 | #include <linux/types.h> | ||
41 | |||
42 | #include <asm/page.h> | ||
43 | #include <asm/pgtable.h> | ||
38 | 44 | ||
39 | #include <xen/interface/xen.h> | 45 | #include <xen/interface/xen.h> |
40 | #include <xen/interface/sched.h> | 46 | #include <xen/interface/sched.h> |
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index a38d25ac87d2..81fbd735aec4 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h | |||
@@ -33,39 +33,10 @@ | |||
33 | #ifndef _ASM_X86_XEN_HYPERVISOR_H | 33 | #ifndef _ASM_X86_XEN_HYPERVISOR_H |
34 | #define _ASM_X86_XEN_HYPERVISOR_H | 34 | #define _ASM_X86_XEN_HYPERVISOR_H |
35 | 35 | ||
36 | #include <linux/types.h> | ||
37 | #include <linux/kernel.h> | ||
38 | |||
39 | #include <xen/interface/xen.h> | ||
40 | #include <xen/interface/version.h> | ||
41 | |||
42 | #include <asm/ptrace.h> | ||
43 | #include <asm/page.h> | ||
44 | #include <asm/desc.h> | ||
45 | #if defined(__i386__) | ||
46 | # ifdef CONFIG_X86_PAE | ||
47 | # include <asm-generic/pgtable-nopud.h> | ||
48 | # else | ||
49 | # include <asm-generic/pgtable-nopmd.h> | ||
50 | # endif | ||
51 | #endif | ||
52 | #include <asm/xen/hypercall.h> | ||
53 | |||
54 | /* arch/i386/kernel/setup.c */ | 36 | /* arch/i386/kernel/setup.c */ |
55 | extern struct shared_info *HYPERVISOR_shared_info; | 37 | extern struct shared_info *HYPERVISOR_shared_info; |
56 | extern struct start_info *xen_start_info; | 38 | extern struct start_info *xen_start_info; |
57 | 39 | ||
58 | /* arch/i386/mach-xen/evtchn.c */ | ||
59 | /* Force a proper event-channel callback from Xen. */ | ||
60 | extern void force_evtchn_callback(void); | ||
61 | |||
62 | /* Turn jiffies into Xen system time. */ | ||
63 | u64 jiffies_to_st(unsigned long jiffies); | ||
64 | |||
65 | |||
66 | #define MULTI_UVMFLAGS_INDEX 3 | ||
67 | #define MULTI_UVMDOMID_INDEX 4 | ||
68 | |||
69 | enum xen_domain_type { | 40 | enum xen_domain_type { |
70 | XEN_NATIVE, | 41 | XEN_NATIVE, |
71 | XEN_PV_DOMAIN, | 42 | XEN_PV_DOMAIN, |
@@ -74,9 +45,15 @@ enum xen_domain_type { | |||
74 | 45 | ||
75 | extern enum xen_domain_type xen_domain_type; | 46 | extern enum xen_domain_type xen_domain_type; |
76 | 47 | ||
48 | #ifdef CONFIG_XEN | ||
77 | #define xen_domain() (xen_domain_type != XEN_NATIVE) | 49 | #define xen_domain() (xen_domain_type != XEN_NATIVE) |
78 | #define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) | 50 | #else |
51 | #define xen_domain() (0) | ||
52 | #endif | ||
53 | |||
54 | #define xen_pv_domain() (xen_domain() && xen_domain_type == XEN_PV_DOMAIN) | ||
55 | #define xen_hvm_domain() (xen_domain() && xen_domain_type == XEN_HVM_DOMAIN) | ||
56 | |||
79 | #define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) | 57 | #define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) |
80 | #define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) | ||
81 | 58 | ||
82 | #endif /* _ASM_X86_XEN_HYPERVISOR_H */ | 59 | #endif /* _ASM_X86_XEN_HYPERVISOR_H */ |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index bc628998a1b9..7ef617ef1df3 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -1,11 +1,16 @@ | |||
1 | #ifndef _ASM_X86_XEN_PAGE_H | 1 | #ifndef _ASM_X86_XEN_PAGE_H |
2 | #define _ASM_X86_XEN_PAGE_H | 2 | #define _ASM_X86_XEN_PAGE_H |
3 | 3 | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/spinlock.h> | ||
4 | #include <linux/pfn.h> | 7 | #include <linux/pfn.h> |
5 | 8 | ||
6 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | #include <asm/page.h> | ||
7 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
8 | 12 | ||
13 | #include <xen/interface/xen.h> | ||
9 | #include <xen/features.h> | 14 | #include <xen/features.h> |
10 | 15 | ||
11 | /* Xen machine address */ | 16 | /* Xen machine address */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b62a7667828e..1f208aaee780 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -12,6 +12,7 @@ CFLAGS_REMOVE_tsc.o = -pg | |||
12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
14 | CFLAGS_REMOVE_ftrace.o = -pg | 14 | CFLAGS_REMOVE_ftrace.o = -pg |
15 | CFLAGS_REMOVE_early_printk.o = -pg | ||
15 | endif | 16 | endif |
16 | 17 | ||
17 | # | 18 | # |
@@ -23,9 +24,9 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | |||
23 | CFLAGS_hpet.o := $(nostackp) | 24 | CFLAGS_hpet.o := $(nostackp) |
24 | CFLAGS_tsc.o := $(nostackp) | 25 | CFLAGS_tsc.o := $(nostackp) |
25 | 26 | ||
26 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o | 27 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
27 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 28 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
28 | obj-y += time_$(BITS).o ioport.o ldt.o | 29 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o |
29 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o | 30 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o |
30 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 31 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
31 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 32 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
@@ -105,6 +106,8 @@ microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | |||
105 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o | 106 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o |
106 | obj-$(CONFIG_MICROCODE) += microcode.o | 107 | obj-$(CONFIG_MICROCODE) += microcode.o |
107 | 108 | ||
109 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | ||
110 | |||
108 | ### | 111 | ### |
109 | # 64 bit specific files | 112 | # 64 bit specific files |
110 | ifeq ($(CONFIG_X86_64),y) | 113 | ifeq ($(CONFIG_X86_64),y) |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4c51a2f8fd31..65d0b72777ea 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -1360,6 +1360,17 @@ static void __init acpi_process_madt(void) | |||
1360 | disable_acpi(); | 1360 | disable_acpi(); |
1361 | } | 1361 | } |
1362 | } | 1362 | } |
1363 | |||
1364 | /* | ||
1365 | * ACPI supports both logical (e.g. Hyper-Threading) and physical | ||
1366 | * processors, where MPS only supports physical. | ||
1367 | */ | ||
1368 | if (acpi_lapic && acpi_ioapic) | ||
1369 | printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " | ||
1370 | "information\n"); | ||
1371 | else if (acpi_lapic) | ||
1372 | printk(KERN_INFO "Using ACPI for processor (LAPIC) " | ||
1373 | "configuration information\n"); | ||
1363 | #endif | 1374 | #endif |
1364 | return; | 1375 | return; |
1365 | } | 1376 | } |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0a60d60ed036..2e2da717b350 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/iommu-helper.h> | 24 | #include <linux/iommu-helper.h> |
25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
26 | #include <asm/iommu.h> | 26 | #include <asm/iommu.h> |
27 | #include <asm/gart.h> | ||
27 | #include <asm/amd_iommu_types.h> | 28 | #include <asm/amd_iommu_types.h> |
28 | #include <asm/amd_iommu.h> | 29 | #include <asm/amd_iommu.h> |
29 | 30 | ||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c6cc22815d35..c625800c55ca 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/amd_iommu_types.h> | 28 | #include <asm/amd_iommu_types.h> |
29 | #include <asm/amd_iommu.h> | 29 | #include <asm/amd_iommu.h> |
30 | #include <asm/iommu.h> | 30 | #include <asm/iommu.h> |
31 | #include <asm/gart.h> | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * definitions for the ACPI scanning code | 34 | * definitions for the ACPI scanning code |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9a32b37ee2ee..676debfc1702 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -1,8 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * Firmware replacement code. | 2 | * Firmware replacement code. |
3 | * | 3 | * |
4 | * Work around broken BIOSes that don't set an aperture or only set the | 4 | * Work around broken BIOSes that don't set an aperture, only set the |
5 | * aperture in the AGP bridge. | 5 | * aperture in the AGP bridge, or set too small aperture. |
6 | * | ||
6 | * If all fails map the aperture over some low memory. This is cheaper than | 7 | * If all fails map the aperture over some low memory. This is cheaper than |
7 | * doing bounce buffering. The memory is lost. This is done at early boot | 8 | * doing bounce buffering. The memory is lost. This is done at early boot |
8 | * because only the bootmem allocator can allocate 32+MB. | 9 | * because only the bootmem allocator can allocate 32+MB. |
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 16f94879b525..7397911f8478 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c | |||
@@ -441,6 +441,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
441 | v = apic_read(APIC_LVTT); | 441 | v = apic_read(APIC_LVTT); |
442 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 442 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
443 | apic_write(APIC_LVTT, v); | 443 | apic_write(APIC_LVTT, v); |
444 | apic_write(APIC_TMICT, 0xffffffff); | ||
444 | break; | 445 | break; |
445 | case CLOCK_EVT_MODE_RESUME: | 446 | case CLOCK_EVT_MODE_RESUME: |
446 | /* Nothing to do here */ | 447 | /* Nothing to do here */ |
@@ -559,13 +560,13 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) | |||
559 | } else { | 560 | } else { |
560 | res = (((u64)deltapm) * mult) >> 22; | 561 | res = (((u64)deltapm) * mult) >> 22; |
561 | do_div(res, 1000000); | 562 | do_div(res, 1000000); |
562 | printk(KERN_WARNING "APIC calibration not consistent " | 563 | pr_warning("APIC calibration not consistent " |
563 | "with PM Timer: %ldms instead of 100ms\n", | 564 | "with PM Timer: %ldms instead of 100ms\n", |
564 | (long)res); | 565 | (long)res); |
565 | /* Correct the lapic counter value */ | 566 | /* Correct the lapic counter value */ |
566 | res = (((u64)(*delta)) * pm_100ms); | 567 | res = (((u64)(*delta)) * pm_100ms); |
567 | do_div(res, deltapm); | 568 | do_div(res, deltapm); |
568 | printk(KERN_INFO "APIC delta adjusted to PM-Timer: " | 569 | pr_info("APIC delta adjusted to PM-Timer: " |
569 | "%lu (%ld)\n", (unsigned long)res, *delta); | 570 | "%lu (%ld)\n", (unsigned long)res, *delta); |
570 | *delta = (long)res; | 571 | *delta = (long)res; |
571 | } | 572 | } |
@@ -645,8 +646,7 @@ static int __init calibrate_APIC_clock(void) | |||
645 | */ | 646 | */ |
646 | if (calibration_result < (1000000 / HZ)) { | 647 | if (calibration_result < (1000000 / HZ)) { |
647 | local_irq_enable(); | 648 | local_irq_enable(); |
648 | printk(KERN_WARNING | 649 | pr_warning("APIC frequency too slow, disabling apic timer\n"); |
649 | "APIC frequency too slow, disabling apic timer\n"); | ||
650 | return -1; | 650 | return -1; |
651 | } | 651 | } |
652 | 652 | ||
@@ -672,13 +672,9 @@ static int __init calibrate_APIC_clock(void) | |||
672 | while (lapic_cal_loops <= LAPIC_CAL_LOOPS) | 672 | while (lapic_cal_loops <= LAPIC_CAL_LOOPS) |
673 | cpu_relax(); | 673 | cpu_relax(); |
674 | 674 | ||
675 | local_irq_disable(); | ||
676 | |||
677 | /* Stop the lapic timer */ | 675 | /* Stop the lapic timer */ |
678 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); | 676 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); |
679 | 677 | ||
680 | local_irq_enable(); | ||
681 | |||
682 | /* Jiffies delta */ | 678 | /* Jiffies delta */ |
683 | deltaj = lapic_cal_j2 - lapic_cal_j1; | 679 | deltaj = lapic_cal_j2 - lapic_cal_j1; |
684 | apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); | 680 | apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); |
@@ -692,8 +688,7 @@ static int __init calibrate_APIC_clock(void) | |||
692 | local_irq_enable(); | 688 | local_irq_enable(); |
693 | 689 | ||
694 | if (levt->features & CLOCK_EVT_FEAT_DUMMY) { | 690 | if (levt->features & CLOCK_EVT_FEAT_DUMMY) { |
695 | printk(KERN_WARNING | 691 | pr_warning("APIC timer disabled due to verification failure.\n"); |
696 | "APIC timer disabled due to verification failure.\n"); | ||
697 | return -1; | 692 | return -1; |
698 | } | 693 | } |
699 | 694 | ||
@@ -714,7 +709,7 @@ void __init setup_boot_APIC_clock(void) | |||
714 | * broadcast mechanism is used. On UP systems simply ignore it. | 709 | * broadcast mechanism is used. On UP systems simply ignore it. |
715 | */ | 710 | */ |
716 | if (disable_apic_timer) { | 711 | if (disable_apic_timer) { |
717 | printk(KERN_INFO "Disabling APIC timer\n"); | 712 | pr_info("Disabling APIC timer\n"); |
718 | /* No broadcast on UP ! */ | 713 | /* No broadcast on UP ! */ |
719 | if (num_possible_cpus() > 1) { | 714 | if (num_possible_cpus() > 1) { |
720 | lapic_clockevent.mult = 1; | 715 | lapic_clockevent.mult = 1; |
@@ -741,7 +736,7 @@ void __init setup_boot_APIC_clock(void) | |||
741 | if (nmi_watchdog != NMI_IO_APIC) | 736 | if (nmi_watchdog != NMI_IO_APIC) |
742 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | 737 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; |
743 | else | 738 | else |
744 | printk(KERN_WARNING "APIC timer registered as dummy," | 739 | pr_warning("APIC timer registered as dummy," |
745 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | 740 | " due to nmi_watchdog=%d!\n", nmi_watchdog); |
746 | 741 | ||
747 | /* Setup the lapic or request the broadcast */ | 742 | /* Setup the lapic or request the broadcast */ |
@@ -773,8 +768,7 @@ static void local_apic_timer_interrupt(void) | |||
773 | * spurious. | 768 | * spurious. |
774 | */ | 769 | */ |
775 | if (!evt->event_handler) { | 770 | if (!evt->event_handler) { |
776 | printk(KERN_WARNING | 771 | pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu); |
777 | "Spurious LAPIC timer interrupt on cpu %d\n", cpu); | ||
778 | /* Switch it off */ | 772 | /* Switch it off */ |
779 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); | 773 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); |
780 | return; | 774 | return; |
@@ -783,11 +777,7 @@ static void local_apic_timer_interrupt(void) | |||
783 | /* | 777 | /* |
784 | * the NMI deadlock-detector uses this. | 778 | * the NMI deadlock-detector uses this. |
785 | */ | 779 | */ |
786 | #ifdef CONFIG_X86_64 | 780 | inc_irq_stat(apic_timer_irqs); |
787 | add_pda(apic_timer_irqs, 1); | ||
788 | #else | ||
789 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
790 | #endif | ||
791 | 781 | ||
792 | evt->event_handler(evt); | 782 | evt->event_handler(evt); |
793 | } | 783 | } |
@@ -814,9 +804,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
814 | * Besides, if we don't timer interrupts ignore the global | 804 | * Besides, if we don't timer interrupts ignore the global |
815 | * interrupt lock, which is the WrongThing (tm) to do. | 805 | * interrupt lock, which is the WrongThing (tm) to do. |
816 | */ | 806 | */ |
817 | #ifdef CONFIG_X86_64 | ||
818 | exit_idle(); | 807 | exit_idle(); |
819 | #endif | ||
820 | irq_enter(); | 808 | irq_enter(); |
821 | local_apic_timer_interrupt(); | 809 | local_apic_timer_interrupt(); |
822 | irq_exit(); | 810 | irq_exit(); |
@@ -1093,7 +1081,7 @@ static void __cpuinit lapic_setup_esr(void) | |||
1093 | unsigned int oldvalue, value, maxlvt; | 1081 | unsigned int oldvalue, value, maxlvt; |
1094 | 1082 | ||
1095 | if (!lapic_is_integrated()) { | 1083 | if (!lapic_is_integrated()) { |
1096 | printk(KERN_INFO "No ESR for 82489DX.\n"); | 1084 | pr_info("No ESR for 82489DX.\n"); |
1097 | return; | 1085 | return; |
1098 | } | 1086 | } |
1099 | 1087 | ||
@@ -1104,7 +1092,7 @@ static void __cpuinit lapic_setup_esr(void) | |||
1104 | * ESR disabled - we can't do anything useful with the | 1092 | * ESR disabled - we can't do anything useful with the |
1105 | * errors anyway - mbligh | 1093 | * errors anyway - mbligh |
1106 | */ | 1094 | */ |
1107 | printk(KERN_INFO "Leaving ESR disabled.\n"); | 1095 | pr_info("Leaving ESR disabled.\n"); |
1108 | return; | 1096 | return; |
1109 | } | 1097 | } |
1110 | 1098 | ||
@@ -1298,7 +1286,7 @@ void check_x2apic(void) | |||
1298 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1286 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
1299 | 1287 | ||
1300 | if (msr & X2APIC_ENABLE) { | 1288 | if (msr & X2APIC_ENABLE) { |
1301 | printk("x2apic enabled by BIOS, switching to x2apic ops\n"); | 1289 | pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); |
1302 | x2apic_preenabled = x2apic = 1; | 1290 | x2apic_preenabled = x2apic = 1; |
1303 | apic_ops = &x2apic_ops; | 1291 | apic_ops = &x2apic_ops; |
1304 | } | 1292 | } |
@@ -1310,7 +1298,7 @@ void enable_x2apic(void) | |||
1310 | 1298 | ||
1311 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1299 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
1312 | if (!(msr & X2APIC_ENABLE)) { | 1300 | if (!(msr & X2APIC_ENABLE)) { |
1313 | printk("Enabling x2apic\n"); | 1301 | pr_info("Enabling x2apic\n"); |
1314 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); | 1302 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); |
1315 | } | 1303 | } |
1316 | } | 1304 | } |
@@ -1325,9 +1313,8 @@ void __init enable_IR_x2apic(void) | |||
1325 | return; | 1313 | return; |
1326 | 1314 | ||
1327 | if (!x2apic_preenabled && disable_x2apic) { | 1315 | if (!x2apic_preenabled && disable_x2apic) { |
1328 | printk(KERN_INFO | 1316 | pr_info("Skipped enabling x2apic and Interrupt-remapping " |
1329 | "Skipped enabling x2apic and Interrupt-remapping " | 1317 | "because of nox2apic\n"); |
1330 | "because of nox2apic\n"); | ||
1331 | return; | 1318 | return; |
1332 | } | 1319 | } |
1333 | 1320 | ||
@@ -1335,22 +1322,19 @@ void __init enable_IR_x2apic(void) | |||
1335 | panic("Bios already enabled x2apic, can't enforce nox2apic"); | 1322 | panic("Bios already enabled x2apic, can't enforce nox2apic"); |
1336 | 1323 | ||
1337 | if (!x2apic_preenabled && skip_ioapic_setup) { | 1324 | if (!x2apic_preenabled && skip_ioapic_setup) { |
1338 | printk(KERN_INFO | 1325 | pr_info("Skipped enabling x2apic and Interrupt-remapping " |
1339 | "Skipped enabling x2apic and Interrupt-remapping " | 1326 | "because of skipping io-apic setup\n"); |
1340 | "because of skipping io-apic setup\n"); | ||
1341 | return; | 1327 | return; |
1342 | } | 1328 | } |
1343 | 1329 | ||
1344 | ret = dmar_table_init(); | 1330 | ret = dmar_table_init(); |
1345 | if (ret) { | 1331 | if (ret) { |
1346 | printk(KERN_INFO | 1332 | pr_info("dmar_table_init() failed with %d:\n", ret); |
1347 | "dmar_table_init() failed with %d:\n", ret); | ||
1348 | 1333 | ||
1349 | if (x2apic_preenabled) | 1334 | if (x2apic_preenabled) |
1350 | panic("x2apic enabled by bios. But IR enabling failed"); | 1335 | panic("x2apic enabled by bios. But IR enabling failed"); |
1351 | else | 1336 | else |
1352 | printk(KERN_INFO | 1337 | pr_info("Not enabling x2apic,Intr-remapping\n"); |
1353 | "Not enabling x2apic,Intr-remapping\n"); | ||
1354 | return; | 1338 | return; |
1355 | } | 1339 | } |
1356 | 1340 | ||
@@ -1359,7 +1343,7 @@ void __init enable_IR_x2apic(void) | |||
1359 | 1343 | ||
1360 | ret = save_mask_IO_APIC_setup(); | 1344 | ret = save_mask_IO_APIC_setup(); |
1361 | if (ret) { | 1345 | if (ret) { |
1362 | printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); | 1346 | pr_info("Saving IO-APIC state failed: %d\n", ret); |
1363 | goto end; | 1347 | goto end; |
1364 | } | 1348 | } |
1365 | 1349 | ||
@@ -1394,14 +1378,11 @@ end: | |||
1394 | 1378 | ||
1395 | if (!ret) { | 1379 | if (!ret) { |
1396 | if (!x2apic_preenabled) | 1380 | if (!x2apic_preenabled) |
1397 | printk(KERN_INFO | 1381 | pr_info("Enabled x2apic and interrupt-remapping\n"); |
1398 | "Enabled x2apic and interrupt-remapping\n"); | ||
1399 | else | 1382 | else |
1400 | printk(KERN_INFO | 1383 | pr_info("Enabled Interrupt-remapping\n"); |
1401 | "Enabled Interrupt-remapping\n"); | ||
1402 | } else | 1384 | } else |
1403 | printk(KERN_ERR | 1385 | pr_err("Failed to enable Interrupt-remapping and x2apic\n"); |
1404 | "Failed to enable Interrupt-remapping and x2apic\n"); | ||
1405 | #else | 1386 | #else |
1406 | if (!cpu_has_x2apic) | 1387 | if (!cpu_has_x2apic) |
1407 | return; | 1388 | return; |
@@ -1410,8 +1391,8 @@ end: | |||
1410 | panic("x2apic enabled prior OS handover," | 1391 | panic("x2apic enabled prior OS handover," |
1411 | " enable CONFIG_INTR_REMAP"); | 1392 | " enable CONFIG_INTR_REMAP"); |
1412 | 1393 | ||
1413 | printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " | 1394 | pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping " |
1414 | " and x2apic\n"); | 1395 | " and x2apic\n"); |
1415 | #endif | 1396 | #endif |
1416 | 1397 | ||
1417 | return; | 1398 | return; |
@@ -1428,7 +1409,7 @@ end: | |||
1428 | static int __init detect_init_APIC(void) | 1409 | static int __init detect_init_APIC(void) |
1429 | { | 1410 | { |
1430 | if (!cpu_has_apic) { | 1411 | if (!cpu_has_apic) { |
1431 | printk(KERN_INFO "No local APIC present\n"); | 1412 | pr_info("No local APIC present\n"); |
1432 | return -1; | 1413 | return -1; |
1433 | } | 1414 | } |
1434 | 1415 | ||
@@ -1469,8 +1450,8 @@ static int __init detect_init_APIC(void) | |||
1469 | * "lapic" specified. | 1450 | * "lapic" specified. |
1470 | */ | 1451 | */ |
1471 | if (!force_enable_local_apic) { | 1452 | if (!force_enable_local_apic) { |
1472 | printk(KERN_INFO "Local APIC disabled by BIOS -- " | 1453 | pr_info("Local APIC disabled by BIOS -- " |
1473 | "you can enable it with \"lapic\"\n"); | 1454 | "you can enable it with \"lapic\"\n"); |
1474 | return -1; | 1455 | return -1; |
1475 | } | 1456 | } |
1476 | /* | 1457 | /* |
@@ -1480,8 +1461,7 @@ static int __init detect_init_APIC(void) | |||
1480 | */ | 1461 | */ |
1481 | rdmsr(MSR_IA32_APICBASE, l, h); | 1462 | rdmsr(MSR_IA32_APICBASE, l, h); |
1482 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { | 1463 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { |
1483 | printk(KERN_INFO | 1464 | pr_info("Local APIC disabled by BIOS -- reenabling.\n"); |
1484 | "Local APIC disabled by BIOS -- reenabling.\n"); | ||
1485 | l &= ~MSR_IA32_APICBASE_BASE; | 1465 | l &= ~MSR_IA32_APICBASE_BASE; |
1486 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | 1466 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; |
1487 | wrmsr(MSR_IA32_APICBASE, l, h); | 1467 | wrmsr(MSR_IA32_APICBASE, l, h); |
@@ -1494,7 +1474,7 @@ static int __init detect_init_APIC(void) | |||
1494 | */ | 1474 | */ |
1495 | features = cpuid_edx(1); | 1475 | features = cpuid_edx(1); |
1496 | if (!(features & (1 << X86_FEATURE_APIC))) { | 1476 | if (!(features & (1 << X86_FEATURE_APIC))) { |
1497 | printk(KERN_WARNING "Could not enable APIC!\n"); | 1477 | pr_warning("Could not enable APIC!\n"); |
1498 | return -1; | 1478 | return -1; |
1499 | } | 1479 | } |
1500 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1480 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
@@ -1505,14 +1485,14 @@ static int __init detect_init_APIC(void) | |||
1505 | if (l & MSR_IA32_APICBASE_ENABLE) | 1485 | if (l & MSR_IA32_APICBASE_ENABLE) |
1506 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; | 1486 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; |
1507 | 1487 | ||
1508 | printk(KERN_INFO "Found and enabled local APIC!\n"); | 1488 | pr_info("Found and enabled local APIC!\n"); |
1509 | 1489 | ||
1510 | apic_pm_activate(); | 1490 | apic_pm_activate(); |
1511 | 1491 | ||
1512 | return 0; | 1492 | return 0; |
1513 | 1493 | ||
1514 | no_apic: | 1494 | no_apic: |
1515 | printk(KERN_INFO "No local APIC present or hardware disabled\n"); | 1495 | pr_info("No local APIC present or hardware disabled\n"); |
1516 | return -1; | 1496 | return -1; |
1517 | } | 1497 | } |
1518 | #endif | 1498 | #endif |
@@ -1588,12 +1568,12 @@ int __init APIC_init_uniprocessor(void) | |||
1588 | { | 1568 | { |
1589 | #ifdef CONFIG_X86_64 | 1569 | #ifdef CONFIG_X86_64 |
1590 | if (disable_apic) { | 1570 | if (disable_apic) { |
1591 | printk(KERN_INFO "Apic disabled\n"); | 1571 | pr_info("Apic disabled\n"); |
1592 | return -1; | 1572 | return -1; |
1593 | } | 1573 | } |
1594 | if (!cpu_has_apic) { | 1574 | if (!cpu_has_apic) { |
1595 | disable_apic = 1; | 1575 | disable_apic = 1; |
1596 | printk(KERN_INFO "Apic disabled by BIOS\n"); | 1576 | pr_info("Apic disabled by BIOS\n"); |
1597 | return -1; | 1577 | return -1; |
1598 | } | 1578 | } |
1599 | #else | 1579 | #else |
@@ -1605,8 +1585,8 @@ int __init APIC_init_uniprocessor(void) | |||
1605 | */ | 1585 | */ |
1606 | if (!cpu_has_apic && | 1586 | if (!cpu_has_apic && |
1607 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 1587 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
1608 | printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", | 1588 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", |
1609 | boot_cpu_physical_apicid); | 1589 | boot_cpu_physical_apicid); |
1610 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1590 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
1611 | return -1; | 1591 | return -1; |
1612 | } | 1592 | } |
@@ -1682,9 +1662,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1682 | { | 1662 | { |
1683 | u32 v; | 1663 | u32 v; |
1684 | 1664 | ||
1685 | #ifdef CONFIG_X86_64 | ||
1686 | exit_idle(); | 1665 | exit_idle(); |
1687 | #endif | ||
1688 | irq_enter(); | 1666 | irq_enter(); |
1689 | /* | 1667 | /* |
1690 | * Check if this really is a spurious interrupt and ACK it | 1668 | * Check if this really is a spurious interrupt and ACK it |
@@ -1695,14 +1673,11 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1695 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) | 1673 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) |
1696 | ack_APIC_irq(); | 1674 | ack_APIC_irq(); |
1697 | 1675 | ||
1698 | #ifdef CONFIG_X86_64 | 1676 | inc_irq_stat(irq_spurious_count); |
1699 | add_pda(irq_spurious_count, 1); | 1677 | |
1700 | #else | ||
1701 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ | 1678 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ |
1702 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " | 1679 | pr_info("spurious APIC interrupt on CPU#%d, " |
1703 | "should never happen.\n", smp_processor_id()); | 1680 | "should never happen.\n", smp_processor_id()); |
1704 | __get_cpu_var(irq_stat).irq_spurious_count++; | ||
1705 | #endif | ||
1706 | irq_exit(); | 1681 | irq_exit(); |
1707 | } | 1682 | } |
1708 | 1683 | ||
@@ -1713,9 +1688,7 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1713 | { | 1688 | { |
1714 | u32 v, v1; | 1689 | u32 v, v1; |
1715 | 1690 | ||
1716 | #ifdef CONFIG_X86_64 | ||
1717 | exit_idle(); | 1691 | exit_idle(); |
1718 | #endif | ||
1719 | irq_enter(); | 1692 | irq_enter(); |
1720 | /* First tickle the hardware, only then report what went on. -- REW */ | 1693 | /* First tickle the hardware, only then report what went on. -- REW */ |
1721 | v = apic_read(APIC_ESR); | 1694 | v = apic_read(APIC_ESR); |
@@ -1724,17 +1697,18 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1724 | ack_APIC_irq(); | 1697 | ack_APIC_irq(); |
1725 | atomic_inc(&irq_err_count); | 1698 | atomic_inc(&irq_err_count); |
1726 | 1699 | ||
1727 | /* Here is what the APIC error bits mean: | 1700 | /* |
1728 | 0: Send CS error | 1701 | * Here is what the APIC error bits mean: |
1729 | 1: Receive CS error | 1702 | * 0: Send CS error |
1730 | 2: Send accept error | 1703 | * 1: Receive CS error |
1731 | 3: Receive accept error | 1704 | * 2: Send accept error |
1732 | 4: Reserved | 1705 | * 3: Receive accept error |
1733 | 5: Send illegal vector | 1706 | * 4: Reserved |
1734 | 6: Received illegal vector | 1707 | * 5: Send illegal vector |
1735 | 7: Illegal register address | 1708 | * 6: Received illegal vector |
1736 | */ | 1709 | * 7: Illegal register address |
1737 | printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", | 1710 | */ |
1711 | pr_debug("APIC error on CPU%d: %02x(%02x)\n", | ||
1738 | smp_processor_id(), v , v1); | 1712 | smp_processor_id(), v , v1); |
1739 | irq_exit(); | 1713 | irq_exit(); |
1740 | } | 1714 | } |
@@ -1838,15 +1812,15 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1838 | * Validate version | 1812 | * Validate version |
1839 | */ | 1813 | */ |
1840 | if (version == 0x0) { | 1814 | if (version == 0x0) { |
1841 | printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " | 1815 | pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " |
1842 | "fixing up to 0x10. (tell your hw vendor)\n", | 1816 | "fixing up to 0x10. (tell your hw vendor)\n", |
1843 | version); | 1817 | version); |
1844 | version = 0x10; | 1818 | version = 0x10; |
1845 | } | 1819 | } |
1846 | apic_version[apicid] = version; | 1820 | apic_version[apicid] = version; |
1847 | 1821 | ||
1848 | if (num_processors >= NR_CPUS) { | 1822 | if (num_processors >= NR_CPUS) { |
1849 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." | 1823 | pr_warning("WARNING: NR_CPUS limit of %i reached." |
1850 | " Processor ignored.\n", NR_CPUS); | 1824 | " Processor ignored.\n", NR_CPUS); |
1851 | return; | 1825 | return; |
1852 | } | 1826 | } |
@@ -2209,7 +2183,7 @@ static int __init apic_set_verbosity(char *arg) | |||
2209 | else if (strcmp("verbose", arg) == 0) | 2183 | else if (strcmp("verbose", arg) == 0) |
2210 | apic_verbosity = APIC_VERBOSE; | 2184 | apic_verbosity = APIC_VERBOSE; |
2211 | else { | 2185 | else { |
2212 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | 2186 | pr_warning("APIC Verbosity level %s not recognised" |
2213 | " use apic=verbose or apic=debug\n", arg); | 2187 | " use apic=verbose or apic=debug\n", arg); |
2214 | return -EINVAL; | 2188 | return -EINVAL; |
2215 | } | 2189 | } |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 5145a6e72bbb..3a26525a3f31 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -391,11 +391,7 @@ static int power_off; | |||
391 | #else | 391 | #else |
392 | static int power_off = 1; | 392 | static int power_off = 1; |
393 | #endif | 393 | #endif |
394 | #ifdef CONFIG_APM_REAL_MODE_POWER_OFF | ||
395 | static int realmode_power_off = 1; | ||
396 | #else | ||
397 | static int realmode_power_off; | 394 | static int realmode_power_off; |
398 | #endif | ||
399 | #ifdef CONFIG_APM_ALLOW_INTS | 395 | #ifdef CONFIG_APM_ALLOW_INTS |
400 | static int allow_ints = 1; | 396 | static int allow_ints = 1; |
401 | #else | 397 | #else |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 6649d09ad88f..ee4df08feee6 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #include <linux/suspend.h> | 11 | #include <linux/suspend.h> |
12 | #include <linux/kbuild.h> | 12 | #include <linux/kbuild.h> |
13 | #include <asm/ucontext.h> | 13 | #include <asm/ucontext.h> |
14 | #include "sigframe.h" | 14 | #include <asm/sigframe.h> |
15 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
16 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
17 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 7fcf63d22f8b..1d41d3f1edbc 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -20,6 +20,8 @@ | |||
20 | 20 | ||
21 | #include <xen/interface/xen.h> | 21 | #include <xen/interface/xen.h> |
22 | 22 | ||
23 | #include <asm/sigframe.h> | ||
24 | |||
23 | #define __NO_STUBS 1 | 25 | #define __NO_STUBS 1 |
24 | #undef __SYSCALL | 26 | #undef __SYSCALL |
25 | #undef _ASM_X86_UNISTD_64_H | 27 | #undef _ASM_X86_UNISTD_64_H |
@@ -87,7 +89,7 @@ int main(void) | |||
87 | BLANK(); | 89 | BLANK(); |
88 | #undef ENTRY | 90 | #undef ENTRY |
89 | DEFINE(IA32_RT_SIGFRAME_sigcontext, | 91 | DEFINE(IA32_RT_SIGFRAME_sigcontext, |
90 | offsetof (struct rt_sigframe32, uc.uc_mcontext)); | 92 | offsetof (struct rt_sigframe_ia32, uc.uc_mcontext)); |
91 | BLANK(); | 93 | BLANK(); |
92 | #endif | 94 | #endif |
93 | DEFINE(pbe_address, offsetof(struct pbe, address)); | 95 | DEFINE(pbe_address, offsetof(struct pbe, address)); |
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index f0dfe6f17e7e..2a0a2a3cac26 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c | |||
@@ -69,10 +69,10 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | |||
69 | 69 | ||
70 | long sn_partition_id; | 70 | long sn_partition_id; |
71 | EXPORT_SYMBOL_GPL(sn_partition_id); | 71 | EXPORT_SYMBOL_GPL(sn_partition_id); |
72 | long uv_coherency_id; | 72 | long sn_coherency_id; |
73 | EXPORT_SYMBOL_GPL(uv_coherency_id); | 73 | EXPORT_SYMBOL_GPL(sn_coherency_id); |
74 | long uv_region_size; | 74 | long sn_region_size; |
75 | EXPORT_SYMBOL_GPL(uv_region_size); | 75 | EXPORT_SYMBOL_GPL(sn_region_size); |
76 | int uv_type; | 76 | int uv_type; |
77 | 77 | ||
78 | 78 | ||
@@ -100,6 +100,56 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | |||
100 | return ret; | 100 | return ret; |
101 | } | 101 | } |
102 | 102 | ||
103 | int | ||
104 | uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size, | ||
105 | unsigned long *intr_mmr_offset) | ||
106 | { | ||
107 | union uv_watchlist_u size_blade; | ||
108 | u64 watchlist; | ||
109 | s64 ret; | ||
110 | |||
111 | size_blade.size = mq_size; | ||
112 | size_blade.blade = blade; | ||
113 | |||
114 | /* | ||
115 | * bios returns watchlist number or negative error number. | ||
116 | */ | ||
117 | ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, | ||
118 | size_blade.val, (u64)intr_mmr_offset, | ||
119 | (u64)&watchlist, 0); | ||
120 | if (ret < BIOS_STATUS_SUCCESS) | ||
121 | return ret; | ||
122 | |||
123 | return watchlist; | ||
124 | } | ||
125 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc); | ||
126 | |||
127 | int | ||
128 | uv_bios_mq_watchlist_free(int blade, int watchlist_num) | ||
129 | { | ||
130 | return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE, | ||
131 | blade, watchlist_num, 0, 0, 0); | ||
132 | } | ||
133 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); | ||
134 | |||
135 | s64 | ||
136 | uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms) | ||
137 | { | ||
138 | return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len, | ||
139 | perms, 0, 0); | ||
140 | } | ||
141 | EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); | ||
142 | |||
143 | s64 | ||
144 | uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) | ||
145 | { | ||
146 | s64 ret; | ||
147 | |||
148 | ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, | ||
149 | (u64)addr, buf, (u64)len, 0); | ||
150 | return ret; | ||
151 | } | ||
152 | EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); | ||
103 | 153 | ||
104 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) | 154 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) |
105 | { | 155 | { |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c new file mode 100644 index 000000000000..2ac0ab71412a --- /dev/null +++ b/arch/x86/kernel/check.c | |||
@@ -0,0 +1,161 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/kthread.h> | ||
4 | #include <linux/workqueue.h> | ||
5 | #include <asm/e820.h> | ||
6 | #include <asm/proto.h> | ||
7 | |||
8 | /* | ||
9 | * Some BIOSes seem to corrupt the low 64k of memory during events | ||
10 | * like suspend/resume and unplugging an HDMI cable. Reserve all | ||
11 | * remaining free memory in that area and fill it with a distinct | ||
12 | * pattern. | ||
13 | */ | ||
14 | #define MAX_SCAN_AREAS 8 | ||
15 | |||
16 | static int __read_mostly memory_corruption_check = -1; | ||
17 | |||
18 | static unsigned __read_mostly corruption_check_size = 64*1024; | ||
19 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | ||
20 | |||
21 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | ||
22 | static int num_scan_areas; | ||
23 | |||
24 | |||
25 | static __init int set_corruption_check(char *arg) | ||
26 | { | ||
27 | char *end; | ||
28 | |||
29 | memory_corruption_check = simple_strtol(arg, &end, 10); | ||
30 | |||
31 | return (*end == 0) ? 0 : -EINVAL; | ||
32 | } | ||
33 | early_param("memory_corruption_check", set_corruption_check); | ||
34 | |||
35 | static __init int set_corruption_check_period(char *arg) | ||
36 | { | ||
37 | char *end; | ||
38 | |||
39 | corruption_check_period = simple_strtoul(arg, &end, 10); | ||
40 | |||
41 | return (*end == 0) ? 0 : -EINVAL; | ||
42 | } | ||
43 | early_param("memory_corruption_check_period", set_corruption_check_period); | ||
44 | |||
45 | static __init int set_corruption_check_size(char *arg) | ||
46 | { | ||
47 | char *end; | ||
48 | unsigned size; | ||
49 | |||
50 | size = memparse(arg, &end); | ||
51 | |||
52 | if (*end == '\0') | ||
53 | corruption_check_size = size; | ||
54 | |||
55 | return (size == corruption_check_size) ? 0 : -EINVAL; | ||
56 | } | ||
57 | early_param("memory_corruption_check_size", set_corruption_check_size); | ||
58 | |||
59 | |||
60 | void __init setup_bios_corruption_check(void) | ||
61 | { | ||
62 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | ||
63 | |||
64 | if (memory_corruption_check == -1) { | ||
65 | memory_corruption_check = | ||
66 | #ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK | ||
67 | 1 | ||
68 | #else | ||
69 | 0 | ||
70 | #endif | ||
71 | ; | ||
72 | } | ||
73 | |||
74 | if (corruption_check_size == 0) | ||
75 | memory_corruption_check = 0; | ||
76 | |||
77 | if (!memory_corruption_check) | ||
78 | return; | ||
79 | |||
80 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | ||
81 | |||
82 | while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | ||
83 | u64 size; | ||
84 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | ||
85 | |||
86 | if (addr == 0) | ||
87 | break; | ||
88 | |||
89 | if ((addr + size) > corruption_check_size) | ||
90 | size = corruption_check_size - addr; | ||
91 | |||
92 | if (size == 0) | ||
93 | break; | ||
94 | |||
95 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | ||
96 | scan_areas[num_scan_areas].addr = addr; | ||
97 | scan_areas[num_scan_areas].size = size; | ||
98 | num_scan_areas++; | ||
99 | |||
100 | /* Assume we've already mapped this early memory */ | ||
101 | memset(__va(addr), 0, size); | ||
102 | |||
103 | addr += size; | ||
104 | } | ||
105 | |||
106 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | ||
107 | num_scan_areas); | ||
108 | update_e820(); | ||
109 | } | ||
110 | |||
111 | |||
112 | void check_for_bios_corruption(void) | ||
113 | { | ||
114 | int i; | ||
115 | int corruption = 0; | ||
116 | |||
117 | if (!memory_corruption_check) | ||
118 | return; | ||
119 | |||
120 | for (i = 0; i < num_scan_areas; i++) { | ||
121 | unsigned long *addr = __va(scan_areas[i].addr); | ||
122 | unsigned long size = scan_areas[i].size; | ||
123 | |||
124 | for (; size; addr++, size -= sizeof(unsigned long)) { | ||
125 | if (!*addr) | ||
126 | continue; | ||
127 | printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", | ||
128 | addr, __pa(addr), *addr); | ||
129 | corruption = 1; | ||
130 | *addr = 0; | ||
131 | } | ||
132 | } | ||
133 | |||
134 | WARN_ONCE(corruption, KERN_ERR "Memory corruption detected in low memory\n"); | ||
135 | } | ||
136 | |||
137 | static void check_corruption(struct work_struct *dummy); | ||
138 | static DECLARE_DELAYED_WORK(bios_check_work, check_corruption); | ||
139 | |||
140 | static void check_corruption(struct work_struct *dummy) | ||
141 | { | ||
142 | check_for_bios_corruption(); | ||
143 | schedule_delayed_work(&bios_check_work, | ||
144 | round_jiffies_relative(corruption_check_period*HZ)); | ||
145 | } | ||
146 | |||
147 | static int start_periodic_check_for_corruption(void) | ||
148 | { | ||
149 | if (!memory_corruption_check || corruption_check_period == 0) | ||
150 | return 0; | ||
151 | |||
152 | printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", | ||
153 | corruption_check_period); | ||
154 | |||
155 | /* First time we run the checks right away */ | ||
156 | schedule_delayed_work(&bios_check_work, 0); | ||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | module_init(start_periodic_check_for_corruption); | ||
161 | |||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 82ec6075c057..a5c04e88777e 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
6 | obj-y += proc.o capflags.o powerflags.o common.o | 6 | obj-y += proc.o capflags.o powerflags.o common.o |
7 | obj-y += vmware.o hypervisor.o | ||
7 | 8 | ||
8 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 9 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
9 | obj-$(CONFIG_X86_64) += bugs_64.o | 10 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index ef8f831af823..2cf23634b6d9 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -120,9 +120,17 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
120 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) | 120 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) |
121 | & core_select_mask; | 121 | & core_select_mask; |
122 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); | 122 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); |
123 | /* | ||
124 | * Reinit the apicid, now that we have extended initial_apicid. | ||
125 | */ | ||
126 | c->apicid = phys_pkg_id(c->initial_apicid, 0); | ||
123 | #else | 127 | #else |
124 | c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; | 128 | c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; |
125 | c->phys_proc_id = phys_pkg_id(core_plus_mask_width); | 129 | c->phys_proc_id = phys_pkg_id(core_plus_mask_width); |
130 | /* | ||
131 | * Reinit the apicid, now that we have extended initial_apicid. | ||
132 | */ | ||
133 | c->apicid = phys_pkg_id(0); | ||
126 | #endif | 134 | #endif |
127 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); | 135 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); |
128 | 136 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8f1e31db2ad5..7c878f6aa919 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -283,9 +283,14 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
283 | { | 283 | { |
284 | early_init_amd_mc(c); | 284 | early_init_amd_mc(c); |
285 | 285 | ||
286 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | 286 | /* |
287 | if (c->x86_power & (1<<8)) | 287 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate |
288 | * with P/T states and does not stop in deep C-states | ||
289 | */ | ||
290 | if (c->x86_power & (1 << 8)) { | ||
288 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 291 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
292 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | ||
293 | } | ||
289 | 294 | ||
290 | #ifdef CONFIG_X86_64 | 295 | #ifdef CONFIG_X86_64 |
291 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | 296 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b9c9ea0217a9..42e0853030cb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/proto.h> | 36 | #include <asm/proto.h> |
37 | #include <asm/sections.h> | 37 | #include <asm/sections.h> |
38 | #include <asm/setup.h> | 38 | #include <asm/setup.h> |
39 | #include <asm/hypervisor.h> | ||
39 | 40 | ||
40 | #include "cpu.h" | 41 | #include "cpu.h" |
41 | 42 | ||
@@ -703,6 +704,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
703 | detect_ht(c); | 704 | detect_ht(c); |
704 | #endif | 705 | #endif |
705 | 706 | ||
707 | init_hypervisor(c); | ||
706 | /* | 708 | /* |
707 | * On SMP, boot_cpu_data holds the common feature set between | 709 | * On SMP, boot_cpu_data holds the common feature set between |
708 | * all CPUs; so make sure that we indicate which features are | 710 | * all CPUs; so make sure that we indicate which features are |
@@ -862,7 +864,7 @@ EXPORT_SYMBOL(_cpu_pda); | |||
862 | 864 | ||
863 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | 865 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; |
864 | 866 | ||
865 | char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | 867 | static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; |
866 | 868 | ||
867 | void __cpuinit pda_init(int cpu) | 869 | void __cpuinit pda_init(int cpu) |
868 | { | 870 | { |
@@ -903,8 +905,8 @@ void __cpuinit pda_init(int cpu) | |||
903 | } | 905 | } |
904 | } | 906 | } |
905 | 907 | ||
906 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | 908 | static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + |
907 | DEBUG_STKSZ] __page_aligned_bss; | 909 | DEBUG_STKSZ] __page_aligned_bss; |
908 | 910 | ||
909 | extern asmlinkage void ignore_sysret(void); | 911 | extern asmlinkage void ignore_sysret(void); |
910 | 912 | ||
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c new file mode 100644 index 000000000000..fb5b86af0b01 --- /dev/null +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -0,0 +1,58 @@ | |||
1 | /* | ||
2 | * Common hypervisor code | ||
3 | * | ||
4 | * Copyright (C) 2008, VMware, Inc. | ||
5 | * Author : Alok N Kataria <akataria@vmware.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, but | ||
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
15 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
16 | * details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <asm/processor.h> | ||
25 | #include <asm/vmware.h> | ||
26 | #include <asm/hypervisor.h> | ||
27 | |||
28 | static inline void __cpuinit | ||
29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | ||
30 | { | ||
31 | if (vmware_platform()) { | ||
32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | ||
33 | } else { | ||
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | ||
35 | } | ||
36 | } | ||
37 | |||
38 | unsigned long get_hypervisor_tsc_freq(void) | ||
39 | { | ||
40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | ||
41 | return vmware_get_tsc_khz(); | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static inline void __cpuinit | ||
46 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | ||
47 | { | ||
48 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | ||
49 | vmware_set_feature_bits(c); | ||
50 | return; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | ||
55 | { | ||
56 | detect_hypervisor_vendor(c); | ||
57 | hypervisor_set_feature_bits(c); | ||
58 | } | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index cce0b6118d55..ccfd2047630c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -41,6 +41,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
41 | if (c->x86 == 15 && c->x86_cache_alignment == 64) | 41 | if (c->x86 == 15 && c->x86_cache_alignment == 64) |
42 | c->x86_cache_alignment = 128; | 42 | c->x86_cache_alignment = 128; |
43 | #endif | 43 | #endif |
44 | |||
45 | /* | ||
46 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate | ||
47 | * with P/T states and does not stop in deep C-states | ||
48 | */ | ||
49 | if (c->x86_power & (1 << 8)) { | ||
50 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
51 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | ||
52 | } | ||
53 | |||
44 | } | 54 | } |
45 | 55 | ||
46 | #ifdef CONFIG_X86_32 | 56 | #ifdef CONFIG_X86_32 |
@@ -242,6 +252,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
242 | 252 | ||
243 | intel_workarounds(c); | 253 | intel_workarounds(c); |
244 | 254 | ||
255 | /* | ||
256 | * Detect the extended topology information if available. This | ||
257 | * will reinitialise the initial_apicid which will be used | ||
258 | * in init_intel_cacheinfo() | ||
259 | */ | ||
260 | detect_extended_topology(c); | ||
261 | |||
245 | l2 = init_intel_cacheinfo(c); | 262 | l2 = init_intel_cacheinfo(c); |
246 | if (c->cpuid_level > 9) { | 263 | if (c->cpuid_level > 9) { |
247 | unsigned eax = cpuid_eax(10); | 264 | unsigned eax = cpuid_eax(10); |
@@ -307,13 +324,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
307 | set_cpu_cap(c, X86_FEATURE_P4); | 324 | set_cpu_cap(c, X86_FEATURE_P4); |
308 | if (c->x86 == 6) | 325 | if (c->x86 == 6) |
309 | set_cpu_cap(c, X86_FEATURE_P3); | 326 | set_cpu_cap(c, X86_FEATURE_P3); |
327 | #endif | ||
310 | 328 | ||
311 | if (cpu_has_bts) | 329 | if (cpu_has_bts) |
312 | ptrace_bts_init_intel(c); | 330 | ptrace_bts_init_intel(c); |
313 | 331 | ||
314 | #endif | ||
315 | |||
316 | detect_extended_topology(c); | ||
317 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { | 332 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { |
318 | /* | 333 | /* |
319 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology | 334 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 3f46afbb1cf1..68b5d8681cbb 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -644,20 +644,17 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) | |||
644 | return show_shared_cpu_map_func(leaf, 1, buf); | 644 | return show_shared_cpu_map_func(leaf, 1, buf); |
645 | } | 645 | } |
646 | 646 | ||
647 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { | 647 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) |
648 | switch(this_leaf->eax.split.type) { | 648 | { |
649 | case CACHE_TYPE_DATA: | 649 | switch (this_leaf->eax.split.type) { |
650 | case CACHE_TYPE_DATA: | ||
650 | return sprintf(buf, "Data\n"); | 651 | return sprintf(buf, "Data\n"); |
651 | break; | 652 | case CACHE_TYPE_INST: |
652 | case CACHE_TYPE_INST: | ||
653 | return sprintf(buf, "Instruction\n"); | 653 | return sprintf(buf, "Instruction\n"); |
654 | break; | 654 | case CACHE_TYPE_UNIFIED: |
655 | case CACHE_TYPE_UNIFIED: | ||
656 | return sprintf(buf, "Unified\n"); | 655 | return sprintf(buf, "Unified\n"); |
657 | break; | 656 | default: |
658 | default: | ||
659 | return sprintf(buf, "Unknown\n"); | 657 | return sprintf(buf, "Unknown\n"); |
660 | break; | ||
661 | } | 658 | } |
662 | } | 659 | } |
663 | 660 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 5eb390a4b2e9..748c8f9e7a05 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
@@ -237,7 +237,7 @@ asmlinkage void mce_threshold_interrupt(void) | |||
237 | } | 237 | } |
238 | } | 238 | } |
239 | out: | 239 | out: |
240 | add_pda(irq_threshold_count, 1); | 240 | inc_irq_stat(irq_threshold_count); |
241 | irq_exit(); | 241 | irq_exit(); |
242 | } | 242 | } |
243 | 243 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index c17eaf5dd6dd..4b48f251fd39 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c | |||
@@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(void) | |||
26 | if (therm_throt_process(msr_val & 1)) | 26 | if (therm_throt_process(msr_val & 1)) |
27 | mce_log_therm_throt_event(smp_processor_id(), msr_val); | 27 | mce_log_therm_throt_event(smp_processor_id(), msr_val); |
28 | 28 | ||
29 | add_pda(irq_thermal_count, 1); | 29 | inc_irq_stat(irq_thermal_count); |
30 | irq_exit(); | 30 | irq_exit(); |
31 | } | 31 | } |
32 | 32 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c78c04821ea1..1159e269e596 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -803,6 +803,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
803 | } | 803 | } |
804 | 804 | ||
805 | static struct res_range __initdata range[RANGE_NUM]; | 805 | static struct res_range __initdata range[RANGE_NUM]; |
806 | static int __initdata nr_range; | ||
806 | 807 | ||
807 | #ifdef CONFIG_MTRR_SANITIZER | 808 | #ifdef CONFIG_MTRR_SANITIZER |
808 | 809 | ||
@@ -1206,39 +1207,43 @@ struct mtrr_cleanup_result { | |||
1206 | #define PSHIFT (PAGE_SHIFT - 10) | 1207 | #define PSHIFT (PAGE_SHIFT - 10) |
1207 | 1208 | ||
1208 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | 1209 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; |
1209 | static struct res_range __initdata range_new[RANGE_NUM]; | ||
1210 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | 1210 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; |
1211 | 1211 | ||
1212 | static int __init mtrr_cleanup(unsigned address_bits) | 1212 | static void __init print_out_mtrr_range_state(void) |
1213 | { | 1213 | { |
1214 | unsigned long extra_remove_base, extra_remove_size; | ||
1215 | unsigned long base, size, def, dummy; | ||
1216 | mtrr_type type; | ||
1217 | int nr_range, nr_range_new; | ||
1218 | u64 chunk_size, gran_size; | ||
1219 | unsigned long range_sums, range_sums_new; | ||
1220 | int index_good; | ||
1221 | int num_reg_good; | ||
1222 | int i; | 1214 | int i; |
1215 | char start_factor = 'K', size_factor = 'K'; | ||
1216 | unsigned long start_base, size_base; | ||
1217 | mtrr_type type; | ||
1223 | 1218 | ||
1224 | /* extra one for all 0 */ | 1219 | for (i = 0; i < num_var_ranges; i++) { |
1225 | int num[MTRR_NUM_TYPES + 1]; | ||
1226 | 1220 | ||
1227 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | 1221 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); |
1228 | return 0; | 1222 | if (!size_base) |
1229 | rdmsr(MTRRdefType_MSR, def, dummy); | 1223 | continue; |
1230 | def &= 0xff; | ||
1231 | if (def != MTRR_TYPE_UNCACHABLE) | ||
1232 | return 0; | ||
1233 | 1224 | ||
1234 | /* get it and store it aside */ | 1225 | size_base = to_size_factor(size_base, &size_factor), |
1235 | memset(range_state, 0, sizeof(range_state)); | 1226 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); |
1236 | for (i = 0; i < num_var_ranges; i++) { | 1227 | start_base = to_size_factor(start_base, &start_factor), |
1237 | mtrr_if->get(i, &base, &size, &type); | 1228 | type = range_state[i].type; |
1238 | range_state[i].base_pfn = base; | 1229 | |
1239 | range_state[i].size_pfn = size; | 1230 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", |
1240 | range_state[i].type = type; | 1231 | i, start_base, start_factor, |
1232 | size_base, size_factor, | ||
1233 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | ||
1234 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
1235 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
1236 | ); | ||
1241 | } | 1237 | } |
1238 | } | ||
1239 | |||
1240 | static int __init mtrr_need_cleanup(void) | ||
1241 | { | ||
1242 | int i; | ||
1243 | mtrr_type type; | ||
1244 | unsigned long size; | ||
1245 | /* extra one for all 0 */ | ||
1246 | int num[MTRR_NUM_TYPES + 1]; | ||
1242 | 1247 | ||
1243 | /* check entries number */ | 1248 | /* check entries number */ |
1244 | memset(num, 0, sizeof(num)); | 1249 | memset(num, 0, sizeof(num)); |
@@ -1263,29 +1268,133 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1263 | num_var_ranges - num[MTRR_NUM_TYPES]) | 1268 | num_var_ranges - num[MTRR_NUM_TYPES]) |
1264 | return 0; | 1269 | return 0; |
1265 | 1270 | ||
1266 | /* print original var MTRRs at first, for debugging: */ | 1271 | return 1; |
1267 | printk(KERN_DEBUG "original variable MTRRs\n"); | 1272 | } |
1268 | for (i = 0; i < num_var_ranges; i++) { | ||
1269 | char start_factor = 'K', size_factor = 'K'; | ||
1270 | unsigned long start_base, size_base; | ||
1271 | 1273 | ||
1272 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); | 1274 | static unsigned long __initdata range_sums; |
1273 | if (!size_base) | 1275 | static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, |
1274 | continue; | 1276 | unsigned long extra_remove_base, |
1277 | unsigned long extra_remove_size, | ||
1278 | int i) | ||
1279 | { | ||
1280 | int num_reg; | ||
1281 | static struct res_range range_new[RANGE_NUM]; | ||
1282 | static int nr_range_new; | ||
1283 | unsigned long range_sums_new; | ||
1284 | |||
1285 | /* convert ranges to var ranges state */ | ||
1286 | num_reg = x86_setup_var_mtrrs(range, nr_range, | ||
1287 | chunk_size, gran_size); | ||
1288 | |||
1289 | /* we got new setting in range_state, check it */ | ||
1290 | memset(range_new, 0, sizeof(range_new)); | ||
1291 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1292 | extra_remove_base, extra_remove_size); | ||
1293 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1294 | |||
1295 | result[i].chunk_sizek = chunk_size >> 10; | ||
1296 | result[i].gran_sizek = gran_size >> 10; | ||
1297 | result[i].num_reg = num_reg; | ||
1298 | if (range_sums < range_sums_new) { | ||
1299 | result[i].lose_cover_sizek = | ||
1300 | (range_sums_new - range_sums) << PSHIFT; | ||
1301 | result[i].bad = 1; | ||
1302 | } else | ||
1303 | result[i].lose_cover_sizek = | ||
1304 | (range_sums - range_sums_new) << PSHIFT; | ||
1275 | 1305 | ||
1276 | size_base = to_size_factor(size_base, &size_factor), | 1306 | /* double check it */ |
1277 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); | 1307 | if (!result[i].bad && !result[i].lose_cover_sizek) { |
1278 | start_base = to_size_factor(start_base, &start_factor), | 1308 | if (nr_range_new != nr_range || |
1279 | type = range_state[i].type; | 1309 | memcmp(range, range_new, sizeof(range))) |
1310 | result[i].bad = 1; | ||
1311 | } | ||
1280 | 1312 | ||
1281 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", | 1313 | if (!result[i].bad && (range_sums - range_sums_new < |
1282 | i, start_base, start_factor, | 1314 | min_loss_pfn[num_reg])) { |
1283 | size_base, size_factor, | 1315 | min_loss_pfn[num_reg] = |
1284 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | 1316 | range_sums - range_sums_new; |
1285 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
1286 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
1287 | ); | ||
1288 | } | 1317 | } |
1318 | } | ||
1319 | |||
1320 | static void __init mtrr_print_out_one_result(int i) | ||
1321 | { | ||
1322 | char gran_factor, chunk_factor, lose_factor; | ||
1323 | unsigned long gran_base, chunk_base, lose_base; | ||
1324 | |||
1325 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
1326 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
1327 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1328 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1329 | result[i].bad ? "*BAD*" : " ", | ||
1330 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1331 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1332 | result[i].num_reg, result[i].bad ? "-" : "", | ||
1333 | lose_base, lose_factor); | ||
1334 | } | ||
1335 | |||
1336 | static int __init mtrr_search_optimal_index(void) | ||
1337 | { | ||
1338 | int i; | ||
1339 | int num_reg_good; | ||
1340 | int index_good; | ||
1341 | |||
1342 | if (nr_mtrr_spare_reg >= num_var_ranges) | ||
1343 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
1344 | num_reg_good = -1; | ||
1345 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
1346 | if (!min_loss_pfn[i]) | ||
1347 | num_reg_good = i; | ||
1348 | } | ||
1349 | |||
1350 | index_good = -1; | ||
1351 | if (num_reg_good != -1) { | ||
1352 | for (i = 0; i < NUM_RESULT; i++) { | ||
1353 | if (!result[i].bad && | ||
1354 | result[i].num_reg == num_reg_good && | ||
1355 | !result[i].lose_cover_sizek) { | ||
1356 | index_good = i; | ||
1357 | break; | ||
1358 | } | ||
1359 | } | ||
1360 | } | ||
1361 | |||
1362 | return index_good; | ||
1363 | } | ||
1364 | |||
1365 | |||
1366 | static int __init mtrr_cleanup(unsigned address_bits) | ||
1367 | { | ||
1368 | unsigned long extra_remove_base, extra_remove_size; | ||
1369 | unsigned long base, size, def, dummy; | ||
1370 | mtrr_type type; | ||
1371 | u64 chunk_size, gran_size; | ||
1372 | int index_good; | ||
1373 | int i; | ||
1374 | |||
1375 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | ||
1376 | return 0; | ||
1377 | rdmsr(MTRRdefType_MSR, def, dummy); | ||
1378 | def &= 0xff; | ||
1379 | if (def != MTRR_TYPE_UNCACHABLE) | ||
1380 | return 0; | ||
1381 | |||
1382 | /* get it and store it aside */ | ||
1383 | memset(range_state, 0, sizeof(range_state)); | ||
1384 | for (i = 0; i < num_var_ranges; i++) { | ||
1385 | mtrr_if->get(i, &base, &size, &type); | ||
1386 | range_state[i].base_pfn = base; | ||
1387 | range_state[i].size_pfn = size; | ||
1388 | range_state[i].type = type; | ||
1389 | } | ||
1390 | |||
1391 | /* check if we need handle it and can handle it */ | ||
1392 | if (!mtrr_need_cleanup()) | ||
1393 | return 0; | ||
1394 | |||
1395 | /* print original var MTRRs at first, for debugging: */ | ||
1396 | printk(KERN_DEBUG "original variable MTRRs\n"); | ||
1397 | print_out_mtrr_range_state(); | ||
1289 | 1398 | ||
1290 | memset(range, 0, sizeof(range)); | 1399 | memset(range, 0, sizeof(range)); |
1291 | extra_remove_size = 0; | 1400 | extra_remove_size = 0; |
@@ -1309,176 +1418,64 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1309 | range_sums >> (20 - PAGE_SHIFT)); | 1418 | range_sums >> (20 - PAGE_SHIFT)); |
1310 | 1419 | ||
1311 | if (mtrr_chunk_size && mtrr_gran_size) { | 1420 | if (mtrr_chunk_size && mtrr_gran_size) { |
1312 | int num_reg; | 1421 | i = 0; |
1313 | char gran_factor, chunk_factor, lose_factor; | 1422 | mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, |
1314 | unsigned long gran_base, chunk_base, lose_base; | 1423 | extra_remove_base, extra_remove_size, i); |
1315 | |||
1316 | debug_print++; | ||
1317 | /* convert ranges to var ranges state */ | ||
1318 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, | ||
1319 | mtrr_gran_size); | ||
1320 | 1424 | ||
1321 | /* we got new setting in range_state, check it */ | 1425 | mtrr_print_out_one_result(i); |
1322 | memset(range_new, 0, sizeof(range_new)); | ||
1323 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1324 | extra_remove_base, | ||
1325 | extra_remove_size); | ||
1326 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1327 | 1426 | ||
1328 | i = 0; | ||
1329 | result[i].chunk_sizek = mtrr_chunk_size >> 10; | ||
1330 | result[i].gran_sizek = mtrr_gran_size >> 10; | ||
1331 | result[i].num_reg = num_reg; | ||
1332 | if (range_sums < range_sums_new) { | ||
1333 | result[i].lose_cover_sizek = | ||
1334 | (range_sums_new - range_sums) << PSHIFT; | ||
1335 | result[i].bad = 1; | ||
1336 | } else | ||
1337 | result[i].lose_cover_sizek = | ||
1338 | (range_sums - range_sums_new) << PSHIFT; | ||
1339 | |||
1340 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
1341 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
1342 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1343 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1344 | result[i].bad?"*BAD*":" ", | ||
1345 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1346 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1347 | result[i].num_reg, result[i].bad?"-":"", | ||
1348 | lose_base, lose_factor); | ||
1349 | if (!result[i].bad) { | 1427 | if (!result[i].bad) { |
1350 | set_var_mtrr_all(address_bits); | 1428 | set_var_mtrr_all(address_bits); |
1351 | return 1; | 1429 | return 1; |
1352 | } | 1430 | } |
1353 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | 1431 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " |
1354 | "will find optimal one\n"); | 1432 | "will find optimal one\n"); |
1355 | debug_print--; | ||
1356 | memset(result, 0, sizeof(result[0])); | ||
1357 | } | 1433 | } |
1358 | 1434 | ||
1359 | i = 0; | 1435 | i = 0; |
1360 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | 1436 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); |
1361 | memset(result, 0, sizeof(result)); | 1437 | memset(result, 0, sizeof(result)); |
1362 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { | 1438 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { |
1363 | char gran_factor; | ||
1364 | unsigned long gran_base; | ||
1365 | |||
1366 | if (debug_print) | ||
1367 | gran_base = to_size_factor(gran_size >> 10, &gran_factor); | ||
1368 | 1439 | ||
1369 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); | 1440 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); |
1370 | chunk_size <<= 1) { | 1441 | chunk_size <<= 1) { |
1371 | int num_reg; | ||
1372 | 1442 | ||
1373 | if (debug_print) { | ||
1374 | char chunk_factor; | ||
1375 | unsigned long chunk_base; | ||
1376 | |||
1377 | chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), | ||
1378 | printk(KERN_INFO "\n"); | ||
1379 | printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", | ||
1380 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1381 | } | ||
1382 | if (i >= NUM_RESULT) | 1443 | if (i >= NUM_RESULT) |
1383 | continue; | 1444 | continue; |
1384 | 1445 | ||
1385 | /* convert ranges to var ranges state */ | 1446 | mtrr_calc_range_state(chunk_size, gran_size, |
1386 | num_reg = x86_setup_var_mtrrs(range, nr_range, | 1447 | extra_remove_base, extra_remove_size, i); |
1387 | chunk_size, gran_size); | 1448 | if (debug_print) { |
1388 | 1449 | mtrr_print_out_one_result(i); | |
1389 | /* we got new setting in range_state, check it */ | 1450 | printk(KERN_INFO "\n"); |
1390 | memset(range_new, 0, sizeof(range_new)); | ||
1391 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1392 | extra_remove_base, extra_remove_size); | ||
1393 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1394 | |||
1395 | result[i].chunk_sizek = chunk_size >> 10; | ||
1396 | result[i].gran_sizek = gran_size >> 10; | ||
1397 | result[i].num_reg = num_reg; | ||
1398 | if (range_sums < range_sums_new) { | ||
1399 | result[i].lose_cover_sizek = | ||
1400 | (range_sums_new - range_sums) << PSHIFT; | ||
1401 | result[i].bad = 1; | ||
1402 | } else | ||
1403 | result[i].lose_cover_sizek = | ||
1404 | (range_sums - range_sums_new) << PSHIFT; | ||
1405 | |||
1406 | /* double check it */ | ||
1407 | if (!result[i].bad && !result[i].lose_cover_sizek) { | ||
1408 | if (nr_range_new != nr_range || | ||
1409 | memcmp(range, range_new, sizeof(range))) | ||
1410 | result[i].bad = 1; | ||
1411 | } | 1451 | } |
1412 | 1452 | ||
1413 | if (!result[i].bad && (range_sums - range_sums_new < | ||
1414 | min_loss_pfn[num_reg])) { | ||
1415 | min_loss_pfn[num_reg] = | ||
1416 | range_sums - range_sums_new; | ||
1417 | } | ||
1418 | i++; | 1453 | i++; |
1419 | } | 1454 | } |
1420 | } | 1455 | } |
1421 | 1456 | ||
1422 | /* print out all */ | ||
1423 | for (i = 0; i < NUM_RESULT; i++) { | ||
1424 | char gran_factor, chunk_factor, lose_factor; | ||
1425 | unsigned long gran_base, chunk_base, lose_base; | ||
1426 | |||
1427 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
1428 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
1429 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1430 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1431 | result[i].bad?"*BAD*":" ", | ||
1432 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1433 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1434 | result[i].num_reg, result[i].bad?"-":"", | ||
1435 | lose_base, lose_factor); | ||
1436 | } | ||
1437 | |||
1438 | /* try to find the optimal index */ | 1457 | /* try to find the optimal index */ |
1439 | if (nr_mtrr_spare_reg >= num_var_ranges) | 1458 | index_good = mtrr_search_optimal_index(); |
1440 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
1441 | num_reg_good = -1; | ||
1442 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
1443 | if (!min_loss_pfn[i]) | ||
1444 | num_reg_good = i; | ||
1445 | } | ||
1446 | |||
1447 | index_good = -1; | ||
1448 | if (num_reg_good != -1) { | ||
1449 | for (i = 0; i < NUM_RESULT; i++) { | ||
1450 | if (!result[i].bad && | ||
1451 | result[i].num_reg == num_reg_good && | ||
1452 | !result[i].lose_cover_sizek) { | ||
1453 | index_good = i; | ||
1454 | break; | ||
1455 | } | ||
1456 | } | ||
1457 | } | ||
1458 | 1459 | ||
1459 | if (index_good != -1) { | 1460 | if (index_good != -1) { |
1460 | char gran_factor, chunk_factor, lose_factor; | ||
1461 | unsigned long gran_base, chunk_base, lose_base; | ||
1462 | |||
1463 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | 1461 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); |
1464 | i = index_good; | 1462 | i = index_good; |
1465 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | 1463 | mtrr_print_out_one_result(i); |
1466 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | 1464 | |
1467 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1468 | printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1469 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1470 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", | ||
1471 | result[i].num_reg, lose_base, lose_factor); | ||
1472 | /* convert ranges to var ranges state */ | 1465 | /* convert ranges to var ranges state */ |
1473 | chunk_size = result[i].chunk_sizek; | 1466 | chunk_size = result[i].chunk_sizek; |
1474 | chunk_size <<= 10; | 1467 | chunk_size <<= 10; |
1475 | gran_size = result[i].gran_sizek; | 1468 | gran_size = result[i].gran_sizek; |
1476 | gran_size <<= 10; | 1469 | gran_size <<= 10; |
1477 | debug_print++; | ||
1478 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | 1470 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); |
1479 | debug_print--; | ||
1480 | set_var_mtrr_all(address_bits); | 1471 | set_var_mtrr_all(address_bits); |
1472 | printk(KERN_DEBUG "New variable MTRRs\n"); | ||
1473 | print_out_mtrr_range_state(); | ||
1481 | return 1; | 1474 | return 1; |
1475 | } else { | ||
1476 | /* print out all */ | ||
1477 | for (i = 0; i < NUM_RESULT; i++) | ||
1478 | mtrr_print_out_one_result(i); | ||
1482 | } | 1479 | } |
1483 | 1480 | ||
1484 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); | 1481 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); |
@@ -1562,7 +1559,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1562 | { | 1559 | { |
1563 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | 1560 | unsigned long i, base, size, highest_pfn = 0, def, dummy; |
1564 | mtrr_type type; | 1561 | mtrr_type type; |
1565 | int nr_range; | ||
1566 | u64 total_trim_size; | 1562 | u64 total_trim_size; |
1567 | 1563 | ||
1568 | /* extra one for all 0 */ | 1564 | /* extra one for all 0 */ |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c new file mode 100644 index 000000000000..284c399e3234 --- /dev/null +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -0,0 +1,112 @@ | |||
1 | /* | ||
2 | * VMware Detection code. | ||
3 | * | ||
4 | * Copyright (C) 2008, VMware, Inc. | ||
5 | * Author : Alok N Kataria <akataria@vmware.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, but | ||
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
15 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
16 | * details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/dmi.h> | ||
25 | #include <asm/div64.h> | ||
26 | #include <asm/vmware.h> | ||
27 | |||
28 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | ||
29 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | ||
30 | #define VMWARE_HYPERVISOR_PORT 0x5658 | ||
31 | |||
32 | #define VMWARE_PORT_CMD_GETVERSION 10 | ||
33 | #define VMWARE_PORT_CMD_GETHZ 45 | ||
34 | |||
35 | #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ | ||
36 | __asm__("inl (%%dx)" : \ | ||
37 | "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ | ||
38 | "0"(VMWARE_HYPERVISOR_MAGIC), \ | ||
39 | "1"(VMWARE_PORT_CMD_##cmd), \ | ||
40 | "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \ | ||
41 | "memory"); | ||
42 | |||
43 | static inline int __vmware_platform(void) | ||
44 | { | ||
45 | uint32_t eax, ebx, ecx, edx; | ||
46 | VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx); | ||
47 | return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; | ||
48 | } | ||
49 | |||
50 | static unsigned long __vmware_get_tsc_khz(void) | ||
51 | { | ||
52 | uint64_t tsc_hz; | ||
53 | uint32_t eax, ebx, ecx, edx; | ||
54 | |||
55 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | ||
56 | |||
57 | if (ebx == UINT_MAX) | ||
58 | return 0; | ||
59 | tsc_hz = eax | (((uint64_t)ebx) << 32); | ||
60 | do_div(tsc_hz, 1000); | ||
61 | BUG_ON(tsc_hz >> 32); | ||
62 | return tsc_hz; | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * While checking the dmi string infomation, just checking the product | ||
67 | * serial key should be enough, as this will always have a VMware | ||
68 | * specific string when running under VMware hypervisor. | ||
69 | */ | ||
70 | int vmware_platform(void) | ||
71 | { | ||
72 | if (cpu_has_hypervisor) { | ||
73 | unsigned int eax, ebx, ecx, edx; | ||
74 | char hyper_vendor_id[13]; | ||
75 | |||
76 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | ||
77 | memcpy(hyper_vendor_id + 0, &ebx, 4); | ||
78 | memcpy(hyper_vendor_id + 4, &ecx, 4); | ||
79 | memcpy(hyper_vendor_id + 8, &edx, 4); | ||
80 | hyper_vendor_id[12] = '\0'; | ||
81 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
82 | return 1; | ||
83 | } else if (dmi_available && dmi_name_in_serial("VMware") && | ||
84 | __vmware_platform()) | ||
85 | return 1; | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | unsigned long vmware_get_tsc_khz(void) | ||
91 | { | ||
92 | BUG_ON(!vmware_platform()); | ||
93 | return __vmware_get_tsc_khz(); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | ||
98 | * Still, due to timing difference when running on virtual cpus, the TSC can | ||
99 | * be marked as unstable in some cases. For example, the TSC sync check at | ||
100 | * bootup can fail due to a marginal offset between vcpus' TSCs (though the | ||
101 | * TSCs do not drift from each other). Also, the ACPI PM timer clocksource | ||
102 | * is not suitable as a watchdog when running on a hypervisor because the | ||
103 | * kernel may miss a wrap of the counter if the vcpu is descheduled for a | ||
104 | * long time. To skip these checks at runtime we set these capability bits, | ||
105 | * so that the kernel could just trust the hypervisor with providing a | ||
106 | * reliable virtual TSC that is suitable for timekeeping. | ||
107 | */ | ||
108 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | ||
109 | { | ||
110 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
111 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | ||
112 | } | ||
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 268553817909..d84a852e4cd7 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -29,34 +29,17 @@ | |||
29 | 29 | ||
30 | #include <mach_ipi.h> | 30 | #include <mach_ipi.h> |
31 | 31 | ||
32 | /* This keeps a track of which one is crashing cpu. */ | ||
33 | static int crashing_cpu; | ||
34 | 32 | ||
35 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
36 | static atomic_t waiting_for_crash_ipi; | ||
37 | 34 | ||
38 | static int crash_nmi_callback(struct notifier_block *self, | 35 | static void kdump_nmi_callback(int cpu, struct die_args *args) |
39 | unsigned long val, void *data) | ||
40 | { | 36 | { |
41 | struct pt_regs *regs; | 37 | struct pt_regs *regs; |
42 | #ifdef CONFIG_X86_32 | 38 | #ifdef CONFIG_X86_32 |
43 | struct pt_regs fixed_regs; | 39 | struct pt_regs fixed_regs; |
44 | #endif | 40 | #endif |
45 | int cpu; | ||
46 | 41 | ||
47 | if (val != DIE_NMI_IPI) | 42 | regs = args->regs; |
48 | return NOTIFY_OK; | ||
49 | |||
50 | regs = ((struct die_args *)data)->regs; | ||
51 | cpu = raw_smp_processor_id(); | ||
52 | |||
53 | /* Don't do anything if this handler is invoked on crashing cpu. | ||
54 | * Otherwise, system will completely hang. Crashing cpu can get | ||
55 | * an NMI if system was initially booted with nmi_watchdog parameter. | ||
56 | */ | ||
57 | if (cpu == crashing_cpu) | ||
58 | return NOTIFY_STOP; | ||
59 | local_irq_disable(); | ||
60 | 43 | ||
61 | #ifdef CONFIG_X86_32 | 44 | #ifdef CONFIG_X86_32 |
62 | if (!user_mode_vm(regs)) { | 45 | if (!user_mode_vm(regs)) { |
@@ -65,54 +48,19 @@ static int crash_nmi_callback(struct notifier_block *self, | |||
65 | } | 48 | } |
66 | #endif | 49 | #endif |
67 | crash_save_cpu(regs, cpu); | 50 | crash_save_cpu(regs, cpu); |
68 | disable_local_APIC(); | ||
69 | atomic_dec(&waiting_for_crash_ipi); | ||
70 | /* Assume hlt works */ | ||
71 | halt(); | ||
72 | for (;;) | ||
73 | cpu_relax(); | ||
74 | |||
75 | return 1; | ||
76 | } | ||
77 | 51 | ||
78 | static void smp_send_nmi_allbutself(void) | 52 | disable_local_APIC(); |
79 | { | ||
80 | cpumask_t mask = cpu_online_map; | ||
81 | cpu_clear(safe_smp_processor_id(), mask); | ||
82 | if (!cpus_empty(mask)) | ||
83 | send_IPI_mask(mask, NMI_VECTOR); | ||
84 | } | 53 | } |
85 | 54 | ||
86 | static struct notifier_block crash_nmi_nb = { | 55 | static void kdump_nmi_shootdown_cpus(void) |
87 | .notifier_call = crash_nmi_callback, | ||
88 | }; | ||
89 | |||
90 | static void nmi_shootdown_cpus(void) | ||
91 | { | 56 | { |
92 | unsigned long msecs; | 57 | nmi_shootdown_cpus(kdump_nmi_callback); |
93 | |||
94 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); | ||
95 | /* Would it be better to replace the trap vector here? */ | ||
96 | if (register_die_notifier(&crash_nmi_nb)) | ||
97 | return; /* return what? */ | ||
98 | /* Ensure the new callback function is set before sending | ||
99 | * out the NMI | ||
100 | */ | ||
101 | wmb(); | ||
102 | 58 | ||
103 | smp_send_nmi_allbutself(); | ||
104 | |||
105 | msecs = 1000; /* Wait at most a second for the other cpus to stop */ | ||
106 | while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { | ||
107 | mdelay(1); | ||
108 | msecs--; | ||
109 | } | ||
110 | |||
111 | /* Leave the nmi callback set */ | ||
112 | disable_local_APIC(); | 59 | disable_local_APIC(); |
113 | } | 60 | } |
61 | |||
114 | #else | 62 | #else |
115 | static void nmi_shootdown_cpus(void) | 63 | static void kdump_nmi_shootdown_cpus(void) |
116 | { | 64 | { |
117 | /* There are no cpus to shootdown */ | 65 | /* There are no cpus to shootdown */ |
118 | } | 66 | } |
@@ -131,9 +79,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
131 | /* The kernel is broken so disable interrupts */ | 79 | /* The kernel is broken so disable interrupts */ |
132 | local_irq_disable(); | 80 | local_irq_disable(); |
133 | 81 | ||
134 | /* Make a note of crashing cpu. Will be used in NMI callback.*/ | 82 | kdump_nmi_shootdown_cpus(); |
135 | crashing_cpu = safe_smp_processor_id(); | ||
136 | nmi_shootdown_cpus(); | ||
137 | lapic_shutdown(); | 83 | lapic_shutdown(); |
138 | #if defined(CONFIG_X86_IO_APIC) | 84 | #if defined(CONFIG_X86_IO_APIC) |
139 | disable_IO_APIC(); | 85 | disable_IO_APIC(); |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index a2d1176c38ee..d6938d9351cf 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -847,17 +847,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
847 | switch (c->x86) { | 847 | switch (c->x86) { |
848 | case 0x6: | 848 | case 0x6: |
849 | switch (c->x86_model) { | 849 | switch (c->x86_model) { |
850 | case 0 ... 0xC: | ||
851 | /* sorry, don't know about them */ | ||
852 | break; | ||
850 | case 0xD: | 853 | case 0xD: |
851 | case 0xE: /* Pentium M */ | 854 | case 0xE: /* Pentium M */ |
852 | ds_configure(&ds_cfg_var); | 855 | ds_configure(&ds_cfg_var); |
853 | break; | 856 | break; |
854 | case 0xF: /* Core2 */ | 857 | default: /* Core2, Atom, ... */ |
855 | case 0x1C: /* Atom */ | ||
856 | ds_configure(&ds_cfg_64); | 858 | ds_configure(&ds_cfg_64); |
857 | break; | 859 | break; |
858 | default: | ||
859 | /* sorry, don't know about them */ | ||
860 | break; | ||
861 | } | 860 | } |
862 | break; | 861 | break; |
863 | case 0xF: | 862 | case 0xF: |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c new file mode 100644 index 000000000000..5962176dfabb --- /dev/null +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -0,0 +1,319 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | #include <linux/kallsyms.h> | ||
6 | #include <linux/kprobes.h> | ||
7 | #include <linux/uaccess.h> | ||
8 | #include <linux/utsname.h> | ||
9 | #include <linux/hardirq.h> | ||
10 | #include <linux/kdebug.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/ptrace.h> | ||
13 | #include <linux/kexec.h> | ||
14 | #include <linux/bug.h> | ||
15 | #include <linux/nmi.h> | ||
16 | #include <linux/sysfs.h> | ||
17 | |||
18 | #include <asm/stacktrace.h> | ||
19 | |||
20 | #include "dumpstack.h" | ||
21 | |||
22 | int panic_on_unrecovered_nmi; | ||
23 | unsigned int code_bytes = 64; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static int die_counter; | ||
26 | |||
27 | void printk_address(unsigned long address, int reliable) | ||
28 | { | ||
29 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
30 | reliable ? "" : "? ", (void *) address); | ||
31 | } | ||
32 | |||
33 | /* | ||
34 | * x86-64 can have up to three kernel stacks: | ||
35 | * process stack | ||
36 | * interrupt stack | ||
37 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
38 | */ | ||
39 | |||
40 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
41 | void *p, unsigned int size, void *end) | ||
42 | { | ||
43 | void *t = tinfo; | ||
44 | if (end) { | ||
45 | if (p < end && p >= (end-THREAD_SIZE)) | ||
46 | return 1; | ||
47 | else | ||
48 | return 0; | ||
49 | } | ||
50 | return p > t && p < t + THREAD_SIZE - size; | ||
51 | } | ||
52 | |||
53 | unsigned long | ||
54 | print_context_stack(struct thread_info *tinfo, | ||
55 | unsigned long *stack, unsigned long bp, | ||
56 | const struct stacktrace_ops *ops, void *data, | ||
57 | unsigned long *end) | ||
58 | { | ||
59 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
60 | |||
61 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
62 | unsigned long addr; | ||
63 | |||
64 | addr = *stack; | ||
65 | if (__kernel_text_address(addr)) { | ||
66 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
67 | ops->address(data, addr, 1); | ||
68 | frame = frame->next_frame; | ||
69 | bp = (unsigned long) frame; | ||
70 | } else { | ||
71 | ops->address(data, addr, bp == 0); | ||
72 | } | ||
73 | } | ||
74 | stack++; | ||
75 | } | ||
76 | return bp; | ||
77 | } | ||
78 | |||
79 | |||
80 | static void | ||
81 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
82 | { | ||
83 | printk(data); | ||
84 | print_symbol(msg, symbol); | ||
85 | printk("\n"); | ||
86 | } | ||
87 | |||
88 | static void print_trace_warning(void *data, char *msg) | ||
89 | { | ||
90 | printk("%s%s\n", (char *)data, msg); | ||
91 | } | ||
92 | |||
93 | static int print_trace_stack(void *data, char *name) | ||
94 | { | ||
95 | printk("%s <%s> ", (char *)data, name); | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Print one address/symbol entries per line. | ||
101 | */ | ||
102 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
103 | { | ||
104 | touch_nmi_watchdog(); | ||
105 | printk(data); | ||
106 | printk_address(addr, reliable); | ||
107 | } | ||
108 | |||
109 | static const struct stacktrace_ops print_trace_ops = { | ||
110 | .warning = print_trace_warning, | ||
111 | .warning_symbol = print_trace_warning_symbol, | ||
112 | .stack = print_trace_stack, | ||
113 | .address = print_trace_address, | ||
114 | }; | ||
115 | |||
116 | void | ||
117 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
118 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
119 | { | ||
120 | printk("%sCall Trace:\n", log_lvl); | ||
121 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
122 | } | ||
123 | |||
124 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
125 | unsigned long *stack, unsigned long bp) | ||
126 | { | ||
127 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
128 | } | ||
129 | |||
130 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
131 | { | ||
132 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * The architecture-independent dump_stack generator | ||
137 | */ | ||
138 | void dump_stack(void) | ||
139 | { | ||
140 | unsigned long bp = 0; | ||
141 | unsigned long stack; | ||
142 | |||
143 | #ifdef CONFIG_FRAME_POINTER | ||
144 | if (!bp) | ||
145 | get_bp(bp); | ||
146 | #endif | ||
147 | |||
148 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
149 | current->pid, current->comm, print_tainted(), | ||
150 | init_utsname()->release, | ||
151 | (int)strcspn(init_utsname()->version, " "), | ||
152 | init_utsname()->version); | ||
153 | show_trace(NULL, NULL, &stack, bp); | ||
154 | } | ||
155 | EXPORT_SYMBOL(dump_stack); | ||
156 | |||
157 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
158 | static int die_owner = -1; | ||
159 | static unsigned int die_nest_count; | ||
160 | |||
161 | unsigned __kprobes long oops_begin(void) | ||
162 | { | ||
163 | int cpu; | ||
164 | unsigned long flags; | ||
165 | |||
166 | oops_enter(); | ||
167 | |||
168 | /* racy, but better than risking deadlock. */ | ||
169 | raw_local_irq_save(flags); | ||
170 | cpu = smp_processor_id(); | ||
171 | if (!__raw_spin_trylock(&die_lock)) { | ||
172 | if (cpu == die_owner) | ||
173 | /* nested oops. should stop eventually */; | ||
174 | else | ||
175 | __raw_spin_lock(&die_lock); | ||
176 | } | ||
177 | die_nest_count++; | ||
178 | die_owner = cpu; | ||
179 | console_verbose(); | ||
180 | bust_spinlocks(1); | ||
181 | return flags; | ||
182 | } | ||
183 | |||
184 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
185 | { | ||
186 | if (regs && kexec_should_crash(current)) | ||
187 | crash_kexec(regs); | ||
188 | |||
189 | bust_spinlocks(0); | ||
190 | die_owner = -1; | ||
191 | add_taint(TAINT_DIE); | ||
192 | die_nest_count--; | ||
193 | if (!die_nest_count) | ||
194 | /* Nest count reaches zero, release the lock. */ | ||
195 | __raw_spin_unlock(&die_lock); | ||
196 | raw_local_irq_restore(flags); | ||
197 | oops_exit(); | ||
198 | |||
199 | if (!signr) | ||
200 | return; | ||
201 | if (in_interrupt()) | ||
202 | panic("Fatal exception in interrupt"); | ||
203 | if (panic_on_oops) | ||
204 | panic("Fatal exception"); | ||
205 | do_exit(signr); | ||
206 | } | ||
207 | |||
208 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
209 | { | ||
210 | #ifdef CONFIG_X86_32 | ||
211 | unsigned short ss; | ||
212 | unsigned long sp; | ||
213 | #endif | ||
214 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
215 | #ifdef CONFIG_PREEMPT | ||
216 | printk("PREEMPT "); | ||
217 | #endif | ||
218 | #ifdef CONFIG_SMP | ||
219 | printk("SMP "); | ||
220 | #endif | ||
221 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
222 | printk("DEBUG_PAGEALLOC"); | ||
223 | #endif | ||
224 | printk("\n"); | ||
225 | sysfs_printk_last_file(); | ||
226 | if (notify_die(DIE_OOPS, str, regs, err, | ||
227 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
228 | return 1; | ||
229 | |||
230 | show_registers(regs); | ||
231 | #ifdef CONFIG_X86_32 | ||
232 | sp = (unsigned long) (®s->sp); | ||
233 | savesegment(ss, ss); | ||
234 | if (user_mode(regs)) { | ||
235 | sp = regs->sp; | ||
236 | ss = regs->ss & 0xffff; | ||
237 | } | ||
238 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
239 | print_symbol("%s", regs->ip); | ||
240 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
241 | #else | ||
242 | /* Executive summary in case the oops scrolled away */ | ||
243 | printk(KERN_ALERT "RIP "); | ||
244 | printk_address(regs->ip, 1); | ||
245 | printk(" RSP <%016lx>\n", regs->sp); | ||
246 | #endif | ||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * This is gone through when something in the kernel has done something bad | ||
252 | * and is about to be terminated: | ||
253 | */ | ||
254 | void die(const char *str, struct pt_regs *regs, long err) | ||
255 | { | ||
256 | unsigned long flags = oops_begin(); | ||
257 | int sig = SIGSEGV; | ||
258 | |||
259 | if (!user_mode_vm(regs)) | ||
260 | report_bug(regs->ip, regs); | ||
261 | |||
262 | if (__die(str, regs, err)) | ||
263 | sig = 0; | ||
264 | oops_end(flags, regs, sig); | ||
265 | } | ||
266 | |||
267 | void notrace __kprobes | ||
268 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
269 | { | ||
270 | unsigned long flags; | ||
271 | |||
272 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
273 | return; | ||
274 | |||
275 | /* | ||
276 | * We are in trouble anyway, lets at least try | ||
277 | * to get a message out. | ||
278 | */ | ||
279 | flags = oops_begin(); | ||
280 | printk(KERN_EMERG "%s", str); | ||
281 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
282 | smp_processor_id(), regs->ip); | ||
283 | show_registers(regs); | ||
284 | oops_end(flags, regs, 0); | ||
285 | if (do_panic || panic_on_oops) | ||
286 | panic("Non maskable interrupt"); | ||
287 | nmi_exit(); | ||
288 | local_irq_enable(); | ||
289 | do_exit(SIGBUS); | ||
290 | } | ||
291 | |||
292 | static int __init oops_setup(char *s) | ||
293 | { | ||
294 | if (!s) | ||
295 | return -EINVAL; | ||
296 | if (!strcmp(s, "panic")) | ||
297 | panic_on_oops = 1; | ||
298 | return 0; | ||
299 | } | ||
300 | early_param("oops", oops_setup); | ||
301 | |||
302 | static int __init kstack_setup(char *s) | ||
303 | { | ||
304 | if (!s) | ||
305 | return -EINVAL; | ||
306 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
307 | return 0; | ||
308 | } | ||
309 | early_param("kstack", kstack_setup); | ||
310 | |||
311 | static int __init code_bytes_setup(char *s) | ||
312 | { | ||
313 | code_bytes = simple_strtoul(s, NULL, 0); | ||
314 | if (code_bytes > 8192) | ||
315 | code_bytes = 8192; | ||
316 | |||
317 | return 1; | ||
318 | } | ||
319 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h new file mode 100644 index 000000000000..3119a801c32b --- /dev/null +++ b/arch/x86/kernel/dumpstack.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | |||
6 | #ifndef DUMPSTACK_H | ||
7 | #define DUMPSTACK_H | ||
8 | |||
9 | #ifdef CONFIG_X86_32 | ||
10 | #define STACKSLOTS_PER_LINE 8 | ||
11 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
12 | #else | ||
13 | #define STACKSLOTS_PER_LINE 4 | ||
14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
15 | #endif | ||
16 | |||
17 | extern unsigned long | ||
18 | print_context_stack(struct thread_info *tinfo, | ||
19 | unsigned long *stack, unsigned long bp, | ||
20 | const struct stacktrace_ops *ops, void *data, | ||
21 | unsigned long *end); | ||
22 | |||
23 | extern void | ||
24 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
25 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
26 | |||
27 | extern void | ||
28 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
29 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
30 | |||
31 | extern unsigned int code_bytes; | ||
32 | extern int kstack_depth_to_print; | ||
33 | |||
34 | /* The form of the top of the frame on the stack */ | ||
35 | struct stack_frame { | ||
36 | struct stack_frame *next_frame; | ||
37 | unsigned long return_address; | ||
38 | }; | ||
39 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index b3614752197b..7b031b106ec8 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -17,64 +17,7 @@ | |||
17 | 17 | ||
18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
19 | 19 | ||
20 | #define STACKSLOTS_PER_LINE 8 | 20 | #include "dumpstack.h" |
21 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
22 | |||
23 | int panic_on_unrecovered_nmi; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static unsigned int code_bytes = 64; | ||
26 | static int die_counter; | ||
27 | |||
28 | void printk_address(unsigned long address, int reliable) | ||
29 | { | ||
30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
31 | reliable ? "" : "? ", (void *) address); | ||
32 | } | ||
33 | |||
34 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
35 | void *p, unsigned int size, void *end) | ||
36 | { | ||
37 | void *t = tinfo; | ||
38 | if (end) { | ||
39 | if (p < end && p >= (end-THREAD_SIZE)) | ||
40 | return 1; | ||
41 | else | ||
42 | return 0; | ||
43 | } | ||
44 | return p > t && p < t + THREAD_SIZE - size; | ||
45 | } | ||
46 | |||
47 | /* The form of the top of the frame on the stack */ | ||
48 | struct stack_frame { | ||
49 | struct stack_frame *next_frame; | ||
50 | unsigned long return_address; | ||
51 | }; | ||
52 | |||
53 | static inline unsigned long | ||
54 | print_context_stack(struct thread_info *tinfo, | ||
55 | unsigned long *stack, unsigned long bp, | ||
56 | const struct stacktrace_ops *ops, void *data, | ||
57 | unsigned long *end) | ||
58 | { | ||
59 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
60 | |||
61 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
62 | unsigned long addr; | ||
63 | |||
64 | addr = *stack; | ||
65 | if (__kernel_text_address(addr)) { | ||
66 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
67 | ops->address(data, addr, 1); | ||
68 | frame = frame->next_frame; | ||
69 | bp = (unsigned long) frame; | ||
70 | } else { | ||
71 | ops->address(data, addr, bp == 0); | ||
72 | } | ||
73 | } | ||
74 | stack++; | ||
75 | } | ||
76 | return bp; | ||
77 | } | ||
78 | 21 | ||
79 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
80 | unsigned long *stack, unsigned long bp, | 23 | unsigned long *stack, unsigned long bp, |
@@ -119,57 +62,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
119 | } | 62 | } |
120 | EXPORT_SYMBOL(dump_trace); | 63 | EXPORT_SYMBOL(dump_trace); |
121 | 64 | ||
122 | static void | 65 | void |
123 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
124 | { | ||
125 | printk(data); | ||
126 | print_symbol(msg, symbol); | ||
127 | printk("\n"); | ||
128 | } | ||
129 | |||
130 | static void print_trace_warning(void *data, char *msg) | ||
131 | { | ||
132 | printk("%s%s\n", (char *)data, msg); | ||
133 | } | ||
134 | |||
135 | static int print_trace_stack(void *data, char *name) | ||
136 | { | ||
137 | printk("%s <%s> ", (char *)data, name); | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Print one address/symbol entries per line. | ||
143 | */ | ||
144 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
145 | { | ||
146 | touch_nmi_watchdog(); | ||
147 | printk(data); | ||
148 | printk_address(addr, reliable); | ||
149 | } | ||
150 | |||
151 | static const struct stacktrace_ops print_trace_ops = { | ||
152 | .warning = print_trace_warning, | ||
153 | .warning_symbol = print_trace_warning_symbol, | ||
154 | .stack = print_trace_stack, | ||
155 | .address = print_trace_address, | ||
156 | }; | ||
157 | |||
158 | static void | ||
159 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
160 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
161 | { | ||
162 | printk("%sCall Trace:\n", log_lvl); | ||
163 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
164 | } | ||
165 | |||
166 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
167 | unsigned long *stack, unsigned long bp) | ||
168 | { | ||
169 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
170 | } | ||
171 | |||
172 | static void | ||
173 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 66 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
174 | unsigned long *sp, unsigned long bp, char *log_lvl) | 67 | unsigned long *sp, unsigned long bp, char *log_lvl) |
175 | { | 68 | { |
@@ -196,33 +89,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
196 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 89 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
197 | } | 90 | } |
198 | 91 | ||
199 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
200 | { | ||
201 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
202 | } | ||
203 | |||
204 | /* | ||
205 | * The architecture-independent dump_stack generator | ||
206 | */ | ||
207 | void dump_stack(void) | ||
208 | { | ||
209 | unsigned long bp = 0; | ||
210 | unsigned long stack; | ||
211 | |||
212 | #ifdef CONFIG_FRAME_POINTER | ||
213 | if (!bp) | ||
214 | get_bp(bp); | ||
215 | #endif | ||
216 | |||
217 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
218 | current->pid, current->comm, print_tainted(), | ||
219 | init_utsname()->release, | ||
220 | (int)strcspn(init_utsname()->version, " "), | ||
221 | init_utsname()->version); | ||
222 | show_trace(NULL, NULL, &stack, bp); | ||
223 | } | ||
224 | |||
225 | EXPORT_SYMBOL(dump_stack); | ||
226 | 92 | ||
227 | void show_registers(struct pt_regs *regs) | 93 | void show_registers(struct pt_regs *regs) |
228 | { | 94 | { |
@@ -283,167 +149,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
283 | return ud2 == 0x0b0f; | 149 | return ud2 == 0x0b0f; |
284 | } | 150 | } |
285 | 151 | ||
286 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
287 | static int die_owner = -1; | ||
288 | static unsigned int die_nest_count; | ||
289 | |||
290 | unsigned __kprobes long oops_begin(void) | ||
291 | { | ||
292 | unsigned long flags; | ||
293 | |||
294 | oops_enter(); | ||
295 | |||
296 | if (die_owner != raw_smp_processor_id()) { | ||
297 | console_verbose(); | ||
298 | raw_local_irq_save(flags); | ||
299 | __raw_spin_lock(&die_lock); | ||
300 | die_owner = smp_processor_id(); | ||
301 | die_nest_count = 0; | ||
302 | bust_spinlocks(1); | ||
303 | } else { | ||
304 | raw_local_irq_save(flags); | ||
305 | } | ||
306 | die_nest_count++; | ||
307 | return flags; | ||
308 | } | ||
309 | |||
310 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
311 | { | ||
312 | bust_spinlocks(0); | ||
313 | die_owner = -1; | ||
314 | add_taint(TAINT_DIE); | ||
315 | __raw_spin_unlock(&die_lock); | ||
316 | raw_local_irq_restore(flags); | ||
317 | |||
318 | if (!regs) | ||
319 | return; | ||
320 | |||
321 | if (kexec_should_crash(current)) | ||
322 | crash_kexec(regs); | ||
323 | if (in_interrupt()) | ||
324 | panic("Fatal exception in interrupt"); | ||
325 | if (panic_on_oops) | ||
326 | panic("Fatal exception"); | ||
327 | oops_exit(); | ||
328 | do_exit(signr); | ||
329 | } | ||
330 | |||
331 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
332 | { | ||
333 | unsigned short ss; | ||
334 | unsigned long sp; | ||
335 | |||
336 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
337 | #ifdef CONFIG_PREEMPT | ||
338 | printk("PREEMPT "); | ||
339 | #endif | ||
340 | #ifdef CONFIG_SMP | ||
341 | printk("SMP "); | ||
342 | #endif | ||
343 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
344 | printk("DEBUG_PAGEALLOC"); | ||
345 | #endif | ||
346 | printk("\n"); | ||
347 | sysfs_printk_last_file(); | ||
348 | if (notify_die(DIE_OOPS, str, regs, err, | ||
349 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
350 | return 1; | ||
351 | |||
352 | show_registers(regs); | ||
353 | /* Executive summary in case the oops scrolled away */ | ||
354 | sp = (unsigned long) (®s->sp); | ||
355 | savesegment(ss, ss); | ||
356 | if (user_mode(regs)) { | ||
357 | sp = regs->sp; | ||
358 | ss = regs->ss & 0xffff; | ||
359 | } | ||
360 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
361 | print_symbol("%s", regs->ip); | ||
362 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
363 | return 0; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * This is gone through when something in the kernel has done something bad | ||
368 | * and is about to be terminated: | ||
369 | */ | ||
370 | void die(const char *str, struct pt_regs *regs, long err) | ||
371 | { | ||
372 | unsigned long flags = oops_begin(); | ||
373 | |||
374 | if (die_nest_count < 3) { | ||
375 | report_bug(regs->ip, regs); | ||
376 | |||
377 | if (__die(str, regs, err)) | ||
378 | regs = NULL; | ||
379 | } else { | ||
380 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | ||
381 | } | ||
382 | |||
383 | oops_end(flags, regs, SIGSEGV); | ||
384 | } | ||
385 | |||
386 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
387 | |||
388 | void notrace __kprobes | ||
389 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
390 | { | ||
391 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
392 | return; | ||
393 | |||
394 | spin_lock(&nmi_print_lock); | ||
395 | /* | ||
396 | * We are in trouble anyway, lets at least try | ||
397 | * to get a message out: | ||
398 | */ | ||
399 | bust_spinlocks(1); | ||
400 | printk(KERN_EMERG "%s", str); | ||
401 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
402 | smp_processor_id(), regs->ip); | ||
403 | show_registers(regs); | ||
404 | if (do_panic) | ||
405 | panic("Non maskable interrupt"); | ||
406 | console_silent(); | ||
407 | spin_unlock(&nmi_print_lock); | ||
408 | |||
409 | /* | ||
410 | * If we are in kernel we are probably nested up pretty bad | ||
411 | * and might aswell get out now while we still can: | ||
412 | */ | ||
413 | if (!user_mode_vm(regs)) { | ||
414 | current->thread.trap_no = 2; | ||
415 | crash_kexec(regs); | ||
416 | } | ||
417 | |||
418 | bust_spinlocks(0); | ||
419 | do_exit(SIGSEGV); | ||
420 | } | ||
421 | |||
422 | static int __init oops_setup(char *s) | ||
423 | { | ||
424 | if (!s) | ||
425 | return -EINVAL; | ||
426 | if (!strcmp(s, "panic")) | ||
427 | panic_on_oops = 1; | ||
428 | return 0; | ||
429 | } | ||
430 | early_param("oops", oops_setup); | ||
431 | |||
432 | static int __init kstack_setup(char *s) | ||
433 | { | ||
434 | if (!s) | ||
435 | return -EINVAL; | ||
436 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
437 | return 0; | ||
438 | } | ||
439 | early_param("kstack", kstack_setup); | ||
440 | |||
441 | static int __init code_bytes_setup(char *s) | ||
442 | { | ||
443 | code_bytes = simple_strtoul(s, NULL, 0); | ||
444 | if (code_bytes > 8192) | ||
445 | code_bytes = 8192; | ||
446 | |||
447 | return 1; | ||
448 | } | ||
449 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 96a5db7da8a7..33ff10287a5d 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -17,19 +17,7 @@ | |||
17 | 17 | ||
18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
19 | 19 | ||
20 | #define STACKSLOTS_PER_LINE 4 | 20 | #include "dumpstack.h" |
21 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
22 | |||
23 | int panic_on_unrecovered_nmi; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static unsigned int code_bytes = 64; | ||
26 | static int die_counter; | ||
27 | |||
28 | void printk_address(unsigned long address, int reliable) | ||
29 | { | ||
30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
31 | reliable ? "" : "? ", (void *) address); | ||
32 | } | ||
33 | 21 | ||
34 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 22 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
35 | unsigned *usedp, char **idp) | 23 | unsigned *usedp, char **idp) |
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
113 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 101 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
114 | */ | 102 | */ |
115 | 103 | ||
116 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
117 | void *p, unsigned int size, void *end) | ||
118 | { | ||
119 | void *t = tinfo; | ||
120 | if (end) { | ||
121 | if (p < end && p >= (end-THREAD_SIZE)) | ||
122 | return 1; | ||
123 | else | ||
124 | return 0; | ||
125 | } | ||
126 | return p > t && p < t + THREAD_SIZE - size; | ||
127 | } | ||
128 | |||
129 | /* The form of the top of the frame on the stack */ | ||
130 | struct stack_frame { | ||
131 | struct stack_frame *next_frame; | ||
132 | unsigned long return_address; | ||
133 | }; | ||
134 | |||
135 | static inline unsigned long | ||
136 | print_context_stack(struct thread_info *tinfo, | ||
137 | unsigned long *stack, unsigned long bp, | ||
138 | const struct stacktrace_ops *ops, void *data, | ||
139 | unsigned long *end) | ||
140 | { | ||
141 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
142 | |||
143 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
144 | unsigned long addr; | ||
145 | |||
146 | addr = *stack; | ||
147 | if (__kernel_text_address(addr)) { | ||
148 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
149 | ops->address(data, addr, 1); | ||
150 | frame = frame->next_frame; | ||
151 | bp = (unsigned long) frame; | ||
152 | } else { | ||
153 | ops->address(data, addr, bp == 0); | ||
154 | } | ||
155 | } | ||
156 | stack++; | ||
157 | } | ||
158 | return bp; | ||
159 | } | ||
160 | |||
161 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 104 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
162 | unsigned long *stack, unsigned long bp, | 105 | unsigned long *stack, unsigned long bp, |
163 | const struct stacktrace_ops *ops, void *data) | 106 | const struct stacktrace_ops *ops, void *data) |
@@ -248,57 +191,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
248 | } | 191 | } |
249 | EXPORT_SYMBOL(dump_trace); | 192 | EXPORT_SYMBOL(dump_trace); |
250 | 193 | ||
251 | static void | 194 | void |
252 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
253 | { | ||
254 | printk(data); | ||
255 | print_symbol(msg, symbol); | ||
256 | printk("\n"); | ||
257 | } | ||
258 | |||
259 | static void print_trace_warning(void *data, char *msg) | ||
260 | { | ||
261 | printk("%s%s\n", (char *)data, msg); | ||
262 | } | ||
263 | |||
264 | static int print_trace_stack(void *data, char *name) | ||
265 | { | ||
266 | printk("%s <%s> ", (char *)data, name); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * Print one address/symbol entries per line. | ||
272 | */ | ||
273 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
274 | { | ||
275 | touch_nmi_watchdog(); | ||
276 | printk(data); | ||
277 | printk_address(addr, reliable); | ||
278 | } | ||
279 | |||
280 | static const struct stacktrace_ops print_trace_ops = { | ||
281 | .warning = print_trace_warning, | ||
282 | .warning_symbol = print_trace_warning_symbol, | ||
283 | .stack = print_trace_stack, | ||
284 | .address = print_trace_address, | ||
285 | }; | ||
286 | |||
287 | static void | ||
288 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
289 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
290 | { | ||
291 | printk("%sCall Trace:\n", log_lvl); | ||
292 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
293 | } | ||
294 | |||
295 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
296 | unsigned long *stack, unsigned long bp) | ||
297 | { | ||
298 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
299 | } | ||
300 | |||
301 | static void | ||
302 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 195 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
303 | unsigned long *sp, unsigned long bp, char *log_lvl) | 196 | unsigned long *sp, unsigned long bp, char *log_lvl) |
304 | { | 197 | { |
@@ -342,33 +235,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
342 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 235 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
343 | } | 236 | } |
344 | 237 | ||
345 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
346 | { | ||
347 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * The architecture-independent dump_stack generator | ||
352 | */ | ||
353 | void dump_stack(void) | ||
354 | { | ||
355 | unsigned long bp = 0; | ||
356 | unsigned long stack; | ||
357 | |||
358 | #ifdef CONFIG_FRAME_POINTER | ||
359 | if (!bp) | ||
360 | get_bp(bp); | ||
361 | #endif | ||
362 | |||
363 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
364 | current->pid, current->comm, print_tainted(), | ||
365 | init_utsname()->release, | ||
366 | (int)strcspn(init_utsname()->version, " "), | ||
367 | init_utsname()->version); | ||
368 | show_trace(NULL, NULL, &stack, bp); | ||
369 | } | ||
370 | EXPORT_SYMBOL(dump_stack); | ||
371 | |||
372 | void show_registers(struct pt_regs *regs) | 238 | void show_registers(struct pt_regs *regs) |
373 | { | 239 | { |
374 | int i; | 240 | int i; |
@@ -429,147 +295,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
429 | return ud2 == 0x0b0f; | 295 | return ud2 == 0x0b0f; |
430 | } | 296 | } |
431 | 297 | ||
432 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
433 | static int die_owner = -1; | ||
434 | static unsigned int die_nest_count; | ||
435 | |||
436 | unsigned __kprobes long oops_begin(void) | ||
437 | { | ||
438 | int cpu; | ||
439 | unsigned long flags; | ||
440 | |||
441 | oops_enter(); | ||
442 | |||
443 | /* racy, but better than risking deadlock. */ | ||
444 | raw_local_irq_save(flags); | ||
445 | cpu = smp_processor_id(); | ||
446 | if (!__raw_spin_trylock(&die_lock)) { | ||
447 | if (cpu == die_owner) | ||
448 | /* nested oops. should stop eventually */; | ||
449 | else | ||
450 | __raw_spin_lock(&die_lock); | ||
451 | } | ||
452 | die_nest_count++; | ||
453 | die_owner = cpu; | ||
454 | console_verbose(); | ||
455 | bust_spinlocks(1); | ||
456 | return flags; | ||
457 | } | ||
458 | |||
459 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
460 | { | ||
461 | die_owner = -1; | ||
462 | bust_spinlocks(0); | ||
463 | die_nest_count--; | ||
464 | if (!die_nest_count) | ||
465 | /* Nest count reaches zero, release the lock. */ | ||
466 | __raw_spin_unlock(&die_lock); | ||
467 | raw_local_irq_restore(flags); | ||
468 | if (!regs) { | ||
469 | oops_exit(); | ||
470 | return; | ||
471 | } | ||
472 | if (in_interrupt()) | ||
473 | panic("Fatal exception in interrupt"); | ||
474 | if (panic_on_oops) | ||
475 | panic("Fatal exception"); | ||
476 | oops_exit(); | ||
477 | do_exit(signr); | ||
478 | } | ||
479 | |||
480 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
481 | { | ||
482 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
483 | #ifdef CONFIG_PREEMPT | ||
484 | printk("PREEMPT "); | ||
485 | #endif | ||
486 | #ifdef CONFIG_SMP | ||
487 | printk("SMP "); | ||
488 | #endif | ||
489 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
490 | printk("DEBUG_PAGEALLOC"); | ||
491 | #endif | ||
492 | printk("\n"); | ||
493 | sysfs_printk_last_file(); | ||
494 | if (notify_die(DIE_OOPS, str, regs, err, | ||
495 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
496 | return 1; | ||
497 | |||
498 | show_registers(regs); | ||
499 | add_taint(TAINT_DIE); | ||
500 | /* Executive summary in case the oops scrolled away */ | ||
501 | printk(KERN_ALERT "RIP "); | ||
502 | printk_address(regs->ip, 1); | ||
503 | printk(" RSP <%016lx>\n", regs->sp); | ||
504 | if (kexec_should_crash(current)) | ||
505 | crash_kexec(regs); | ||
506 | return 0; | ||
507 | } | ||
508 | |||
509 | void die(const char *str, struct pt_regs *regs, long err) | ||
510 | { | ||
511 | unsigned long flags = oops_begin(); | ||
512 | |||
513 | if (!user_mode(regs)) | ||
514 | report_bug(regs->ip, regs); | ||
515 | |||
516 | if (__die(str, regs, err)) | ||
517 | regs = NULL; | ||
518 | oops_end(flags, regs, SIGSEGV); | ||
519 | } | ||
520 | |||
521 | notrace __kprobes void | ||
522 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
523 | { | ||
524 | unsigned long flags; | ||
525 | |||
526 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
527 | return; | ||
528 | |||
529 | flags = oops_begin(); | ||
530 | /* | ||
531 | * We are in trouble anyway, lets at least try | ||
532 | * to get a message out. | ||
533 | */ | ||
534 | printk(KERN_EMERG "%s", str); | ||
535 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
536 | smp_processor_id(), regs->ip); | ||
537 | show_registers(regs); | ||
538 | if (kexec_should_crash(current)) | ||
539 | crash_kexec(regs); | ||
540 | if (do_panic || panic_on_oops) | ||
541 | panic("Non maskable interrupt"); | ||
542 | oops_end(flags, NULL, SIGBUS); | ||
543 | nmi_exit(); | ||
544 | local_irq_enable(); | ||
545 | do_exit(SIGBUS); | ||
546 | } | ||
547 | |||
548 | static int __init oops_setup(char *s) | ||
549 | { | ||
550 | if (!s) | ||
551 | return -EINVAL; | ||
552 | if (!strcmp(s, "panic")) | ||
553 | panic_on_oops = 1; | ||
554 | return 0; | ||
555 | } | ||
556 | early_param("oops", oops_setup); | ||
557 | |||
558 | static int __init kstack_setup(char *s) | ||
559 | { | ||
560 | if (!s) | ||
561 | return -EINVAL; | ||
562 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
563 | return 0; | ||
564 | } | ||
565 | early_param("kstack", kstack_setup); | ||
566 | |||
567 | static int __init code_bytes_setup(char *s) | ||
568 | { | ||
569 | code_bytes = simple_strtoul(s, NULL, 0); | ||
570 | if (code_bytes > 8192) | ||
571 | code_bytes = 8192; | ||
572 | |||
573 | return 1; | ||
574 | } | ||
575 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7aafeb5263ef..65a13943e098 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -677,22 +677,6 @@ struct early_res { | |||
677 | }; | 677 | }; |
678 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | 678 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { |
679 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | 679 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ |
680 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) | ||
681 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | ||
682 | #endif | ||
683 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | ||
684 | /* | ||
685 | * But first pinch a few for the stack/trampoline stuff | ||
686 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
687 | * trampoline before removing it. (see the GDT stuff) | ||
688 | */ | ||
689 | { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, | ||
690 | /* | ||
691 | * Has to be in very low memory so we can execute | ||
692 | * real-mode AP code. | ||
693 | */ | ||
694 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, | ||
695 | #endif | ||
696 | {} | 680 | {} |
697 | }; | 681 | }; |
698 | 682 | ||
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 1b894b72c0f5..744aa7fc49d5 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/io_apic.h> | 17 | #include <asm/io_apic.h> |
18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
19 | #include <asm/iommu.h> | 19 | #include <asm/iommu.h> |
20 | #include <asm/gart.h> | ||
20 | 21 | ||
21 | static void __init fix_hypertransport_config(int num, int slot, int func) | 22 | static void __init fix_hypertransport_config(int num, int slot, int func) |
22 | { | 23 | { |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 34ad997d3834..23b138e31e9c 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -875,49 +875,6 @@ static struct console early_dbgp_console = { | |||
875 | }; | 875 | }; |
876 | #endif | 876 | #endif |
877 | 877 | ||
878 | /* Console interface to a host file on AMD's SimNow! */ | ||
879 | |||
880 | static int simnow_fd; | ||
881 | |||
882 | enum { | ||
883 | MAGIC1 = 0xBACCD00A, | ||
884 | MAGIC2 = 0xCA110000, | ||
885 | XOPEN = 5, | ||
886 | XWRITE = 4, | ||
887 | }; | ||
888 | |||
889 | static noinline long simnow(long cmd, long a, long b, long c) | ||
890 | { | ||
891 | long ret; | ||
892 | |||
893 | asm volatile("cpuid" : | ||
894 | "=a" (ret) : | ||
895 | "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); | ||
896 | return ret; | ||
897 | } | ||
898 | |||
899 | static void __init simnow_init(char *str) | ||
900 | { | ||
901 | char *fn = "klog"; | ||
902 | |||
903 | if (*str == '=') | ||
904 | fn = ++str; | ||
905 | /* error ignored */ | ||
906 | simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644); | ||
907 | } | ||
908 | |||
909 | static void simnow_write(struct console *con, const char *s, unsigned n) | ||
910 | { | ||
911 | simnow(XWRITE, simnow_fd, (unsigned long)s, n); | ||
912 | } | ||
913 | |||
914 | static struct console simnow_console = { | ||
915 | .name = "simnow", | ||
916 | .write = simnow_write, | ||
917 | .flags = CON_PRINTBUFFER, | ||
918 | .index = -1, | ||
919 | }; | ||
920 | |||
921 | /* Direct interface for emergencies */ | 878 | /* Direct interface for emergencies */ |
922 | static struct console *early_console = &early_vga_console; | 879 | static struct console *early_console = &early_vga_console; |
923 | static int __initdata early_console_initialized; | 880 | static int __initdata early_console_initialized; |
@@ -960,10 +917,6 @@ static int __init setup_early_printk(char *buf) | |||
960 | max_ypos = boot_params.screen_info.orig_video_lines; | 917 | max_ypos = boot_params.screen_info.orig_video_lines; |
961 | current_ypos = boot_params.screen_info.orig_y; | 918 | current_ypos = boot_params.screen_info.orig_y; |
962 | early_console = &early_vga_console; | 919 | early_console = &early_vga_console; |
963 | } else if (!strncmp(buf, "simnow", 6)) { | ||
964 | simnow_init(buf + 6); | ||
965 | early_console = &simnow_console; | ||
966 | keep_early = 1; | ||
967 | #ifdef CONFIG_EARLY_PRINTK_DBGP | 920 | #ifdef CONFIG_EARLY_PRINTK_DBGP |
968 | } else if (!strncmp(buf, "dbgp", 4)) { | 921 | } else if (!strncmp(buf, "dbgp", 4)) { |
969 | if (early_dbgp_init(buf+4) < 0) | 922 | if (early_dbgp_init(buf+4) < 0) |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 28b597ef9ca1..fe7014176eb0 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -619,28 +619,37 @@ END(syscall_badsys) | |||
619 | 27:; | 619 | 27:; |
620 | 620 | ||
621 | /* | 621 | /* |
622 | * Build the entry stubs and pointer table with | 622 | * Build the entry stubs and pointer table with some assembler magic. |
623 | * some assembler magic. | 623 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a |
624 | * single cache line on all modern x86 implementations. | ||
624 | */ | 625 | */ |
625 | .section .rodata,"a" | 626 | .section .init.rodata,"a" |
626 | ENTRY(interrupt) | 627 | ENTRY(interrupt) |
627 | .text | 628 | .text |
628 | 629 | .p2align 5 | |
630 | .p2align CONFIG_X86_L1_CACHE_SHIFT | ||
629 | ENTRY(irq_entries_start) | 631 | ENTRY(irq_entries_start) |
630 | RING0_INT_FRAME | 632 | RING0_INT_FRAME |
631 | vector=0 | 633 | vector=FIRST_EXTERNAL_VECTOR |
632 | .rept NR_VECTORS | 634 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
633 | ALIGN | 635 | .balign 32 |
634 | .if vector | 636 | .rept 7 |
637 | .if vector < NR_VECTORS | ||
638 | .if vector <> FIRST_EXTERNAL_VECTOR | ||
635 | CFI_ADJUST_CFA_OFFSET -4 | 639 | CFI_ADJUST_CFA_OFFSET -4 |
636 | .endif | 640 | .endif |
637 | 1: pushl $~(vector) | 641 | 1: pushl $(~vector+0x80) /* Note: always in signed byte range */ |
638 | CFI_ADJUST_CFA_OFFSET 4 | 642 | CFI_ADJUST_CFA_OFFSET 4 |
639 | jmp common_interrupt | 643 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
640 | .previous | 644 | jmp 2f |
645 | .endif | ||
646 | .previous | ||
641 | .long 1b | 647 | .long 1b |
642 | .text | 648 | .text |
643 | vector=vector+1 | 649 | vector=vector+1 |
650 | .endif | ||
651 | .endr | ||
652 | 2: jmp common_interrupt | ||
644 | .endr | 653 | .endr |
645 | END(irq_entries_start) | 654 | END(irq_entries_start) |
646 | 655 | ||
@@ -652,8 +661,9 @@ END(interrupt) | |||
652 | * the CPU automatically disables interrupts when executing an IRQ vector, | 661 | * the CPU automatically disables interrupts when executing an IRQ vector, |
653 | * so IRQ-flags tracing has to follow that: | 662 | * so IRQ-flags tracing has to follow that: |
654 | */ | 663 | */ |
655 | ALIGN | 664 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
656 | common_interrupt: | 665 | common_interrupt: |
666 | addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ | ||
657 | SAVE_ALL | 667 | SAVE_ALL |
658 | TRACE_IRQS_OFF | 668 | TRACE_IRQS_OFF |
659 | movl %esp,%eax | 669 | movl %esp,%eax |
@@ -678,65 +688,6 @@ ENDPROC(name) | |||
678 | /* The include is where all of the SMP etc. interrupts come from */ | 688 | /* The include is where all of the SMP etc. interrupts come from */ |
679 | #include "entry_arch.h" | 689 | #include "entry_arch.h" |
680 | 690 | ||
681 | KPROBE_ENTRY(page_fault) | ||
682 | RING0_EC_FRAME | ||
683 | pushl $do_page_fault | ||
684 | CFI_ADJUST_CFA_OFFSET 4 | ||
685 | ALIGN | ||
686 | error_code: | ||
687 | /* the function address is in %fs's slot on the stack */ | ||
688 | pushl %es | ||
689 | CFI_ADJUST_CFA_OFFSET 4 | ||
690 | /*CFI_REL_OFFSET es, 0*/ | ||
691 | pushl %ds | ||
692 | CFI_ADJUST_CFA_OFFSET 4 | ||
693 | /*CFI_REL_OFFSET ds, 0*/ | ||
694 | pushl %eax | ||
695 | CFI_ADJUST_CFA_OFFSET 4 | ||
696 | CFI_REL_OFFSET eax, 0 | ||
697 | pushl %ebp | ||
698 | CFI_ADJUST_CFA_OFFSET 4 | ||
699 | CFI_REL_OFFSET ebp, 0 | ||
700 | pushl %edi | ||
701 | CFI_ADJUST_CFA_OFFSET 4 | ||
702 | CFI_REL_OFFSET edi, 0 | ||
703 | pushl %esi | ||
704 | CFI_ADJUST_CFA_OFFSET 4 | ||
705 | CFI_REL_OFFSET esi, 0 | ||
706 | pushl %edx | ||
707 | CFI_ADJUST_CFA_OFFSET 4 | ||
708 | CFI_REL_OFFSET edx, 0 | ||
709 | pushl %ecx | ||
710 | CFI_ADJUST_CFA_OFFSET 4 | ||
711 | CFI_REL_OFFSET ecx, 0 | ||
712 | pushl %ebx | ||
713 | CFI_ADJUST_CFA_OFFSET 4 | ||
714 | CFI_REL_OFFSET ebx, 0 | ||
715 | cld | ||
716 | pushl %fs | ||
717 | CFI_ADJUST_CFA_OFFSET 4 | ||
718 | /*CFI_REL_OFFSET fs, 0*/ | ||
719 | movl $(__KERNEL_PERCPU), %ecx | ||
720 | movl %ecx, %fs | ||
721 | UNWIND_ESPFIX_STACK | ||
722 | popl %ecx | ||
723 | CFI_ADJUST_CFA_OFFSET -4 | ||
724 | /*CFI_REGISTER es, ecx*/ | ||
725 | movl PT_FS(%esp), %edi # get the function address | ||
726 | movl PT_ORIG_EAX(%esp), %edx # get the error code | ||
727 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | ||
728 | mov %ecx, PT_FS(%esp) | ||
729 | /*CFI_REL_OFFSET fs, ES*/ | ||
730 | movl $(__USER_DS), %ecx | ||
731 | movl %ecx, %ds | ||
732 | movl %ecx, %es | ||
733 | TRACE_IRQS_OFF | ||
734 | movl %esp,%eax # pt_regs pointer | ||
735 | call *%edi | ||
736 | jmp ret_from_exception | ||
737 | CFI_ENDPROC | ||
738 | KPROBE_END(page_fault) | ||
739 | |||
740 | ENTRY(coprocessor_error) | 691 | ENTRY(coprocessor_error) |
741 | RING0_INT_FRAME | 692 | RING0_INT_FRAME |
742 | pushl $0 | 693 | pushl $0 |
@@ -767,140 +718,6 @@ ENTRY(device_not_available) | |||
767 | CFI_ENDPROC | 718 | CFI_ENDPROC |
768 | END(device_not_available) | 719 | END(device_not_available) |
769 | 720 | ||
770 | /* | ||
771 | * Debug traps and NMI can happen at the one SYSENTER instruction | ||
772 | * that sets up the real kernel stack. Check here, since we can't | ||
773 | * allow the wrong stack to be used. | ||
774 | * | ||
775 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have | ||
776 | * already pushed 3 words if it hits on the sysenter instruction: | ||
777 | * eflags, cs and eip. | ||
778 | * | ||
779 | * We just load the right stack, and push the three (known) values | ||
780 | * by hand onto the new stack - while updating the return eip past | ||
781 | * the instruction that would have done it for sysenter. | ||
782 | */ | ||
783 | #define FIX_STACK(offset, ok, label) \ | ||
784 | cmpw $__KERNEL_CS,4(%esp); \ | ||
785 | jne ok; \ | ||
786 | label: \ | ||
787 | movl TSS_sysenter_sp0+offset(%esp),%esp; \ | ||
788 | CFI_DEF_CFA esp, 0; \ | ||
789 | CFI_UNDEFINED eip; \ | ||
790 | pushfl; \ | ||
791 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
792 | pushl $__KERNEL_CS; \ | ||
793 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
794 | pushl $sysenter_past_esp; \ | ||
795 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
796 | CFI_REL_OFFSET eip, 0 | ||
797 | |||
798 | KPROBE_ENTRY(debug) | ||
799 | RING0_INT_FRAME | ||
800 | cmpl $ia32_sysenter_target,(%esp) | ||
801 | jne debug_stack_correct | ||
802 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | ||
803 | debug_stack_correct: | ||
804 | pushl $-1 # mark this as an int | ||
805 | CFI_ADJUST_CFA_OFFSET 4 | ||
806 | SAVE_ALL | ||
807 | TRACE_IRQS_OFF | ||
808 | xorl %edx,%edx # error code 0 | ||
809 | movl %esp,%eax # pt_regs pointer | ||
810 | call do_debug | ||
811 | jmp ret_from_exception | ||
812 | CFI_ENDPROC | ||
813 | KPROBE_END(debug) | ||
814 | |||
815 | /* | ||
816 | * NMI is doubly nasty. It can happen _while_ we're handling | ||
817 | * a debug fault, and the debug fault hasn't yet been able to | ||
818 | * clear up the stack. So we first check whether we got an | ||
819 | * NMI on the sysenter entry path, but after that we need to | ||
820 | * check whether we got an NMI on the debug path where the debug | ||
821 | * fault happened on the sysenter path. | ||
822 | */ | ||
823 | KPROBE_ENTRY(nmi) | ||
824 | RING0_INT_FRAME | ||
825 | pushl %eax | ||
826 | CFI_ADJUST_CFA_OFFSET 4 | ||
827 | movl %ss, %eax | ||
828 | cmpw $__ESPFIX_SS, %ax | ||
829 | popl %eax | ||
830 | CFI_ADJUST_CFA_OFFSET -4 | ||
831 | je nmi_espfix_stack | ||
832 | cmpl $ia32_sysenter_target,(%esp) | ||
833 | je nmi_stack_fixup | ||
834 | pushl %eax | ||
835 | CFI_ADJUST_CFA_OFFSET 4 | ||
836 | movl %esp,%eax | ||
837 | /* Do not access memory above the end of our stack page, | ||
838 | * it might not exist. | ||
839 | */ | ||
840 | andl $(THREAD_SIZE-1),%eax | ||
841 | cmpl $(THREAD_SIZE-20),%eax | ||
842 | popl %eax | ||
843 | CFI_ADJUST_CFA_OFFSET -4 | ||
844 | jae nmi_stack_correct | ||
845 | cmpl $ia32_sysenter_target,12(%esp) | ||
846 | je nmi_debug_stack_check | ||
847 | nmi_stack_correct: | ||
848 | /* We have a RING0_INT_FRAME here */ | ||
849 | pushl %eax | ||
850 | CFI_ADJUST_CFA_OFFSET 4 | ||
851 | SAVE_ALL | ||
852 | TRACE_IRQS_OFF | ||
853 | xorl %edx,%edx # zero error code | ||
854 | movl %esp,%eax # pt_regs pointer | ||
855 | call do_nmi | ||
856 | jmp restore_nocheck_notrace | ||
857 | CFI_ENDPROC | ||
858 | |||
859 | nmi_stack_fixup: | ||
860 | RING0_INT_FRAME | ||
861 | FIX_STACK(12,nmi_stack_correct, 1) | ||
862 | jmp nmi_stack_correct | ||
863 | |||
864 | nmi_debug_stack_check: | ||
865 | /* We have a RING0_INT_FRAME here */ | ||
866 | cmpw $__KERNEL_CS,16(%esp) | ||
867 | jne nmi_stack_correct | ||
868 | cmpl $debug,(%esp) | ||
869 | jb nmi_stack_correct | ||
870 | cmpl $debug_esp_fix_insn,(%esp) | ||
871 | ja nmi_stack_correct | ||
872 | FIX_STACK(24,nmi_stack_correct, 1) | ||
873 | jmp nmi_stack_correct | ||
874 | |||
875 | nmi_espfix_stack: | ||
876 | /* We have a RING0_INT_FRAME here. | ||
877 | * | ||
878 | * create the pointer to lss back | ||
879 | */ | ||
880 | pushl %ss | ||
881 | CFI_ADJUST_CFA_OFFSET 4 | ||
882 | pushl %esp | ||
883 | CFI_ADJUST_CFA_OFFSET 4 | ||
884 | addw $4, (%esp) | ||
885 | /* copy the iret frame of 12 bytes */ | ||
886 | .rept 3 | ||
887 | pushl 16(%esp) | ||
888 | CFI_ADJUST_CFA_OFFSET 4 | ||
889 | .endr | ||
890 | pushl %eax | ||
891 | CFI_ADJUST_CFA_OFFSET 4 | ||
892 | SAVE_ALL | ||
893 | TRACE_IRQS_OFF | ||
894 | FIXUP_ESPFIX_STACK # %eax == %esp | ||
895 | xorl %edx,%edx # zero error code | ||
896 | call do_nmi | ||
897 | RESTORE_REGS | ||
898 | lss 12+4(%esp), %esp # back to espfix stack | ||
899 | CFI_ADJUST_CFA_OFFSET -24 | ||
900 | jmp irq_return | ||
901 | CFI_ENDPROC | ||
902 | KPROBE_END(nmi) | ||
903 | |||
904 | #ifdef CONFIG_PARAVIRT | 721 | #ifdef CONFIG_PARAVIRT |
905 | ENTRY(native_iret) | 722 | ENTRY(native_iret) |
906 | iret | 723 | iret |
@@ -916,19 +733,6 @@ ENTRY(native_irq_enable_sysexit) | |||
916 | END(native_irq_enable_sysexit) | 733 | END(native_irq_enable_sysexit) |
917 | #endif | 734 | #endif |
918 | 735 | ||
919 | KPROBE_ENTRY(int3) | ||
920 | RING0_INT_FRAME | ||
921 | pushl $-1 # mark this as an int | ||
922 | CFI_ADJUST_CFA_OFFSET 4 | ||
923 | SAVE_ALL | ||
924 | TRACE_IRQS_OFF | ||
925 | xorl %edx,%edx # zero error code | ||
926 | movl %esp,%eax # pt_regs pointer | ||
927 | call do_int3 | ||
928 | jmp ret_from_exception | ||
929 | CFI_ENDPROC | ||
930 | KPROBE_END(int3) | ||
931 | |||
932 | ENTRY(overflow) | 736 | ENTRY(overflow) |
933 | RING0_INT_FRAME | 737 | RING0_INT_FRAME |
934 | pushl $0 | 738 | pushl $0 |
@@ -993,14 +797,6 @@ ENTRY(stack_segment) | |||
993 | CFI_ENDPROC | 797 | CFI_ENDPROC |
994 | END(stack_segment) | 798 | END(stack_segment) |
995 | 799 | ||
996 | KPROBE_ENTRY(general_protection) | ||
997 | RING0_EC_FRAME | ||
998 | pushl $do_general_protection | ||
999 | CFI_ADJUST_CFA_OFFSET 4 | ||
1000 | jmp error_code | ||
1001 | CFI_ENDPROC | ||
1002 | KPROBE_END(general_protection) | ||
1003 | |||
1004 | ENTRY(alignment_check) | 800 | ENTRY(alignment_check) |
1005 | RING0_EC_FRAME | 801 | RING0_EC_FRAME |
1006 | pushl $do_alignment_check | 802 | pushl $do_alignment_check |
@@ -1051,6 +847,7 @@ ENTRY(kernel_thread_helper) | |||
1051 | push %eax | 847 | push %eax |
1052 | CFI_ADJUST_CFA_OFFSET 4 | 848 | CFI_ADJUST_CFA_OFFSET 4 |
1053 | call do_exit | 849 | call do_exit |
850 | ud2 # padding for call trace | ||
1054 | CFI_ENDPROC | 851 | CFI_ENDPROC |
1055 | ENDPROC(kernel_thread_helper) | 852 | ENDPROC(kernel_thread_helper) |
1056 | 853 | ||
@@ -1210,3 +1007,227 @@ END(mcount) | |||
1210 | #include "syscall_table_32.S" | 1007 | #include "syscall_table_32.S" |
1211 | 1008 | ||
1212 | syscall_table_size=(.-sys_call_table) | 1009 | syscall_table_size=(.-sys_call_table) |
1010 | |||
1011 | /* | ||
1012 | * Some functions should be protected against kprobes | ||
1013 | */ | ||
1014 | .pushsection .kprobes.text, "ax" | ||
1015 | |||
1016 | ENTRY(page_fault) | ||
1017 | RING0_EC_FRAME | ||
1018 | pushl $do_page_fault | ||
1019 | CFI_ADJUST_CFA_OFFSET 4 | ||
1020 | ALIGN | ||
1021 | error_code: | ||
1022 | /* the function address is in %fs's slot on the stack */ | ||
1023 | pushl %es | ||
1024 | CFI_ADJUST_CFA_OFFSET 4 | ||
1025 | /*CFI_REL_OFFSET es, 0*/ | ||
1026 | pushl %ds | ||
1027 | CFI_ADJUST_CFA_OFFSET 4 | ||
1028 | /*CFI_REL_OFFSET ds, 0*/ | ||
1029 | pushl %eax | ||
1030 | CFI_ADJUST_CFA_OFFSET 4 | ||
1031 | CFI_REL_OFFSET eax, 0 | ||
1032 | pushl %ebp | ||
1033 | CFI_ADJUST_CFA_OFFSET 4 | ||
1034 | CFI_REL_OFFSET ebp, 0 | ||
1035 | pushl %edi | ||
1036 | CFI_ADJUST_CFA_OFFSET 4 | ||
1037 | CFI_REL_OFFSET edi, 0 | ||
1038 | pushl %esi | ||
1039 | CFI_ADJUST_CFA_OFFSET 4 | ||
1040 | CFI_REL_OFFSET esi, 0 | ||
1041 | pushl %edx | ||
1042 | CFI_ADJUST_CFA_OFFSET 4 | ||
1043 | CFI_REL_OFFSET edx, 0 | ||
1044 | pushl %ecx | ||
1045 | CFI_ADJUST_CFA_OFFSET 4 | ||
1046 | CFI_REL_OFFSET ecx, 0 | ||
1047 | pushl %ebx | ||
1048 | CFI_ADJUST_CFA_OFFSET 4 | ||
1049 | CFI_REL_OFFSET ebx, 0 | ||
1050 | cld | ||
1051 | pushl %fs | ||
1052 | CFI_ADJUST_CFA_OFFSET 4 | ||
1053 | /*CFI_REL_OFFSET fs, 0*/ | ||
1054 | movl $(__KERNEL_PERCPU), %ecx | ||
1055 | movl %ecx, %fs | ||
1056 | UNWIND_ESPFIX_STACK | ||
1057 | popl %ecx | ||
1058 | CFI_ADJUST_CFA_OFFSET -4 | ||
1059 | /*CFI_REGISTER es, ecx*/ | ||
1060 | movl PT_FS(%esp), %edi # get the function address | ||
1061 | movl PT_ORIG_EAX(%esp), %edx # get the error code | ||
1062 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | ||
1063 | mov %ecx, PT_FS(%esp) | ||
1064 | /*CFI_REL_OFFSET fs, ES*/ | ||
1065 | movl $(__USER_DS), %ecx | ||
1066 | movl %ecx, %ds | ||
1067 | movl %ecx, %es | ||
1068 | TRACE_IRQS_OFF | ||
1069 | movl %esp,%eax # pt_regs pointer | ||
1070 | call *%edi | ||
1071 | jmp ret_from_exception | ||
1072 | CFI_ENDPROC | ||
1073 | END(page_fault) | ||
1074 | |||
1075 | /* | ||
1076 | * Debug traps and NMI can happen at the one SYSENTER instruction | ||
1077 | * that sets up the real kernel stack. Check here, since we can't | ||
1078 | * allow the wrong stack to be used. | ||
1079 | * | ||
1080 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have | ||
1081 | * already pushed 3 words if it hits on the sysenter instruction: | ||
1082 | * eflags, cs and eip. | ||
1083 | * | ||
1084 | * We just load the right stack, and push the three (known) values | ||
1085 | * by hand onto the new stack - while updating the return eip past | ||
1086 | * the instruction that would have done it for sysenter. | ||
1087 | */ | ||
1088 | #define FIX_STACK(offset, ok, label) \ | ||
1089 | cmpw $__KERNEL_CS,4(%esp); \ | ||
1090 | jne ok; \ | ||
1091 | label: \ | ||
1092 | movl TSS_sysenter_sp0+offset(%esp),%esp; \ | ||
1093 | CFI_DEF_CFA esp, 0; \ | ||
1094 | CFI_UNDEFINED eip; \ | ||
1095 | pushfl; \ | ||
1096 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
1097 | pushl $__KERNEL_CS; \ | ||
1098 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
1099 | pushl $sysenter_past_esp; \ | ||
1100 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
1101 | CFI_REL_OFFSET eip, 0 | ||
1102 | |||
1103 | ENTRY(debug) | ||
1104 | RING0_INT_FRAME | ||
1105 | cmpl $ia32_sysenter_target,(%esp) | ||
1106 | jne debug_stack_correct | ||
1107 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | ||
1108 | debug_stack_correct: | ||
1109 | pushl $-1 # mark this as an int | ||
1110 | CFI_ADJUST_CFA_OFFSET 4 | ||
1111 | SAVE_ALL | ||
1112 | TRACE_IRQS_OFF | ||
1113 | xorl %edx,%edx # error code 0 | ||
1114 | movl %esp,%eax # pt_regs pointer | ||
1115 | call do_debug | ||
1116 | jmp ret_from_exception | ||
1117 | CFI_ENDPROC | ||
1118 | END(debug) | ||
1119 | |||
1120 | /* | ||
1121 | * NMI is doubly nasty. It can happen _while_ we're handling | ||
1122 | * a debug fault, and the debug fault hasn't yet been able to | ||
1123 | * clear up the stack. So we first check whether we got an | ||
1124 | * NMI on the sysenter entry path, but after that we need to | ||
1125 | * check whether we got an NMI on the debug path where the debug | ||
1126 | * fault happened on the sysenter path. | ||
1127 | */ | ||
1128 | ENTRY(nmi) | ||
1129 | RING0_INT_FRAME | ||
1130 | pushl %eax | ||
1131 | CFI_ADJUST_CFA_OFFSET 4 | ||
1132 | movl %ss, %eax | ||
1133 | cmpw $__ESPFIX_SS, %ax | ||
1134 | popl %eax | ||
1135 | CFI_ADJUST_CFA_OFFSET -4 | ||
1136 | je nmi_espfix_stack | ||
1137 | cmpl $ia32_sysenter_target,(%esp) | ||
1138 | je nmi_stack_fixup | ||
1139 | pushl %eax | ||
1140 | CFI_ADJUST_CFA_OFFSET 4 | ||
1141 | movl %esp,%eax | ||
1142 | /* Do not access memory above the end of our stack page, | ||
1143 | * it might not exist. | ||
1144 | */ | ||
1145 | andl $(THREAD_SIZE-1),%eax | ||
1146 | cmpl $(THREAD_SIZE-20),%eax | ||
1147 | popl %eax | ||
1148 | CFI_ADJUST_CFA_OFFSET -4 | ||
1149 | jae nmi_stack_correct | ||
1150 | cmpl $ia32_sysenter_target,12(%esp) | ||
1151 | je nmi_debug_stack_check | ||
1152 | nmi_stack_correct: | ||
1153 | /* We have a RING0_INT_FRAME here */ | ||
1154 | pushl %eax | ||
1155 | CFI_ADJUST_CFA_OFFSET 4 | ||
1156 | SAVE_ALL | ||
1157 | TRACE_IRQS_OFF | ||
1158 | xorl %edx,%edx # zero error code | ||
1159 | movl %esp,%eax # pt_regs pointer | ||
1160 | call do_nmi | ||
1161 | jmp restore_nocheck_notrace | ||
1162 | CFI_ENDPROC | ||
1163 | |||
1164 | nmi_stack_fixup: | ||
1165 | RING0_INT_FRAME | ||
1166 | FIX_STACK(12,nmi_stack_correct, 1) | ||
1167 | jmp nmi_stack_correct | ||
1168 | |||
1169 | nmi_debug_stack_check: | ||
1170 | /* We have a RING0_INT_FRAME here */ | ||
1171 | cmpw $__KERNEL_CS,16(%esp) | ||
1172 | jne nmi_stack_correct | ||
1173 | cmpl $debug,(%esp) | ||
1174 | jb nmi_stack_correct | ||
1175 | cmpl $debug_esp_fix_insn,(%esp) | ||
1176 | ja nmi_stack_correct | ||
1177 | FIX_STACK(24,nmi_stack_correct, 1) | ||
1178 | jmp nmi_stack_correct | ||
1179 | |||
1180 | nmi_espfix_stack: | ||
1181 | /* We have a RING0_INT_FRAME here. | ||
1182 | * | ||
1183 | * create the pointer to lss back | ||
1184 | */ | ||
1185 | pushl %ss | ||
1186 | CFI_ADJUST_CFA_OFFSET 4 | ||
1187 | pushl %esp | ||
1188 | CFI_ADJUST_CFA_OFFSET 4 | ||
1189 | addw $4, (%esp) | ||
1190 | /* copy the iret frame of 12 bytes */ | ||
1191 | .rept 3 | ||
1192 | pushl 16(%esp) | ||
1193 | CFI_ADJUST_CFA_OFFSET 4 | ||
1194 | .endr | ||
1195 | pushl %eax | ||
1196 | CFI_ADJUST_CFA_OFFSET 4 | ||
1197 | SAVE_ALL | ||
1198 | TRACE_IRQS_OFF | ||
1199 | FIXUP_ESPFIX_STACK # %eax == %esp | ||
1200 | xorl %edx,%edx # zero error code | ||
1201 | call do_nmi | ||
1202 | RESTORE_REGS | ||
1203 | lss 12+4(%esp), %esp # back to espfix stack | ||
1204 | CFI_ADJUST_CFA_OFFSET -24 | ||
1205 | jmp irq_return | ||
1206 | CFI_ENDPROC | ||
1207 | END(nmi) | ||
1208 | |||
1209 | ENTRY(int3) | ||
1210 | RING0_INT_FRAME | ||
1211 | pushl $-1 # mark this as an int | ||
1212 | CFI_ADJUST_CFA_OFFSET 4 | ||
1213 | SAVE_ALL | ||
1214 | TRACE_IRQS_OFF | ||
1215 | xorl %edx,%edx # zero error code | ||
1216 | movl %esp,%eax # pt_regs pointer | ||
1217 | call do_int3 | ||
1218 | jmp ret_from_exception | ||
1219 | CFI_ENDPROC | ||
1220 | END(int3) | ||
1221 | |||
1222 | ENTRY(general_protection) | ||
1223 | RING0_EC_FRAME | ||
1224 | pushl $do_general_protection | ||
1225 | CFI_ADJUST_CFA_OFFSET 4 | ||
1226 | jmp error_code | ||
1227 | CFI_ENDPROC | ||
1228 | END(general_protection) | ||
1229 | |||
1230 | /* | ||
1231 | * End of kprobes section | ||
1232 | */ | ||
1233 | .popsection | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b86f332c96a6..3194636a4293 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -11,15 +11,15 @@ | |||
11 | * | 11 | * |
12 | * NOTE: This code handles signal-recognition, which happens every time | 12 | * NOTE: This code handles signal-recognition, which happens every time |
13 | * after an interrupt and after each system call. | 13 | * after an interrupt and after each system call. |
14 | * | 14 | * |
15 | * Normal syscalls and interrupts don't save a full stack frame, this is | 15 | * Normal syscalls and interrupts don't save a full stack frame, this is |
16 | * only done for syscall tracing, signals or fork/exec et.al. | 16 | * only done for syscall tracing, signals or fork/exec et.al. |
17 | * | 17 | * |
18 | * A note on terminology: | 18 | * A note on terminology: |
19 | * - top of stack: Architecture defined interrupt frame from SS to RIP | 19 | * - top of stack: Architecture defined interrupt frame from SS to RIP |
20 | * at the top of the kernel process stack. | 20 | * at the top of the kernel process stack. |
21 | * - partial stack frame: partially saved registers upto R11. | 21 | * - partial stack frame: partially saved registers upto R11. |
22 | * - full stack frame: Like partial stack frame, but all register saved. | 22 | * - full stack frame: Like partial stack frame, but all register saved. |
23 | * | 23 | * |
24 | * Some macro usage: | 24 | * Some macro usage: |
25 | * - CFI macros are used to generate dwarf2 unwind information for better | 25 | * - CFI macros are used to generate dwarf2 unwind information for better |
@@ -60,7 +60,6 @@ | |||
60 | #define __AUDIT_ARCH_LE 0x40000000 | 60 | #define __AUDIT_ARCH_LE 0x40000000 |
61 | 61 | ||
62 | .code64 | 62 | .code64 |
63 | |||
64 | #ifdef CONFIG_FUNCTION_TRACER | 63 | #ifdef CONFIG_FUNCTION_TRACER |
65 | #ifdef CONFIG_DYNAMIC_FTRACE | 64 | #ifdef CONFIG_DYNAMIC_FTRACE |
66 | ENTRY(mcount) | 65 | ENTRY(mcount) |
@@ -142,7 +141,7 @@ END(mcount) | |||
142 | 141 | ||
143 | #ifndef CONFIG_PREEMPT | 142 | #ifndef CONFIG_PREEMPT |
144 | #define retint_kernel retint_restore_args | 143 | #define retint_kernel retint_restore_args |
145 | #endif | 144 | #endif |
146 | 145 | ||
147 | #ifdef CONFIG_PARAVIRT | 146 | #ifdef CONFIG_PARAVIRT |
148 | ENTRY(native_usergs_sysret64) | 147 | ENTRY(native_usergs_sysret64) |
@@ -161,29 +160,29 @@ ENTRY(native_usergs_sysret64) | |||
161 | .endm | 160 | .endm |
162 | 161 | ||
163 | /* | 162 | /* |
164 | * C code is not supposed to know about undefined top of stack. Every time | 163 | * C code is not supposed to know about undefined top of stack. Every time |
165 | * a C function with an pt_regs argument is called from the SYSCALL based | 164 | * a C function with an pt_regs argument is called from the SYSCALL based |
166 | * fast path FIXUP_TOP_OF_STACK is needed. | 165 | * fast path FIXUP_TOP_OF_STACK is needed. |
167 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs | 166 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs |
168 | * manipulation. | 167 | * manipulation. |
169 | */ | 168 | */ |
170 | 169 | ||
171 | /* %rsp:at FRAMEEND */ | 170 | /* %rsp:at FRAMEEND */ |
172 | .macro FIXUP_TOP_OF_STACK tmp | 171 | .macro FIXUP_TOP_OF_STACK tmp offset=0 |
173 | movq %gs:pda_oldrsp,\tmp | 172 | movq %gs:pda_oldrsp,\tmp |
174 | movq \tmp,RSP(%rsp) | 173 | movq \tmp,RSP+\offset(%rsp) |
175 | movq $__USER_DS,SS(%rsp) | 174 | movq $__USER_DS,SS+\offset(%rsp) |
176 | movq $__USER_CS,CS(%rsp) | 175 | movq $__USER_CS,CS+\offset(%rsp) |
177 | movq $-1,RCX(%rsp) | 176 | movq $-1,RCX+\offset(%rsp) |
178 | movq R11(%rsp),\tmp /* get eflags */ | 177 | movq R11+\offset(%rsp),\tmp /* get eflags */ |
179 | movq \tmp,EFLAGS(%rsp) | 178 | movq \tmp,EFLAGS+\offset(%rsp) |
180 | .endm | 179 | .endm |
181 | 180 | ||
182 | .macro RESTORE_TOP_OF_STACK tmp,offset=0 | 181 | .macro RESTORE_TOP_OF_STACK tmp offset=0 |
183 | movq RSP-\offset(%rsp),\tmp | 182 | movq RSP+\offset(%rsp),\tmp |
184 | movq \tmp,%gs:pda_oldrsp | 183 | movq \tmp,%gs:pda_oldrsp |
185 | movq EFLAGS-\offset(%rsp),\tmp | 184 | movq EFLAGS+\offset(%rsp),\tmp |
186 | movq \tmp,R11-\offset(%rsp) | 185 | movq \tmp,R11+\offset(%rsp) |
187 | .endm | 186 | .endm |
188 | 187 | ||
189 | .macro FAKE_STACK_FRAME child_rip | 188 | .macro FAKE_STACK_FRAME child_rip |
@@ -195,7 +194,7 @@ ENTRY(native_usergs_sysret64) | |||
195 | pushq %rax /* rsp */ | 194 | pushq %rax /* rsp */ |
196 | CFI_ADJUST_CFA_OFFSET 8 | 195 | CFI_ADJUST_CFA_OFFSET 8 |
197 | CFI_REL_OFFSET rsp,0 | 196 | CFI_REL_OFFSET rsp,0 |
198 | pushq $(1<<9) /* eflags - interrupts on */ | 197 | pushq $X86_EFLAGS_IF /* eflags - interrupts on */ |
199 | CFI_ADJUST_CFA_OFFSET 8 | 198 | CFI_ADJUST_CFA_OFFSET 8 |
200 | /*CFI_REL_OFFSET rflags,0*/ | 199 | /*CFI_REL_OFFSET rflags,0*/ |
201 | pushq $__KERNEL_CS /* cs */ | 200 | pushq $__KERNEL_CS /* cs */ |
@@ -213,62 +212,184 @@ ENTRY(native_usergs_sysret64) | |||
213 | CFI_ADJUST_CFA_OFFSET -(6*8) | 212 | CFI_ADJUST_CFA_OFFSET -(6*8) |
214 | .endm | 213 | .endm |
215 | 214 | ||
216 | .macro CFI_DEFAULT_STACK start=1 | 215 | /* |
216 | * initial frame state for interrupts (and exceptions without error code) | ||
217 | */ | ||
218 | .macro EMPTY_FRAME start=1 offset=0 | ||
217 | .if \start | 219 | .if \start |
218 | CFI_STARTPROC simple | 220 | CFI_STARTPROC simple |
219 | CFI_SIGNAL_FRAME | 221 | CFI_SIGNAL_FRAME |
220 | CFI_DEF_CFA rsp,SS+8 | 222 | CFI_DEF_CFA rsp,8+\offset |
221 | .else | 223 | .else |
222 | CFI_DEF_CFA_OFFSET SS+8 | 224 | CFI_DEF_CFA_OFFSET 8+\offset |
223 | .endif | 225 | .endif |
224 | CFI_REL_OFFSET r15,R15 | ||
225 | CFI_REL_OFFSET r14,R14 | ||
226 | CFI_REL_OFFSET r13,R13 | ||
227 | CFI_REL_OFFSET r12,R12 | ||
228 | CFI_REL_OFFSET rbp,RBP | ||
229 | CFI_REL_OFFSET rbx,RBX | ||
230 | CFI_REL_OFFSET r11,R11 | ||
231 | CFI_REL_OFFSET r10,R10 | ||
232 | CFI_REL_OFFSET r9,R9 | ||
233 | CFI_REL_OFFSET r8,R8 | ||
234 | CFI_REL_OFFSET rax,RAX | ||
235 | CFI_REL_OFFSET rcx,RCX | ||
236 | CFI_REL_OFFSET rdx,RDX | ||
237 | CFI_REL_OFFSET rsi,RSI | ||
238 | CFI_REL_OFFSET rdi,RDI | ||
239 | CFI_REL_OFFSET rip,RIP | ||
240 | /*CFI_REL_OFFSET cs,CS*/ | ||
241 | /*CFI_REL_OFFSET rflags,EFLAGS*/ | ||
242 | CFI_REL_OFFSET rsp,RSP | ||
243 | /*CFI_REL_OFFSET ss,SS*/ | ||
244 | .endm | 226 | .endm |
227 | |||
228 | /* | ||
229 | * initial frame state for interrupts (and exceptions without error code) | ||
230 | */ | ||
231 | .macro INTR_FRAME start=1 offset=0 | ||
232 | EMPTY_FRAME \start, SS+8+\offset-RIP | ||
233 | /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ | ||
234 | CFI_REL_OFFSET rsp, RSP+\offset-RIP | ||
235 | /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ | ||
236 | /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ | ||
237 | CFI_REL_OFFSET rip, RIP+\offset-RIP | ||
238 | .endm | ||
239 | |||
240 | /* | ||
241 | * initial frame state for exceptions with error code (and interrupts | ||
242 | * with vector already pushed) | ||
243 | */ | ||
244 | .macro XCPT_FRAME start=1 offset=0 | ||
245 | INTR_FRAME \start, RIP+\offset-ORIG_RAX | ||
246 | /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ | ||
247 | .endm | ||
248 | |||
249 | /* | ||
250 | * frame that enables calling into C. | ||
251 | */ | ||
252 | .macro PARTIAL_FRAME start=1 offset=0 | ||
253 | XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET | ||
254 | CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET | ||
255 | CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET | ||
256 | CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET | ||
257 | CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET | ||
258 | CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET | ||
259 | CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET | ||
260 | CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET | ||
261 | CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET | ||
262 | CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET | ||
263 | .endm | ||
264 | |||
265 | /* | ||
266 | * frame that enables passing a complete pt_regs to a C function. | ||
267 | */ | ||
268 | .macro DEFAULT_FRAME start=1 offset=0 | ||
269 | PARTIAL_FRAME \start, R11+\offset-R15 | ||
270 | CFI_REL_OFFSET rbx, RBX+\offset | ||
271 | CFI_REL_OFFSET rbp, RBP+\offset | ||
272 | CFI_REL_OFFSET r12, R12+\offset | ||
273 | CFI_REL_OFFSET r13, R13+\offset | ||
274 | CFI_REL_OFFSET r14, R14+\offset | ||
275 | CFI_REL_OFFSET r15, R15+\offset | ||
276 | .endm | ||
277 | |||
278 | /* save partial stack frame */ | ||
279 | ENTRY(save_args) | ||
280 | XCPT_FRAME | ||
281 | cld | ||
282 | movq_cfi rdi, RDI+16-ARGOFFSET | ||
283 | movq_cfi rsi, RSI+16-ARGOFFSET | ||
284 | movq_cfi rdx, RDX+16-ARGOFFSET | ||
285 | movq_cfi rcx, RCX+16-ARGOFFSET | ||
286 | movq_cfi rax, RAX+16-ARGOFFSET | ||
287 | movq_cfi r8, R8+16-ARGOFFSET | ||
288 | movq_cfi r9, R9+16-ARGOFFSET | ||
289 | movq_cfi r10, R10+16-ARGOFFSET | ||
290 | movq_cfi r11, R11+16-ARGOFFSET | ||
291 | |||
292 | leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ | ||
293 | movq_cfi rbp, 8 /* push %rbp */ | ||
294 | leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ | ||
295 | testl $3, CS(%rdi) | ||
296 | je 1f | ||
297 | SWAPGS | ||
298 | /* | ||
299 | * irqcount is used to check if a CPU is already on an interrupt stack | ||
300 | * or not. While this is essentially redundant with preempt_count it is | ||
301 | * a little cheaper to use a separate counter in the PDA (short of | ||
302 | * moving irq_enter into assembly, which would be too much work) | ||
303 | */ | ||
304 | 1: incl %gs:pda_irqcount | ||
305 | jne 2f | ||
306 | popq_cfi %rax /* move return address... */ | ||
307 | mov %gs:pda_irqstackptr,%rsp | ||
308 | EMPTY_FRAME 0 | ||
309 | pushq_cfi %rax /* ... to the new stack */ | ||
310 | /* | ||
311 | * We entered an interrupt context - irqs are off: | ||
312 | */ | ||
313 | 2: TRACE_IRQS_OFF | ||
314 | ret | ||
315 | CFI_ENDPROC | ||
316 | END(save_args) | ||
317 | |||
318 | ENTRY(save_rest) | ||
319 | PARTIAL_FRAME 1 REST_SKIP+8 | ||
320 | movq 5*8+16(%rsp), %r11 /* save return address */ | ||
321 | movq_cfi rbx, RBX+16 | ||
322 | movq_cfi rbp, RBP+16 | ||
323 | movq_cfi r12, R12+16 | ||
324 | movq_cfi r13, R13+16 | ||
325 | movq_cfi r14, R14+16 | ||
326 | movq_cfi r15, R15+16 | ||
327 | movq %r11, 8(%rsp) /* return address */ | ||
328 | FIXUP_TOP_OF_STACK %r11, 16 | ||
329 | ret | ||
330 | CFI_ENDPROC | ||
331 | END(save_rest) | ||
332 | |||
333 | /* save complete stack frame */ | ||
334 | ENTRY(save_paranoid) | ||
335 | XCPT_FRAME 1 RDI+8 | ||
336 | cld | ||
337 | movq_cfi rdi, RDI+8 | ||
338 | movq_cfi rsi, RSI+8 | ||
339 | movq_cfi rdx, RDX+8 | ||
340 | movq_cfi rcx, RCX+8 | ||
341 | movq_cfi rax, RAX+8 | ||
342 | movq_cfi r8, R8+8 | ||
343 | movq_cfi r9, R9+8 | ||
344 | movq_cfi r10, R10+8 | ||
345 | movq_cfi r11, R11+8 | ||
346 | movq_cfi rbx, RBX+8 | ||
347 | movq_cfi rbp, RBP+8 | ||
348 | movq_cfi r12, R12+8 | ||
349 | movq_cfi r13, R13+8 | ||
350 | movq_cfi r14, R14+8 | ||
351 | movq_cfi r15, R15+8 | ||
352 | movl $1,%ebx | ||
353 | movl $MSR_GS_BASE,%ecx | ||
354 | rdmsr | ||
355 | testl %edx,%edx | ||
356 | js 1f /* negative -> in kernel */ | ||
357 | SWAPGS | ||
358 | xorl %ebx,%ebx | ||
359 | 1: ret | ||
360 | CFI_ENDPROC | ||
361 | END(save_paranoid) | ||
362 | |||
245 | /* | 363 | /* |
246 | * A newly forked process directly context switches into this. | 364 | * A newly forked process directly context switches into this address. |
247 | */ | 365 | * |
248 | /* rdi: prev */ | 366 | * rdi: prev task we switched from |
367 | */ | ||
249 | ENTRY(ret_from_fork) | 368 | ENTRY(ret_from_fork) |
250 | CFI_DEFAULT_STACK | 369 | DEFAULT_FRAME |
370 | |||
251 | push kernel_eflags(%rip) | 371 | push kernel_eflags(%rip) |
252 | CFI_ADJUST_CFA_OFFSET 8 | 372 | CFI_ADJUST_CFA_OFFSET 8 |
253 | popf # reset kernel eflags | 373 | popf # reset kernel eflags |
254 | CFI_ADJUST_CFA_OFFSET -8 | 374 | CFI_ADJUST_CFA_OFFSET -8 |
255 | call schedule_tail | 375 | |
376 | call schedule_tail # rdi: 'prev' task parameter | ||
377 | |||
256 | GET_THREAD_INFO(%rcx) | 378 | GET_THREAD_INFO(%rcx) |
257 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 379 | |
258 | jnz rff_trace | 380 | CFI_REMEMBER_STATE |
259 | rff_action: | ||
260 | RESTORE_REST | 381 | RESTORE_REST |
261 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? | 382 | |
383 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | ||
262 | je int_ret_from_sys_call | 384 | je int_ret_from_sys_call |
263 | testl $_TIF_IA32,TI_flags(%rcx) | 385 | |
386 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | ||
264 | jnz int_ret_from_sys_call | 387 | jnz int_ret_from_sys_call |
265 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET | 388 | |
266 | jmp ret_from_sys_call | 389 | RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET |
267 | rff_trace: | 390 | jmp ret_from_sys_call # go to the SYSRET fastpath |
268 | movq %rsp,%rdi | 391 | |
269 | call syscall_trace_leave | 392 | CFI_RESTORE_STATE |
270 | GET_THREAD_INFO(%rcx) | ||
271 | jmp rff_action | ||
272 | CFI_ENDPROC | 393 | CFI_ENDPROC |
273 | END(ret_from_fork) | 394 | END(ret_from_fork) |
274 | 395 | ||
@@ -278,20 +399,20 @@ END(ret_from_fork) | |||
278 | * SYSCALL does not save anything on the stack and does not change the | 399 | * SYSCALL does not save anything on the stack and does not change the |
279 | * stack pointer. | 400 | * stack pointer. |
280 | */ | 401 | */ |
281 | 402 | ||
282 | /* | 403 | /* |
283 | * Register setup: | 404 | * Register setup: |
284 | * rax system call number | 405 | * rax system call number |
285 | * rdi arg0 | 406 | * rdi arg0 |
286 | * rcx return address for syscall/sysret, C arg3 | 407 | * rcx return address for syscall/sysret, C arg3 |
287 | * rsi arg1 | 408 | * rsi arg1 |
288 | * rdx arg2 | 409 | * rdx arg2 |
289 | * r10 arg3 (--> moved to rcx for C) | 410 | * r10 arg3 (--> moved to rcx for C) |
290 | * r8 arg4 | 411 | * r8 arg4 |
291 | * r9 arg5 | 412 | * r9 arg5 |
292 | * r11 eflags for syscall/sysret, temporary for C | 413 | * r11 eflags for syscall/sysret, temporary for C |
293 | * r12-r15,rbp,rbx saved by C code, not touched. | 414 | * r12-r15,rbp,rbx saved by C code, not touched. |
294 | * | 415 | * |
295 | * Interrupts are off on entry. | 416 | * Interrupts are off on entry. |
296 | * Only called from user space. | 417 | * Only called from user space. |
297 | * | 418 | * |
@@ -301,7 +422,7 @@ END(ret_from_fork) | |||
301 | * When user can change the frames always force IRET. That is because | 422 | * When user can change the frames always force IRET. That is because |
302 | * it deals with uncanonical addresses better. SYSRET has trouble | 423 | * it deals with uncanonical addresses better. SYSRET has trouble |
303 | * with them due to bugs in both AMD and Intel CPUs. | 424 | * with them due to bugs in both AMD and Intel CPUs. |
304 | */ | 425 | */ |
305 | 426 | ||
306 | ENTRY(system_call) | 427 | ENTRY(system_call) |
307 | CFI_STARTPROC simple | 428 | CFI_STARTPROC simple |
@@ -317,7 +438,7 @@ ENTRY(system_call) | |||
317 | */ | 438 | */ |
318 | ENTRY(system_call_after_swapgs) | 439 | ENTRY(system_call_after_swapgs) |
319 | 440 | ||
320 | movq %rsp,%gs:pda_oldrsp | 441 | movq %rsp,%gs:pda_oldrsp |
321 | movq %gs:pda_kernelstack,%rsp | 442 | movq %gs:pda_kernelstack,%rsp |
322 | /* | 443 | /* |
323 | * No need to follow this irqs off/on section - it's straight | 444 | * No need to follow this irqs off/on section - it's straight |
@@ -325,7 +446,7 @@ ENTRY(system_call_after_swapgs) | |||
325 | */ | 446 | */ |
326 | ENABLE_INTERRUPTS(CLBR_NONE) | 447 | ENABLE_INTERRUPTS(CLBR_NONE) |
327 | SAVE_ARGS 8,1 | 448 | SAVE_ARGS 8,1 |
328 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 449 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
329 | movq %rcx,RIP-ARGOFFSET(%rsp) | 450 | movq %rcx,RIP-ARGOFFSET(%rsp) |
330 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 451 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
331 | GET_THREAD_INFO(%rcx) | 452 | GET_THREAD_INFO(%rcx) |
@@ -339,19 +460,19 @@ system_call_fastpath: | |||
339 | movq %rax,RAX-ARGOFFSET(%rsp) | 460 | movq %rax,RAX-ARGOFFSET(%rsp) |
340 | /* | 461 | /* |
341 | * Syscall return path ending with SYSRET (fast path) | 462 | * Syscall return path ending with SYSRET (fast path) |
342 | * Has incomplete stack frame and undefined top of stack. | 463 | * Has incomplete stack frame and undefined top of stack. |
343 | */ | 464 | */ |
344 | ret_from_sys_call: | 465 | ret_from_sys_call: |
345 | movl $_TIF_ALLWORK_MASK,%edi | 466 | movl $_TIF_ALLWORK_MASK,%edi |
346 | /* edi: flagmask */ | 467 | /* edi: flagmask */ |
347 | sysret_check: | 468 | sysret_check: |
348 | LOCKDEP_SYS_EXIT | 469 | LOCKDEP_SYS_EXIT |
349 | GET_THREAD_INFO(%rcx) | 470 | GET_THREAD_INFO(%rcx) |
350 | DISABLE_INTERRUPTS(CLBR_NONE) | 471 | DISABLE_INTERRUPTS(CLBR_NONE) |
351 | TRACE_IRQS_OFF | 472 | TRACE_IRQS_OFF |
352 | movl TI_flags(%rcx),%edx | 473 | movl TI_flags(%rcx),%edx |
353 | andl %edi,%edx | 474 | andl %edi,%edx |
354 | jnz sysret_careful | 475 | jnz sysret_careful |
355 | CFI_REMEMBER_STATE | 476 | CFI_REMEMBER_STATE |
356 | /* | 477 | /* |
357 | * sysretq will re-enable interrupts: | 478 | * sysretq will re-enable interrupts: |
@@ -366,7 +487,7 @@ sysret_check: | |||
366 | 487 | ||
367 | CFI_RESTORE_STATE | 488 | CFI_RESTORE_STATE |
368 | /* Handle reschedules */ | 489 | /* Handle reschedules */ |
369 | /* edx: work, edi: workmask */ | 490 | /* edx: work, edi: workmask */ |
370 | sysret_careful: | 491 | sysret_careful: |
371 | bt $TIF_NEED_RESCHED,%edx | 492 | bt $TIF_NEED_RESCHED,%edx |
372 | jnc sysret_signal | 493 | jnc sysret_signal |
@@ -379,7 +500,7 @@ sysret_careful: | |||
379 | CFI_ADJUST_CFA_OFFSET -8 | 500 | CFI_ADJUST_CFA_OFFSET -8 |
380 | jmp sysret_check | 501 | jmp sysret_check |
381 | 502 | ||
382 | /* Handle a signal */ | 503 | /* Handle a signal */ |
383 | sysret_signal: | 504 | sysret_signal: |
384 | TRACE_IRQS_ON | 505 | TRACE_IRQS_ON |
385 | ENABLE_INTERRUPTS(CLBR_NONE) | 506 | ENABLE_INTERRUPTS(CLBR_NONE) |
@@ -388,17 +509,20 @@ sysret_signal: | |||
388 | jc sysret_audit | 509 | jc sysret_audit |
389 | #endif | 510 | #endif |
390 | /* edx: work flags (arg3) */ | 511 | /* edx: work flags (arg3) */ |
391 | leaq do_notify_resume(%rip),%rax | ||
392 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 512 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
393 | xorl %esi,%esi # oldset -> arg2 | 513 | xorl %esi,%esi # oldset -> arg2 |
394 | call ptregscall_common | 514 | SAVE_REST |
515 | FIXUP_TOP_OF_STACK %r11 | ||
516 | call do_notify_resume | ||
517 | RESTORE_TOP_OF_STACK %r11 | ||
518 | RESTORE_REST | ||
395 | movl $_TIF_WORK_MASK,%edi | 519 | movl $_TIF_WORK_MASK,%edi |
396 | /* Use IRET because user could have changed frame. This | 520 | /* Use IRET because user could have changed frame. This |
397 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 521 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
398 | DISABLE_INTERRUPTS(CLBR_NONE) | 522 | DISABLE_INTERRUPTS(CLBR_NONE) |
399 | TRACE_IRQS_OFF | 523 | TRACE_IRQS_OFF |
400 | jmp int_with_check | 524 | jmp int_with_check |
401 | 525 | ||
402 | badsys: | 526 | badsys: |
403 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 527 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
404 | jmp ret_from_sys_call | 528 | jmp ret_from_sys_call |
@@ -437,7 +561,7 @@ sysret_audit: | |||
437 | #endif /* CONFIG_AUDITSYSCALL */ | 561 | #endif /* CONFIG_AUDITSYSCALL */ |
438 | 562 | ||
439 | /* Do syscall tracing */ | 563 | /* Do syscall tracing */ |
440 | tracesys: | 564 | tracesys: |
441 | #ifdef CONFIG_AUDITSYSCALL | 565 | #ifdef CONFIG_AUDITSYSCALL |
442 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 566 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) |
443 | jz auditsys | 567 | jz auditsys |
@@ -460,8 +584,8 @@ tracesys: | |||
460 | call *sys_call_table(,%rax,8) | 584 | call *sys_call_table(,%rax,8) |
461 | movq %rax,RAX-ARGOFFSET(%rsp) | 585 | movq %rax,RAX-ARGOFFSET(%rsp) |
462 | /* Use IRET because user could have changed frame */ | 586 | /* Use IRET because user could have changed frame */ |
463 | 587 | ||
464 | /* | 588 | /* |
465 | * Syscall return path ending with IRET. | 589 | * Syscall return path ending with IRET. |
466 | * Has correct top of stack, but partial stack frame. | 590 | * Has correct top of stack, but partial stack frame. |
467 | */ | 591 | */ |
@@ -505,18 +629,18 @@ int_very_careful: | |||
505 | TRACE_IRQS_ON | 629 | TRACE_IRQS_ON |
506 | ENABLE_INTERRUPTS(CLBR_NONE) | 630 | ENABLE_INTERRUPTS(CLBR_NONE) |
507 | SAVE_REST | 631 | SAVE_REST |
508 | /* Check for syscall exit trace */ | 632 | /* Check for syscall exit trace */ |
509 | testl $_TIF_WORK_SYSCALL_EXIT,%edx | 633 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
510 | jz int_signal | 634 | jz int_signal |
511 | pushq %rdi | 635 | pushq %rdi |
512 | CFI_ADJUST_CFA_OFFSET 8 | 636 | CFI_ADJUST_CFA_OFFSET 8 |
513 | leaq 8(%rsp),%rdi # &ptregs -> arg1 | 637 | leaq 8(%rsp),%rdi # &ptregs -> arg1 |
514 | call syscall_trace_leave | 638 | call syscall_trace_leave |
515 | popq %rdi | 639 | popq %rdi |
516 | CFI_ADJUST_CFA_OFFSET -8 | 640 | CFI_ADJUST_CFA_OFFSET -8 |
517 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi | 641 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
518 | jmp int_restore_rest | 642 | jmp int_restore_rest |
519 | 643 | ||
520 | int_signal: | 644 | int_signal: |
521 | testl $_TIF_DO_NOTIFY_MASK,%edx | 645 | testl $_TIF_DO_NOTIFY_MASK,%edx |
522 | jz 1f | 646 | jz 1f |
@@ -531,22 +655,24 @@ int_restore_rest: | |||
531 | jmp int_with_check | 655 | jmp int_with_check |
532 | CFI_ENDPROC | 656 | CFI_ENDPROC |
533 | END(system_call) | 657 | END(system_call) |
534 | 658 | ||
535 | /* | 659 | /* |
536 | * Certain special system calls that need to save a complete full stack frame. | 660 | * Certain special system calls that need to save a complete full stack frame. |
537 | */ | 661 | */ |
538 | |||
539 | .macro PTREGSCALL label,func,arg | 662 | .macro PTREGSCALL label,func,arg |
540 | .globl \label | 663 | ENTRY(\label) |
541 | \label: | 664 | PARTIAL_FRAME 1 8 /* offset 8: return address */ |
542 | leaq \func(%rip),%rax | 665 | subq $REST_SKIP, %rsp |
543 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ | 666 | CFI_ADJUST_CFA_OFFSET REST_SKIP |
544 | jmp ptregscall_common | 667 | call save_rest |
668 | DEFAULT_FRAME 0 8 /* offset 8: return address */ | ||
669 | leaq 8(%rsp), \arg /* pt_regs pointer */ | ||
670 | call \func | ||
671 | jmp ptregscall_common | ||
672 | CFI_ENDPROC | ||
545 | END(\label) | 673 | END(\label) |
546 | .endm | 674 | .endm |
547 | 675 | ||
548 | CFI_STARTPROC | ||
549 | |||
550 | PTREGSCALL stub_clone, sys_clone, %r8 | 676 | PTREGSCALL stub_clone, sys_clone, %r8 |
551 | PTREGSCALL stub_fork, sys_fork, %rdi | 677 | PTREGSCALL stub_fork, sys_fork, %rdi |
552 | PTREGSCALL stub_vfork, sys_vfork, %rdi | 678 | PTREGSCALL stub_vfork, sys_vfork, %rdi |
@@ -554,25 +680,18 @@ END(\label) | |||
554 | PTREGSCALL stub_iopl, sys_iopl, %rsi | 680 | PTREGSCALL stub_iopl, sys_iopl, %rsi |
555 | 681 | ||
556 | ENTRY(ptregscall_common) | 682 | ENTRY(ptregscall_common) |
557 | popq %r11 | 683 | DEFAULT_FRAME 1 8 /* offset 8: return address */ |
558 | CFI_ADJUST_CFA_OFFSET -8 | 684 | RESTORE_TOP_OF_STACK %r11, 8 |
559 | CFI_REGISTER rip, r11 | 685 | movq_cfi_restore R15+8, r15 |
560 | SAVE_REST | 686 | movq_cfi_restore R14+8, r14 |
561 | movq %r11, %r15 | 687 | movq_cfi_restore R13+8, r13 |
562 | CFI_REGISTER rip, r15 | 688 | movq_cfi_restore R12+8, r12 |
563 | FIXUP_TOP_OF_STACK %r11 | 689 | movq_cfi_restore RBP+8, rbp |
564 | call *%rax | 690 | movq_cfi_restore RBX+8, rbx |
565 | RESTORE_TOP_OF_STACK %r11 | 691 | ret $REST_SKIP /* pop extended registers */ |
566 | movq %r15, %r11 | ||
567 | CFI_REGISTER rip, r11 | ||
568 | RESTORE_REST | ||
569 | pushq %r11 | ||
570 | CFI_ADJUST_CFA_OFFSET 8 | ||
571 | CFI_REL_OFFSET rip, 0 | ||
572 | ret | ||
573 | CFI_ENDPROC | 692 | CFI_ENDPROC |
574 | END(ptregscall_common) | 693 | END(ptregscall_common) |
575 | 694 | ||
576 | ENTRY(stub_execve) | 695 | ENTRY(stub_execve) |
577 | CFI_STARTPROC | 696 | CFI_STARTPROC |
578 | popq %r11 | 697 | popq %r11 |
@@ -588,11 +707,11 @@ ENTRY(stub_execve) | |||
588 | jmp int_ret_from_sys_call | 707 | jmp int_ret_from_sys_call |
589 | CFI_ENDPROC | 708 | CFI_ENDPROC |
590 | END(stub_execve) | 709 | END(stub_execve) |
591 | 710 | ||
592 | /* | 711 | /* |
593 | * sigreturn is special because it needs to restore all registers on return. | 712 | * sigreturn is special because it needs to restore all registers on return. |
594 | * This cannot be done with SYSRET, so use the IRET return path instead. | 713 | * This cannot be done with SYSRET, so use the IRET return path instead. |
595 | */ | 714 | */ |
596 | ENTRY(stub_rt_sigreturn) | 715 | ENTRY(stub_rt_sigreturn) |
597 | CFI_STARTPROC | 716 | CFI_STARTPROC |
598 | addq $8, %rsp | 717 | addq $8, %rsp |
@@ -608,70 +727,70 @@ ENTRY(stub_rt_sigreturn) | |||
608 | END(stub_rt_sigreturn) | 727 | END(stub_rt_sigreturn) |
609 | 728 | ||
610 | /* | 729 | /* |
611 | * initial frame state for interrupts and exceptions | 730 | * Build the entry stubs and pointer table with some assembler magic. |
731 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a | ||
732 | * single cache line on all modern x86 implementations. | ||
612 | */ | 733 | */ |
613 | .macro _frame ref | 734 | .section .init.rodata,"a" |
614 | CFI_STARTPROC simple | 735 | ENTRY(interrupt) |
615 | CFI_SIGNAL_FRAME | 736 | .text |
616 | CFI_DEF_CFA rsp,SS+8-\ref | 737 | .p2align 5 |
617 | /*CFI_REL_OFFSET ss,SS-\ref*/ | 738 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
618 | CFI_REL_OFFSET rsp,RSP-\ref | 739 | ENTRY(irq_entries_start) |
619 | /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ | 740 | INTR_FRAME |
620 | /*CFI_REL_OFFSET cs,CS-\ref*/ | 741 | vector=FIRST_EXTERNAL_VECTOR |
621 | CFI_REL_OFFSET rip,RIP-\ref | 742 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
622 | .endm | 743 | .balign 32 |
744 | .rept 7 | ||
745 | .if vector < NR_VECTORS | ||
746 | .if vector <> FIRST_EXTERNAL_VECTOR | ||
747 | CFI_ADJUST_CFA_OFFSET -8 | ||
748 | .endif | ||
749 | 1: pushq $(~vector+0x80) /* Note: always in signed byte range */ | ||
750 | CFI_ADJUST_CFA_OFFSET 8 | ||
751 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | ||
752 | jmp 2f | ||
753 | .endif | ||
754 | .previous | ||
755 | .quad 1b | ||
756 | .text | ||
757 | vector=vector+1 | ||
758 | .endif | ||
759 | .endr | ||
760 | 2: jmp common_interrupt | ||
761 | .endr | ||
762 | CFI_ENDPROC | ||
763 | END(irq_entries_start) | ||
623 | 764 | ||
624 | /* initial frame state for interrupts (and exceptions without error code) */ | 765 | .previous |
625 | #define INTR_FRAME _frame RIP | 766 | END(interrupt) |
626 | /* initial frame state for exceptions with error code (and interrupts with | 767 | .previous |
627 | vector already pushed) */ | ||
628 | #define XCPT_FRAME _frame ORIG_RAX | ||
629 | 768 | ||
630 | /* | 769 | /* |
631 | * Interrupt entry/exit. | 770 | * Interrupt entry/exit. |
632 | * | 771 | * |
633 | * Interrupt entry points save only callee clobbered registers in fast path. | 772 | * Interrupt entry points save only callee clobbered registers in fast path. |
634 | * | 773 | * |
635 | * Entry runs with interrupts off. | 774 | * Entry runs with interrupts off. |
636 | */ | 775 | */ |
637 | 776 | ||
638 | /* 0(%rsp): interrupt number */ | 777 | /* 0(%rsp): ~(interrupt number) */ |
639 | .macro interrupt func | 778 | .macro interrupt func |
640 | cld | 779 | subq $10*8, %rsp |
641 | SAVE_ARGS | 780 | CFI_ADJUST_CFA_OFFSET 10*8 |
642 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler | 781 | call save_args |
643 | pushq %rbp | 782 | PARTIAL_FRAME 0 |
644 | /* | ||
645 | * Save rbp twice: One is for marking the stack frame, as usual, and the | ||
646 | * other, to fill pt_regs properly. This is because bx comes right | ||
647 | * before the last saved register in that structure, and not bp. If the | ||
648 | * base pointer were in the place bx is today, this would not be needed. | ||
649 | */ | ||
650 | movq %rbp, -8(%rsp) | ||
651 | CFI_ADJUST_CFA_OFFSET 8 | ||
652 | CFI_REL_OFFSET rbp, 0 | ||
653 | movq %rsp,%rbp | ||
654 | CFI_DEF_CFA_REGISTER rbp | ||
655 | testl $3,CS(%rdi) | ||
656 | je 1f | ||
657 | SWAPGS | ||
658 | /* irqcount is used to check if a CPU is already on an interrupt | ||
659 | stack or not. While this is essentially redundant with preempt_count | ||
660 | it is a little cheaper to use a separate counter in the PDA | ||
661 | (short of moving irq_enter into assembly, which would be too | ||
662 | much work) */ | ||
663 | 1: incl %gs:pda_irqcount | ||
664 | cmoveq %gs:pda_irqstackptr,%rsp | ||
665 | push %rbp # backlink for old unwinder | ||
666 | /* | ||
667 | * We entered an interrupt context - irqs are off: | ||
668 | */ | ||
669 | TRACE_IRQS_OFF | ||
670 | call \func | 783 | call \func |
671 | .endm | 784 | .endm |
672 | 785 | ||
673 | ENTRY(common_interrupt) | 786 | /* |
787 | * The interrupt stubs push (~vector+0x80) onto the stack and | ||
788 | * then jump to common_interrupt. | ||
789 | */ | ||
790 | .p2align CONFIG_X86_L1_CACHE_SHIFT | ||
791 | common_interrupt: | ||
674 | XCPT_FRAME | 792 | XCPT_FRAME |
793 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | ||
675 | interrupt do_IRQ | 794 | interrupt do_IRQ |
676 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 795 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
677 | ret_from_intr: | 796 | ret_from_intr: |
@@ -685,12 +804,12 @@ exit_intr: | |||
685 | GET_THREAD_INFO(%rcx) | 804 | GET_THREAD_INFO(%rcx) |
686 | testl $3,CS-ARGOFFSET(%rsp) | 805 | testl $3,CS-ARGOFFSET(%rsp) |
687 | je retint_kernel | 806 | je retint_kernel |
688 | 807 | ||
689 | /* Interrupt came from user space */ | 808 | /* Interrupt came from user space */ |
690 | /* | 809 | /* |
691 | * Has a correct top of stack, but a partial stack frame | 810 | * Has a correct top of stack, but a partial stack frame |
692 | * %rcx: thread info. Interrupts off. | 811 | * %rcx: thread info. Interrupts off. |
693 | */ | 812 | */ |
694 | retint_with_reschedule: | 813 | retint_with_reschedule: |
695 | movl $_TIF_WORK_MASK,%edi | 814 | movl $_TIF_WORK_MASK,%edi |
696 | retint_check: | 815 | retint_check: |
@@ -763,20 +882,20 @@ retint_careful: | |||
763 | pushq %rdi | 882 | pushq %rdi |
764 | CFI_ADJUST_CFA_OFFSET 8 | 883 | CFI_ADJUST_CFA_OFFSET 8 |
765 | call schedule | 884 | call schedule |
766 | popq %rdi | 885 | popq %rdi |
767 | CFI_ADJUST_CFA_OFFSET -8 | 886 | CFI_ADJUST_CFA_OFFSET -8 |
768 | GET_THREAD_INFO(%rcx) | 887 | GET_THREAD_INFO(%rcx) |
769 | DISABLE_INTERRUPTS(CLBR_NONE) | 888 | DISABLE_INTERRUPTS(CLBR_NONE) |
770 | TRACE_IRQS_OFF | 889 | TRACE_IRQS_OFF |
771 | jmp retint_check | 890 | jmp retint_check |
772 | 891 | ||
773 | retint_signal: | 892 | retint_signal: |
774 | testl $_TIF_DO_NOTIFY_MASK,%edx | 893 | testl $_TIF_DO_NOTIFY_MASK,%edx |
775 | jz retint_swapgs | 894 | jz retint_swapgs |
776 | TRACE_IRQS_ON | 895 | TRACE_IRQS_ON |
777 | ENABLE_INTERRUPTS(CLBR_NONE) | 896 | ENABLE_INTERRUPTS(CLBR_NONE) |
778 | SAVE_REST | 897 | SAVE_REST |
779 | movq $-1,ORIG_RAX(%rsp) | 898 | movq $-1,ORIG_RAX(%rsp) |
780 | xorl %esi,%esi # oldset | 899 | xorl %esi,%esi # oldset |
781 | movq %rsp,%rdi # &pt_regs | 900 | movq %rsp,%rdi # &pt_regs |
782 | call do_notify_resume | 901 | call do_notify_resume |
@@ -798,324 +917,211 @@ ENTRY(retint_kernel) | |||
798 | jnc retint_restore_args | 917 | jnc retint_restore_args |
799 | call preempt_schedule_irq | 918 | call preempt_schedule_irq |
800 | jmp exit_intr | 919 | jmp exit_intr |
801 | #endif | 920 | #endif |
802 | 921 | ||
803 | CFI_ENDPROC | 922 | CFI_ENDPROC |
804 | END(common_interrupt) | 923 | END(common_interrupt) |
805 | 924 | ||
806 | /* | 925 | /* |
807 | * APIC interrupts. | 926 | * APIC interrupts. |
808 | */ | 927 | */ |
809 | .macro apicinterrupt num,func | 928 | .macro apicinterrupt num sym do_sym |
929 | ENTRY(\sym) | ||
810 | INTR_FRAME | 930 | INTR_FRAME |
811 | pushq $~(\num) | 931 | pushq $~(\num) |
812 | CFI_ADJUST_CFA_OFFSET 8 | 932 | CFI_ADJUST_CFA_OFFSET 8 |
813 | interrupt \func | 933 | interrupt \do_sym |
814 | jmp ret_from_intr | 934 | jmp ret_from_intr |
815 | CFI_ENDPROC | 935 | CFI_ENDPROC |
816 | .endm | 936 | END(\sym) |
817 | 937 | .endm | |
818 | ENTRY(thermal_interrupt) | ||
819 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt | ||
820 | END(thermal_interrupt) | ||
821 | |||
822 | ENTRY(threshold_interrupt) | ||
823 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt | ||
824 | END(threshold_interrupt) | ||
825 | |||
826 | #ifdef CONFIG_SMP | ||
827 | ENTRY(reschedule_interrupt) | ||
828 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | ||
829 | END(reschedule_interrupt) | ||
830 | |||
831 | .macro INVALIDATE_ENTRY num | ||
832 | ENTRY(invalidate_interrupt\num) | ||
833 | apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt | ||
834 | END(invalidate_interrupt\num) | ||
835 | .endm | ||
836 | 938 | ||
837 | INVALIDATE_ENTRY 0 | 939 | #ifdef CONFIG_SMP |
838 | INVALIDATE_ENTRY 1 | 940 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ |
839 | INVALIDATE_ENTRY 2 | 941 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt |
840 | INVALIDATE_ENTRY 3 | ||
841 | INVALIDATE_ENTRY 4 | ||
842 | INVALIDATE_ENTRY 5 | ||
843 | INVALIDATE_ENTRY 6 | ||
844 | INVALIDATE_ENTRY 7 | ||
845 | |||
846 | ENTRY(call_function_interrupt) | ||
847 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt | ||
848 | END(call_function_interrupt) | ||
849 | ENTRY(call_function_single_interrupt) | ||
850 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt | ||
851 | END(call_function_single_interrupt) | ||
852 | ENTRY(irq_move_cleanup_interrupt) | ||
853 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt | ||
854 | END(irq_move_cleanup_interrupt) | ||
855 | #endif | 942 | #endif |
856 | 943 | ||
857 | ENTRY(apic_timer_interrupt) | 944 | apicinterrupt UV_BAU_MESSAGE \ |
858 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt | 945 | uv_bau_message_intr1 uv_bau_message_interrupt |
859 | END(apic_timer_interrupt) | 946 | apicinterrupt LOCAL_TIMER_VECTOR \ |
947 | apic_timer_interrupt smp_apic_timer_interrupt | ||
948 | |||
949 | #ifdef CONFIG_SMP | ||
950 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ | ||
951 | invalidate_interrupt0 smp_invalidate_interrupt | ||
952 | apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ | ||
953 | invalidate_interrupt1 smp_invalidate_interrupt | ||
954 | apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ | ||
955 | invalidate_interrupt2 smp_invalidate_interrupt | ||
956 | apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ | ||
957 | invalidate_interrupt3 smp_invalidate_interrupt | ||
958 | apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ | ||
959 | invalidate_interrupt4 smp_invalidate_interrupt | ||
960 | apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \ | ||
961 | invalidate_interrupt5 smp_invalidate_interrupt | ||
962 | apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ | ||
963 | invalidate_interrupt6 smp_invalidate_interrupt | ||
964 | apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ | ||
965 | invalidate_interrupt7 smp_invalidate_interrupt | ||
966 | #endif | ||
860 | 967 | ||
861 | ENTRY(uv_bau_message_intr1) | 968 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
862 | apicinterrupt 220,uv_bau_message_interrupt | 969 | threshold_interrupt mce_threshold_interrupt |
863 | END(uv_bau_message_intr1) | 970 | apicinterrupt THERMAL_APIC_VECTOR \ |
971 | thermal_interrupt smp_thermal_interrupt | ||
972 | |||
973 | #ifdef CONFIG_SMP | ||
974 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ | ||
975 | call_function_single_interrupt smp_call_function_single_interrupt | ||
976 | apicinterrupt CALL_FUNCTION_VECTOR \ | ||
977 | call_function_interrupt smp_call_function_interrupt | ||
978 | apicinterrupt RESCHEDULE_VECTOR \ | ||
979 | reschedule_interrupt smp_reschedule_interrupt | ||
980 | #endif | ||
864 | 981 | ||
865 | ENTRY(error_interrupt) | 982 | apicinterrupt ERROR_APIC_VECTOR \ |
866 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt | 983 | error_interrupt smp_error_interrupt |
867 | END(error_interrupt) | 984 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
985 | spurious_interrupt smp_spurious_interrupt | ||
868 | 986 | ||
869 | ENTRY(spurious_interrupt) | ||
870 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt | ||
871 | END(spurious_interrupt) | ||
872 | |||
873 | /* | 987 | /* |
874 | * Exception entry points. | 988 | * Exception entry points. |
875 | */ | 989 | */ |
876 | .macro zeroentry sym | 990 | .macro zeroentry sym do_sym |
991 | ENTRY(\sym) | ||
877 | INTR_FRAME | 992 | INTR_FRAME |
878 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 993 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
879 | pushq $0 /* push error code/oldrax */ | 994 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
880 | CFI_ADJUST_CFA_OFFSET 8 | 995 | subq $15*8,%rsp |
881 | pushq %rax /* push real oldrax to the rdi slot */ | 996 | CFI_ADJUST_CFA_OFFSET 15*8 |
882 | CFI_ADJUST_CFA_OFFSET 8 | 997 | call error_entry |
883 | CFI_REL_OFFSET rax,0 | 998 | DEFAULT_FRAME 0 |
884 | leaq \sym(%rip),%rax | 999 | movq %rsp,%rdi /* pt_regs pointer */ |
885 | jmp error_entry | 1000 | xorl %esi,%esi /* no error code */ |
1001 | call \do_sym | ||
1002 | jmp error_exit /* %ebx: no swapgs flag */ | ||
886 | CFI_ENDPROC | 1003 | CFI_ENDPROC |
887 | .endm | 1004 | END(\sym) |
1005 | .endm | ||
888 | 1006 | ||
889 | .macro errorentry sym | 1007 | .macro paranoidzeroentry sym do_sym |
890 | XCPT_FRAME | 1008 | ENTRY(\sym) |
1009 | INTR_FRAME | ||
891 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1010 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
892 | pushq %rax | 1011 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
893 | CFI_ADJUST_CFA_OFFSET 8 | 1012 | CFI_ADJUST_CFA_OFFSET 8 |
894 | CFI_REL_OFFSET rax,0 | 1013 | subq $15*8, %rsp |
895 | leaq \sym(%rip),%rax | 1014 | call save_paranoid |
896 | jmp error_entry | 1015 | TRACE_IRQS_OFF |
1016 | movq %rsp,%rdi /* pt_regs pointer */ | ||
1017 | xorl %esi,%esi /* no error code */ | ||
1018 | call \do_sym | ||
1019 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
897 | CFI_ENDPROC | 1020 | CFI_ENDPROC |
898 | .endm | 1021 | END(\sym) |
1022 | .endm | ||
899 | 1023 | ||
900 | /* error code is on the stack already */ | 1024 | .macro paranoidzeroentry_ist sym do_sym ist |
901 | /* handle NMI like exceptions that can happen everywhere */ | 1025 | ENTRY(\sym) |
902 | .macro paranoidentry sym, ist=0, irqtrace=1 | 1026 | INTR_FRAME |
903 | SAVE_ALL | 1027 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
904 | cld | 1028 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
905 | movl $1,%ebx | 1029 | CFI_ADJUST_CFA_OFFSET 8 |
906 | movl $MSR_GS_BASE,%ecx | 1030 | subq $15*8, %rsp |
907 | rdmsr | 1031 | call save_paranoid |
908 | testl %edx,%edx | ||
909 | js 1f | ||
910 | SWAPGS | ||
911 | xorl %ebx,%ebx | ||
912 | 1: | ||
913 | .if \ist | ||
914 | movq %gs:pda_data_offset, %rbp | ||
915 | .endif | ||
916 | .if \irqtrace | ||
917 | TRACE_IRQS_OFF | ||
918 | .endif | ||
919 | movq %rsp,%rdi | ||
920 | movq ORIG_RAX(%rsp),%rsi | ||
921 | movq $-1,ORIG_RAX(%rsp) | ||
922 | .if \ist | ||
923 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
924 | .endif | ||
925 | call \sym | ||
926 | .if \ist | ||
927 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
928 | .endif | ||
929 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
930 | .if \irqtrace | ||
931 | TRACE_IRQS_OFF | 1032 | TRACE_IRQS_OFF |
932 | .endif | 1033 | movq %rsp,%rdi /* pt_regs pointer */ |
933 | .endm | 1034 | xorl %esi,%esi /* no error code */ |
1035 | movq %gs:pda_data_offset, %rbp | ||
1036 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
1037 | call \do_sym | ||
1038 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
1039 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
1040 | CFI_ENDPROC | ||
1041 | END(\sym) | ||
1042 | .endm | ||
934 | 1043 | ||
935 | /* | 1044 | .macro errorentry sym do_sym |
936 | * "Paranoid" exit path from exception stack. | 1045 | ENTRY(\sym) |
937 | * Paranoid because this is used by NMIs and cannot take | 1046 | XCPT_FRAME |
938 | * any kernel state for granted. | 1047 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
939 | * We don't do kernel preemption checks here, because only | 1048 | subq $15*8,%rsp |
940 | * NMI should be common and it does not enable IRQs and | 1049 | CFI_ADJUST_CFA_OFFSET 15*8 |
941 | * cannot get reschedule ticks. | 1050 | call error_entry |
942 | * | 1051 | DEFAULT_FRAME 0 |
943 | * "trace" is 0 for the NMI handler only, because irq-tracing | 1052 | movq %rsp,%rdi /* pt_regs pointer */ |
944 | * is fundamentally NMI-unsafe. (we cannot change the soft and | 1053 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
945 | * hard flags at once, atomically) | 1054 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
946 | */ | 1055 | call \do_sym |
947 | .macro paranoidexit trace=1 | 1056 | jmp error_exit /* %ebx: no swapgs flag */ |
948 | /* ebx: no swapgs flag */ | ||
949 | paranoid_exit\trace: | ||
950 | testl %ebx,%ebx /* swapgs needed? */ | ||
951 | jnz paranoid_restore\trace | ||
952 | testl $3,CS(%rsp) | ||
953 | jnz paranoid_userspace\trace | ||
954 | paranoid_swapgs\trace: | ||
955 | .if \trace | ||
956 | TRACE_IRQS_IRETQ 0 | ||
957 | .endif | ||
958 | SWAPGS_UNSAFE_STACK | ||
959 | paranoid_restore\trace: | ||
960 | RESTORE_ALL 8 | ||
961 | jmp irq_return | ||
962 | paranoid_userspace\trace: | ||
963 | GET_THREAD_INFO(%rcx) | ||
964 | movl TI_flags(%rcx),%ebx | ||
965 | andl $_TIF_WORK_MASK,%ebx | ||
966 | jz paranoid_swapgs\trace | ||
967 | movq %rsp,%rdi /* &pt_regs */ | ||
968 | call sync_regs | ||
969 | movq %rax,%rsp /* switch stack for scheduling */ | ||
970 | testl $_TIF_NEED_RESCHED,%ebx | ||
971 | jnz paranoid_schedule\trace | ||
972 | movl %ebx,%edx /* arg3: thread flags */ | ||
973 | .if \trace | ||
974 | TRACE_IRQS_ON | ||
975 | .endif | ||
976 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
977 | xorl %esi,%esi /* arg2: oldset */ | ||
978 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
979 | call do_notify_resume | ||
980 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
981 | .if \trace | ||
982 | TRACE_IRQS_OFF | ||
983 | .endif | ||
984 | jmp paranoid_userspace\trace | ||
985 | paranoid_schedule\trace: | ||
986 | .if \trace | ||
987 | TRACE_IRQS_ON | ||
988 | .endif | ||
989 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
990 | call schedule | ||
991 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
992 | .if \trace | ||
993 | TRACE_IRQS_OFF | ||
994 | .endif | ||
995 | jmp paranoid_userspace\trace | ||
996 | CFI_ENDPROC | 1057 | CFI_ENDPROC |
997 | .endm | 1058 | END(\sym) |
1059 | .endm | ||
998 | 1060 | ||
999 | /* | 1061 | /* error code is on the stack already */ |
1000 | * Exception entry point. This expects an error code/orig_rax on the stack | 1062 | .macro paranoiderrorentry sym do_sym |
1001 | * and the exception handler in %rax. | 1063 | ENTRY(\sym) |
1002 | */ | 1064 | XCPT_FRAME |
1003 | KPROBE_ENTRY(error_entry) | 1065 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1004 | _frame RDI | 1066 | subq $15*8,%rsp |
1005 | CFI_REL_OFFSET rax,0 | 1067 | CFI_ADJUST_CFA_OFFSET 15*8 |
1006 | /* rdi slot contains rax, oldrax contains error code */ | 1068 | call save_paranoid |
1007 | cld | 1069 | DEFAULT_FRAME 0 |
1008 | subq $14*8,%rsp | ||
1009 | CFI_ADJUST_CFA_OFFSET (14*8) | ||
1010 | movq %rsi,13*8(%rsp) | ||
1011 | CFI_REL_OFFSET rsi,RSI | ||
1012 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ | ||
1013 | CFI_REGISTER rax,rsi | ||
1014 | movq %rdx,12*8(%rsp) | ||
1015 | CFI_REL_OFFSET rdx,RDX | ||
1016 | movq %rcx,11*8(%rsp) | ||
1017 | CFI_REL_OFFSET rcx,RCX | ||
1018 | movq %rsi,10*8(%rsp) /* store rax */ | ||
1019 | CFI_REL_OFFSET rax,RAX | ||
1020 | movq %r8, 9*8(%rsp) | ||
1021 | CFI_REL_OFFSET r8,R8 | ||
1022 | movq %r9, 8*8(%rsp) | ||
1023 | CFI_REL_OFFSET r9,R9 | ||
1024 | movq %r10,7*8(%rsp) | ||
1025 | CFI_REL_OFFSET r10,R10 | ||
1026 | movq %r11,6*8(%rsp) | ||
1027 | CFI_REL_OFFSET r11,R11 | ||
1028 | movq %rbx,5*8(%rsp) | ||
1029 | CFI_REL_OFFSET rbx,RBX | ||
1030 | movq %rbp,4*8(%rsp) | ||
1031 | CFI_REL_OFFSET rbp,RBP | ||
1032 | movq %r12,3*8(%rsp) | ||
1033 | CFI_REL_OFFSET r12,R12 | ||
1034 | movq %r13,2*8(%rsp) | ||
1035 | CFI_REL_OFFSET r13,R13 | ||
1036 | movq %r14,1*8(%rsp) | ||
1037 | CFI_REL_OFFSET r14,R14 | ||
1038 | movq %r15,(%rsp) | ||
1039 | CFI_REL_OFFSET r15,R15 | ||
1040 | xorl %ebx,%ebx | ||
1041 | testl $3,CS(%rsp) | ||
1042 | je error_kernelspace | ||
1043 | error_swapgs: | ||
1044 | SWAPGS | ||
1045 | error_sti: | ||
1046 | TRACE_IRQS_OFF | ||
1047 | movq %rdi,RDI(%rsp) | ||
1048 | CFI_REL_OFFSET rdi,RDI | ||
1049 | movq %rsp,%rdi | ||
1050 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | ||
1051 | movq $-1,ORIG_RAX(%rsp) | ||
1052 | call *%rax | ||
1053 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | ||
1054 | error_exit: | ||
1055 | movl %ebx,%eax | ||
1056 | RESTORE_REST | ||
1057 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1058 | TRACE_IRQS_OFF | 1070 | TRACE_IRQS_OFF |
1059 | GET_THREAD_INFO(%rcx) | 1071 | movq %rsp,%rdi /* pt_regs pointer */ |
1060 | testl %eax,%eax | 1072 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
1061 | jne retint_kernel | 1073 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
1062 | LOCKDEP_SYS_EXIT_IRQ | 1074 | call \do_sym |
1063 | movl TI_flags(%rcx),%edx | 1075 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1064 | movl $_TIF_WORK_MASK,%edi | ||
1065 | andl %edi,%edx | ||
1066 | jnz retint_careful | ||
1067 | jmp retint_swapgs | ||
1068 | CFI_ENDPROC | 1076 | CFI_ENDPROC |
1077 | END(\sym) | ||
1078 | .endm | ||
1069 | 1079 | ||
1070 | error_kernelspace: | 1080 | zeroentry divide_error do_divide_error |
1071 | incl %ebx | 1081 | zeroentry overflow do_overflow |
1072 | /* There are two places in the kernel that can potentially fault with | 1082 | zeroentry bounds do_bounds |
1073 | usergs. Handle them here. The exception handlers after | 1083 | zeroentry invalid_op do_invalid_op |
1074 | iret run with kernel gs again, so don't set the user space flag. | 1084 | zeroentry device_not_available do_device_not_available |
1075 | B stepping K8s sometimes report an truncated RIP for IRET | 1085 | paranoiderrorentry double_fault do_double_fault |
1076 | exceptions returning to compat mode. Check for these here too. */ | 1086 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun |
1077 | leaq irq_return(%rip),%rcx | 1087 | errorentry invalid_TSS do_invalid_TSS |
1078 | cmpq %rcx,RIP(%rsp) | 1088 | errorentry segment_not_present do_segment_not_present |
1079 | je error_swapgs | 1089 | zeroentry spurious_interrupt_bug do_spurious_interrupt_bug |
1080 | movl %ecx,%ecx /* zero extend */ | 1090 | zeroentry coprocessor_error do_coprocessor_error |
1081 | cmpq %rcx,RIP(%rsp) | 1091 | errorentry alignment_check do_alignment_check |
1082 | je error_swapgs | 1092 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
1083 | cmpq $gs_change,RIP(%rsp) | 1093 | |
1084 | je error_swapgs | 1094 | /* Reload gs selector with exception handling */ |
1085 | jmp error_sti | 1095 | /* edi: new selector */ |
1086 | KPROBE_END(error_entry) | ||
1087 | |||
1088 | /* Reload gs selector with exception handling */ | ||
1089 | /* edi: new selector */ | ||
1090 | ENTRY(native_load_gs_index) | 1096 | ENTRY(native_load_gs_index) |
1091 | CFI_STARTPROC | 1097 | CFI_STARTPROC |
1092 | pushf | 1098 | pushf |
1093 | CFI_ADJUST_CFA_OFFSET 8 | 1099 | CFI_ADJUST_CFA_OFFSET 8 |
1094 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) | 1100 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) |
1095 | SWAPGS | 1101 | SWAPGS |
1096 | gs_change: | 1102 | gs_change: |
1097 | movl %edi,%gs | 1103 | movl %edi,%gs |
1098 | 2: mfence /* workaround */ | 1104 | 2: mfence /* workaround */ |
1099 | SWAPGS | 1105 | SWAPGS |
1100 | popf | 1106 | popf |
1101 | CFI_ADJUST_CFA_OFFSET -8 | 1107 | CFI_ADJUST_CFA_OFFSET -8 |
1102 | ret | 1108 | ret |
1103 | CFI_ENDPROC | 1109 | CFI_ENDPROC |
1104 | ENDPROC(native_load_gs_index) | 1110 | END(native_load_gs_index) |
1105 | 1111 | ||
1106 | .section __ex_table,"a" | 1112 | .section __ex_table,"a" |
1107 | .align 8 | 1113 | .align 8 |
1108 | .quad gs_change,bad_gs | 1114 | .quad gs_change,bad_gs |
1109 | .previous | 1115 | .previous |
1110 | .section .fixup,"ax" | 1116 | .section .fixup,"ax" |
1111 | /* running with kernelgs */ | 1117 | /* running with kernelgs */ |
1112 | bad_gs: | 1118 | bad_gs: |
1113 | SWAPGS /* switch back to user gs */ | 1119 | SWAPGS /* switch back to user gs */ |
1114 | xorl %eax,%eax | 1120 | xorl %eax,%eax |
1115 | movl %eax,%gs | 1121 | movl %eax,%gs |
1116 | jmp 2b | 1122 | jmp 2b |
1117 | .previous | 1123 | .previous |
1118 | 1124 | ||
1119 | /* | 1125 | /* |
1120 | * Create a kernel thread. | 1126 | * Create a kernel thread. |
1121 | * | 1127 | * |
@@ -1138,7 +1144,7 @@ ENTRY(kernel_thread) | |||
1138 | 1144 | ||
1139 | xorl %r8d,%r8d | 1145 | xorl %r8d,%r8d |
1140 | xorl %r9d,%r9d | 1146 | xorl %r9d,%r9d |
1141 | 1147 | ||
1142 | # clone now | 1148 | # clone now |
1143 | call do_fork | 1149 | call do_fork |
1144 | movq %rax,RAX(%rsp) | 1150 | movq %rax,RAX(%rsp) |
@@ -1149,15 +1155,15 @@ ENTRY(kernel_thread) | |||
1149 | * so internally to the x86_64 port you can rely on kernel_thread() | 1155 | * so internally to the x86_64 port you can rely on kernel_thread() |
1150 | * not to reschedule the child before returning, this avoids the need | 1156 | * not to reschedule the child before returning, this avoids the need |
1151 | * of hacks for example to fork off the per-CPU idle tasks. | 1157 | * of hacks for example to fork off the per-CPU idle tasks. |
1152 | * [Hopefully no generic code relies on the reschedule -AK] | 1158 | * [Hopefully no generic code relies on the reschedule -AK] |
1153 | */ | 1159 | */ |
1154 | RESTORE_ALL | 1160 | RESTORE_ALL |
1155 | UNFAKE_STACK_FRAME | 1161 | UNFAKE_STACK_FRAME |
1156 | ret | 1162 | ret |
1157 | CFI_ENDPROC | 1163 | CFI_ENDPROC |
1158 | ENDPROC(kernel_thread) | 1164 | END(kernel_thread) |
1159 | 1165 | ||
1160 | child_rip: | 1166 | ENTRY(child_rip) |
1161 | pushq $0 # fake return address | 1167 | pushq $0 # fake return address |
1162 | CFI_STARTPROC | 1168 | CFI_STARTPROC |
1163 | /* | 1169 | /* |
@@ -1170,8 +1176,9 @@ child_rip: | |||
1170 | # exit | 1176 | # exit |
1171 | mov %eax, %edi | 1177 | mov %eax, %edi |
1172 | call do_exit | 1178 | call do_exit |
1179 | ud2 # padding for call trace | ||
1173 | CFI_ENDPROC | 1180 | CFI_ENDPROC |
1174 | ENDPROC(child_rip) | 1181 | END(child_rip) |
1175 | 1182 | ||
1176 | /* | 1183 | /* |
1177 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | 1184 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
@@ -1191,10 +1198,10 @@ ENDPROC(child_rip) | |||
1191 | ENTRY(kernel_execve) | 1198 | ENTRY(kernel_execve) |
1192 | CFI_STARTPROC | 1199 | CFI_STARTPROC |
1193 | FAKE_STACK_FRAME $0 | 1200 | FAKE_STACK_FRAME $0 |
1194 | SAVE_ALL | 1201 | SAVE_ALL |
1195 | movq %rsp,%rcx | 1202 | movq %rsp,%rcx |
1196 | call sys_execve | 1203 | call sys_execve |
1197 | movq %rax, RAX(%rsp) | 1204 | movq %rax, RAX(%rsp) |
1198 | RESTORE_REST | 1205 | RESTORE_REST |
1199 | testq %rax,%rax | 1206 | testq %rax,%rax |
1200 | je int_ret_from_sys_call | 1207 | je int_ret_from_sys_call |
@@ -1202,129 +1209,7 @@ ENTRY(kernel_execve) | |||
1202 | UNFAKE_STACK_FRAME | 1209 | UNFAKE_STACK_FRAME |
1203 | ret | 1210 | ret |
1204 | CFI_ENDPROC | 1211 | CFI_ENDPROC |
1205 | ENDPROC(kernel_execve) | 1212 | END(kernel_execve) |
1206 | |||
1207 | KPROBE_ENTRY(page_fault) | ||
1208 | errorentry do_page_fault | ||
1209 | KPROBE_END(page_fault) | ||
1210 | |||
1211 | ENTRY(coprocessor_error) | ||
1212 | zeroentry do_coprocessor_error | ||
1213 | END(coprocessor_error) | ||
1214 | |||
1215 | ENTRY(simd_coprocessor_error) | ||
1216 | zeroentry do_simd_coprocessor_error | ||
1217 | END(simd_coprocessor_error) | ||
1218 | |||
1219 | ENTRY(device_not_available) | ||
1220 | zeroentry do_device_not_available | ||
1221 | END(device_not_available) | ||
1222 | |||
1223 | /* runs on exception stack */ | ||
1224 | KPROBE_ENTRY(debug) | ||
1225 | INTR_FRAME | ||
1226 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1227 | pushq $0 | ||
1228 | CFI_ADJUST_CFA_OFFSET 8 | ||
1229 | paranoidentry do_debug, DEBUG_STACK | ||
1230 | paranoidexit | ||
1231 | KPROBE_END(debug) | ||
1232 | |||
1233 | /* runs on exception stack */ | ||
1234 | KPROBE_ENTRY(nmi) | ||
1235 | INTR_FRAME | ||
1236 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1237 | pushq $-1 | ||
1238 | CFI_ADJUST_CFA_OFFSET 8 | ||
1239 | paranoidentry do_nmi, 0, 0 | ||
1240 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1241 | paranoidexit 0 | ||
1242 | #else | ||
1243 | jmp paranoid_exit1 | ||
1244 | CFI_ENDPROC | ||
1245 | #endif | ||
1246 | KPROBE_END(nmi) | ||
1247 | |||
1248 | KPROBE_ENTRY(int3) | ||
1249 | INTR_FRAME | ||
1250 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1251 | pushq $0 | ||
1252 | CFI_ADJUST_CFA_OFFSET 8 | ||
1253 | paranoidentry do_int3, DEBUG_STACK | ||
1254 | jmp paranoid_exit1 | ||
1255 | CFI_ENDPROC | ||
1256 | KPROBE_END(int3) | ||
1257 | |||
1258 | ENTRY(overflow) | ||
1259 | zeroentry do_overflow | ||
1260 | END(overflow) | ||
1261 | |||
1262 | ENTRY(bounds) | ||
1263 | zeroentry do_bounds | ||
1264 | END(bounds) | ||
1265 | |||
1266 | ENTRY(invalid_op) | ||
1267 | zeroentry do_invalid_op | ||
1268 | END(invalid_op) | ||
1269 | |||
1270 | ENTRY(coprocessor_segment_overrun) | ||
1271 | zeroentry do_coprocessor_segment_overrun | ||
1272 | END(coprocessor_segment_overrun) | ||
1273 | |||
1274 | /* runs on exception stack */ | ||
1275 | ENTRY(double_fault) | ||
1276 | XCPT_FRAME | ||
1277 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1278 | paranoidentry do_double_fault | ||
1279 | jmp paranoid_exit1 | ||
1280 | CFI_ENDPROC | ||
1281 | END(double_fault) | ||
1282 | |||
1283 | ENTRY(invalid_TSS) | ||
1284 | errorentry do_invalid_TSS | ||
1285 | END(invalid_TSS) | ||
1286 | |||
1287 | ENTRY(segment_not_present) | ||
1288 | errorentry do_segment_not_present | ||
1289 | END(segment_not_present) | ||
1290 | |||
1291 | /* runs on exception stack */ | ||
1292 | ENTRY(stack_segment) | ||
1293 | XCPT_FRAME | ||
1294 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1295 | paranoidentry do_stack_segment | ||
1296 | jmp paranoid_exit1 | ||
1297 | CFI_ENDPROC | ||
1298 | END(stack_segment) | ||
1299 | |||
1300 | KPROBE_ENTRY(general_protection) | ||
1301 | errorentry do_general_protection | ||
1302 | KPROBE_END(general_protection) | ||
1303 | |||
1304 | ENTRY(alignment_check) | ||
1305 | errorentry do_alignment_check | ||
1306 | END(alignment_check) | ||
1307 | |||
1308 | ENTRY(divide_error) | ||
1309 | zeroentry do_divide_error | ||
1310 | END(divide_error) | ||
1311 | |||
1312 | ENTRY(spurious_interrupt_bug) | ||
1313 | zeroentry do_spurious_interrupt_bug | ||
1314 | END(spurious_interrupt_bug) | ||
1315 | |||
1316 | #ifdef CONFIG_X86_MCE | ||
1317 | /* runs on exception stack */ | ||
1318 | ENTRY(machine_check) | ||
1319 | INTR_FRAME | ||
1320 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1321 | pushq $0 | ||
1322 | CFI_ADJUST_CFA_OFFSET 8 | ||
1323 | paranoidentry do_machine_check | ||
1324 | jmp paranoid_exit1 | ||
1325 | CFI_ENDPROC | ||
1326 | END(machine_check) | ||
1327 | #endif | ||
1328 | 1213 | ||
1329 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1214 | /* Call softirq on interrupt stack. Interrupts are off. */ |
1330 | ENTRY(call_softirq) | 1215 | ENTRY(call_softirq) |
@@ -1344,40 +1229,33 @@ ENTRY(call_softirq) | |||
1344 | decl %gs:pda_irqcount | 1229 | decl %gs:pda_irqcount |
1345 | ret | 1230 | ret |
1346 | CFI_ENDPROC | 1231 | CFI_ENDPROC |
1347 | ENDPROC(call_softirq) | 1232 | END(call_softirq) |
1348 | |||
1349 | KPROBE_ENTRY(ignore_sysret) | ||
1350 | CFI_STARTPROC | ||
1351 | mov $-ENOSYS,%eax | ||
1352 | sysret | ||
1353 | CFI_ENDPROC | ||
1354 | ENDPROC(ignore_sysret) | ||
1355 | 1233 | ||
1356 | #ifdef CONFIG_XEN | 1234 | #ifdef CONFIG_XEN |
1357 | ENTRY(xen_hypervisor_callback) | 1235 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback |
1358 | zeroentry xen_do_hypervisor_callback | ||
1359 | END(xen_hypervisor_callback) | ||
1360 | 1236 | ||
1361 | /* | 1237 | /* |
1362 | # A note on the "critical region" in our callback handler. | 1238 | * A note on the "critical region" in our callback handler. |
1363 | # We want to avoid stacking callback handlers due to events occurring | 1239 | * We want to avoid stacking callback handlers due to events occurring |
1364 | # during handling of the last event. To do this, we keep events disabled | 1240 | * during handling of the last event. To do this, we keep events disabled |
1365 | # until we've done all processing. HOWEVER, we must enable events before | 1241 | * until we've done all processing. HOWEVER, we must enable events before |
1366 | # popping the stack frame (can't be done atomically) and so it would still | 1242 | * popping the stack frame (can't be done atomically) and so it would still |
1367 | # be possible to get enough handler activations to overflow the stack. | 1243 | * be possible to get enough handler activations to overflow the stack. |
1368 | # Although unlikely, bugs of that kind are hard to track down, so we'd | 1244 | * Although unlikely, bugs of that kind are hard to track down, so we'd |
1369 | # like to avoid the possibility. | 1245 | * like to avoid the possibility. |
1370 | # So, on entry to the handler we detect whether we interrupted an | 1246 | * So, on entry to the handler we detect whether we interrupted an |
1371 | # existing activation in its critical region -- if so, we pop the current | 1247 | * existing activation in its critical region -- if so, we pop the current |
1372 | # activation and restart the handler using the previous one. | 1248 | * activation and restart the handler using the previous one. |
1373 | */ | 1249 | */ |
1374 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | 1250 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) |
1375 | CFI_STARTPROC | 1251 | CFI_STARTPROC |
1376 | /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will | 1252 | /* |
1377 | see the correct pointer to the pt_regs */ | 1253 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will |
1254 | * see the correct pointer to the pt_regs | ||
1255 | */ | ||
1378 | movq %rdi, %rsp # we don't return, adjust the stack frame | 1256 | movq %rdi, %rsp # we don't return, adjust the stack frame |
1379 | CFI_ENDPROC | 1257 | CFI_ENDPROC |
1380 | CFI_DEFAULT_STACK | 1258 | DEFAULT_FRAME |
1381 | 11: incl %gs:pda_irqcount | 1259 | 11: incl %gs:pda_irqcount |
1382 | movq %rsp,%rbp | 1260 | movq %rsp,%rbp |
1383 | CFI_DEF_CFA_REGISTER rbp | 1261 | CFI_DEF_CFA_REGISTER rbp |
@@ -1392,23 +1270,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | |||
1392 | END(do_hypervisor_callback) | 1270 | END(do_hypervisor_callback) |
1393 | 1271 | ||
1394 | /* | 1272 | /* |
1395 | # Hypervisor uses this for application faults while it executes. | 1273 | * Hypervisor uses this for application faults while it executes. |
1396 | # We get here for two reasons: | 1274 | * We get here for two reasons: |
1397 | # 1. Fault while reloading DS, ES, FS or GS | 1275 | * 1. Fault while reloading DS, ES, FS or GS |
1398 | # 2. Fault while executing IRET | 1276 | * 2. Fault while executing IRET |
1399 | # Category 1 we do not need to fix up as Xen has already reloaded all segment | 1277 | * Category 1 we do not need to fix up as Xen has already reloaded all segment |
1400 | # registers that could be reloaded and zeroed the others. | 1278 | * registers that could be reloaded and zeroed the others. |
1401 | # Category 2 we fix up by killing the current process. We cannot use the | 1279 | * Category 2 we fix up by killing the current process. We cannot use the |
1402 | # normal Linux return path in this case because if we use the IRET hypercall | 1280 | * normal Linux return path in this case because if we use the IRET hypercall |
1403 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. | 1281 | * to pop the stack frame we end up in an infinite loop of failsafe callbacks. |
1404 | # We distinguish between categories by comparing each saved segment register | 1282 | * We distinguish between categories by comparing each saved segment register |
1405 | # with its current contents: any discrepancy means we in category 1. | 1283 | * with its current contents: any discrepancy means we in category 1. |
1406 | */ | 1284 | */ |
1407 | ENTRY(xen_failsafe_callback) | 1285 | ENTRY(xen_failsafe_callback) |
1408 | framesz = (RIP-0x30) /* workaround buggy gas */ | 1286 | INTR_FRAME 1 (6*8) |
1409 | _frame framesz | 1287 | /*CFI_REL_OFFSET gs,GS*/ |
1410 | CFI_REL_OFFSET rcx, 0 | 1288 | /*CFI_REL_OFFSET fs,FS*/ |
1411 | CFI_REL_OFFSET r11, 8 | 1289 | /*CFI_REL_OFFSET es,ES*/ |
1290 | /*CFI_REL_OFFSET ds,DS*/ | ||
1291 | CFI_REL_OFFSET r11,8 | ||
1292 | CFI_REL_OFFSET rcx,0 | ||
1412 | movw %ds,%cx | 1293 | movw %ds,%cx |
1413 | cmpw %cx,0x10(%rsp) | 1294 | cmpw %cx,0x10(%rsp) |
1414 | CFI_REMEMBER_STATE | 1295 | CFI_REMEMBER_STATE |
@@ -1429,12 +1310,9 @@ ENTRY(xen_failsafe_callback) | |||
1429 | CFI_RESTORE r11 | 1310 | CFI_RESTORE r11 |
1430 | addq $0x30,%rsp | 1311 | addq $0x30,%rsp |
1431 | CFI_ADJUST_CFA_OFFSET -0x30 | 1312 | CFI_ADJUST_CFA_OFFSET -0x30 |
1432 | pushq $0 | 1313 | pushq_cfi $0 /* RIP */ |
1433 | CFI_ADJUST_CFA_OFFSET 8 | 1314 | pushq_cfi %r11 |
1434 | pushq %r11 | 1315 | pushq_cfi %rcx |
1435 | CFI_ADJUST_CFA_OFFSET 8 | ||
1436 | pushq %rcx | ||
1437 | CFI_ADJUST_CFA_OFFSET 8 | ||
1438 | jmp general_protection | 1316 | jmp general_protection |
1439 | CFI_RESTORE_STATE | 1317 | CFI_RESTORE_STATE |
1440 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ | 1318 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ |
@@ -1444,11 +1322,223 @@ ENTRY(xen_failsafe_callback) | |||
1444 | CFI_RESTORE r11 | 1322 | CFI_RESTORE r11 |
1445 | addq $0x30,%rsp | 1323 | addq $0x30,%rsp |
1446 | CFI_ADJUST_CFA_OFFSET -0x30 | 1324 | CFI_ADJUST_CFA_OFFSET -0x30 |
1447 | pushq $0 | 1325 | pushq_cfi $0 |
1448 | CFI_ADJUST_CFA_OFFSET 8 | ||
1449 | SAVE_ALL | 1326 | SAVE_ALL |
1450 | jmp error_exit | 1327 | jmp error_exit |
1451 | CFI_ENDPROC | 1328 | CFI_ENDPROC |
1452 | END(xen_failsafe_callback) | 1329 | END(xen_failsafe_callback) |
1453 | 1330 | ||
1454 | #endif /* CONFIG_XEN */ | 1331 | #endif /* CONFIG_XEN */ |
1332 | |||
1333 | /* | ||
1334 | * Some functions should be protected against kprobes | ||
1335 | */ | ||
1336 | .pushsection .kprobes.text, "ax" | ||
1337 | |||
1338 | paranoidzeroentry_ist debug do_debug DEBUG_STACK | ||
1339 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK | ||
1340 | paranoiderrorentry stack_segment do_stack_segment | ||
1341 | errorentry general_protection do_general_protection | ||
1342 | errorentry page_fault do_page_fault | ||
1343 | #ifdef CONFIG_X86_MCE | ||
1344 | paranoidzeroentry machine_check do_machine_check | ||
1345 | #endif | ||
1346 | |||
1347 | /* | ||
1348 | * "Paranoid" exit path from exception stack. | ||
1349 | * Paranoid because this is used by NMIs and cannot take | ||
1350 | * any kernel state for granted. | ||
1351 | * We don't do kernel preemption checks here, because only | ||
1352 | * NMI should be common and it does not enable IRQs and | ||
1353 | * cannot get reschedule ticks. | ||
1354 | * | ||
1355 | * "trace" is 0 for the NMI handler only, because irq-tracing | ||
1356 | * is fundamentally NMI-unsafe. (we cannot change the soft and | ||
1357 | * hard flags at once, atomically) | ||
1358 | */ | ||
1359 | |||
1360 | /* ebx: no swapgs flag */ | ||
1361 | ENTRY(paranoid_exit) | ||
1362 | INTR_FRAME | ||
1363 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1364 | TRACE_IRQS_OFF | ||
1365 | testl %ebx,%ebx /* swapgs needed? */ | ||
1366 | jnz paranoid_restore | ||
1367 | testl $3,CS(%rsp) | ||
1368 | jnz paranoid_userspace | ||
1369 | paranoid_swapgs: | ||
1370 | TRACE_IRQS_IRETQ 0 | ||
1371 | SWAPGS_UNSAFE_STACK | ||
1372 | paranoid_restore: | ||
1373 | RESTORE_ALL 8 | ||
1374 | jmp irq_return | ||
1375 | paranoid_userspace: | ||
1376 | GET_THREAD_INFO(%rcx) | ||
1377 | movl TI_flags(%rcx),%ebx | ||
1378 | andl $_TIF_WORK_MASK,%ebx | ||
1379 | jz paranoid_swapgs | ||
1380 | movq %rsp,%rdi /* &pt_regs */ | ||
1381 | call sync_regs | ||
1382 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1383 | testl $_TIF_NEED_RESCHED,%ebx | ||
1384 | jnz paranoid_schedule | ||
1385 | movl %ebx,%edx /* arg3: thread flags */ | ||
1386 | TRACE_IRQS_ON | ||
1387 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1388 | xorl %esi,%esi /* arg2: oldset */ | ||
1389 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1390 | call do_notify_resume | ||
1391 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1392 | TRACE_IRQS_OFF | ||
1393 | jmp paranoid_userspace | ||
1394 | paranoid_schedule: | ||
1395 | TRACE_IRQS_ON | ||
1396 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1397 | call schedule | ||
1398 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1399 | TRACE_IRQS_OFF | ||
1400 | jmp paranoid_userspace | ||
1401 | CFI_ENDPROC | ||
1402 | END(paranoid_exit) | ||
1403 | |||
1404 | /* | ||
1405 | * Exception entry point. This expects an error code/orig_rax on the stack. | ||
1406 | * returns in "no swapgs flag" in %ebx. | ||
1407 | */ | ||
1408 | ENTRY(error_entry) | ||
1409 | XCPT_FRAME | ||
1410 | CFI_ADJUST_CFA_OFFSET 15*8 | ||
1411 | /* oldrax contains error code */ | ||
1412 | cld | ||
1413 | movq_cfi rdi, RDI+8 | ||
1414 | movq_cfi rsi, RSI+8 | ||
1415 | movq_cfi rdx, RDX+8 | ||
1416 | movq_cfi rcx, RCX+8 | ||
1417 | movq_cfi rax, RAX+8 | ||
1418 | movq_cfi r8, R8+8 | ||
1419 | movq_cfi r9, R9+8 | ||
1420 | movq_cfi r10, R10+8 | ||
1421 | movq_cfi r11, R11+8 | ||
1422 | movq_cfi rbx, RBX+8 | ||
1423 | movq_cfi rbp, RBP+8 | ||
1424 | movq_cfi r12, R12+8 | ||
1425 | movq_cfi r13, R13+8 | ||
1426 | movq_cfi r14, R14+8 | ||
1427 | movq_cfi r15, R15+8 | ||
1428 | xorl %ebx,%ebx | ||
1429 | testl $3,CS+8(%rsp) | ||
1430 | je error_kernelspace | ||
1431 | error_swapgs: | ||
1432 | SWAPGS | ||
1433 | error_sti: | ||
1434 | TRACE_IRQS_OFF | ||
1435 | ret | ||
1436 | CFI_ENDPROC | ||
1437 | |||
1438 | /* | ||
1439 | * There are two places in the kernel that can potentially fault with | ||
1440 | * usergs. Handle them here. The exception handlers after iret run with | ||
1441 | * kernel gs again, so don't set the user space flag. B stepping K8s | ||
1442 | * sometimes report an truncated RIP for IRET exceptions returning to | ||
1443 | * compat mode. Check for these here too. | ||
1444 | */ | ||
1445 | error_kernelspace: | ||
1446 | incl %ebx | ||
1447 | leaq irq_return(%rip),%rcx | ||
1448 | cmpq %rcx,RIP+8(%rsp) | ||
1449 | je error_swapgs | ||
1450 | movl %ecx,%ecx /* zero extend */ | ||
1451 | cmpq %rcx,RIP+8(%rsp) | ||
1452 | je error_swapgs | ||
1453 | cmpq $gs_change,RIP+8(%rsp) | ||
1454 | je error_swapgs | ||
1455 | jmp error_sti | ||
1456 | END(error_entry) | ||
1457 | |||
1458 | |||
1459 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | ||
1460 | ENTRY(error_exit) | ||
1461 | DEFAULT_FRAME | ||
1462 | movl %ebx,%eax | ||
1463 | RESTORE_REST | ||
1464 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1465 | TRACE_IRQS_OFF | ||
1466 | GET_THREAD_INFO(%rcx) | ||
1467 | testl %eax,%eax | ||
1468 | jne retint_kernel | ||
1469 | LOCKDEP_SYS_EXIT_IRQ | ||
1470 | movl TI_flags(%rcx),%edx | ||
1471 | movl $_TIF_WORK_MASK,%edi | ||
1472 | andl %edi,%edx | ||
1473 | jnz retint_careful | ||
1474 | jmp retint_swapgs | ||
1475 | CFI_ENDPROC | ||
1476 | END(error_exit) | ||
1477 | |||
1478 | |||
1479 | /* runs on exception stack */ | ||
1480 | ENTRY(nmi) | ||
1481 | INTR_FRAME | ||
1482 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1483 | pushq_cfi $-1 | ||
1484 | subq $15*8, %rsp | ||
1485 | CFI_ADJUST_CFA_OFFSET 15*8 | ||
1486 | call save_paranoid | ||
1487 | DEFAULT_FRAME 0 | ||
1488 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | ||
1489 | movq %rsp,%rdi | ||
1490 | movq $-1,%rsi | ||
1491 | call do_nmi | ||
1492 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1493 | /* paranoidexit; without TRACE_IRQS_OFF */ | ||
1494 | /* ebx: no swapgs flag */ | ||
1495 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1496 | testl %ebx,%ebx /* swapgs needed? */ | ||
1497 | jnz nmi_restore | ||
1498 | testl $3,CS(%rsp) | ||
1499 | jnz nmi_userspace | ||
1500 | nmi_swapgs: | ||
1501 | SWAPGS_UNSAFE_STACK | ||
1502 | nmi_restore: | ||
1503 | RESTORE_ALL 8 | ||
1504 | jmp irq_return | ||
1505 | nmi_userspace: | ||
1506 | GET_THREAD_INFO(%rcx) | ||
1507 | movl TI_flags(%rcx),%ebx | ||
1508 | andl $_TIF_WORK_MASK,%ebx | ||
1509 | jz nmi_swapgs | ||
1510 | movq %rsp,%rdi /* &pt_regs */ | ||
1511 | call sync_regs | ||
1512 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1513 | testl $_TIF_NEED_RESCHED,%ebx | ||
1514 | jnz nmi_schedule | ||
1515 | movl %ebx,%edx /* arg3: thread flags */ | ||
1516 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1517 | xorl %esi,%esi /* arg2: oldset */ | ||
1518 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1519 | call do_notify_resume | ||
1520 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1521 | jmp nmi_userspace | ||
1522 | nmi_schedule: | ||
1523 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1524 | call schedule | ||
1525 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1526 | jmp nmi_userspace | ||
1527 | CFI_ENDPROC | ||
1528 | #else | ||
1529 | jmp paranoid_exit | ||
1530 | CFI_ENDPROC | ||
1531 | #endif | ||
1532 | END(nmi) | ||
1533 | |||
1534 | ENTRY(ignore_sysret) | ||
1535 | CFI_STARTPROC | ||
1536 | mov $-ENOSYS,%eax | ||
1537 | sysret | ||
1538 | CFI_ENDPROC | ||
1539 | END(ignore_sysret) | ||
1540 | |||
1541 | /* | ||
1542 | * End of kprobes section | ||
1543 | */ | ||
1544 | .popsection | ||
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 0aa2c443d600..53699c931ad4 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c | |||
@@ -38,8 +38,11 @@ | |||
38 | #include <asm/io.h> | 38 | #include <asm/io.h> |
39 | #include <asm/nmi.h> | 39 | #include <asm/nmi.h> |
40 | #include <asm/smp.h> | 40 | #include <asm/smp.h> |
41 | #include <asm/atomic.h> | ||
41 | #include <asm/apicdef.h> | 42 | #include <asm/apicdef.h> |
42 | #include <mach_mpparse.h> | 43 | #include <mach_mpparse.h> |
44 | #include <asm/genapic.h> | ||
45 | #include <asm/setup.h> | ||
43 | 46 | ||
44 | /* | 47 | /* |
45 | * ES7000 chipsets | 48 | * ES7000 chipsets |
@@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi) | |||
161 | return gsi; | 164 | return gsi; |
162 | } | 165 | } |
163 | 166 | ||
167 | static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | ||
168 | { | ||
169 | unsigned long vect = 0, psaival = 0; | ||
170 | |||
171 | if (psai == NULL) | ||
172 | return -1; | ||
173 | |||
174 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
175 | psaival = (0x1000000 | vect | cpu); | ||
176 | |||
177 | while (*psai & 0x1000000) | ||
178 | ; | ||
179 | |||
180 | *psai = psaival; | ||
181 | |||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | static void noop_wait_for_deassert(atomic_t *deassert_not_used) | ||
186 | { | ||
187 | } | ||
188 | |||
189 | static int __init es7000_update_genapic(void) | ||
190 | { | ||
191 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; | ||
192 | |||
193 | /* MPENTIUMIII */ | ||
194 | if (boot_cpu_data.x86 == 6 && | ||
195 | (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { | ||
196 | es7000_update_genapic_to_cluster(); | ||
197 | genapic->wait_for_init_deassert = noop_wait_for_deassert; | ||
198 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; | ||
199 | } | ||
200 | |||
201 | return 0; | ||
202 | } | ||
203 | |||
164 | void __init | 204 | void __init |
165 | setup_unisys(void) | 205 | setup_unisys(void) |
166 | { | 206 | { |
@@ -176,6 +216,8 @@ setup_unisys(void) | |||
176 | else | 216 | else |
177 | es7000_plat = ES7000_CLASSIC; | 217 | es7000_plat = ES7000_CLASSIC; |
178 | ioapic_renumber_irq = es7000_rename_gsi; | 218 | ioapic_renumber_irq = es7000_rename_gsi; |
219 | |||
220 | x86_quirks->update_genapic = es7000_update_genapic; | ||
179 | } | 221 | } |
180 | 222 | ||
181 | /* | 223 | /* |
@@ -317,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg) | |||
317 | return status; | 359 | return status; |
318 | } | 360 | } |
319 | 361 | ||
320 | int | ||
321 | es7000_start_cpu(int cpu, unsigned long eip) | ||
322 | { | ||
323 | unsigned long vect = 0, psaival = 0; | ||
324 | |||
325 | if (psai == NULL) | ||
326 | return -1; | ||
327 | |||
328 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
329 | psaival = (0x1000000 | vect | cpu); | ||
330 | |||
331 | while (*psai & 0x1000000) | ||
332 | ; | ||
333 | |||
334 | *psai = psaival; | ||
335 | |||
336 | return 0; | ||
337 | |||
338 | } | ||
339 | |||
340 | void __init | 362 | void __init |
341 | es7000_sw_apic(void) | 363 | es7000_sw_apic(void) |
342 | { | 364 | { |
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 6c9bfc9e1e95..2bced78b0b8e 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
22 | #include <asm/ipi.h> | 22 | #include <asm/ipi.h> |
23 | #include <asm/genapic.h> | 23 | #include <asm/genapic.h> |
24 | #include <asm/setup.h> | ||
24 | 25 | ||
25 | extern struct genapic apic_flat; | 26 | extern struct genapic apic_flat; |
26 | extern struct genapic apic_physflat; | 27 | extern struct genapic apic_physflat; |
@@ -53,6 +54,9 @@ void __init setup_apic_routing(void) | |||
53 | genapic = &apic_physflat; | 54 | genapic = &apic_physflat; |
54 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); | 55 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); |
55 | } | 56 | } |
57 | |||
58 | if (x86_quirks->update_genapic) | ||
59 | x86_quirks->update_genapic(); | ||
56 | } | 60 | } |
57 | 61 | ||
58 | /* Same for both flat and physical. */ | 62 | /* Same for both flat and physical. */ |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2c7dbdb98278..dece17289731 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/threads.h> | 12 | #include <linux/threads.h> |
13 | #include <linux/cpu.h> | ||
13 | #include <linux/cpumask.h> | 14 | #include <linux/cpumask.h> |
14 | #include <linux/string.h> | 15 | #include <linux/string.h> |
15 | #include <linux/ctype.h> | 16 | #include <linux/ctype.h> |
@@ -17,6 +18,9 @@ | |||
17 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
19 | #include <linux/hardirq.h> | 20 | #include <linux/hardirq.h> |
21 | #include <linux/timer.h> | ||
22 | #include <linux/proc_fs.h> | ||
23 | #include <asm/current.h> | ||
20 | #include <asm/smp.h> | 24 | #include <asm/smp.h> |
21 | #include <asm/ipi.h> | 25 | #include <asm/ipi.h> |
22 | #include <asm/genapic.h> | 26 | #include <asm/genapic.h> |
@@ -356,6 +360,103 @@ static __init void uv_rtc_init(void) | |||
356 | } | 360 | } |
357 | 361 | ||
358 | /* | 362 | /* |
363 | * percpu heartbeat timer | ||
364 | */ | ||
365 | static void uv_heartbeat(unsigned long ignored) | ||
366 | { | ||
367 | struct timer_list *timer = &uv_hub_info->scir.timer; | ||
368 | unsigned char bits = uv_hub_info->scir.state; | ||
369 | |||
370 | /* flip heartbeat bit */ | ||
371 | bits ^= SCIR_CPU_HEARTBEAT; | ||
372 | |||
373 | /* is this cpu idle? */ | ||
374 | if (idle_cpu(raw_smp_processor_id())) | ||
375 | bits &= ~SCIR_CPU_ACTIVITY; | ||
376 | else | ||
377 | bits |= SCIR_CPU_ACTIVITY; | ||
378 | |||
379 | /* update system controller interface reg */ | ||
380 | uv_set_scir_bits(bits); | ||
381 | |||
382 | /* enable next timer period */ | ||
383 | mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); | ||
384 | } | ||
385 | |||
386 | static void __cpuinit uv_heartbeat_enable(int cpu) | ||
387 | { | ||
388 | if (!uv_cpu_hub_info(cpu)->scir.enabled) { | ||
389 | struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; | ||
390 | |||
391 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); | ||
392 | setup_timer(timer, uv_heartbeat, cpu); | ||
393 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; | ||
394 | add_timer_on(timer, cpu); | ||
395 | uv_cpu_hub_info(cpu)->scir.enabled = 1; | ||
396 | } | ||
397 | |||
398 | /* check boot cpu */ | ||
399 | if (!uv_cpu_hub_info(0)->scir.enabled) | ||
400 | uv_heartbeat_enable(0); | ||
401 | } | ||
402 | |||
403 | #ifdef CONFIG_HOTPLUG_CPU | ||
404 | static void __cpuinit uv_heartbeat_disable(int cpu) | ||
405 | { | ||
406 | if (uv_cpu_hub_info(cpu)->scir.enabled) { | ||
407 | uv_cpu_hub_info(cpu)->scir.enabled = 0; | ||
408 | del_timer(&uv_cpu_hub_info(cpu)->scir.timer); | ||
409 | } | ||
410 | uv_set_cpu_scir_bits(cpu, 0xff); | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * cpu hotplug notifier | ||
415 | */ | ||
416 | static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self, | ||
417 | unsigned long action, void *hcpu) | ||
418 | { | ||
419 | long cpu = (long)hcpu; | ||
420 | |||
421 | switch (action) { | ||
422 | case CPU_ONLINE: | ||
423 | uv_heartbeat_enable(cpu); | ||
424 | break; | ||
425 | case CPU_DOWN_PREPARE: | ||
426 | uv_heartbeat_disable(cpu); | ||
427 | break; | ||
428 | default: | ||
429 | break; | ||
430 | } | ||
431 | return NOTIFY_OK; | ||
432 | } | ||
433 | |||
434 | static __init void uv_scir_register_cpu_notifier(void) | ||
435 | { | ||
436 | hotcpu_notifier(uv_scir_cpu_notify, 0); | ||
437 | } | ||
438 | |||
439 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
440 | |||
441 | static __init void uv_scir_register_cpu_notifier(void) | ||
442 | { | ||
443 | } | ||
444 | |||
445 | static __init int uv_init_heartbeat(void) | ||
446 | { | ||
447 | int cpu; | ||
448 | |||
449 | if (is_uv_system()) | ||
450 | for_each_online_cpu(cpu) | ||
451 | uv_heartbeat_enable(cpu); | ||
452 | return 0; | ||
453 | } | ||
454 | |||
455 | late_initcall(uv_init_heartbeat); | ||
456 | |||
457 | #endif /* !CONFIG_HOTPLUG_CPU */ | ||
458 | |||
459 | /* | ||
359 | * Called on each cpu to initialize the per_cpu UV data area. | 460 | * Called on each cpu to initialize the per_cpu UV data area. |
360 | * ZZZ hotplug not supported yet | 461 | * ZZZ hotplug not supported yet |
361 | */ | 462 | */ |
@@ -428,7 +529,7 @@ void __init uv_system_init(void) | |||
428 | 529 | ||
429 | uv_bios_init(); | 530 | uv_bios_init(); |
430 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, | 531 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, |
431 | &uv_coherency_id, &uv_region_size); | 532 | &sn_coherency_id, &sn_region_size); |
432 | uv_rtc_init(); | 533 | uv_rtc_init(); |
433 | 534 | ||
434 | for_each_present_cpu(cpu) { | 535 | for_each_present_cpu(cpu) { |
@@ -439,8 +540,7 @@ void __init uv_system_init(void) | |||
439 | uv_blade_info[blade].nr_possible_cpus++; | 540 | uv_blade_info[blade].nr_possible_cpus++; |
440 | 541 | ||
441 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; | 542 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; |
442 | uv_cpu_hub_info(cpu)->lowmem_remap_top = | 543 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; |
443 | lowmem_redir_base + lowmem_redir_size; | ||
444 | uv_cpu_hub_info(cpu)->m_val = m_val; | 544 | uv_cpu_hub_info(cpu)->m_val = m_val; |
445 | uv_cpu_hub_info(cpu)->n_val = m_val; | 545 | uv_cpu_hub_info(cpu)->n_val = m_val; |
446 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; | 546 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; |
@@ -450,7 +550,8 @@ void __init uv_system_init(void) | |||
450 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; | 550 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; |
451 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | 551 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; |
452 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | 552 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; |
453 | uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; | 553 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; |
554 | uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; | ||
454 | uv_node_to_blade[nid] = blade; | 555 | uv_node_to_blade[nid] = blade; |
455 | uv_cpu_to_blade[cpu] = blade; | 556 | uv_cpu_to_blade[cpu] = blade; |
456 | max_pnode = max(pnode, max_pnode); | 557 | max_pnode = max(pnode, max_pnode); |
@@ -467,4 +568,6 @@ void __init uv_system_init(void) | |||
467 | map_mmioh_high(max_pnode); | 568 | map_mmioh_high(max_pnode); |
468 | 569 | ||
469 | uv_cpu_init(); | 570 | uv_cpu_init(); |
571 | uv_scir_register_cpu_notifier(); | ||
572 | proc_mkdir("sgi_uv", NULL); | ||
470 | } | 573 | } |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 1dcb0f13897e..3e66bd364a9d 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
@@ -35,7 +35,6 @@ void __init reserve_ebda_region(void) | |||
35 | 35 | ||
36 | /* start of EBDA area */ | 36 | /* start of EBDA area */ |
37 | ebda_addr = get_bios_ebda(); | 37 | ebda_addr = get_bios_ebda(); |
38 | printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem); | ||
39 | 38 | ||
40 | /* Fixup: bios puts an EBDA in the top 64K segment */ | 39 | /* Fixup: bios puts an EBDA in the top 64K segment */ |
41 | /* of conventional memory, but does not adjust lowmem. */ | 40 | /* of conventional memory, but does not adjust lowmem. */ |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index fa1d25dd83e3..ac108d1fe182 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -12,9 +12,12 @@ | |||
12 | #include <asm/sections.h> | 12 | #include <asm/sections.h> |
13 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
14 | #include <asm/bios_ebda.h> | 14 | #include <asm/bios_ebda.h> |
15 | #include <asm/trampoline.h> | ||
15 | 16 | ||
16 | void __init i386_start_kernel(void) | 17 | void __init i386_start_kernel(void) |
17 | { | 18 | { |
19 | reserve_trampoline_memory(); | ||
20 | |||
18 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | 21 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
19 | 22 | ||
20 | #ifdef CONFIG_BLK_DEV_INITRD | 23 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d16084f90649..388e05a5fc17 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/kdebug.h> | 24 | #include <asm/kdebug.h> |
25 | #include <asm/e820.h> | 25 | #include <asm/e820.h> |
26 | #include <asm/bios_ebda.h> | 26 | #include <asm/bios_ebda.h> |
27 | #include <asm/trampoline.h> | ||
27 | 28 | ||
28 | /* boot cpu pda */ | 29 | /* boot cpu pda */ |
29 | static struct x8664_pda _boot_cpu_pda __read_mostly; | 30 | static struct x8664_pda _boot_cpu_pda __read_mostly; |
@@ -120,6 +121,8 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
120 | { | 121 | { |
121 | copy_bootdata(__va(real_mode_data)); | 122 | copy_bootdata(__va(real_mode_data)); |
122 | 123 | ||
124 | reserve_trampoline_memory(); | ||
125 | |||
123 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | 126 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
124 | 127 | ||
125 | #ifdef CONFIG_BLK_DEV_INITRD | 128 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 067d8de913f6..3f0a3edf0a57 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -33,7 +33,9 @@ | |||
33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
34 | */ | 34 | */ |
35 | unsigned long hpet_address; | 35 | unsigned long hpet_address; |
36 | unsigned long hpet_num_timers; | 36 | #ifdef CONFIG_PCI_MSI |
37 | static unsigned long hpet_num_timers; | ||
38 | #endif | ||
37 | static void __iomem *hpet_virt_address; | 39 | static void __iomem *hpet_virt_address; |
38 | 40 | ||
39 | struct hpet_dev { | 41 | struct hpet_dev { |
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index a4f93b4120c1..d39918076bb4 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
@@ -14,7 +14,6 @@ static struct fs_struct init_fs = INIT_FS; | |||
14 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | 14 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); |
15 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | 15 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); |
16 | struct mm_struct init_mm = INIT_MM(init_mm); | 16 | struct mm_struct init_mm = INIT_MM(init_mm); |
17 | EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ | ||
18 | 17 | ||
19 | /* | 18 | /* |
20 | * Initial thread structure. | 19 | * Initial thread structure. |
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9043251210fb..679e7bbbbcd6 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c | |||
@@ -2216,10 +2216,9 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | |||
2216 | asmlinkage void smp_irq_move_cleanup_interrupt(void) | 2216 | asmlinkage void smp_irq_move_cleanup_interrupt(void) |
2217 | { | 2217 | { |
2218 | unsigned vector, me; | 2218 | unsigned vector, me; |
2219 | |||
2219 | ack_APIC_irq(); | 2220 | ack_APIC_irq(); |
2220 | #ifdef CONFIG_X86_64 | ||
2221 | exit_idle(); | 2221 | exit_idle(); |
2222 | #endif | ||
2223 | irq_enter(); | 2222 | irq_enter(); |
2224 | 2223 | ||
2225 | me = smp_processor_id(); | 2224 | me = smp_processor_id(); |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 60eb84eb77a0..1d3d0e71b044 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <asm/idle.h> | 18 | #include <asm/idle.h> |
19 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
20 | 20 | ||
21 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
22 | /* | 21 | /* |
23 | * Probabilistic stack overflow check: | 22 | * Probabilistic stack overflow check: |
24 | * | 23 | * |
@@ -28,19 +27,18 @@ | |||
28 | */ | 27 | */ |
29 | static inline void stack_overflow_check(struct pt_regs *regs) | 28 | static inline void stack_overflow_check(struct pt_regs *regs) |
30 | { | 29 | { |
30 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
31 | u64 curbase = (u64)task_stack_page(current); | 31 | u64 curbase = (u64)task_stack_page(current); |
32 | static unsigned long warned = -60*HZ; | 32 | |
33 | 33 | WARN_ONCE(regs->sp >= curbase && | |
34 | if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && | 34 | regs->sp <= curbase + THREAD_SIZE && |
35 | regs->sp < curbase + sizeof(struct thread_info) + 128 && | 35 | regs->sp < curbase + sizeof(struct thread_info) + |
36 | time_after(jiffies, warned + 60*HZ)) { | 36 | sizeof(struct pt_regs) + 128, |
37 | printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", | 37 | |
38 | current->comm, curbase, regs->sp); | 38 | "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", |
39 | show_stack(NULL,NULL); | 39 | current->comm, curbase, regs->sp); |
40 | warned = jiffies; | ||
41 | } | ||
42 | } | ||
43 | #endif | 40 | #endif |
41 | } | ||
44 | 42 | ||
45 | /* | 43 | /* |
46 | * do_IRQ handles all normal device IRQ's (the special | 44 | * do_IRQ handles all normal device IRQ's (the special |
@@ -60,9 +58,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | |||
60 | irq_enter(); | 58 | irq_enter(); |
61 | irq = __get_cpu_var(vector_irq)[vector]; | 59 | irq = __get_cpu_var(vector_irq)[vector]; |
62 | 60 | ||
63 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
64 | stack_overflow_check(regs); | 61 | stack_overflow_check(regs); |
65 | #endif | ||
66 | 62 | ||
67 | desc = irq_to_desc(irq); | 63 | desc = irq_to_desc(irq); |
68 | if (likely(desc)) | 64 | if (likely(desc)) |
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 845aa9803e80..607db63044a5 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c | |||
@@ -129,7 +129,7 @@ void __init native_init_IRQ(void) | |||
129 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | 129 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { |
130 | /* SYSCALL_VECTOR was reserved in trap_init. */ | 130 | /* SYSCALL_VECTOR was reserved in trap_init. */ |
131 | if (i != SYSCALL_VECTOR) | 131 | if (i != SYSCALL_VECTOR) |
132 | set_intr_gate(i, interrupt[i]); | 132 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); |
133 | } | 133 | } |
134 | 134 | ||
135 | 135 | ||
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index ff0235391285..8670b3ce626e 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -24,41 +24,6 @@ | |||
24 | #include <asm/i8259.h> | 24 | #include <asm/i8259.h> |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Common place to define all x86 IRQ vectors | ||
28 | * | ||
29 | * This builds up the IRQ handler stubs using some ugly macros in irq.h | ||
30 | * | ||
31 | * These macros create the low-level assembly IRQ routines that save | ||
32 | * register context and call do_IRQ(). do_IRQ() then does all the | ||
33 | * operations that are needed to keep the AT (or SMP IOAPIC) | ||
34 | * interrupt-controller happy. | ||
35 | */ | ||
36 | |||
37 | #define IRQ_NAME2(nr) nr##_interrupt(void) | ||
38 | #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) | ||
39 | |||
40 | /* | ||
41 | * SMP has a few special interrupts for IPI messages | ||
42 | */ | ||
43 | |||
44 | #define BUILD_IRQ(nr) \ | ||
45 | asmlinkage void IRQ_NAME(nr); \ | ||
46 | asm("\n.text\n.p2align\n" \ | ||
47 | "IRQ" #nr "_interrupt:\n\t" \ | ||
48 | "push $~(" #nr ") ; " \ | ||
49 | "jmp common_interrupt\n" \ | ||
50 | ".previous"); | ||
51 | |||
52 | #define BI(x,y) \ | ||
53 | BUILD_IRQ(x##y) | ||
54 | |||
55 | #define BUILD_16_IRQS(x) \ | ||
56 | BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ | ||
57 | BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ | ||
58 | BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ | ||
59 | BI(x,c) BI(x,d) BI(x,e) BI(x,f) | ||
60 | |||
61 | /* | ||
62 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: | 27 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: |
63 | * (these are usually mapped to vectors 0x30-0x3f) | 28 | * (these are usually mapped to vectors 0x30-0x3f) |
64 | */ | 29 | */ |
@@ -73,37 +38,6 @@ | |||
73 | * | 38 | * |
74 | * (these are usually mapped into the 0x30-0xff vector range) | 39 | * (these are usually mapped into the 0x30-0xff vector range) |
75 | */ | 40 | */ |
76 | BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) | ||
77 | BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) | ||
78 | BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) | ||
79 | BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) | ||
80 | |||
81 | #undef BUILD_16_IRQS | ||
82 | #undef BI | ||
83 | |||
84 | |||
85 | #define IRQ(x,y) \ | ||
86 | IRQ##x##y##_interrupt | ||
87 | |||
88 | #define IRQLIST_16(x) \ | ||
89 | IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ | ||
90 | IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ | ||
91 | IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ | ||
92 | IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) | ||
93 | |||
94 | /* for the irq vectors */ | ||
95 | static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { | ||
96 | IRQLIST_16(0x2), IRQLIST_16(0x3), | ||
97 | IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), | ||
98 | IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), | ||
99 | IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) | ||
100 | }; | ||
101 | |||
102 | #undef IRQ | ||
103 | #undef IRQLIST_16 | ||
104 | |||
105 | |||
106 | |||
107 | 41 | ||
108 | /* | 42 | /* |
109 | * IRQ2 is cascade interrupt to second interrupt controller | 43 | * IRQ2 is cascade interrupt to second interrupt controller |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 7a385746509a..37f420018a41 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
15 | #include <linux/suspend.h> | 15 | #include <linux/suspend.h> |
16 | #include <linux/gfp.h> | ||
16 | 17 | ||
17 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
18 | #include <asm/pgalloc.h> | 19 | #include <asm/pgalloc.h> |
@@ -25,15 +26,6 @@ | |||
25 | #include <asm/system.h> | 26 | #include <asm/system.h> |
26 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
27 | 28 | ||
28 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | ||
29 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | ||
30 | #ifdef CONFIG_X86_PAE | ||
31 | static u32 kexec_pmd0[1024] PAGE_ALIGNED; | ||
32 | static u32 kexec_pmd1[1024] PAGE_ALIGNED; | ||
33 | #endif | ||
34 | static u32 kexec_pte0[1024] PAGE_ALIGNED; | ||
35 | static u32 kexec_pte1[1024] PAGE_ALIGNED; | ||
36 | |||
37 | static void set_idt(void *newidt, __u16 limit) | 29 | static void set_idt(void *newidt, __u16 limit) |
38 | { | 30 | { |
39 | struct desc_ptr curidt; | 31 | struct desc_ptr curidt; |
@@ -76,6 +68,76 @@ static void load_segments(void) | |||
76 | #undef __STR | 68 | #undef __STR |
77 | } | 69 | } |
78 | 70 | ||
71 | static void machine_kexec_free_page_tables(struct kimage *image) | ||
72 | { | ||
73 | free_page((unsigned long)image->arch.pgd); | ||
74 | #ifdef CONFIG_X86_PAE | ||
75 | free_page((unsigned long)image->arch.pmd0); | ||
76 | free_page((unsigned long)image->arch.pmd1); | ||
77 | #endif | ||
78 | free_page((unsigned long)image->arch.pte0); | ||
79 | free_page((unsigned long)image->arch.pte1); | ||
80 | } | ||
81 | |||
82 | static int machine_kexec_alloc_page_tables(struct kimage *image) | ||
83 | { | ||
84 | image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); | ||
85 | #ifdef CONFIG_X86_PAE | ||
86 | image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
87 | image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
88 | #endif | ||
89 | image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL); | ||
90 | image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL); | ||
91 | if (!image->arch.pgd || | ||
92 | #ifdef CONFIG_X86_PAE | ||
93 | !image->arch.pmd0 || !image->arch.pmd1 || | ||
94 | #endif | ||
95 | !image->arch.pte0 || !image->arch.pte1) { | ||
96 | machine_kexec_free_page_tables(image); | ||
97 | return -ENOMEM; | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | static void machine_kexec_page_table_set_one( | ||
103 | pgd_t *pgd, pmd_t *pmd, pte_t *pte, | ||
104 | unsigned long vaddr, unsigned long paddr) | ||
105 | { | ||
106 | pud_t *pud; | ||
107 | |||
108 | pgd += pgd_index(vaddr); | ||
109 | #ifdef CONFIG_X86_PAE | ||
110 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) | ||
111 | set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT)); | ||
112 | #endif | ||
113 | pud = pud_offset(pgd, vaddr); | ||
114 | pmd = pmd_offset(pud, vaddr); | ||
115 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) | ||
116 | set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); | ||
117 | pte = pte_offset_kernel(pmd, vaddr); | ||
118 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); | ||
119 | } | ||
120 | |||
121 | static void machine_kexec_prepare_page_tables(struct kimage *image) | ||
122 | { | ||
123 | void *control_page; | ||
124 | pmd_t *pmd = 0; | ||
125 | |||
126 | control_page = page_address(image->control_code_page); | ||
127 | #ifdef CONFIG_X86_PAE | ||
128 | pmd = image->arch.pmd0; | ||
129 | #endif | ||
130 | machine_kexec_page_table_set_one( | ||
131 | image->arch.pgd, pmd, image->arch.pte0, | ||
132 | (unsigned long)control_page, __pa(control_page)); | ||
133 | #ifdef CONFIG_X86_PAE | ||
134 | pmd = image->arch.pmd1; | ||
135 | #endif | ||
136 | machine_kexec_page_table_set_one( | ||
137 | image->arch.pgd, pmd, image->arch.pte1, | ||
138 | __pa(control_page), __pa(control_page)); | ||
139 | } | ||
140 | |||
79 | /* | 141 | /* |
80 | * A architecture hook called to validate the | 142 | * A architecture hook called to validate the |
81 | * proposed image and prepare the control pages | 143 | * proposed image and prepare the control pages |
@@ -87,12 +149,20 @@ static void load_segments(void) | |||
87 | * reboot code buffer to allow us to avoid allocations | 149 | * reboot code buffer to allow us to avoid allocations |
88 | * later. | 150 | * later. |
89 | * | 151 | * |
90 | * Make control page executable. | 152 | * - Make control page executable. |
153 | * - Allocate page tables | ||
154 | * - Setup page tables | ||
91 | */ | 155 | */ |
92 | int machine_kexec_prepare(struct kimage *image) | 156 | int machine_kexec_prepare(struct kimage *image) |
93 | { | 157 | { |
158 | int error; | ||
159 | |||
94 | if (nx_enabled) | 160 | if (nx_enabled) |
95 | set_pages_x(image->control_code_page, 1); | 161 | set_pages_x(image->control_code_page, 1); |
162 | error = machine_kexec_alloc_page_tables(image); | ||
163 | if (error) | ||
164 | return error; | ||
165 | machine_kexec_prepare_page_tables(image); | ||
96 | return 0; | 166 | return 0; |
97 | } | 167 | } |
98 | 168 | ||
@@ -104,6 +174,7 @@ void machine_kexec_cleanup(struct kimage *image) | |||
104 | { | 174 | { |
105 | if (nx_enabled) | 175 | if (nx_enabled) |
106 | set_pages_nx(image->control_code_page, 1); | 176 | set_pages_nx(image->control_code_page, 1); |
177 | machine_kexec_free_page_tables(image); | ||
107 | } | 178 | } |
108 | 179 | ||
109 | /* | 180 | /* |
@@ -150,18 +221,7 @@ void machine_kexec(struct kimage *image) | |||
150 | relocate_kernel_ptr = control_page; | 221 | relocate_kernel_ptr = control_page; |
151 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 222 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
152 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; | 223 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
153 | page_list[PA_PGD] = __pa(kexec_pgd); | 224 | page_list[PA_PGD] = __pa(image->arch.pgd); |
154 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | ||
155 | #ifdef CONFIG_X86_PAE | ||
156 | page_list[PA_PMD_0] = __pa(kexec_pmd0); | ||
157 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; | ||
158 | page_list[PA_PMD_1] = __pa(kexec_pmd1); | ||
159 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
160 | #endif | ||
161 | page_list[PA_PTE_0] = __pa(kexec_pte0); | ||
162 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | ||
163 | page_list[PA_PTE_1] = __pa(kexec_pte1); | ||
164 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
165 | 225 | ||
166 | if (image->type == KEXEC_TYPE_DEFAULT) | 226 | if (image->type == KEXEC_TYPE_DEFAULT) |
167 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) | 227 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 5f8e5d75a254..c25fdb382292 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * This driver allows to upgrade microcode on AMD | 10 | * This driver allows to upgrade microcode on AMD |
11 | * family 0x10 and 0x11 processors. | 11 | * family 0x10 and 0x11 processors. |
12 | * | 12 | * |
13 | * Licensed unter the terms of the GNU General Public | 13 | * Licensed under the terms of the GNU General Public |
14 | * License version 2. See file COPYING for details. | 14 | * License version 2. See file COPYING for details. |
15 | */ | 15 | */ |
16 | 16 | ||
@@ -32,9 +32,9 @@ | |||
32 | #include <linux/platform_device.h> | 32 | #include <linux/platform_device.h> |
33 | #include <linux/pci.h> | 33 | #include <linux/pci.h> |
34 | #include <linux/pci_ids.h> | 34 | #include <linux/pci_ids.h> |
35 | #include <linux/uaccess.h> | ||
35 | 36 | ||
36 | #include <asm/msr.h> | 37 | #include <asm/msr.h> |
37 | #include <asm/uaccess.h> | ||
38 | #include <asm/processor.h> | 38 | #include <asm/processor.h> |
39 | #include <asm/microcode.h> | 39 | #include <asm/microcode.h> |
40 | 40 | ||
@@ -47,43 +47,38 @@ MODULE_LICENSE("GPL v2"); | |||
47 | #define UCODE_UCODE_TYPE 0x00000001 | 47 | #define UCODE_UCODE_TYPE 0x00000001 |
48 | 48 | ||
49 | struct equiv_cpu_entry { | 49 | struct equiv_cpu_entry { |
50 | unsigned int installed_cpu; | 50 | u32 installed_cpu; |
51 | unsigned int fixed_errata_mask; | 51 | u32 fixed_errata_mask; |
52 | unsigned int fixed_errata_compare; | 52 | u32 fixed_errata_compare; |
53 | unsigned int equiv_cpu; | 53 | u16 equiv_cpu; |
54 | }; | 54 | u16 res; |
55 | } __attribute__((packed)); | ||
55 | 56 | ||
56 | struct microcode_header_amd { | 57 | struct microcode_header_amd { |
57 | unsigned int data_code; | 58 | u32 data_code; |
58 | unsigned int patch_id; | 59 | u32 patch_id; |
59 | unsigned char mc_patch_data_id[2]; | 60 | u16 mc_patch_data_id; |
60 | unsigned char mc_patch_data_len; | 61 | u8 mc_patch_data_len; |
61 | unsigned char init_flag; | 62 | u8 init_flag; |
62 | unsigned int mc_patch_data_checksum; | 63 | u32 mc_patch_data_checksum; |
63 | unsigned int nb_dev_id; | 64 | u32 nb_dev_id; |
64 | unsigned int sb_dev_id; | 65 | u32 sb_dev_id; |
65 | unsigned char processor_rev_id[2]; | 66 | u16 processor_rev_id; |
66 | unsigned char nb_rev_id; | 67 | u8 nb_rev_id; |
67 | unsigned char sb_rev_id; | 68 | u8 sb_rev_id; |
68 | unsigned char bios_api_rev; | 69 | u8 bios_api_rev; |
69 | unsigned char reserved1[3]; | 70 | u8 reserved1[3]; |
70 | unsigned int match_reg[8]; | 71 | u32 match_reg[8]; |
71 | }; | 72 | } __attribute__((packed)); |
72 | 73 | ||
73 | struct microcode_amd { | 74 | struct microcode_amd { |
74 | struct microcode_header_amd hdr; | 75 | struct microcode_header_amd hdr; |
75 | unsigned int mpb[0]; | 76 | unsigned int mpb[0]; |
76 | }; | 77 | }; |
77 | 78 | ||
78 | #define UCODE_MAX_SIZE (2048) | 79 | #define UCODE_MAX_SIZE 2048 |
79 | #define DEFAULT_UCODE_DATASIZE (896) | 80 | #define UCODE_CONTAINER_SECTION_HDR 8 |
80 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) | 81 | #define UCODE_CONTAINER_HEADER_SIZE 12 |
81 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | ||
82 | #define DWSIZE (sizeof(u32)) | ||
83 | /* For now we support a fixed ucode total size only */ | ||
84 | #define get_totalsize(mc) \ | ||
85 | ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ | ||
86 | + MC_HEADER_SIZE) | ||
87 | 82 | ||
88 | /* serialize access to the physical write */ | 83 | /* serialize access to the physical write */ |
89 | static DEFINE_SPINLOCK(microcode_update_lock); | 84 | static DEFINE_SPINLOCK(microcode_update_lock); |
@@ -93,31 +88,24 @@ static struct equiv_cpu_entry *equiv_cpu_table; | |||
93 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 88 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
94 | { | 89 | { |
95 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 90 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
91 | u32 dummy; | ||
96 | 92 | ||
97 | memset(csig, 0, sizeof(*csig)); | 93 | memset(csig, 0, sizeof(*csig)); |
98 | |||
99 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | 94 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { |
100 | printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", | 95 | printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not " |
101 | cpu); | 96 | "supported\n", cpu, c->x86); |
102 | return -1; | 97 | return -1; |
103 | } | 98 | } |
104 | 99 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); | |
105 | asm volatile("movl %1, %%ecx; rdmsr" | 100 | printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); |
106 | : "=a" (csig->rev) | ||
107 | : "i" (0x0000008B) : "ecx"); | ||
108 | |||
109 | printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n", | ||
110 | csig->rev); | ||
111 | |||
112 | return 0; | 101 | return 0; |
113 | } | 102 | } |
114 | 103 | ||
115 | static int get_matching_microcode(int cpu, void *mc, int rev) | 104 | static int get_matching_microcode(int cpu, void *mc, int rev) |
116 | { | 105 | { |
117 | struct microcode_header_amd *mc_header = mc; | 106 | struct microcode_header_amd *mc_header = mc; |
118 | struct pci_dev *nb_pci_dev, *sb_pci_dev; | ||
119 | unsigned int current_cpu_id; | 107 | unsigned int current_cpu_id; |
120 | unsigned int equiv_cpu_id = 0x00; | 108 | u16 equiv_cpu_id = 0; |
121 | unsigned int i = 0; | 109 | unsigned int i = 0; |
122 | 110 | ||
123 | BUG_ON(equiv_cpu_table == NULL); | 111 | BUG_ON(equiv_cpu_table == NULL); |
@@ -132,57 +120,25 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
132 | } | 120 | } |
133 | 121 | ||
134 | if (!equiv_cpu_id) { | 122 | if (!equiv_cpu_id) { |
135 | printk(KERN_ERR "microcode: CPU%d cpu_id " | 123 | printk(KERN_WARNING "microcode: CPU%d: cpu revision " |
136 | "not found in equivalent cpu table \n", cpu); | 124 | "not listed in equivalent cpu table\n", cpu); |
137 | return 0; | 125 | return 0; |
138 | } | 126 | } |
139 | 127 | ||
140 | if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) { | 128 | if (mc_header->processor_rev_id != equiv_cpu_id) { |
141 | printk(KERN_ERR | 129 | printk(KERN_ERR "microcode: CPU%d: patch mismatch " |
142 | "microcode: CPU%d patch does not match " | 130 | "(processor_rev_id: %x, equiv_cpu_id: %x)\n", |
143 | "(patch is %x, cpu extended is %x) \n", | 131 | cpu, mc_header->processor_rev_id, equiv_cpu_id); |
144 | cpu, mc_header->processor_rev_id[0], | ||
145 | (equiv_cpu_id & 0xff)); | ||
146 | return 0; | 132 | return 0; |
147 | } | 133 | } |
148 | 134 | ||
149 | if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) { | 135 | /* ucode might be chipset specific -- currently we don't support this */ |
150 | printk(KERN_ERR "microcode: CPU%d patch does not match " | 136 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { |
151 | "(patch is %x, cpu base id is %x) \n", | 137 | printk(KERN_ERR "microcode: CPU%d: loading of chipset " |
152 | cpu, mc_header->processor_rev_id[1], | 138 | "specific code not yet supported\n", cpu); |
153 | ((equiv_cpu_id >> 16) & 0xff)); | ||
154 | |||
155 | return 0; | 139 | return 0; |
156 | } | 140 | } |
157 | 141 | ||
158 | /* ucode may be northbridge specific */ | ||
159 | if (mc_header->nb_dev_id) { | ||
160 | nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, | ||
161 | (mc_header->nb_dev_id & 0xff), | ||
162 | NULL); | ||
163 | if ((!nb_pci_dev) || | ||
164 | (mc_header->nb_rev_id != nb_pci_dev->revision)) { | ||
165 | printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu); | ||
166 | pci_dev_put(nb_pci_dev); | ||
167 | return 0; | ||
168 | } | ||
169 | pci_dev_put(nb_pci_dev); | ||
170 | } | ||
171 | |||
172 | /* ucode may be southbridge specific */ | ||
173 | if (mc_header->sb_dev_id) { | ||
174 | sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, | ||
175 | (mc_header->sb_dev_id & 0xff), | ||
176 | NULL); | ||
177 | if ((!sb_pci_dev) || | ||
178 | (mc_header->sb_rev_id != sb_pci_dev->revision)) { | ||
179 | printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu); | ||
180 | pci_dev_put(sb_pci_dev); | ||
181 | return 0; | ||
182 | } | ||
183 | pci_dev_put(sb_pci_dev); | ||
184 | } | ||
185 | |||
186 | if (mc_header->patch_id <= rev) | 142 | if (mc_header->patch_id <= rev) |
187 | return 0; | 143 | return 0; |
188 | 144 | ||
@@ -192,12 +148,10 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
192 | static void apply_microcode_amd(int cpu) | 148 | static void apply_microcode_amd(int cpu) |
193 | { | 149 | { |
194 | unsigned long flags; | 150 | unsigned long flags; |
195 | unsigned int eax, edx; | 151 | u32 rev, dummy; |
196 | unsigned int rev; | ||
197 | int cpu_num = raw_smp_processor_id(); | 152 | int cpu_num = raw_smp_processor_id(); |
198 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | 153 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; |
199 | struct microcode_amd *mc_amd = uci->mc; | 154 | struct microcode_amd *mc_amd = uci->mc; |
200 | unsigned long addr; | ||
201 | 155 | ||
202 | /* We should bind the task to the CPU */ | 156 | /* We should bind the task to the CPU */ |
203 | BUG_ON(cpu_num != cpu); | 157 | BUG_ON(cpu_num != cpu); |
@@ -206,42 +160,34 @@ static void apply_microcode_amd(int cpu) | |||
206 | return; | 160 | return; |
207 | 161 | ||
208 | spin_lock_irqsave(µcode_update_lock, flags); | 162 | spin_lock_irqsave(µcode_update_lock, flags); |
209 | 163 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); | |
210 | addr = (unsigned long)&mc_amd->hdr.data_code; | ||
211 | edx = (unsigned int)(((unsigned long)upper_32_bits(addr))); | ||
212 | eax = (unsigned int)(((unsigned long)lower_32_bits(addr))); | ||
213 | |||
214 | asm volatile("movl %0, %%ecx; wrmsr" : | ||
215 | : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx"); | ||
216 | |||
217 | /* get patch id after patching */ | 164 | /* get patch id after patching */ |
218 | asm volatile("movl %1, %%ecx; rdmsr" | 165 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); |
219 | : "=a" (rev) | ||
220 | : "i" (0x0000008B) : "ecx"); | ||
221 | |||
222 | spin_unlock_irqrestore(µcode_update_lock, flags); | 166 | spin_unlock_irqrestore(µcode_update_lock, flags); |
223 | 167 | ||
224 | /* check current patch id and patch's id for match */ | 168 | /* check current patch id and patch's id for match */ |
225 | if (rev != mc_amd->hdr.patch_id) { | 169 | if (rev != mc_amd->hdr.patch_id) { |
226 | printk(KERN_ERR "microcode: CPU%d update from revision " | 170 | printk(KERN_ERR "microcode: CPU%d: update failed " |
227 | "0x%x to 0x%x failed\n", cpu_num, | 171 | "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); |
228 | mc_amd->hdr.patch_id, rev); | ||
229 | return; | 172 | return; |
230 | } | 173 | } |
231 | 174 | ||
232 | printk(KERN_INFO "microcode: CPU%d updated from revision " | 175 | printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", |
233 | "0x%x to 0x%x \n", | 176 | cpu, rev); |
234 | cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id); | ||
235 | 177 | ||
236 | uci->cpu_sig.rev = rev; | 178 | uci->cpu_sig.rev = rev; |
237 | } | 179 | } |
238 | 180 | ||
239 | static void * get_next_ucode(u8 *buf, unsigned int size, | 181 | static int get_ucode_data(void *to, const u8 *from, size_t n) |
240 | int (*get_ucode_data)(void *, const void *, size_t), | 182 | { |
241 | unsigned int *mc_size) | 183 | memcpy(to, from, n); |
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static void *get_next_ucode(const u8 *buf, unsigned int size, | ||
188 | unsigned int *mc_size) | ||
242 | { | 189 | { |
243 | unsigned int total_size; | 190 | unsigned int total_size; |
244 | #define UCODE_CONTAINER_SECTION_HDR 8 | ||
245 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; | 191 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; |
246 | void *mc; | 192 | void *mc; |
247 | 193 | ||
@@ -249,39 +195,37 @@ static void * get_next_ucode(u8 *buf, unsigned int size, | |||
249 | return NULL; | 195 | return NULL; |
250 | 196 | ||
251 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | 197 | if (section_hdr[0] != UCODE_UCODE_TYPE) { |
252 | printk(KERN_ERR "microcode: error! " | 198 | printk(KERN_ERR "microcode: error: invalid type field in " |
253 | "Wrong microcode payload type field\n"); | 199 | "container file section header\n"); |
254 | return NULL; | 200 | return NULL; |
255 | } | 201 | } |
256 | 202 | ||
257 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); | 203 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); |
258 | 204 | ||
259 | printk(KERN_INFO "microcode: size %u, total_size %u\n", | 205 | printk(KERN_DEBUG "microcode: size %u, total_size %u\n", |
260 | size, total_size); | 206 | size, total_size); |
261 | 207 | ||
262 | if (total_size > size || total_size > UCODE_MAX_SIZE) { | 208 | if (total_size > size || total_size > UCODE_MAX_SIZE) { |
263 | printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); | 209 | printk(KERN_ERR "microcode: error: size mismatch\n"); |
264 | return NULL; | 210 | return NULL; |
265 | } | 211 | } |
266 | 212 | ||
267 | mc = vmalloc(UCODE_MAX_SIZE); | 213 | mc = vmalloc(UCODE_MAX_SIZE); |
268 | if (mc) { | 214 | if (mc) { |
269 | memset(mc, 0, UCODE_MAX_SIZE); | 215 | memset(mc, 0, UCODE_MAX_SIZE); |
270 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { | 216 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, |
217 | total_size)) { | ||
271 | vfree(mc); | 218 | vfree(mc); |
272 | mc = NULL; | 219 | mc = NULL; |
273 | } else | 220 | } else |
274 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; | 221 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; |
275 | } | 222 | } |
276 | #undef UCODE_CONTAINER_SECTION_HDR | ||
277 | return mc; | 223 | return mc; |
278 | } | 224 | } |
279 | 225 | ||
280 | 226 | ||
281 | static int install_equiv_cpu_table(u8 *buf, | 227 | static int install_equiv_cpu_table(const u8 *buf) |
282 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
283 | { | 228 | { |
284 | #define UCODE_CONTAINER_HEADER_SIZE 12 | ||
285 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; | 229 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; |
286 | unsigned int *buf_pos = (unsigned int *)container_hdr; | 230 | unsigned int *buf_pos = (unsigned int *)container_hdr; |
287 | unsigned long size; | 231 | unsigned long size; |
@@ -292,14 +236,15 @@ static int install_equiv_cpu_table(u8 *buf, | |||
292 | size = buf_pos[2]; | 236 | size = buf_pos[2]; |
293 | 237 | ||
294 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { | 238 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { |
295 | printk(KERN_ERR "microcode: error! " | 239 | printk(KERN_ERR "microcode: error: invalid type field in " |
296 | "Wrong microcode equivalnet cpu table\n"); | 240 | "container file section header\n"); |
297 | return 0; | 241 | return 0; |
298 | } | 242 | } |
299 | 243 | ||
300 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); | 244 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); |
301 | if (!equiv_cpu_table) { | 245 | if (!equiv_cpu_table) { |
302 | printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n"); | 246 | printk(KERN_ERR "microcode: failed to allocate " |
247 | "equivalent CPU table\n"); | ||
303 | return 0; | 248 | return 0; |
304 | } | 249 | } |
305 | 250 | ||
@@ -310,7 +255,6 @@ static int install_equiv_cpu_table(u8 *buf, | |||
310 | } | 255 | } |
311 | 256 | ||
312 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ | 257 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ |
313 | #undef UCODE_CONTAINER_HEADER_SIZE | ||
314 | } | 258 | } |
315 | 259 | ||
316 | static void free_equiv_cpu_table(void) | 260 | static void free_equiv_cpu_table(void) |
@@ -321,18 +265,20 @@ static void free_equiv_cpu_table(void) | |||
321 | } | 265 | } |
322 | } | 266 | } |
323 | 267 | ||
324 | static int generic_load_microcode(int cpu, void *data, size_t size, | 268 | static int generic_load_microcode(int cpu, const u8 *data, size_t size) |
325 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
326 | { | 269 | { |
327 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 270 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
328 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | 271 | const u8 *ucode_ptr = data; |
272 | void *new_mc = NULL; | ||
273 | void *mc; | ||
329 | int new_rev = uci->cpu_sig.rev; | 274 | int new_rev = uci->cpu_sig.rev; |
330 | unsigned int leftover; | 275 | unsigned int leftover; |
331 | unsigned long offset; | 276 | unsigned long offset; |
332 | 277 | ||
333 | offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data); | 278 | offset = install_equiv_cpu_table(ucode_ptr); |
334 | if (!offset) { | 279 | if (!offset) { |
335 | printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); | 280 | printk(KERN_ERR "microcode: failed to create " |
281 | "equivalent cpu table\n"); | ||
336 | return -EINVAL; | 282 | return -EINVAL; |
337 | } | 283 | } |
338 | 284 | ||
@@ -343,7 +289,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
343 | unsigned int uninitialized_var(mc_size); | 289 | unsigned int uninitialized_var(mc_size); |
344 | struct microcode_header_amd *mc_header; | 290 | struct microcode_header_amd *mc_header; |
345 | 291 | ||
346 | mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); | 292 | mc = get_next_ucode(ucode_ptr, leftover, &mc_size); |
347 | if (!mc) | 293 | if (!mc) |
348 | break; | 294 | break; |
349 | 295 | ||
@@ -353,7 +299,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
353 | vfree(new_mc); | 299 | vfree(new_mc); |
354 | new_rev = mc_header->patch_id; | 300 | new_rev = mc_header->patch_id; |
355 | new_mc = mc; | 301 | new_mc = mc; |
356 | } else | 302 | } else |
357 | vfree(mc); | 303 | vfree(mc); |
358 | 304 | ||
359 | ucode_ptr += mc_size; | 305 | ucode_ptr += mc_size; |
@@ -365,9 +311,9 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
365 | if (uci->mc) | 311 | if (uci->mc) |
366 | vfree(uci->mc); | 312 | vfree(uci->mc); |
367 | uci->mc = new_mc; | 313 | uci->mc = new_mc; |
368 | pr_debug("microcode: CPU%d found a matching microcode update with" | 314 | pr_debug("microcode: CPU%d found a matching microcode " |
369 | " version 0x%x (current=0x%x)\n", | 315 | "update with version 0x%x (current=0x%x)\n", |
370 | cpu, new_rev, uci->cpu_sig.rev); | 316 | cpu, new_rev, uci->cpu_sig.rev); |
371 | } else | 317 | } else |
372 | vfree(new_mc); | 318 | vfree(new_mc); |
373 | } | 319 | } |
@@ -377,12 +323,6 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
377 | return (int)leftover; | 323 | return (int)leftover; |
378 | } | 324 | } |
379 | 325 | ||
380 | static int get_ucode_fw(void *to, const void *from, size_t n) | ||
381 | { | ||
382 | memcpy(to, from, n); | ||
383 | return 0; | ||
384 | } | ||
385 | |||
386 | static int request_microcode_fw(int cpu, struct device *device) | 326 | static int request_microcode_fw(int cpu, struct device *device) |
387 | { | 327 | { |
388 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | 328 | const char *fw_name = "amd-ucode/microcode_amd.bin"; |
@@ -394,12 +334,11 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
394 | 334 | ||
395 | ret = request_firmware(&firmware, fw_name, device); | 335 | ret = request_firmware(&firmware, fw_name, device); |
396 | if (ret) { | 336 | if (ret) { |
397 | printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name); | 337 | printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); |
398 | return ret; | 338 | return ret; |
399 | } | 339 | } |
400 | 340 | ||
401 | ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, | 341 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); |
402 | &get_ucode_fw); | ||
403 | 342 | ||
404 | release_firmware(firmware); | 343 | release_firmware(firmware); |
405 | 344 | ||
@@ -408,8 +347,8 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
408 | 347 | ||
409 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) | 348 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) |
410 | { | 349 | { |
411 | printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode" | 350 | printk(KERN_INFO "microcode: AMD microcode update via " |
412 | "is not supported\n"); | 351 | "/dev/cpu/microcode not supported\n"); |
413 | return -1; | 352 | return -1; |
414 | } | 353 | } |
415 | 354 | ||
@@ -433,3 +372,4 @@ struct microcode_ops * __init init_amd_microcode(void) | |||
433 | { | 372 | { |
434 | return µcode_amd_ops; | 373 | return µcode_amd_ops; |
435 | } | 374 | } |
375 | |||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index c4b5b24e0217..c9b721ba968c 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -99,7 +99,7 @@ MODULE_LICENSE("GPL"); | |||
99 | 99 | ||
100 | #define MICROCODE_VERSION "2.00" | 100 | #define MICROCODE_VERSION "2.00" |
101 | 101 | ||
102 | struct microcode_ops *microcode_ops; | 102 | static struct microcode_ops *microcode_ops; |
103 | 103 | ||
104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ | 104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ |
105 | static DEFINE_MUTEX(microcode_mutex); | 105 | static DEFINE_MUTEX(microcode_mutex); |
@@ -203,7 +203,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | |||
203 | #endif | 203 | #endif |
204 | 204 | ||
205 | /* fake device for request_firmware */ | 205 | /* fake device for request_firmware */ |
206 | struct platform_device *microcode_pdev; | 206 | static struct platform_device *microcode_pdev; |
207 | 207 | ||
208 | static ssize_t reload_store(struct sys_device *dev, | 208 | static ssize_t reload_store(struct sys_device *dev, |
209 | struct sysdev_attribute *attr, | 209 | struct sysdev_attribute *attr, |
@@ -328,7 +328,7 @@ static int microcode_resume_cpu(int cpu) | |||
328 | return 0; | 328 | return 0; |
329 | } | 329 | } |
330 | 330 | ||
331 | void microcode_update_cpu(int cpu) | 331 | static void microcode_update_cpu(int cpu) |
332 | { | 332 | { |
333 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 333 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
334 | int err = 0; | 334 | int err = 0; |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index a8e62792d171..b7f4c929e615 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -471,7 +471,7 @@ static void microcode_fini_cpu(int cpu) | |||
471 | uci->mc = NULL; | 471 | uci->mc = NULL; |
472 | } | 472 | } |
473 | 473 | ||
474 | struct microcode_ops microcode_intel_ops = { | 474 | static struct microcode_ops microcode_intel_ops = { |
475 | .request_microcode_user = request_microcode_user, | 475 | .request_microcode_user = request_microcode_user, |
476 | .request_microcode_fw = request_microcode_fw, | 476 | .request_microcode_fw = request_microcode_fw, |
477 | .collect_cpu_info = collect_cpu_info, | 477 | .collect_cpu_info = collect_cpu_info, |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 0f4c1fd5a1f4..45e3b69808ba 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -586,26 +586,23 @@ static void __init __get_smp_config(unsigned int early) | |||
586 | { | 586 | { |
587 | struct intel_mp_floating *mpf = mpf_found; | 587 | struct intel_mp_floating *mpf = mpf_found; |
588 | 588 | ||
589 | if (x86_quirks->mach_get_smp_config) { | 589 | if (!mpf) |
590 | if (x86_quirks->mach_get_smp_config(early)) | 590 | return; |
591 | return; | 591 | |
592 | } | ||
593 | if (acpi_lapic && early) | 592 | if (acpi_lapic && early) |
594 | return; | 593 | return; |
594 | |||
595 | /* | 595 | /* |
596 | * ACPI supports both logical (e.g. Hyper-Threading) and physical | 596 | * MPS doesn't support hyperthreading, aka only have |
597 | * processors, where MPS only supports physical. | 597 | * thread 0 apic id in MPS table |
598 | */ | 598 | */ |
599 | if (acpi_lapic && acpi_ioapic) { | 599 | if (acpi_lapic && acpi_ioapic) |
600 | printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " | ||
601 | "information\n"); | ||
602 | return; | 600 | return; |
603 | } else if (acpi_lapic) | ||
604 | printk(KERN_INFO "Using ACPI for processor (LAPIC) " | ||
605 | "configuration information\n"); | ||
606 | 601 | ||
607 | if (!mpf) | 602 | if (x86_quirks->mach_get_smp_config) { |
608 | return; | 603 | if (x86_quirks->mach_get_smp_config(early)) |
604 | return; | ||
605 | } | ||
609 | 606 | ||
610 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", | 607 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", |
611 | mpf->mpf_specification); | 608 | mpf->mpf_specification); |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2c97f07f1c2c..8bd1bf9622a7 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -131,6 +131,11 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) | |||
131 | atomic_dec(&nmi_active); | 131 | atomic_dec(&nmi_active); |
132 | } | 132 | } |
133 | 133 | ||
134 | static void __acpi_nmi_disable(void *__unused) | ||
135 | { | ||
136 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
137 | } | ||
138 | |||
134 | int __init check_nmi_watchdog(void) | 139 | int __init check_nmi_watchdog(void) |
135 | { | 140 | { |
136 | unsigned int *prev_nmi_count; | 141 | unsigned int *prev_nmi_count; |
@@ -179,8 +184,12 @@ int __init check_nmi_watchdog(void) | |||
179 | kfree(prev_nmi_count); | 184 | kfree(prev_nmi_count); |
180 | return 0; | 185 | return 0; |
181 | error: | 186 | error: |
182 | if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) | 187 | if (nmi_watchdog == NMI_IO_APIC) { |
183 | disable_8259A_irq(0); | 188 | if (!timer_through_8259) |
189 | disable_8259A_irq(0); | ||
190 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | ||
191 | } | ||
192 | |||
184 | #ifdef CONFIG_X86_32 | 193 | #ifdef CONFIG_X86_32 |
185 | timer_ack = 0; | 194 | timer_ack = 0; |
186 | #endif | 195 | #endif |
@@ -199,12 +208,17 @@ static int __init setup_nmi_watchdog(char *str) | |||
199 | ++str; | 208 | ++str; |
200 | } | 209 | } |
201 | 210 | ||
202 | get_option(&str, &nmi); | 211 | if (!strncmp(str, "lapic", 5)) |
203 | 212 | nmi_watchdog = NMI_LOCAL_APIC; | |
204 | if (nmi >= NMI_INVALID) | 213 | else if (!strncmp(str, "ioapic", 6)) |
205 | return 0; | 214 | nmi_watchdog = NMI_IO_APIC; |
215 | else { | ||
216 | get_option(&str, &nmi); | ||
217 | if (nmi >= NMI_INVALID) | ||
218 | return 0; | ||
219 | nmi_watchdog = nmi; | ||
220 | } | ||
206 | 221 | ||
207 | nmi_watchdog = nmi; | ||
208 | return 1; | 222 | return 1; |
209 | } | 223 | } |
210 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 224 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
@@ -285,11 +299,6 @@ void acpi_nmi_enable(void) | |||
285 | on_each_cpu(__acpi_nmi_enable, NULL, 1); | 299 | on_each_cpu(__acpi_nmi_enable, NULL, 1); |
286 | } | 300 | } |
287 | 301 | ||
288 | static void __acpi_nmi_disable(void *__unused) | ||
289 | { | ||
290 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
291 | } | ||
292 | |||
293 | /* | 302 | /* |
294 | * Disable timer based NMIs on all CPUs: | 303 | * Disable timer based NMIs on all CPUs: |
295 | */ | 304 | */ |
@@ -340,6 +349,8 @@ void stop_apic_nmi_watchdog(void *unused) | |||
340 | return; | 349 | return; |
341 | if (nmi_watchdog == NMI_LOCAL_APIC) | 350 | if (nmi_watchdog == NMI_LOCAL_APIC) |
342 | lapic_watchdog_stop(); | 351 | lapic_watchdog_stop(); |
352 | else | ||
353 | __acpi_nmi_disable(NULL); | ||
343 | __get_cpu_var(wd_enabled) = 0; | 354 | __get_cpu_var(wd_enabled) = 0; |
344 | atomic_dec(&nmi_active); | 355 | atomic_dec(&nmi_active); |
345 | } | 356 | } |
@@ -465,6 +476,24 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
465 | 476 | ||
466 | #ifdef CONFIG_SYSCTL | 477 | #ifdef CONFIG_SYSCTL |
467 | 478 | ||
479 | static void enable_ioapic_nmi_watchdog_single(void *unused) | ||
480 | { | ||
481 | __get_cpu_var(wd_enabled) = 1; | ||
482 | atomic_inc(&nmi_active); | ||
483 | __acpi_nmi_enable(NULL); | ||
484 | } | ||
485 | |||
486 | static void enable_ioapic_nmi_watchdog(void) | ||
487 | { | ||
488 | on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); | ||
489 | touch_nmi_watchdog(); | ||
490 | } | ||
491 | |||
492 | static void disable_ioapic_nmi_watchdog(void) | ||
493 | { | ||
494 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); | ||
495 | } | ||
496 | |||
468 | static int __init setup_unknown_nmi_panic(char *str) | 497 | static int __init setup_unknown_nmi_panic(char *str) |
469 | { | 498 | { |
470 | unknown_nmi_panic = 1; | 499 | unknown_nmi_panic = 1; |
@@ -507,6 +536,11 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | |||
507 | enable_lapic_nmi_watchdog(); | 536 | enable_lapic_nmi_watchdog(); |
508 | else | 537 | else |
509 | disable_lapic_nmi_watchdog(); | 538 | disable_lapic_nmi_watchdog(); |
539 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
540 | if (nmi_watchdog_enabled) | ||
541 | enable_ioapic_nmi_watchdog(); | ||
542 | else | ||
543 | disable_ioapic_nmi_watchdog(); | ||
510 | } else { | 544 | } else { |
511 | printk(KERN_WARNING | 545 | printk(KERN_WARNING |
512 | "NMI watchdog doesn't know what hardware to touch\n"); | 546 | "NMI watchdog doesn't know what hardware to touch\n"); |
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 4caff39078e0..0deea37a53cf 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <asm/numaq.h> | 31 | #include <asm/numaq.h> |
32 | #include <asm/topology.h> | 32 | #include <asm/topology.h> |
33 | #include <asm/processor.h> | 33 | #include <asm/processor.h> |
34 | #include <asm/mpspec.h> | 34 | #include <asm/genapic.h> |
35 | #include <asm/e820.h> | 35 | #include <asm/e820.h> |
36 | #include <asm/setup.h> | 36 | #include <asm/setup.h> |
37 | 37 | ||
@@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void) | |||
235 | return 1; | 235 | return 1; |
236 | } | 236 | } |
237 | 237 | ||
238 | static int __init numaq_update_genapic(void) | ||
239 | { | ||
240 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; | ||
241 | |||
242 | return 0; | ||
243 | } | ||
244 | |||
238 | static struct x86_quirks numaq_x86_quirks __initdata = { | 245 | static struct x86_quirks numaq_x86_quirks __initdata = { |
239 | .arch_pre_time_init = numaq_pre_time_init, | 246 | .arch_pre_time_init = numaq_pre_time_init, |
240 | .arch_time_init = NULL, | 247 | .arch_time_init = NULL, |
@@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = { | |||
250 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | 257 | .mpc_oem_pci_bus = mpc_oem_pci_bus, |
251 | .smp_read_mpc_oem = smp_read_mpc_oem, | 258 | .smp_read_mpc_oem = smp_read_mpc_oem, |
252 | .setup_ioapic_ids = numaq_setup_ioapic_ids, | 259 | .setup_ioapic_ids = numaq_setup_ioapic_ids, |
260 | .update_genapic = numaq_update_genapic, | ||
253 | }; | 261 | }; |
254 | 262 | ||
255 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, | 263 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 192624820217..7a3dfceb90e4 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <asm/proto.h> | 6 | #include <asm/proto.h> |
7 | #include <asm/dma.h> | 7 | #include <asm/dma.h> |
8 | #include <asm/iommu.h> | 8 | #include <asm/iommu.h> |
9 | #include <asm/gart.h> | ||
9 | #include <asm/calgary.h> | 10 | #include <asm/calgary.h> |
10 | #include <asm/amd_iommu.h> | 11 | #include <asm/amd_iommu.h> |
11 | 12 | ||
@@ -30,11 +31,6 @@ int no_iommu __read_mostly; | |||
30 | /* Set this to 1 if there is a HW IOMMU in the system */ | 31 | /* Set this to 1 if there is a HW IOMMU in the system */ |
31 | int iommu_detected __read_mostly = 0; | 32 | int iommu_detected __read_mostly = 0; |
32 | 33 | ||
33 | /* This tells the BIO block layer to assume merging. Default to off | ||
34 | because we cannot guarantee merging later. */ | ||
35 | int iommu_bio_merge __read_mostly = 0; | ||
36 | EXPORT_SYMBOL(iommu_bio_merge); | ||
37 | |||
38 | dma_addr_t bad_dma_address __read_mostly = 0; | 34 | dma_addr_t bad_dma_address __read_mostly = 0; |
39 | EXPORT_SYMBOL(bad_dma_address); | 35 | EXPORT_SYMBOL(bad_dma_address); |
40 | 36 | ||
@@ -188,7 +184,6 @@ static __init int iommu_setup(char *p) | |||
188 | } | 184 | } |
189 | 185 | ||
190 | if (!strncmp(p, "biomerge", 8)) { | 186 | if (!strncmp(p, "biomerge", 8)) { |
191 | iommu_bio_merge = 4096; | ||
192 | iommu_merge = 1; | 187 | iommu_merge = 1; |
193 | force_iommu = 1; | 188 | force_iommu = 1; |
194 | } | 189 | } |
@@ -300,8 +295,8 @@ fs_initcall(pci_iommu_init); | |||
300 | static __devinit void via_no_dac(struct pci_dev *dev) | 295 | static __devinit void via_no_dac(struct pci_dev *dev) |
301 | { | 296 | { |
302 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { | 297 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { |
303 | printk(KERN_INFO "PCI: VIA PCI bridge detected." | 298 | printk(KERN_INFO |
304 | "Disabling DAC.\n"); | 299 | "PCI: VIA PCI bridge detected. Disabling DAC.\n"); |
305 | forbid_dac = 1; | 300 | forbid_dac = 1; |
306 | } | 301 | } |
307 | } | 302 | } |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c622772744d8..b8f3e9dbabd7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/errno.h> | 1 | #include <linux/errno.h> |
2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
4 | #include <asm/idle.h> | ||
4 | #include <linux/smp.h> | 5 | #include <linux/smp.h> |
5 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
@@ -8,6 +9,7 @@ | |||
8 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
9 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
10 | #include <asm/system.h> | 11 | #include <asm/system.h> |
12 | #include <asm/apic.h> | ||
11 | 13 | ||
12 | unsigned long idle_halt; | 14 | unsigned long idle_halt; |
13 | EXPORT_SYMBOL(idle_halt); | 15 | EXPORT_SYMBOL(idle_halt); |
@@ -122,6 +124,21 @@ void default_idle(void) | |||
122 | EXPORT_SYMBOL(default_idle); | 124 | EXPORT_SYMBOL(default_idle); |
123 | #endif | 125 | #endif |
124 | 126 | ||
127 | void stop_this_cpu(void *dummy) | ||
128 | { | ||
129 | local_irq_disable(); | ||
130 | /* | ||
131 | * Remove this CPU: | ||
132 | */ | ||
133 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
134 | disable_local_APIC(); | ||
135 | |||
136 | for (;;) { | ||
137 | if (hlt_works(smp_processor_id())) | ||
138 | halt(); | ||
139 | } | ||
140 | } | ||
141 | |||
125 | static void do_nothing(void *unused) | 142 | static void do_nothing(void *unused) |
126 | { | 143 | { |
127 | } | 144 | } |
@@ -270,7 +287,7 @@ static void c1e_idle(void) | |||
270 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | 287 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); |
271 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | 288 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { |
272 | c1e_detected = 1; | 289 | c1e_detected = 1; |
273 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 290 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
274 | mark_tsc_unstable("TSC halt in AMD C1E"); | 291 | mark_tsc_unstable("TSC halt in AMD C1E"); |
275 | printk(KERN_INFO "System has AMD C1E enabled\n"); | 292 | printk(KERN_INFO "System has AMD C1E enabled\n"); |
276 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | 293 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a6d8c12e10d..06180dff5b2e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -929,17 +929,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) | |||
929 | switch (c->x86) { | 929 | switch (c->x86) { |
930 | case 0x6: | 930 | case 0x6: |
931 | switch (c->x86_model) { | 931 | switch (c->x86_model) { |
932 | case 0 ... 0xC: | ||
933 | /* sorry, don't know about them */ | ||
934 | break; | ||
932 | case 0xD: | 935 | case 0xD: |
933 | case 0xE: /* Pentium M */ | 936 | case 0xE: /* Pentium M */ |
934 | bts_configure(&bts_cfg_pentium_m); | 937 | bts_configure(&bts_cfg_pentium_m); |
935 | break; | 938 | break; |
936 | case 0xF: /* Core2 */ | 939 | default: /* Core2, Atom, ... */ |
937 | case 0x1C: /* Atom */ | ||
938 | bts_configure(&bts_cfg_core2); | 940 | bts_configure(&bts_cfg_core2); |
939 | break; | 941 | break; |
940 | default: | ||
941 | /* sorry, don't know about them */ | ||
942 | break; | ||
943 | } | 942 | } |
944 | break; | 943 | break; |
945 | case 0xF: | 944 | case 0xF: |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index cc5a2545dd41..61f718df6eec 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -21,6 +21,9 @@ | |||
21 | # include <asm/iommu.h> | 21 | # include <asm/iommu.h> |
22 | #endif | 22 | #endif |
23 | 23 | ||
24 | #include <mach_ipi.h> | ||
25 | |||
26 | |||
24 | /* | 27 | /* |
25 | * Power off function, if any | 28 | * Power off function, if any |
26 | */ | 29 | */ |
@@ -36,7 +39,10 @@ int reboot_force; | |||
36 | static int reboot_cpu = -1; | 39 | static int reboot_cpu = -1; |
37 | #endif | 40 | #endif |
38 | 41 | ||
39 | /* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | 42 | /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ |
43 | bool port_cf9_safe = false; | ||
44 | |||
45 | /* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] | ||
40 | warm Don't set the cold reboot flag | 46 | warm Don't set the cold reboot flag |
41 | cold Set the cold reboot flag | 47 | cold Set the cold reboot flag |
42 | bios Reboot by jumping through the BIOS (only for X86_32) | 48 | bios Reboot by jumping through the BIOS (only for X86_32) |
@@ -45,6 +51,7 @@ static int reboot_cpu = -1; | |||
45 | kbd Use the keyboard controller. cold reset (default) | 51 | kbd Use the keyboard controller. cold reset (default) |
46 | acpi Use the RESET_REG in the FADT | 52 | acpi Use the RESET_REG in the FADT |
47 | efi Use efi reset_system runtime service | 53 | efi Use efi reset_system runtime service |
54 | pci Use the so-called "PCI reset register", CF9 | ||
48 | force Avoid anything that could hang. | 55 | force Avoid anything that could hang. |
49 | */ | 56 | */ |
50 | static int __init reboot_setup(char *str) | 57 | static int __init reboot_setup(char *str) |
@@ -79,6 +86,7 @@ static int __init reboot_setup(char *str) | |||
79 | case 'k': | 86 | case 'k': |
80 | case 't': | 87 | case 't': |
81 | case 'e': | 88 | case 'e': |
89 | case 'p': | ||
82 | reboot_type = *str; | 90 | reboot_type = *str; |
83 | break; | 91 | break; |
84 | 92 | ||
@@ -404,12 +412,27 @@ static void native_machine_emergency_restart(void) | |||
404 | reboot_type = BOOT_KBD; | 412 | reboot_type = BOOT_KBD; |
405 | break; | 413 | break; |
406 | 414 | ||
407 | |||
408 | case BOOT_EFI: | 415 | case BOOT_EFI: |
409 | if (efi_enabled) | 416 | if (efi_enabled) |
410 | efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, | 417 | efi.reset_system(reboot_mode ? |
418 | EFI_RESET_WARM : | ||
419 | EFI_RESET_COLD, | ||
411 | EFI_SUCCESS, 0, NULL); | 420 | EFI_SUCCESS, 0, NULL); |
421 | reboot_type = BOOT_KBD; | ||
422 | break; | ||
412 | 423 | ||
424 | case BOOT_CF9: | ||
425 | port_cf9_safe = true; | ||
426 | /* fall through */ | ||
427 | |||
428 | case BOOT_CF9_COND: | ||
429 | if (port_cf9_safe) { | ||
430 | u8 cf9 = inb(0xcf9) & ~6; | ||
431 | outb(cf9|2, 0xcf9); /* Request hard reset */ | ||
432 | udelay(50); | ||
433 | outb(cf9|6, 0xcf9); /* Actually do the reset */ | ||
434 | udelay(50); | ||
435 | } | ||
413 | reboot_type = BOOT_KBD; | 436 | reboot_type = BOOT_KBD; |
414 | break; | 437 | break; |
415 | } | 438 | } |
@@ -470,6 +493,11 @@ static void native_machine_restart(char *__unused) | |||
470 | 493 | ||
471 | static void native_machine_halt(void) | 494 | static void native_machine_halt(void) |
472 | { | 495 | { |
496 | /* stop other cpus and apics */ | ||
497 | machine_shutdown(); | ||
498 | |||
499 | /* stop this cpu */ | ||
500 | stop_this_cpu(NULL); | ||
473 | } | 501 | } |
474 | 502 | ||
475 | static void native_machine_power_off(void) | 503 | static void native_machine_power_off(void) |
@@ -523,3 +551,95 @@ void machine_crash_shutdown(struct pt_regs *regs) | |||
523 | machine_ops.crash_shutdown(regs); | 551 | machine_ops.crash_shutdown(regs); |
524 | } | 552 | } |
525 | #endif | 553 | #endif |
554 | |||
555 | |||
556 | #if defined(CONFIG_SMP) | ||
557 | |||
558 | /* This keeps a track of which one is crashing cpu. */ | ||
559 | static int crashing_cpu; | ||
560 | static nmi_shootdown_cb shootdown_callback; | ||
561 | |||
562 | static atomic_t waiting_for_crash_ipi; | ||
563 | |||
564 | static int crash_nmi_callback(struct notifier_block *self, | ||
565 | unsigned long val, void *data) | ||
566 | { | ||
567 | int cpu; | ||
568 | |||
569 | if (val != DIE_NMI_IPI) | ||
570 | return NOTIFY_OK; | ||
571 | |||
572 | cpu = raw_smp_processor_id(); | ||
573 | |||
574 | /* Don't do anything if this handler is invoked on crashing cpu. | ||
575 | * Otherwise, system will completely hang. Crashing cpu can get | ||
576 | * an NMI if system was initially booted with nmi_watchdog parameter. | ||
577 | */ | ||
578 | if (cpu == crashing_cpu) | ||
579 | return NOTIFY_STOP; | ||
580 | local_irq_disable(); | ||
581 | |||
582 | shootdown_callback(cpu, (struct die_args *)data); | ||
583 | |||
584 | atomic_dec(&waiting_for_crash_ipi); | ||
585 | /* Assume hlt works */ | ||
586 | halt(); | ||
587 | for (;;) | ||
588 | cpu_relax(); | ||
589 | |||
590 | return 1; | ||
591 | } | ||
592 | |||
593 | static void smp_send_nmi_allbutself(void) | ||
594 | { | ||
595 | cpumask_t mask = cpu_online_map; | ||
596 | cpu_clear(safe_smp_processor_id(), mask); | ||
597 | if (!cpus_empty(mask)) | ||
598 | send_IPI_mask(mask, NMI_VECTOR); | ||
599 | } | ||
600 | |||
601 | static struct notifier_block crash_nmi_nb = { | ||
602 | .notifier_call = crash_nmi_callback, | ||
603 | }; | ||
604 | |||
605 | /* Halt all other CPUs, calling the specified function on each of them | ||
606 | * | ||
607 | * This function can be used to halt all other CPUs on crash | ||
608 | * or emergency reboot time. The function passed as parameter | ||
609 | * will be called inside a NMI handler on all CPUs. | ||
610 | */ | ||
611 | void nmi_shootdown_cpus(nmi_shootdown_cb callback) | ||
612 | { | ||
613 | unsigned long msecs; | ||
614 | local_irq_disable(); | ||
615 | |||
616 | /* Make a note of crashing cpu. Will be used in NMI callback.*/ | ||
617 | crashing_cpu = safe_smp_processor_id(); | ||
618 | |||
619 | shootdown_callback = callback; | ||
620 | |||
621 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); | ||
622 | /* Would it be better to replace the trap vector here? */ | ||
623 | if (register_die_notifier(&crash_nmi_nb)) | ||
624 | return; /* return what? */ | ||
625 | /* Ensure the new callback function is set before sending | ||
626 | * out the NMI | ||
627 | */ | ||
628 | wmb(); | ||
629 | |||
630 | smp_send_nmi_allbutself(); | ||
631 | |||
632 | msecs = 1000; /* Wait at most a second for the other cpus to stop */ | ||
633 | while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { | ||
634 | mdelay(1); | ||
635 | msecs--; | ||
636 | } | ||
637 | |||
638 | /* Leave the nmi callback set */ | ||
639 | } | ||
640 | #else /* !CONFIG_SMP */ | ||
641 | void nmi_shootdown_cpus(nmi_shootdown_cb callback) | ||
642 | { | ||
643 | /* No other CPUs to shoot down */ | ||
644 | } | ||
645 | #endif | ||
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 6f50664b2ba5..a160f3119725 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
@@ -10,15 +10,12 @@ | |||
10 | #include <asm/page.h> | 10 | #include <asm/page.h> |
11 | #include <asm/kexec.h> | 11 | #include <asm/kexec.h> |
12 | #include <asm/processor-flags.h> | 12 | #include <asm/processor-flags.h> |
13 | #include <asm/pgtable.h> | ||
14 | 13 | ||
15 | /* | 14 | /* |
16 | * Must be relocatable PIC code callable as a C function | 15 | * Must be relocatable PIC code callable as a C function |
17 | */ | 16 | */ |
18 | 17 | ||
19 | #define PTR(x) (x << 2) | 18 | #define PTR(x) (x << 2) |
20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | ||
21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) | ||
22 | 19 | ||
23 | /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE | 20 | /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE |
24 | * ~ control_page + PAGE_SIZE are used as data storage and stack for | 21 | * ~ control_page + PAGE_SIZE are used as data storage and stack for |
@@ -39,7 +36,6 @@ | |||
39 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) | 36 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) |
40 | 37 | ||
41 | .text | 38 | .text |
42 | .align PAGE_SIZE | ||
43 | .globl relocate_kernel | 39 | .globl relocate_kernel |
44 | relocate_kernel: | 40 | relocate_kernel: |
45 | /* Save the CPU context, used for jumping back */ | 41 | /* Save the CPU context, used for jumping back */ |
@@ -60,117 +56,6 @@ relocate_kernel: | |||
60 | movl %cr4, %eax | 56 | movl %cr4, %eax |
61 | movl %eax, CR4(%edi) | 57 | movl %eax, CR4(%edi) |
62 | 58 | ||
63 | #ifdef CONFIG_X86_PAE | ||
64 | /* map the control page at its virtual address */ | ||
65 | |||
66 | movl PTR(VA_PGD)(%ebp), %edi | ||
67 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
68 | andl $0xc0000000, %eax | ||
69 | shrl $27, %eax | ||
70 | addl %edi, %eax | ||
71 | |||
72 | movl PTR(PA_PMD_0)(%ebp), %edx | ||
73 | orl $PAE_PGD_ATTR, %edx | ||
74 | movl %edx, (%eax) | ||
75 | |||
76 | movl PTR(VA_PMD_0)(%ebp), %edi | ||
77 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
78 | andl $0x3fe00000, %eax | ||
79 | shrl $18, %eax | ||
80 | addl %edi, %eax | ||
81 | |||
82 | movl PTR(PA_PTE_0)(%ebp), %edx | ||
83 | orl $PAGE_ATTR, %edx | ||
84 | movl %edx, (%eax) | ||
85 | |||
86 | movl PTR(VA_PTE_0)(%ebp), %edi | ||
87 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
88 | andl $0x001ff000, %eax | ||
89 | shrl $9, %eax | ||
90 | addl %edi, %eax | ||
91 | |||
92 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
93 | orl $PAGE_ATTR, %edx | ||
94 | movl %edx, (%eax) | ||
95 | |||
96 | /* identity map the control page at its physical address */ | ||
97 | |||
98 | movl PTR(VA_PGD)(%ebp), %edi | ||
99 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
100 | andl $0xc0000000, %eax | ||
101 | shrl $27, %eax | ||
102 | addl %edi, %eax | ||
103 | |||
104 | movl PTR(PA_PMD_1)(%ebp), %edx | ||
105 | orl $PAE_PGD_ATTR, %edx | ||
106 | movl %edx, (%eax) | ||
107 | |||
108 | movl PTR(VA_PMD_1)(%ebp), %edi | ||
109 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
110 | andl $0x3fe00000, %eax | ||
111 | shrl $18, %eax | ||
112 | addl %edi, %eax | ||
113 | |||
114 | movl PTR(PA_PTE_1)(%ebp), %edx | ||
115 | orl $PAGE_ATTR, %edx | ||
116 | movl %edx, (%eax) | ||
117 | |||
118 | movl PTR(VA_PTE_1)(%ebp), %edi | ||
119 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
120 | andl $0x001ff000, %eax | ||
121 | shrl $9, %eax | ||
122 | addl %edi, %eax | ||
123 | |||
124 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
125 | orl $PAGE_ATTR, %edx | ||
126 | movl %edx, (%eax) | ||
127 | #else | ||
128 | /* map the control page at its virtual address */ | ||
129 | |||
130 | movl PTR(VA_PGD)(%ebp), %edi | ||
131 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
132 | andl $0xffc00000, %eax | ||
133 | shrl $20, %eax | ||
134 | addl %edi, %eax | ||
135 | |||
136 | movl PTR(PA_PTE_0)(%ebp), %edx | ||
137 | orl $PAGE_ATTR, %edx | ||
138 | movl %edx, (%eax) | ||
139 | |||
140 | movl PTR(VA_PTE_0)(%ebp), %edi | ||
141 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
142 | andl $0x003ff000, %eax | ||
143 | shrl $10, %eax | ||
144 | addl %edi, %eax | ||
145 | |||
146 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
147 | orl $PAGE_ATTR, %edx | ||
148 | movl %edx, (%eax) | ||
149 | |||
150 | /* identity map the control page at its physical address */ | ||
151 | |||
152 | movl PTR(VA_PGD)(%ebp), %edi | ||
153 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
154 | andl $0xffc00000, %eax | ||
155 | shrl $20, %eax | ||
156 | addl %edi, %eax | ||
157 | |||
158 | movl PTR(PA_PTE_1)(%ebp), %edx | ||
159 | orl $PAGE_ATTR, %edx | ||
160 | movl %edx, (%eax) | ||
161 | |||
162 | movl PTR(VA_PTE_1)(%ebp), %edi | ||
163 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
164 | andl $0x003ff000, %eax | ||
165 | shrl $10, %eax | ||
166 | addl %edi, %eax | ||
167 | |||
168 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
169 | orl $PAGE_ATTR, %edx | ||
170 | movl %edx, (%eax) | ||
171 | #endif | ||
172 | |||
173 | relocate_new_kernel: | ||
174 | /* read the arguments and say goodbye to the stack */ | 59 | /* read the arguments and say goodbye to the stack */ |
175 | movl 20+4(%esp), %ebx /* page_list */ | 60 | movl 20+4(%esp), %ebx /* page_list */ |
176 | movl 20+8(%esp), %ebp /* list of pages */ | 61 | movl 20+8(%esp), %ebp /* list of pages */ |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bdec76e55594..08e02e8453c9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -93,11 +93,13 @@ | |||
93 | #include <asm/desc.h> | 93 | #include <asm/desc.h> |
94 | #include <asm/dma.h> | 94 | #include <asm/dma.h> |
95 | #include <asm/iommu.h> | 95 | #include <asm/iommu.h> |
96 | #include <asm/gart.h> | ||
96 | #include <asm/mmu_context.h> | 97 | #include <asm/mmu_context.h> |
97 | #include <asm/proto.h> | 98 | #include <asm/proto.h> |
98 | 99 | ||
99 | #include <mach_apic.h> | 100 | #include <mach_apic.h> |
100 | #include <asm/paravirt.h> | 101 | #include <asm/paravirt.h> |
102 | #include <asm/hypervisor.h> | ||
101 | 103 | ||
102 | #include <asm/percpu.h> | 104 | #include <asm/percpu.h> |
103 | #include <asm/topology.h> | 105 | #include <asm/topology.h> |
@@ -448,6 +450,7 @@ static void __init reserve_early_setup_data(void) | |||
448 | * @size: Size of the crashkernel memory to reserve. | 450 | * @size: Size of the crashkernel memory to reserve. |
449 | * Returns the base address on success, and -1ULL on failure. | 451 | * Returns the base address on success, and -1ULL on failure. |
450 | */ | 452 | */ |
453 | static | ||
451 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) | 454 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) |
452 | { | 455 | { |
453 | const unsigned long long alignment = 16<<20; /* 16M */ | 456 | const unsigned long long alignment = 16<<20; /* 16M */ |
@@ -583,161 +586,24 @@ static int __init setup_elfcorehdr(char *arg) | |||
583 | early_param("elfcorehdr", setup_elfcorehdr); | 586 | early_param("elfcorehdr", setup_elfcorehdr); |
584 | #endif | 587 | #endif |
585 | 588 | ||
586 | static struct x86_quirks default_x86_quirks __initdata; | 589 | static int __init default_update_genapic(void) |
587 | |||
588 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | ||
589 | |||
590 | /* | ||
591 | * Some BIOSes seem to corrupt the low 64k of memory during events | ||
592 | * like suspend/resume and unplugging an HDMI cable. Reserve all | ||
593 | * remaining free memory in that area and fill it with a distinct | ||
594 | * pattern. | ||
595 | */ | ||
596 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | ||
597 | #define MAX_SCAN_AREAS 8 | ||
598 | |||
599 | static int __read_mostly memory_corruption_check = -1; | ||
600 | |||
601 | static unsigned __read_mostly corruption_check_size = 64*1024; | ||
602 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | ||
603 | |||
604 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | ||
605 | static int num_scan_areas; | ||
606 | |||
607 | |||
608 | static int set_corruption_check(char *arg) | ||
609 | { | ||
610 | char *end; | ||
611 | |||
612 | memory_corruption_check = simple_strtol(arg, &end, 10); | ||
613 | |||
614 | return (*end == 0) ? 0 : -EINVAL; | ||
615 | } | ||
616 | early_param("memory_corruption_check", set_corruption_check); | ||
617 | |||
618 | static int set_corruption_check_period(char *arg) | ||
619 | { | ||
620 | char *end; | ||
621 | |||
622 | corruption_check_period = simple_strtoul(arg, &end, 10); | ||
623 | |||
624 | return (*end == 0) ? 0 : -EINVAL; | ||
625 | } | ||
626 | early_param("memory_corruption_check_period", set_corruption_check_period); | ||
627 | |||
628 | static int set_corruption_check_size(char *arg) | ||
629 | { | 590 | { |
630 | char *end; | 591 | #ifdef CONFIG_X86_SMP |
631 | unsigned size; | 592 | # if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) |
632 | 593 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; | |
633 | size = memparse(arg, &end); | 594 | # endif |
634 | |||
635 | if (*end == '\0') | ||
636 | corruption_check_size = size; | ||
637 | |||
638 | return (size == corruption_check_size) ? 0 : -EINVAL; | ||
639 | } | ||
640 | early_param("memory_corruption_check_size", set_corruption_check_size); | ||
641 | |||
642 | |||
643 | static void __init setup_bios_corruption_check(void) | ||
644 | { | ||
645 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | ||
646 | |||
647 | if (memory_corruption_check == -1) { | ||
648 | memory_corruption_check = | ||
649 | #ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK | ||
650 | 1 | ||
651 | #else | ||
652 | 0 | ||
653 | #endif | 595 | #endif |
654 | ; | ||
655 | } | ||
656 | |||
657 | if (corruption_check_size == 0) | ||
658 | memory_corruption_check = 0; | ||
659 | |||
660 | if (!memory_corruption_check) | ||
661 | return; | ||
662 | |||
663 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | ||
664 | 596 | ||
665 | while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | 597 | return 0; |
666 | u64 size; | ||
667 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | ||
668 | |||
669 | if (addr == 0) | ||
670 | break; | ||
671 | |||
672 | if ((addr + size) > corruption_check_size) | ||
673 | size = corruption_check_size - addr; | ||
674 | |||
675 | if (size == 0) | ||
676 | break; | ||
677 | |||
678 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | ||
679 | scan_areas[num_scan_areas].addr = addr; | ||
680 | scan_areas[num_scan_areas].size = size; | ||
681 | num_scan_areas++; | ||
682 | |||
683 | /* Assume we've already mapped this early memory */ | ||
684 | memset(__va(addr), 0, size); | ||
685 | |||
686 | addr += size; | ||
687 | } | ||
688 | |||
689 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | ||
690 | num_scan_areas); | ||
691 | update_e820(); | ||
692 | } | ||
693 | |||
694 | static struct timer_list periodic_check_timer; | ||
695 | |||
696 | void check_for_bios_corruption(void) | ||
697 | { | ||
698 | int i; | ||
699 | int corruption = 0; | ||
700 | |||
701 | if (!memory_corruption_check) | ||
702 | return; | ||
703 | |||
704 | for(i = 0; i < num_scan_areas; i++) { | ||
705 | unsigned long *addr = __va(scan_areas[i].addr); | ||
706 | unsigned long size = scan_areas[i].size; | ||
707 | |||
708 | for(; size; addr++, size -= sizeof(unsigned long)) { | ||
709 | if (!*addr) | ||
710 | continue; | ||
711 | printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", | ||
712 | addr, __pa(addr), *addr); | ||
713 | corruption = 1; | ||
714 | *addr = 0; | ||
715 | } | ||
716 | } | ||
717 | |||
718 | WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); | ||
719 | } | ||
720 | |||
721 | static void periodic_check_for_corruption(unsigned long data) | ||
722 | { | ||
723 | check_for_bios_corruption(); | ||
724 | mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); | ||
725 | } | 598 | } |
726 | 599 | ||
727 | void start_periodic_check_for_corruption(void) | 600 | static struct x86_quirks default_x86_quirks __initdata = { |
728 | { | 601 | .update_genapic = default_update_genapic, |
729 | if (!memory_corruption_check || corruption_check_period == 0) | 602 | }; |
730 | return; | ||
731 | |||
732 | printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", | ||
733 | corruption_check_period); | ||
734 | 603 | ||
735 | init_timer(&periodic_check_timer); | 604 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; |
736 | periodic_check_timer.function = &periodic_check_for_corruption; | ||
737 | periodic_check_for_corruption(0); | ||
738 | } | ||
739 | #endif | ||
740 | 605 | ||
606 | #ifdef CONFIG_X86_RESERVE_LOW_64K | ||
741 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | 607 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) |
742 | { | 608 | { |
743 | printk(KERN_NOTICE | 609 | printk(KERN_NOTICE |
@@ -749,6 +615,7 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | |||
749 | 615 | ||
750 | return 0; | 616 | return 0; |
751 | } | 617 | } |
618 | #endif | ||
752 | 619 | ||
753 | /* List of systems that have known low memory corruption BIOS problems */ | 620 | /* List of systems that have known low memory corruption BIOS problems */ |
754 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | 621 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { |
@@ -907,6 +774,12 @@ void __init setup_arch(char **cmdline_p) | |||
907 | 774 | ||
908 | dmi_check_system(bad_bios_dmi_table); | 775 | dmi_check_system(bad_bios_dmi_table); |
909 | 776 | ||
777 | /* | ||
778 | * VMware detection requires dmi to be available, so this | ||
779 | * needs to be done after dmi_scan_machine, for the BP. | ||
780 | */ | ||
781 | init_hypervisor(&boot_cpu_data); | ||
782 | |||
910 | #ifdef CONFIG_X86_32 | 783 | #ifdef CONFIG_X86_32 |
911 | probe_roms(); | 784 | probe_roms(); |
912 | #endif | 785 | #endif |
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h deleted file mode 100644 index cc673aa55ce4..000000000000 --- a/arch/x86/kernel/sigframe.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | #ifdef CONFIG_X86_32 | ||
2 | struct sigframe { | ||
3 | char __user *pretcode; | ||
4 | int sig; | ||
5 | struct sigcontext sc; | ||
6 | /* | ||
7 | * fpstate is unused. fpstate is moved/allocated after | ||
8 | * retcode[] below. This movement allows to have the FP state and the | ||
9 | * future state extensions (xsave) stay together. | ||
10 | * And at the same time retaining the unused fpstate, prevents changing | ||
11 | * the offset of extramask[] in the sigframe and thus prevent any | ||
12 | * legacy application accessing/modifying it. | ||
13 | */ | ||
14 | struct _fpstate fpstate_unused; | ||
15 | unsigned long extramask[_NSIG_WORDS-1]; | ||
16 | char retcode[8]; | ||
17 | /* fp state follows here */ | ||
18 | }; | ||
19 | |||
20 | struct rt_sigframe { | ||
21 | char __user *pretcode; | ||
22 | int sig; | ||
23 | struct siginfo __user *pinfo; | ||
24 | void __user *puc; | ||
25 | struct siginfo info; | ||
26 | struct ucontext uc; | ||
27 | char retcode[8]; | ||
28 | /* fp state follows here */ | ||
29 | }; | ||
30 | #else | ||
31 | struct rt_sigframe { | ||
32 | char __user *pretcode; | ||
33 | struct ucontext uc; | ||
34 | struct siginfo info; | ||
35 | /* fp state follows here */ | ||
36 | }; | ||
37 | |||
38 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
39 | sigset_t *set, struct pt_regs *regs); | ||
40 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
41 | sigset_t *set, struct pt_regs *regs); | ||
42 | #endif | ||
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal.c index d6dd057d0f22..89bb7668041d 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal.c | |||
@@ -1,36 +1,41 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | ||
3 | * | 4 | * |
4 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson | 5 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson |
5 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
7 | * 2000-2002 x86-64 support by Andi Kleen | ||
6 | */ | 8 | */ |
7 | #include <linux/list.h> | ||
8 | 9 | ||
9 | #include <linux/personality.h> | 10 | #include <linux/sched.h> |
10 | #include <linux/binfmts.h> | 11 | #include <linux/mm.h> |
11 | #include <linux/suspend.h> | 12 | #include <linux/smp.h> |
12 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
13 | #include <linux/ptrace.h> | ||
14 | #include <linux/signal.h> | 14 | #include <linux/signal.h> |
15 | #include <linux/stddef.h> | ||
16 | #include <linux/unistd.h> | ||
17 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
18 | #include <linux/sched.h> | ||
19 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
17 | #include <linux/ptrace.h> | ||
20 | #include <linux/tracehook.h> | 18 | #include <linux/tracehook.h> |
21 | #include <linux/elf.h> | 19 | #include <linux/unistd.h> |
22 | #include <linux/smp.h> | 20 | #include <linux/stddef.h> |
23 | #include <linux/mm.h> | 21 | #include <linux/personality.h> |
22 | #include <linux/uaccess.h> | ||
24 | 23 | ||
25 | #include <asm/processor.h> | 24 | #include <asm/processor.h> |
26 | #include <asm/ucontext.h> | 25 | #include <asm/ucontext.h> |
27 | #include <asm/uaccess.h> | ||
28 | #include <asm/i387.h> | 26 | #include <asm/i387.h> |
29 | #include <asm/vdso.h> | 27 | #include <asm/vdso.h> |
28 | |||
29 | #ifdef CONFIG_X86_64 | ||
30 | #include <asm/proto.h> | ||
31 | #include <asm/ia32_unistd.h> | ||
32 | #include <asm/mce.h> | ||
33 | #endif /* CONFIG_X86_64 */ | ||
34 | |||
30 | #include <asm/syscall.h> | 35 | #include <asm/syscall.h> |
31 | #include <asm/syscalls.h> | 36 | #include <asm/syscalls.h> |
32 | 37 | ||
33 | #include "sigframe.h" | 38 | #include <asm/sigframe.h> |
34 | 39 | ||
35 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | 40 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) |
36 | 41 | ||
@@ -45,74 +50,6 @@ | |||
45 | # define FIX_EFLAGS __FIX_EFLAGS | 50 | # define FIX_EFLAGS __FIX_EFLAGS |
46 | #endif | 51 | #endif |
47 | 52 | ||
48 | /* | ||
49 | * Atomically swap in the new signal mask, and wait for a signal. | ||
50 | */ | ||
51 | asmlinkage int | ||
52 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | ||
53 | { | ||
54 | mask &= _BLOCKABLE; | ||
55 | spin_lock_irq(¤t->sighand->siglock); | ||
56 | current->saved_sigmask = current->blocked; | ||
57 | siginitset(¤t->blocked, mask); | ||
58 | recalc_sigpending(); | ||
59 | spin_unlock_irq(¤t->sighand->siglock); | ||
60 | |||
61 | current->state = TASK_INTERRUPTIBLE; | ||
62 | schedule(); | ||
63 | set_restore_sigmask(); | ||
64 | |||
65 | return -ERESTARTNOHAND; | ||
66 | } | ||
67 | |||
68 | asmlinkage int | ||
69 | sys_sigaction(int sig, const struct old_sigaction __user *act, | ||
70 | struct old_sigaction __user *oact) | ||
71 | { | ||
72 | struct k_sigaction new_ka, old_ka; | ||
73 | int ret; | ||
74 | |||
75 | if (act) { | ||
76 | old_sigset_t mask; | ||
77 | |||
78 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | ||
79 | __get_user(new_ka.sa.sa_handler, &act->sa_handler) || | ||
80 | __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) | ||
81 | return -EFAULT; | ||
82 | |||
83 | __get_user(new_ka.sa.sa_flags, &act->sa_flags); | ||
84 | __get_user(mask, &act->sa_mask); | ||
85 | siginitset(&new_ka.sa.sa_mask, mask); | ||
86 | } | ||
87 | |||
88 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
89 | |||
90 | if (!ret && oact) { | ||
91 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | ||
92 | __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || | ||
93 | __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) | ||
94 | return -EFAULT; | ||
95 | |||
96 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | ||
97 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
98 | } | ||
99 | |||
100 | return ret; | ||
101 | } | ||
102 | |||
103 | asmlinkage int sys_sigaltstack(unsigned long bx) | ||
104 | { | ||
105 | /* | ||
106 | * This is needed to make gcc realize it doesn't own the | ||
107 | * "struct pt_regs" | ||
108 | */ | ||
109 | struct pt_regs *regs = (struct pt_regs *)&bx; | ||
110 | const stack_t __user *uss = (const stack_t __user *)bx; | ||
111 | stack_t __user *uoss = (stack_t __user *)regs->cx; | ||
112 | |||
113 | return do_sigaltstack(uss, uoss, regs->sp); | ||
114 | } | ||
115 | |||
116 | #define COPY(x) { \ | 53 | #define COPY(x) { \ |
117 | err |= __get_user(regs->x, &sc->x); \ | 54 | err |= __get_user(regs->x, &sc->x); \ |
118 | } | 55 | } |
@@ -123,7 +60,7 @@ asmlinkage int sys_sigaltstack(unsigned long bx) | |||
123 | regs->seg = tmp; \ | 60 | regs->seg = tmp; \ |
124 | } | 61 | } |
125 | 62 | ||
126 | #define COPY_SEG_STRICT(seg) { \ | 63 | #define COPY_SEG_CPL3(seg) { \ |
127 | unsigned short tmp; \ | 64 | unsigned short tmp; \ |
128 | err |= __get_user(tmp, &sc->seg); \ | 65 | err |= __get_user(tmp, &sc->seg); \ |
129 | regs->seg = tmp | 3; \ | 66 | regs->seg = tmp | 3; \ |
@@ -135,9 +72,6 @@ asmlinkage int sys_sigaltstack(unsigned long bx) | |||
135 | loadsegment(seg, tmp); \ | 72 | loadsegment(seg, tmp); \ |
136 | } | 73 | } |
137 | 74 | ||
138 | /* | ||
139 | * Do a signal return; undo the signal stack. | ||
140 | */ | ||
141 | static int | 75 | static int |
142 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | 76 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, |
143 | unsigned long *pax) | 77 | unsigned long *pax) |
@@ -149,14 +83,36 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
149 | /* Always make any pending restarted system calls return -EINTR */ | 83 | /* Always make any pending restarted system calls return -EINTR */ |
150 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 84 | current_thread_info()->restart_block.fn = do_no_restart_syscall; |
151 | 85 | ||
86 | #ifdef CONFIG_X86_32 | ||
152 | GET_SEG(gs); | 87 | GET_SEG(gs); |
153 | COPY_SEG(fs); | 88 | COPY_SEG(fs); |
154 | COPY_SEG(es); | 89 | COPY_SEG(es); |
155 | COPY_SEG(ds); | 90 | COPY_SEG(ds); |
91 | #endif /* CONFIG_X86_32 */ | ||
92 | |||
156 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 93 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
157 | COPY(dx); COPY(cx); COPY(ip); | 94 | COPY(dx); COPY(cx); COPY(ip); |
158 | COPY_SEG_STRICT(cs); | 95 | |
159 | COPY_SEG_STRICT(ss); | 96 | #ifdef CONFIG_X86_64 |
97 | COPY(r8); | ||
98 | COPY(r9); | ||
99 | COPY(r10); | ||
100 | COPY(r11); | ||
101 | COPY(r12); | ||
102 | COPY(r13); | ||
103 | COPY(r14); | ||
104 | COPY(r15); | ||
105 | #endif /* CONFIG_X86_64 */ | ||
106 | |||
107 | #ifdef CONFIG_X86_32 | ||
108 | COPY_SEG_CPL3(cs); | ||
109 | COPY_SEG_CPL3(ss); | ||
110 | #else /* !CONFIG_X86_32 */ | ||
111 | /* Kernel saves and restores only the CS segment register on signals, | ||
112 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
113 | * App's signal handler can save/restore other segments if needed. */ | ||
114 | COPY_SEG_CPL3(cs); | ||
115 | #endif /* CONFIG_X86_32 */ | ||
160 | 116 | ||
161 | err |= __get_user(tmpflags, &sc->flags); | 117 | err |= __get_user(tmpflags, &sc->flags); |
162 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | 118 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
@@ -169,102 +125,24 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
169 | return err; | 125 | return err; |
170 | } | 126 | } |
171 | 127 | ||
172 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | ||
173 | { | ||
174 | struct sigframe __user *frame; | ||
175 | struct pt_regs *regs; | ||
176 | unsigned long ax; | ||
177 | sigset_t set; | ||
178 | |||
179 | regs = (struct pt_regs *) &__unused; | ||
180 | frame = (struct sigframe __user *)(regs->sp - 8); | ||
181 | |||
182 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
183 | goto badframe; | ||
184 | if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1 | ||
185 | && __copy_from_user(&set.sig[1], &frame->extramask, | ||
186 | sizeof(frame->extramask)))) | ||
187 | goto badframe; | ||
188 | |||
189 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
190 | spin_lock_irq(¤t->sighand->siglock); | ||
191 | current->blocked = set; | ||
192 | recalc_sigpending(); | ||
193 | spin_unlock_irq(¤t->sighand->siglock); | ||
194 | |||
195 | if (restore_sigcontext(regs, &frame->sc, &ax)) | ||
196 | goto badframe; | ||
197 | return ax; | ||
198 | |||
199 | badframe: | ||
200 | if (show_unhandled_signals && printk_ratelimit()) { | ||
201 | printk("%s%s[%d] bad frame in sigreturn frame:" | ||
202 | "%p ip:%lx sp:%lx oeax:%lx", | ||
203 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | ||
204 | current->comm, task_pid_nr(current), frame, regs->ip, | ||
205 | regs->sp, regs->orig_ax); | ||
206 | print_vma_addr(" in ", regs->ip); | ||
207 | printk(KERN_CONT "\n"); | ||
208 | } | ||
209 | |||
210 | force_sig(SIGSEGV, current); | ||
211 | |||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static long do_rt_sigreturn(struct pt_regs *regs) | ||
216 | { | ||
217 | struct rt_sigframe __user *frame; | ||
218 | unsigned long ax; | ||
219 | sigset_t set; | ||
220 | |||
221 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
222 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
223 | goto badframe; | ||
224 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
225 | goto badframe; | ||
226 | |||
227 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
228 | spin_lock_irq(¤t->sighand->siglock); | ||
229 | current->blocked = set; | ||
230 | recalc_sigpending(); | ||
231 | spin_unlock_irq(¤t->sighand->siglock); | ||
232 | |||
233 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
234 | goto badframe; | ||
235 | |||
236 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
237 | goto badframe; | ||
238 | |||
239 | return ax; | ||
240 | |||
241 | badframe: | ||
242 | signal_fault(regs, frame, "rt_sigreturn"); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) | ||
247 | { | ||
248 | struct pt_regs *regs = (struct pt_regs *)&__unused; | ||
249 | |||
250 | return do_rt_sigreturn(regs); | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * Set up a signal frame. | ||
255 | */ | ||
256 | static int | 128 | static int |
257 | setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | 129 | setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, |
258 | struct pt_regs *regs, unsigned long mask) | 130 | struct pt_regs *regs, unsigned long mask) |
259 | { | 131 | { |
260 | int tmp, err = 0; | 132 | int err = 0; |
261 | 133 | ||
262 | err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); | 134 | #ifdef CONFIG_X86_32 |
263 | savesegment(gs, tmp); | 135 | { |
264 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); | 136 | unsigned int tmp; |
265 | 137 | ||
138 | savesegment(gs, tmp); | ||
139 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); | ||
140 | } | ||
141 | err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); | ||
266 | err |= __put_user(regs->es, (unsigned int __user *)&sc->es); | 142 | err |= __put_user(regs->es, (unsigned int __user *)&sc->es); |
267 | err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); | 143 | err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); |
144 | #endif /* CONFIG_X86_32 */ | ||
145 | |||
268 | err |= __put_user(regs->di, &sc->di); | 146 | err |= __put_user(regs->di, &sc->di); |
269 | err |= __put_user(regs->si, &sc->si); | 147 | err |= __put_user(regs->si, &sc->si); |
270 | err |= __put_user(regs->bp, &sc->bp); | 148 | err |= __put_user(regs->bp, &sc->bp); |
@@ -273,19 +151,33 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
273 | err |= __put_user(regs->dx, &sc->dx); | 151 | err |= __put_user(regs->dx, &sc->dx); |
274 | err |= __put_user(regs->cx, &sc->cx); | 152 | err |= __put_user(regs->cx, &sc->cx); |
275 | err |= __put_user(regs->ax, &sc->ax); | 153 | err |= __put_user(regs->ax, &sc->ax); |
154 | #ifdef CONFIG_X86_64 | ||
155 | err |= __put_user(regs->r8, &sc->r8); | ||
156 | err |= __put_user(regs->r9, &sc->r9); | ||
157 | err |= __put_user(regs->r10, &sc->r10); | ||
158 | err |= __put_user(regs->r11, &sc->r11); | ||
159 | err |= __put_user(regs->r12, &sc->r12); | ||
160 | err |= __put_user(regs->r13, &sc->r13); | ||
161 | err |= __put_user(regs->r14, &sc->r14); | ||
162 | err |= __put_user(regs->r15, &sc->r15); | ||
163 | #endif /* CONFIG_X86_64 */ | ||
164 | |||
276 | err |= __put_user(current->thread.trap_no, &sc->trapno); | 165 | err |= __put_user(current->thread.trap_no, &sc->trapno); |
277 | err |= __put_user(current->thread.error_code, &sc->err); | 166 | err |= __put_user(current->thread.error_code, &sc->err); |
278 | err |= __put_user(regs->ip, &sc->ip); | 167 | err |= __put_user(regs->ip, &sc->ip); |
168 | #ifdef CONFIG_X86_32 | ||
279 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); | 169 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); |
280 | err |= __put_user(regs->flags, &sc->flags); | 170 | err |= __put_user(regs->flags, &sc->flags); |
281 | err |= __put_user(regs->sp, &sc->sp_at_signal); | 171 | err |= __put_user(regs->sp, &sc->sp_at_signal); |
282 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); | 172 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); |
173 | #else /* !CONFIG_X86_32 */ | ||
174 | err |= __put_user(regs->flags, &sc->flags); | ||
175 | err |= __put_user(regs->cs, &sc->cs); | ||
176 | err |= __put_user(0, &sc->gs); | ||
177 | err |= __put_user(0, &sc->fs); | ||
178 | #endif /* CONFIG_X86_32 */ | ||
283 | 179 | ||
284 | tmp = save_i387_xstate(fpstate); | 180 | err |= __put_user(fpstate, &sc->fpstate); |
285 | if (tmp < 0) | ||
286 | err = 1; | ||
287 | else | ||
288 | err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); | ||
289 | 181 | ||
290 | /* non-iBCS2 extensions.. */ | 182 | /* non-iBCS2 extensions.. */ |
291 | err |= __put_user(mask, &sc->oldmask); | 183 | err |= __put_user(mask, &sc->oldmask); |
@@ -295,6 +187,32 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
295 | } | 187 | } |
296 | 188 | ||
297 | /* | 189 | /* |
190 | * Set up a signal frame. | ||
191 | */ | ||
192 | #ifdef CONFIG_X86_32 | ||
193 | static const struct { | ||
194 | u16 poplmovl; | ||
195 | u32 val; | ||
196 | u16 int80; | ||
197 | } __attribute__((packed)) retcode = { | ||
198 | 0xb858, /* popl %eax; movl $..., %eax */ | ||
199 | __NR_sigreturn, | ||
200 | 0x80cd, /* int $0x80 */ | ||
201 | }; | ||
202 | |||
203 | static const struct { | ||
204 | u8 movl; | ||
205 | u32 val; | ||
206 | u16 int80; | ||
207 | u8 pad; | ||
208 | } __attribute__((packed)) rt_retcode = { | ||
209 | 0xb8, /* movl $..., %eax */ | ||
210 | __NR_rt_sigreturn, | ||
211 | 0x80cd, /* int $0x80 */ | ||
212 | 0 | ||
213 | }; | ||
214 | |||
215 | /* | ||
298 | * Determine which stack to use.. | 216 | * Determine which stack to use.. |
299 | */ | 217 | */ |
300 | static inline void __user * | 218 | static inline void __user * |
@@ -328,6 +246,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
328 | if (used_math()) { | 246 | if (used_math()) { |
329 | sp = sp - sig_xstate_size; | 247 | sp = sp - sig_xstate_size; |
330 | *fpstate = (struct _fpstate *) sp; | 248 | *fpstate = (struct _fpstate *) sp; |
249 | if (save_i387_xstate(*fpstate) < 0) | ||
250 | return (void __user *)-1L; | ||
331 | } | 251 | } |
332 | 252 | ||
333 | sp -= frame_size; | 253 | sp -= frame_size; |
@@ -383,9 +303,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | |||
383 | * reasons and because gdb uses it as a signature to notice | 303 | * reasons and because gdb uses it as a signature to notice |
384 | * signal handler stack frames. | 304 | * signal handler stack frames. |
385 | */ | 305 | */ |
386 | err |= __put_user(0xb858, (short __user *)(frame->retcode+0)); | 306 | err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode); |
387 | err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2)); | ||
388 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); | ||
389 | 307 | ||
390 | if (err) | 308 | if (err) |
391 | return -EFAULT; | 309 | return -EFAULT; |
@@ -454,9 +372,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
454 | * reasons and because gdb uses it as a signature to notice | 372 | * reasons and because gdb uses it as a signature to notice |
455 | * signal handler stack frames. | 373 | * signal handler stack frames. |
456 | */ | 374 | */ |
457 | err |= __put_user(0xb8, (char __user *)(frame->retcode+0)); | 375 | err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); |
458 | err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1)); | ||
459 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); | ||
460 | 376 | ||
461 | if (err) | 377 | if (err) |
462 | return -EFAULT; | 378 | return -EFAULT; |
@@ -475,23 +391,293 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
475 | 391 | ||
476 | return 0; | 392 | return 0; |
477 | } | 393 | } |
394 | #else /* !CONFIG_X86_32 */ | ||
395 | /* | ||
396 | * Determine which stack to use.. | ||
397 | */ | ||
398 | static void __user * | ||
399 | get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size) | ||
400 | { | ||
401 | /* Default to using normal stack - redzone*/ | ||
402 | sp -= 128; | ||
403 | |||
404 | /* This is the X/Open sanctioned signal stack switching. */ | ||
405 | if (ka->sa.sa_flags & SA_ONSTACK) { | ||
406 | if (sas_ss_flags(sp) == 0) | ||
407 | sp = current->sas_ss_sp + current->sas_ss_size; | ||
408 | } | ||
409 | |||
410 | return (void __user *)round_down(sp - size, 64); | ||
411 | } | ||
412 | |||
413 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
414 | sigset_t *set, struct pt_regs *regs) | ||
415 | { | ||
416 | struct rt_sigframe __user *frame; | ||
417 | void __user *fp = NULL; | ||
418 | int err = 0; | ||
419 | struct task_struct *me = current; | ||
420 | |||
421 | if (used_math()) { | ||
422 | fp = get_stack(ka, regs->sp, sig_xstate_size); | ||
423 | frame = (void __user *)round_down( | ||
424 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | ||
425 | |||
426 | if (save_i387_xstate(fp) < 0) | ||
427 | return -EFAULT; | ||
428 | } else | ||
429 | frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8; | ||
430 | |||
431 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
432 | return -EFAULT; | ||
433 | |||
434 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
435 | if (copy_siginfo_to_user(&frame->info, info)) | ||
436 | return -EFAULT; | ||
437 | } | ||
438 | |||
439 | /* Create the ucontext. */ | ||
440 | if (cpu_has_xsave) | ||
441 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
442 | else | ||
443 | err |= __put_user(0, &frame->uc.uc_flags); | ||
444 | err |= __put_user(0, &frame->uc.uc_link); | ||
445 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
446 | err |= __put_user(sas_ss_flags(regs->sp), | ||
447 | &frame->uc.uc_stack.ss_flags); | ||
448 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
449 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); | ||
450 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
451 | |||
452 | /* Set up to return from userspace. If provided, use a stub | ||
453 | already in userspace. */ | ||
454 | /* x86-64 should always use SA_RESTORER. */ | ||
455 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
456 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | ||
457 | } else { | ||
458 | /* could use a vstub here */ | ||
459 | return -EFAULT; | ||
460 | } | ||
461 | |||
462 | if (err) | ||
463 | return -EFAULT; | ||
464 | |||
465 | /* Set up registers for signal handler */ | ||
466 | regs->di = sig; | ||
467 | /* In case the signal handler was declared without prototypes */ | ||
468 | regs->ax = 0; | ||
469 | |||
470 | /* This also works for non SA_SIGINFO handlers because they expect the | ||
471 | next argument after the signal number on the stack. */ | ||
472 | regs->si = (unsigned long)&frame->info; | ||
473 | regs->dx = (unsigned long)&frame->uc; | ||
474 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
475 | |||
476 | regs->sp = (unsigned long)frame; | ||
477 | |||
478 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
479 | even if the handler happens to be interrupting 32-bit code. */ | ||
480 | regs->cs = __USER_CS; | ||
481 | |||
482 | return 0; | ||
483 | } | ||
484 | #endif /* CONFIG_X86_32 */ | ||
485 | |||
486 | #ifdef CONFIG_X86_32 | ||
487 | /* | ||
488 | * Atomically swap in the new signal mask, and wait for a signal. | ||
489 | */ | ||
490 | asmlinkage int | ||
491 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | ||
492 | { | ||
493 | mask &= _BLOCKABLE; | ||
494 | spin_lock_irq(¤t->sighand->siglock); | ||
495 | current->saved_sigmask = current->blocked; | ||
496 | siginitset(¤t->blocked, mask); | ||
497 | recalc_sigpending(); | ||
498 | spin_unlock_irq(¤t->sighand->siglock); | ||
499 | |||
500 | current->state = TASK_INTERRUPTIBLE; | ||
501 | schedule(); | ||
502 | set_restore_sigmask(); | ||
503 | |||
504 | return -ERESTARTNOHAND; | ||
505 | } | ||
506 | |||
507 | asmlinkage int | ||
508 | sys_sigaction(int sig, const struct old_sigaction __user *act, | ||
509 | struct old_sigaction __user *oact) | ||
510 | { | ||
511 | struct k_sigaction new_ka, old_ka; | ||
512 | int ret; | ||
513 | |||
514 | if (act) { | ||
515 | old_sigset_t mask; | ||
516 | |||
517 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | ||
518 | __get_user(new_ka.sa.sa_handler, &act->sa_handler) || | ||
519 | __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) | ||
520 | return -EFAULT; | ||
521 | |||
522 | __get_user(new_ka.sa.sa_flags, &act->sa_flags); | ||
523 | __get_user(mask, &act->sa_mask); | ||
524 | siginitset(&new_ka.sa.sa_mask, mask); | ||
525 | } | ||
526 | |||
527 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
528 | |||
529 | if (!ret && oact) { | ||
530 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | ||
531 | __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || | ||
532 | __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) | ||
533 | return -EFAULT; | ||
534 | |||
535 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | ||
536 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
537 | } | ||
538 | |||
539 | return ret; | ||
540 | } | ||
541 | #endif /* CONFIG_X86_32 */ | ||
542 | |||
543 | #ifdef CONFIG_X86_32 | ||
544 | asmlinkage int sys_sigaltstack(unsigned long bx) | ||
545 | { | ||
546 | /* | ||
547 | * This is needed to make gcc realize it doesn't own the | ||
548 | * "struct pt_regs" | ||
549 | */ | ||
550 | struct pt_regs *regs = (struct pt_regs *)&bx; | ||
551 | const stack_t __user *uss = (const stack_t __user *)bx; | ||
552 | stack_t __user *uoss = (stack_t __user *)regs->cx; | ||
553 | |||
554 | return do_sigaltstack(uss, uoss, regs->sp); | ||
555 | } | ||
556 | #else /* !CONFIG_X86_32 */ | ||
557 | asmlinkage long | ||
558 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | ||
559 | struct pt_regs *regs) | ||
560 | { | ||
561 | return do_sigaltstack(uss, uoss, regs->sp); | ||
562 | } | ||
563 | #endif /* CONFIG_X86_32 */ | ||
564 | |||
565 | /* | ||
566 | * Do a signal return; undo the signal stack. | ||
567 | */ | ||
568 | #ifdef CONFIG_X86_32 | ||
569 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | ||
570 | { | ||
571 | struct sigframe __user *frame; | ||
572 | struct pt_regs *regs; | ||
573 | unsigned long ax; | ||
574 | sigset_t set; | ||
575 | |||
576 | regs = (struct pt_regs *) &__unused; | ||
577 | frame = (struct sigframe __user *)(regs->sp - 8); | ||
578 | |||
579 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
580 | goto badframe; | ||
581 | if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1 | ||
582 | && __copy_from_user(&set.sig[1], &frame->extramask, | ||
583 | sizeof(frame->extramask)))) | ||
584 | goto badframe; | ||
585 | |||
586 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
587 | spin_lock_irq(¤t->sighand->siglock); | ||
588 | current->blocked = set; | ||
589 | recalc_sigpending(); | ||
590 | spin_unlock_irq(¤t->sighand->siglock); | ||
591 | |||
592 | if (restore_sigcontext(regs, &frame->sc, &ax)) | ||
593 | goto badframe; | ||
594 | return ax; | ||
595 | |||
596 | badframe: | ||
597 | signal_fault(regs, frame, "sigreturn"); | ||
598 | |||
599 | return 0; | ||
600 | } | ||
601 | #endif /* CONFIG_X86_32 */ | ||
602 | |||
603 | static long do_rt_sigreturn(struct pt_regs *regs) | ||
604 | { | ||
605 | struct rt_sigframe __user *frame; | ||
606 | unsigned long ax; | ||
607 | sigset_t set; | ||
608 | |||
609 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
610 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
611 | goto badframe; | ||
612 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
613 | goto badframe; | ||
614 | |||
615 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
616 | spin_lock_irq(¤t->sighand->siglock); | ||
617 | current->blocked = set; | ||
618 | recalc_sigpending(); | ||
619 | spin_unlock_irq(¤t->sighand->siglock); | ||
620 | |||
621 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
622 | goto badframe; | ||
623 | |||
624 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
625 | goto badframe; | ||
626 | |||
627 | return ax; | ||
628 | |||
629 | badframe: | ||
630 | signal_fault(regs, frame, "rt_sigreturn"); | ||
631 | return 0; | ||
632 | } | ||
633 | |||
634 | #ifdef CONFIG_X86_32 | ||
635 | asmlinkage int sys_rt_sigreturn(struct pt_regs regs) | ||
636 | { | ||
637 | return do_rt_sigreturn(®s); | ||
638 | } | ||
639 | #else /* !CONFIG_X86_32 */ | ||
640 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | ||
641 | { | ||
642 | return do_rt_sigreturn(regs); | ||
643 | } | ||
644 | #endif /* CONFIG_X86_32 */ | ||
478 | 645 | ||
479 | /* | 646 | /* |
480 | * OK, we're invoking a handler: | 647 | * OK, we're invoking a handler: |
481 | */ | 648 | */ |
482 | static int signr_convert(int sig) | 649 | static int signr_convert(int sig) |
483 | { | 650 | { |
651 | #ifdef CONFIG_X86_32 | ||
484 | struct thread_info *info = current_thread_info(); | 652 | struct thread_info *info = current_thread_info(); |
485 | 653 | ||
486 | if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) | 654 | if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) |
487 | return info->exec_domain->signal_invmap[sig]; | 655 | return info->exec_domain->signal_invmap[sig]; |
656 | #endif /* CONFIG_X86_32 */ | ||
488 | return sig; | 657 | return sig; |
489 | } | 658 | } |
490 | 659 | ||
660 | #ifdef CONFIG_X86_32 | ||
661 | |||
491 | #define is_ia32 1 | 662 | #define is_ia32 1 |
492 | #define ia32_setup_frame __setup_frame | 663 | #define ia32_setup_frame __setup_frame |
493 | #define ia32_setup_rt_frame __setup_rt_frame | 664 | #define ia32_setup_rt_frame __setup_rt_frame |
494 | 665 | ||
666 | #else /* !CONFIG_X86_32 */ | ||
667 | |||
668 | #ifdef CONFIG_IA32_EMULATION | ||
669 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
670 | #else /* !CONFIG_IA32_EMULATION */ | ||
671 | #define is_ia32 0 | ||
672 | #endif /* CONFIG_IA32_EMULATION */ | ||
673 | |||
674 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
675 | sigset_t *set, struct pt_regs *regs); | ||
676 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
677 | sigset_t *set, struct pt_regs *regs); | ||
678 | |||
679 | #endif /* CONFIG_X86_32 */ | ||
680 | |||
495 | static int | 681 | static int |
496 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 682 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
497 | sigset_t *set, struct pt_regs *regs) | 683 | sigset_t *set, struct pt_regs *regs) |
@@ -592,7 +778,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
592 | return 0; | 778 | return 0; |
593 | } | 779 | } |
594 | 780 | ||
781 | #ifdef CONFIG_X86_32 | ||
595 | #define NR_restart_syscall __NR_restart_syscall | 782 | #define NR_restart_syscall __NR_restart_syscall |
783 | #else /* !CONFIG_X86_32 */ | ||
784 | #define NR_restart_syscall \ | ||
785 | test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall | ||
786 | #endif /* CONFIG_X86_32 */ | ||
787 | |||
596 | /* | 788 | /* |
597 | * Note that 'init' is a special process: it doesn't get signals it doesn't | 789 | * Note that 'init' is a special process: it doesn't get signals it doesn't |
598 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | 790 | * want to handle. Thus you cannot kill init even with a SIGKILL even by |
@@ -704,8 +896,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | |||
704 | struct task_struct *me = current; | 896 | struct task_struct *me = current; |
705 | 897 | ||
706 | if (show_unhandled_signals && printk_ratelimit()) { | 898 | if (show_unhandled_signals && printk_ratelimit()) { |
707 | printk(KERN_INFO | 899 | printk("%s" |
708 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | 900 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", |
901 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | ||
709 | me->comm, me->pid, where, frame, | 902 | me->comm, me->pid, where, frame, |
710 | regs->ip, regs->sp, regs->orig_ax); | 903 | regs->ip, regs->sp, regs->orig_ax); |
711 | print_vma_addr(" in ", regs->ip); | 904 | print_vma_addr(" in ", regs->ip); |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c deleted file mode 100644 index a5c9627f4db9..000000000000 --- a/arch/x86/kernel/signal_64.c +++ /dev/null | |||
@@ -1,516 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | ||
4 | * | ||
5 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson | ||
6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | ||
7 | * 2000-2002 x86-64 support by Andi Kleen | ||
8 | */ | ||
9 | |||
10 | #include <linux/sched.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/signal.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/wait.h> | ||
17 | #include <linux/ptrace.h> | ||
18 | #include <linux/tracehook.h> | ||
19 | #include <linux/unistd.h> | ||
20 | #include <linux/stddef.h> | ||
21 | #include <linux/personality.h> | ||
22 | #include <linux/compiler.h> | ||
23 | #include <linux/uaccess.h> | ||
24 | |||
25 | #include <asm/processor.h> | ||
26 | #include <asm/ucontext.h> | ||
27 | #include <asm/i387.h> | ||
28 | #include <asm/proto.h> | ||
29 | #include <asm/ia32_unistd.h> | ||
30 | #include <asm/mce.h> | ||
31 | #include <asm/syscall.h> | ||
32 | #include <asm/syscalls.h> | ||
33 | #include "sigframe.h" | ||
34 | |||
35 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | ||
36 | |||
37 | #define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ | ||
38 | X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ | ||
39 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ | ||
40 | X86_EFLAGS_CF) | ||
41 | |||
42 | #ifdef CONFIG_X86_32 | ||
43 | # define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) | ||
44 | #else | ||
45 | # define FIX_EFLAGS __FIX_EFLAGS | ||
46 | #endif | ||
47 | |||
48 | asmlinkage long | ||
49 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | ||
50 | struct pt_regs *regs) | ||
51 | { | ||
52 | return do_sigaltstack(uss, uoss, regs->sp); | ||
53 | } | ||
54 | |||
55 | #define COPY(x) { \ | ||
56 | err |= __get_user(regs->x, &sc->x); \ | ||
57 | } | ||
58 | |||
59 | #define COPY_SEG_STRICT(seg) { \ | ||
60 | unsigned short tmp; \ | ||
61 | err |= __get_user(tmp, &sc->seg); \ | ||
62 | regs->seg = tmp | 3; \ | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Do a signal return; undo the signal stack. | ||
67 | */ | ||
68 | static int | ||
69 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | ||
70 | unsigned long *pax) | ||
71 | { | ||
72 | void __user *buf; | ||
73 | unsigned int tmpflags; | ||
74 | unsigned int err = 0; | ||
75 | |||
76 | /* Always make any pending restarted system calls return -EINTR */ | ||
77 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | ||
78 | |||
79 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | ||
80 | COPY(dx); COPY(cx); COPY(ip); | ||
81 | COPY(r8); | ||
82 | COPY(r9); | ||
83 | COPY(r10); | ||
84 | COPY(r11); | ||
85 | COPY(r12); | ||
86 | COPY(r13); | ||
87 | COPY(r14); | ||
88 | COPY(r15); | ||
89 | |||
90 | /* Kernel saves and restores only the CS segment register on signals, | ||
91 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
92 | * App's signal handler can save/restore other segments if needed. */ | ||
93 | COPY_SEG_STRICT(cs); | ||
94 | |||
95 | err |= __get_user(tmpflags, &sc->flags); | ||
96 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | ||
97 | regs->orig_ax = -1; /* disable syscall checks */ | ||
98 | |||
99 | err |= __get_user(buf, &sc->fpstate); | ||
100 | err |= restore_i387_xstate(buf); | ||
101 | |||
102 | err |= __get_user(*pax, &sc->ax); | ||
103 | return err; | ||
104 | } | ||
105 | |||
106 | static long do_rt_sigreturn(struct pt_regs *regs) | ||
107 | { | ||
108 | struct rt_sigframe __user *frame; | ||
109 | unsigned long ax; | ||
110 | sigset_t set; | ||
111 | |||
112 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
113 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
114 | goto badframe; | ||
115 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
116 | goto badframe; | ||
117 | |||
118 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
119 | spin_lock_irq(¤t->sighand->siglock); | ||
120 | current->blocked = set; | ||
121 | recalc_sigpending(); | ||
122 | spin_unlock_irq(¤t->sighand->siglock); | ||
123 | |||
124 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
125 | goto badframe; | ||
126 | |||
127 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
128 | goto badframe; | ||
129 | |||
130 | return ax; | ||
131 | |||
132 | badframe: | ||
133 | signal_fault(regs, frame, "rt_sigreturn"); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | ||
138 | { | ||
139 | return do_rt_sigreturn(regs); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Set up a signal frame. | ||
144 | */ | ||
145 | |||
146 | static inline int | ||
147 | setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, | ||
148 | unsigned long mask, struct task_struct *me) | ||
149 | { | ||
150 | int err = 0; | ||
151 | |||
152 | err |= __put_user(regs->cs, &sc->cs); | ||
153 | err |= __put_user(0, &sc->gs); | ||
154 | err |= __put_user(0, &sc->fs); | ||
155 | |||
156 | err |= __put_user(regs->di, &sc->di); | ||
157 | err |= __put_user(regs->si, &sc->si); | ||
158 | err |= __put_user(regs->bp, &sc->bp); | ||
159 | err |= __put_user(regs->sp, &sc->sp); | ||
160 | err |= __put_user(regs->bx, &sc->bx); | ||
161 | err |= __put_user(regs->dx, &sc->dx); | ||
162 | err |= __put_user(regs->cx, &sc->cx); | ||
163 | err |= __put_user(regs->ax, &sc->ax); | ||
164 | err |= __put_user(regs->r8, &sc->r8); | ||
165 | err |= __put_user(regs->r9, &sc->r9); | ||
166 | err |= __put_user(regs->r10, &sc->r10); | ||
167 | err |= __put_user(regs->r11, &sc->r11); | ||
168 | err |= __put_user(regs->r12, &sc->r12); | ||
169 | err |= __put_user(regs->r13, &sc->r13); | ||
170 | err |= __put_user(regs->r14, &sc->r14); | ||
171 | err |= __put_user(regs->r15, &sc->r15); | ||
172 | err |= __put_user(me->thread.trap_no, &sc->trapno); | ||
173 | err |= __put_user(me->thread.error_code, &sc->err); | ||
174 | err |= __put_user(regs->ip, &sc->ip); | ||
175 | err |= __put_user(regs->flags, &sc->flags); | ||
176 | err |= __put_user(mask, &sc->oldmask); | ||
177 | err |= __put_user(me->thread.cr2, &sc->cr2); | ||
178 | |||
179 | return err; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Determine which stack to use.. | ||
184 | */ | ||
185 | |||
186 | static void __user * | ||
187 | get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) | ||
188 | { | ||
189 | unsigned long sp; | ||
190 | |||
191 | /* Default to using normal stack - redzone*/ | ||
192 | sp = regs->sp - 128; | ||
193 | |||
194 | /* This is the X/Open sanctioned signal stack switching. */ | ||
195 | if (ka->sa.sa_flags & SA_ONSTACK) { | ||
196 | if (sas_ss_flags(sp) == 0) | ||
197 | sp = current->sas_ss_sp + current->sas_ss_size; | ||
198 | } | ||
199 | |||
200 | return (void __user *)round_down(sp - size, 64); | ||
201 | } | ||
202 | |||
203 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
204 | sigset_t *set, struct pt_regs *regs) | ||
205 | { | ||
206 | struct rt_sigframe __user *frame; | ||
207 | void __user *fp = NULL; | ||
208 | int err = 0; | ||
209 | struct task_struct *me = current; | ||
210 | |||
211 | if (used_math()) { | ||
212 | fp = get_stack(ka, regs, sig_xstate_size); | ||
213 | frame = (void __user *)round_down( | ||
214 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | ||
215 | |||
216 | if (save_i387_xstate(fp) < 0) | ||
217 | return -EFAULT; | ||
218 | } else | ||
219 | frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; | ||
220 | |||
221 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
222 | return -EFAULT; | ||
223 | |||
224 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
225 | if (copy_siginfo_to_user(&frame->info, info)) | ||
226 | return -EFAULT; | ||
227 | } | ||
228 | |||
229 | /* Create the ucontext. */ | ||
230 | if (cpu_has_xsave) | ||
231 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
232 | else | ||
233 | err |= __put_user(0, &frame->uc.uc_flags); | ||
234 | err |= __put_user(0, &frame->uc.uc_link); | ||
235 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
236 | err |= __put_user(sas_ss_flags(regs->sp), | ||
237 | &frame->uc.uc_stack.ss_flags); | ||
238 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
239 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); | ||
240 | err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); | ||
241 | if (sizeof(*set) == 16) { | ||
242 | __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); | ||
243 | __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); | ||
244 | } else | ||
245 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
246 | |||
247 | /* Set up to return from userspace. If provided, use a stub | ||
248 | already in userspace. */ | ||
249 | /* x86-64 should always use SA_RESTORER. */ | ||
250 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
251 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | ||
252 | } else { | ||
253 | /* could use a vstub here */ | ||
254 | return -EFAULT; | ||
255 | } | ||
256 | |||
257 | if (err) | ||
258 | return -EFAULT; | ||
259 | |||
260 | /* Set up registers for signal handler */ | ||
261 | regs->di = sig; | ||
262 | /* In case the signal handler was declared without prototypes */ | ||
263 | regs->ax = 0; | ||
264 | |||
265 | /* This also works for non SA_SIGINFO handlers because they expect the | ||
266 | next argument after the signal number on the stack. */ | ||
267 | regs->si = (unsigned long)&frame->info; | ||
268 | regs->dx = (unsigned long)&frame->uc; | ||
269 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
270 | |||
271 | regs->sp = (unsigned long)frame; | ||
272 | |||
273 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
274 | even if the handler happens to be interrupting 32-bit code. */ | ||
275 | regs->cs = __USER_CS; | ||
276 | |||
277 | return 0; | ||
278 | } | ||
279 | |||
280 | /* | ||
281 | * OK, we're invoking a handler | ||
282 | */ | ||
283 | static int signr_convert(int sig) | ||
284 | { | ||
285 | return sig; | ||
286 | } | ||
287 | |||
288 | #ifdef CONFIG_IA32_EMULATION | ||
289 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
290 | #else | ||
291 | #define is_ia32 0 | ||
292 | #endif | ||
293 | |||
294 | static int | ||
295 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
296 | sigset_t *set, struct pt_regs *regs) | ||
297 | { | ||
298 | int usig = signr_convert(sig); | ||
299 | int ret; | ||
300 | |||
301 | /* Set up the stack frame */ | ||
302 | if (is_ia32) { | ||
303 | if (ka->sa.sa_flags & SA_SIGINFO) | ||
304 | ret = ia32_setup_rt_frame(usig, ka, info, set, regs); | ||
305 | else | ||
306 | ret = ia32_setup_frame(usig, ka, set, regs); | ||
307 | } else | ||
308 | ret = __setup_rt_frame(sig, ka, info, set, regs); | ||
309 | |||
310 | if (ret) { | ||
311 | force_sigsegv(sig, current); | ||
312 | return -EFAULT; | ||
313 | } | ||
314 | |||
315 | return ret; | ||
316 | } | ||
317 | |||
318 | static int | ||
319 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | ||
320 | sigset_t *oldset, struct pt_regs *regs) | ||
321 | { | ||
322 | int ret; | ||
323 | |||
324 | /* Are we from a system call? */ | ||
325 | if (syscall_get_nr(current, regs) >= 0) { | ||
326 | /* If so, check system call restarting.. */ | ||
327 | switch (syscall_get_error(current, regs)) { | ||
328 | case -ERESTART_RESTARTBLOCK: | ||
329 | case -ERESTARTNOHAND: | ||
330 | regs->ax = -EINTR; | ||
331 | break; | ||
332 | |||
333 | case -ERESTARTSYS: | ||
334 | if (!(ka->sa.sa_flags & SA_RESTART)) { | ||
335 | regs->ax = -EINTR; | ||
336 | break; | ||
337 | } | ||
338 | /* fallthrough */ | ||
339 | case -ERESTARTNOINTR: | ||
340 | regs->ax = regs->orig_ax; | ||
341 | regs->ip -= 2; | ||
342 | break; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | /* | ||
347 | * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF | ||
348 | * flag so that register information in the sigcontext is correct. | ||
349 | */ | ||
350 | if (unlikely(regs->flags & X86_EFLAGS_TF) && | ||
351 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) | ||
352 | regs->flags &= ~X86_EFLAGS_TF; | ||
353 | |||
354 | ret = setup_rt_frame(sig, ka, info, oldset, regs); | ||
355 | |||
356 | if (ret) | ||
357 | return ret; | ||
358 | |||
359 | #ifdef CONFIG_X86_64 | ||
360 | /* | ||
361 | * This has nothing to do with segment registers, | ||
362 | * despite the name. This magic affects uaccess.h | ||
363 | * macros' behavior. Reset it to the normal setting. | ||
364 | */ | ||
365 | set_fs(USER_DS); | ||
366 | #endif | ||
367 | |||
368 | /* | ||
369 | * Clear the direction flag as per the ABI for function entry. | ||
370 | */ | ||
371 | regs->flags &= ~X86_EFLAGS_DF; | ||
372 | |||
373 | /* | ||
374 | * Clear TF when entering the signal handler, but | ||
375 | * notify any tracer that was single-stepping it. | ||
376 | * The tracer may want to single-step inside the | ||
377 | * handler too. | ||
378 | */ | ||
379 | regs->flags &= ~X86_EFLAGS_TF; | ||
380 | |||
381 | spin_lock_irq(¤t->sighand->siglock); | ||
382 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | ||
383 | if (!(ka->sa.sa_flags & SA_NODEFER)) | ||
384 | sigaddset(¤t->blocked, sig); | ||
385 | recalc_sigpending(); | ||
386 | spin_unlock_irq(¤t->sighand->siglock); | ||
387 | |||
388 | tracehook_signal_handler(sig, info, ka, regs, | ||
389 | test_thread_flag(TIF_SINGLESTEP)); | ||
390 | |||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | #define NR_restart_syscall \ | ||
395 | test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall | ||
396 | /* | ||
397 | * Note that 'init' is a special process: it doesn't get signals it doesn't | ||
398 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | ||
399 | * mistake. | ||
400 | */ | ||
401 | static void do_signal(struct pt_regs *regs) | ||
402 | { | ||
403 | struct k_sigaction ka; | ||
404 | siginfo_t info; | ||
405 | int signr; | ||
406 | sigset_t *oldset; | ||
407 | |||
408 | /* | ||
409 | * We want the common case to go fast, which is why we may in certain | ||
410 | * cases get here from kernel mode. Just return without doing anything | ||
411 | * if so. | ||
412 | * X86_32: vm86 regs switched out by assembly code before reaching | ||
413 | * here, so testing against kernel CS suffices. | ||
414 | */ | ||
415 | if (!user_mode(regs)) | ||
416 | return; | ||
417 | |||
418 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) | ||
419 | oldset = ¤t->saved_sigmask; | ||
420 | else | ||
421 | oldset = ¤t->blocked; | ||
422 | |||
423 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | ||
424 | if (signr > 0) { | ||
425 | /* | ||
426 | * Re-enable any watchpoints before delivering the | ||
427 | * signal to user space. The processor register will | ||
428 | * have been cleared if the watchpoint triggered | ||
429 | * inside the kernel. | ||
430 | */ | ||
431 | if (current->thread.debugreg7) | ||
432 | set_debugreg(current->thread.debugreg7, 7); | ||
433 | |||
434 | /* Whee! Actually deliver the signal. */ | ||
435 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | ||
436 | /* | ||
437 | * A signal was successfully delivered; the saved | ||
438 | * sigmask will have been stored in the signal frame, | ||
439 | * and will be restored by sigreturn, so we can simply | ||
440 | * clear the TS_RESTORE_SIGMASK flag. | ||
441 | */ | ||
442 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
443 | } | ||
444 | return; | ||
445 | } | ||
446 | |||
447 | /* Did we come from a system call? */ | ||
448 | if (syscall_get_nr(current, regs) >= 0) { | ||
449 | /* Restart the system call - no handlers present */ | ||
450 | switch (syscall_get_error(current, regs)) { | ||
451 | case -ERESTARTNOHAND: | ||
452 | case -ERESTARTSYS: | ||
453 | case -ERESTARTNOINTR: | ||
454 | regs->ax = regs->orig_ax; | ||
455 | regs->ip -= 2; | ||
456 | break; | ||
457 | |||
458 | case -ERESTART_RESTARTBLOCK: | ||
459 | regs->ax = NR_restart_syscall; | ||
460 | regs->ip -= 2; | ||
461 | break; | ||
462 | } | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * If there's no signal to deliver, we just put the saved sigmask | ||
467 | * back. | ||
468 | */ | ||
469 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) { | ||
470 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
471 | sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); | ||
472 | } | ||
473 | } | ||
474 | |||
475 | /* | ||
476 | * notification of userspace execution resumption | ||
477 | * - triggered by the TIF_WORK_MASK flags | ||
478 | */ | ||
479 | void | ||
480 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | ||
481 | { | ||
482 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) | ||
483 | /* notify userspace of pending MCEs */ | ||
484 | if (thread_info_flags & _TIF_MCE_NOTIFY) | ||
485 | mce_notify_user(); | ||
486 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | ||
487 | |||
488 | /* deal with pending signal delivery */ | ||
489 | if (thread_info_flags & _TIF_SIGPENDING) | ||
490 | do_signal(regs); | ||
491 | |||
492 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | ||
493 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
494 | tracehook_notify_resume(regs); | ||
495 | } | ||
496 | |||
497 | #ifdef CONFIG_X86_32 | ||
498 | clear_thread_flag(TIF_IRET); | ||
499 | #endif /* CONFIG_X86_32 */ | ||
500 | } | ||
501 | |||
502 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | ||
503 | { | ||
504 | struct task_struct *me = current; | ||
505 | |||
506 | if (show_unhandled_signals && printk_ratelimit()) { | ||
507 | printk(KERN_INFO | ||
508 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | ||
509 | me->comm, me->pid, where, frame, | ||
510 | regs->ip, regs->sp, regs->orig_ax); | ||
511 | print_vma_addr(" in ", regs->ip); | ||
512 | printk(KERN_CONT "\n"); | ||
513 | } | ||
514 | |||
515 | force_sig(SIGSEGV, me); | ||
516 | } | ||
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18f9b19f5f8f..7e558db362c1 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -140,19 +140,6 @@ void native_send_call_func_ipi(cpumask_t mask) | |||
140 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | 140 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); |
141 | } | 141 | } |
142 | 142 | ||
143 | static void stop_this_cpu(void *dummy) | ||
144 | { | ||
145 | local_irq_disable(); | ||
146 | /* | ||
147 | * Remove this CPU: | ||
148 | */ | ||
149 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
150 | disable_local_APIC(); | ||
151 | if (hlt_works(smp_processor_id())) | ||
152 | for (;;) halt(); | ||
153 | for (;;); | ||
154 | } | ||
155 | |||
156 | /* | 143 | /* |
157 | * this function calls the 'stop' function on all other CPUs in the system. | 144 | * this function calls the 'stop' function on all other CPUs in the system. |
158 | */ | 145 | */ |
@@ -178,11 +165,7 @@ static void native_smp_send_stop(void) | |||
178 | void smp_reschedule_interrupt(struct pt_regs *regs) | 165 | void smp_reschedule_interrupt(struct pt_regs *regs) |
179 | { | 166 | { |
180 | ack_APIC_irq(); | 167 | ack_APIC_irq(); |
181 | #ifdef CONFIG_X86_32 | 168 | inc_irq_stat(irq_resched_count); |
182 | __get_cpu_var(irq_stat).irq_resched_count++; | ||
183 | #else | ||
184 | add_pda(irq_resched_count, 1); | ||
185 | #endif | ||
186 | } | 169 | } |
187 | 170 | ||
188 | void smp_call_function_interrupt(struct pt_regs *regs) | 171 | void smp_call_function_interrupt(struct pt_regs *regs) |
@@ -190,11 +173,7 @@ void smp_call_function_interrupt(struct pt_regs *regs) | |||
190 | ack_APIC_irq(); | 173 | ack_APIC_irq(); |
191 | irq_enter(); | 174 | irq_enter(); |
192 | generic_smp_call_function_interrupt(); | 175 | generic_smp_call_function_interrupt(); |
193 | #ifdef CONFIG_X86_32 | 176 | inc_irq_stat(irq_call_count); |
194 | __get_cpu_var(irq_stat).irq_call_count++; | ||
195 | #else | ||
196 | add_pda(irq_call_count, 1); | ||
197 | #endif | ||
198 | irq_exit(); | 177 | irq_exit(); |
199 | } | 178 | } |
200 | 179 | ||
@@ -203,11 +182,7 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
203 | ack_APIC_irq(); | 182 | ack_APIC_irq(); |
204 | irq_enter(); | 183 | irq_enter(); |
205 | generic_smp_call_function_single_interrupt(); | 184 | generic_smp_call_function_single_interrupt(); |
206 | #ifdef CONFIG_X86_32 | 185 | inc_irq_stat(irq_call_count); |
207 | __get_cpu_var(irq_stat).irq_call_count++; | ||
208 | #else | ||
209 | add_pda(irq_call_count, 1); | ||
210 | #endif | ||
211 | irq_exit(); | 186 | irq_exit(); |
212 | } | 187 | } |
213 | 188 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f71f96fc9e62..7a430c4d1551 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -62,6 +62,7 @@ | |||
62 | #include <asm/mtrr.h> | 62 | #include <asm/mtrr.h> |
63 | #include <asm/vmi.h> | 63 | #include <asm/vmi.h> |
64 | #include <asm/genapic.h> | 64 | #include <asm/genapic.h> |
65 | #include <asm/setup.h> | ||
65 | #include <linux/mc146818rtc.h> | 66 | #include <linux/mc146818rtc.h> |
66 | 67 | ||
67 | #include <mach_apic.h> | 68 | #include <mach_apic.h> |
@@ -534,7 +535,7 @@ static void impress_friends(void) | |||
534 | pr_debug("Before bogocount - setting activated=1.\n"); | 535 | pr_debug("Before bogocount - setting activated=1.\n"); |
535 | } | 536 | } |
536 | 537 | ||
537 | static inline void __inquire_remote_apic(int apicid) | 538 | void __inquire_remote_apic(int apicid) |
538 | { | 539 | { |
539 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 540 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
540 | char *names[] = { "ID", "VERSION", "SPIV" }; | 541 | char *names[] = { "ID", "VERSION", "SPIV" }; |
@@ -573,14 +574,13 @@ static inline void __inquire_remote_apic(int apicid) | |||
573 | } | 574 | } |
574 | } | 575 | } |
575 | 576 | ||
576 | #ifdef WAKE_SECONDARY_VIA_NMI | ||
577 | /* | 577 | /* |
578 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal | 578 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal |
579 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this | 579 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this |
580 | * won't ... remember to clear down the APIC, etc later. | 580 | * won't ... remember to clear down the APIC, etc later. |
581 | */ | 581 | */ |
582 | static int __devinit | 582 | int __devinit |
583 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | 583 | wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) |
584 | { | 584 | { |
585 | unsigned long send_status, accept_status = 0; | 585 | unsigned long send_status, accept_status = 0; |
586 | int maxlvt; | 586 | int maxlvt; |
@@ -597,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
597 | * Give the other CPU some time to accept the IPI. | 597 | * Give the other CPU some time to accept the IPI. |
598 | */ | 598 | */ |
599 | udelay(200); | 599 | udelay(200); |
600 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { | 600 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
601 | maxlvt = lapic_get_maxlvt(); | 601 | maxlvt = lapic_get_maxlvt(); |
602 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 602 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
603 | apic_write(APIC_ESR, 0); | 603 | apic_write(APIC_ESR, 0); |
@@ -612,11 +612,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
612 | 612 | ||
613 | return (send_status | accept_status); | 613 | return (send_status | accept_status); |
614 | } | 614 | } |
615 | #endif /* WAKE_SECONDARY_VIA_NMI */ | ||
616 | 615 | ||
617 | #ifdef WAKE_SECONDARY_VIA_INIT | 616 | int __devinit |
618 | static int __devinit | 617 | wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) |
619 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | ||
620 | { | 618 | { |
621 | unsigned long send_status, accept_status = 0; | 619 | unsigned long send_status, accept_status = 0; |
622 | int maxlvt, num_starts, j; | 620 | int maxlvt, num_starts, j; |
@@ -735,7 +733,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
735 | 733 | ||
736 | return (send_status | accept_status); | 734 | return (send_status | accept_status); |
737 | } | 735 | } |
738 | #endif /* WAKE_SECONDARY_VIA_INIT */ | ||
739 | 736 | ||
740 | struct create_idle { | 737 | struct create_idle { |
741 | struct work_struct work; | 738 | struct work_struct work; |
@@ -1084,8 +1081,10 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1084 | #endif | 1081 | #endif |
1085 | 1082 | ||
1086 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 1083 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
1087 | printk(KERN_WARNING "weird, boot CPU (#%d) not listed" | 1084 | printk(KERN_WARNING |
1088 | "by the BIOS.\n", hard_smp_processor_id()); | 1085 | "weird, boot CPU (#%d) not listed by the BIOS.\n", |
1086 | hard_smp_processor_id()); | ||
1087 | |||
1089 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | 1088 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
1090 | } | 1089 | } |
1091 | 1090 | ||
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 77b400f06ea2..65309e4cb1c0 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(profile_pc); | |||
75 | irqreturn_t timer_interrupt(int irq, void *dev_id) | 75 | irqreturn_t timer_interrupt(int irq, void *dev_id) |
76 | { | 76 | { |
77 | /* Keep nmi watchdog up to date */ | 77 | /* Keep nmi watchdog up to date */ |
78 | per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; | 78 | inc_irq_stat(irq0_irqs); |
79 | 79 | ||
80 | #ifdef CONFIG_X86_IO_APIC | 80 | #ifdef CONFIG_X86_IO_APIC |
81 | if (timer_ack) { | 81 | if (timer_ack) { |
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index cb19d650c216..891e7a7c4334 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c | |||
@@ -49,9 +49,9 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
49 | } | 49 | } |
50 | EXPORT_SYMBOL(profile_pc); | 50 | EXPORT_SYMBOL(profile_pc); |
51 | 51 | ||
52 | irqreturn_t timer_interrupt(int irq, void *dev_id) | 52 | static irqreturn_t timer_interrupt(int irq, void *dev_id) |
53 | { | 53 | { |
54 | add_pda(irq0_irqs, 1); | 54 | inc_irq_stat(irq0_irqs); |
55 | 55 | ||
56 | global_clock_event->event_handler(global_clock_event); | 56 | global_clock_event->event_handler(global_clock_event); |
57 | 57 | ||
@@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void) | |||
80 | break; | 80 | break; |
81 | no_ctr_free = (i == 4); | 81 | no_ctr_free = (i == 4); |
82 | if (no_ctr_free) { | 82 | if (no_ctr_free) { |
83 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
84 | "cpu_khz value may be incorrect.\n"); | ||
83 | i = 3; | 85 | i = 3; |
84 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | 86 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); |
85 | wrmsrl(MSR_K7_EVNTSEL3, 0); | 87 | wrmsrl(MSR_K7_EVNTSEL3, 0); |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index f4049f3513b6..8da059f949be 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c | |||
@@ -34,9 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock); | |||
34 | */ | 34 | */ |
35 | void leave_mm(int cpu) | 35 | void leave_mm(int cpu) |
36 | { | 36 | { |
37 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) | 37 | BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); |
38 | BUG(); | 38 | cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); |
39 | cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); | ||
40 | load_cr3(swapper_pg_dir); | 39 | load_cr3(swapper_pg_dir); |
41 | } | 40 | } |
42 | EXPORT_SYMBOL_GPL(leave_mm); | 41 | EXPORT_SYMBOL_GPL(leave_mm); |
@@ -104,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs) | |||
104 | * BUG(); | 103 | * BUG(); |
105 | */ | 104 | */ |
106 | 105 | ||
107 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { | 106 | if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { |
108 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { | 107 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { |
109 | if (flush_va == TLB_FLUSH_ALL) | 108 | if (flush_va == TLB_FLUSH_ALL) |
110 | local_flush_tlb(); | 109 | local_flush_tlb(); |
111 | else | 110 | else |
@@ -119,7 +118,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs) | |||
119 | smp_mb__after_clear_bit(); | 118 | smp_mb__after_clear_bit(); |
120 | out: | 119 | out: |
121 | put_cpu_no_resched(); | 120 | put_cpu_no_resched(); |
122 | __get_cpu_var(irq_stat).irq_tlb_count++; | 121 | inc_irq_stat(irq_tlb_count); |
123 | } | 122 | } |
124 | 123 | ||
125 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | 124 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
@@ -238,7 +237,7 @@ static void do_flush_tlb_all(void *info) | |||
238 | unsigned long cpu = smp_processor_id(); | 237 | unsigned long cpu = smp_processor_id(); |
239 | 238 | ||
240 | __flush_tlb_all(); | 239 | __flush_tlb_all(); |
241 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) | 240 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) |
242 | leave_mm(cpu); | 241 | leave_mm(cpu); |
243 | } | 242 | } |
244 | 243 | ||
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 8f919ca69494..29887d7081a9 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c | |||
@@ -154,7 +154,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) | |||
154 | out: | 154 | out: |
155 | ack_APIC_irq(); | 155 | ack_APIC_irq(); |
156 | cpu_clear(cpu, f->flush_cpumask); | 156 | cpu_clear(cpu, f->flush_cpumask); |
157 | add_pda(irq_tlb_count, 1); | 157 | inc_irq_stat(irq_tlb_count); |
158 | } | 158 | } |
159 | 159 | ||
160 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | 160 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 04431f34fd16..6a00e5faaa74 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -566,14 +566,10 @@ static int __init uv_ptc_init(void) | |||
566 | if (!is_uv_system()) | 566 | if (!is_uv_system()) |
567 | return 0; | 567 | return 0; |
568 | 568 | ||
569 | if (!proc_mkdir("sgi_uv", NULL)) | ||
570 | return -EINVAL; | ||
571 | |||
572 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); | 569 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); |
573 | if (!proc_uv_ptc) { | 570 | if (!proc_uv_ptc) { |
574 | printk(KERN_ERR "unable to create %s proc entry\n", | 571 | printk(KERN_ERR "unable to create %s proc entry\n", |
575 | UV_PTC_BASENAME); | 572 | UV_PTC_BASENAME); |
576 | remove_proc_entry("sgi_uv", NULL); | ||
577 | return -EINVAL; | 573 | return -EINVAL; |
578 | } | 574 | } |
579 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; | 575 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index 1106fac6024d..808031a5ba19 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -1,10 +1,26 @@ | |||
1 | #include <linux/io.h> | 1 | #include <linux/io.h> |
2 | 2 | ||
3 | #include <asm/trampoline.h> | 3 | #include <asm/trampoline.h> |
4 | #include <asm/e820.h> | ||
4 | 5 | ||
5 | /* ready for x86_64 and x86 */ | 6 | /* ready for x86_64 and x86 */ |
6 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | 7 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); |
7 | 8 | ||
9 | void __init reserve_trampoline_memory(void) | ||
10 | { | ||
11 | #ifdef CONFIG_X86_32 | ||
12 | /* | ||
13 | * But first pinch a few for the stack/trampoline stuff | ||
14 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
15 | * trampoline before removing it. (see the GDT stuff) | ||
16 | */ | ||
17 | reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); | ||
18 | #endif | ||
19 | /* Has to be in very low memory so we can execute real-mode AP code. */ | ||
20 | reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE, | ||
21 | "TRAMPOLINE"); | ||
22 | } | ||
23 | |||
8 | /* | 24 | /* |
9 | * Currently trivial. Write the real->protected mode | 25 | * Currently trivial. Write the real->protected mode |
10 | * bootstrap into the page concerned. The caller | 26 | * bootstrap into the page concerned. The caller |
@@ -12,7 +28,6 @@ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | |||
12 | */ | 28 | */ |
13 | unsigned long setup_trampoline(void) | 29 | unsigned long setup_trampoline(void) |
14 | { | 30 | { |
15 | memcpy(trampoline_base, trampoline_data, | 31 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); |
16 | trampoline_end - trampoline_data); | ||
17 | return virt_to_phys(trampoline_base); | 32 | return virt_to_phys(trampoline_base); |
18 | } | 33 | } |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 04d242ab0161..141907ab6e22 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -481,11 +481,7 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
481 | { | 481 | { |
482 | nmi_enter(); | 482 | nmi_enter(); |
483 | 483 | ||
484 | #ifdef CONFIG_X86_32 | 484 | inc_irq_stat(__nmi_count); |
485 | { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } | ||
486 | #else | ||
487 | add_pda(__nmi_count, 1); | ||
488 | #endif | ||
489 | 485 | ||
490 | if (!ignore_nmis) | 486 | if (!ignore_nmis) |
491 | default_do_nmi(regs); | 487 | default_do_nmi(regs); |
@@ -664,7 +660,7 @@ void math_error(void __user *ip) | |||
664 | { | 660 | { |
665 | struct task_struct *task; | 661 | struct task_struct *task; |
666 | siginfo_t info; | 662 | siginfo_t info; |
667 | unsigned short cwd, swd; | 663 | unsigned short cwd, swd, err; |
668 | 664 | ||
669 | /* | 665 | /* |
670 | * Save the info for the exception handler and clear the error. | 666 | * Save the info for the exception handler and clear the error. |
@@ -675,7 +671,6 @@ void math_error(void __user *ip) | |||
675 | task->thread.error_code = 0; | 671 | task->thread.error_code = 0; |
676 | info.si_signo = SIGFPE; | 672 | info.si_signo = SIGFPE; |
677 | info.si_errno = 0; | 673 | info.si_errno = 0; |
678 | info.si_code = __SI_FAULT; | ||
679 | info.si_addr = ip; | 674 | info.si_addr = ip; |
680 | /* | 675 | /* |
681 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 676 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
@@ -689,34 +684,31 @@ void math_error(void __user *ip) | |||
689 | */ | 684 | */ |
690 | cwd = get_fpu_cwd(task); | 685 | cwd = get_fpu_cwd(task); |
691 | swd = get_fpu_swd(task); | 686 | swd = get_fpu_swd(task); |
692 | switch (swd & ~cwd & 0x3f) { | 687 | |
693 | case 0x000: /* No unmasked exception */ | 688 | err = swd & ~cwd & 0x3f; |
689 | |||
694 | #ifdef CONFIG_X86_32 | 690 | #ifdef CONFIG_X86_32 |
691 | if (!err) | ||
695 | return; | 692 | return; |
696 | #endif | 693 | #endif |
697 | default: /* Multiple exceptions */ | 694 | |
698 | break; | 695 | if (err & 0x001) { /* Invalid op */ |
699 | case 0x001: /* Invalid Op */ | ||
700 | /* | 696 | /* |
701 | * swd & 0x240 == 0x040: Stack Underflow | 697 | * swd & 0x240 == 0x040: Stack Underflow |
702 | * swd & 0x240 == 0x240: Stack Overflow | 698 | * swd & 0x240 == 0x240: Stack Overflow |
703 | * User must clear the SF bit (0x40) if set | 699 | * User must clear the SF bit (0x40) if set |
704 | */ | 700 | */ |
705 | info.si_code = FPE_FLTINV; | 701 | info.si_code = FPE_FLTINV; |
706 | break; | 702 | } else if (err & 0x004) { /* Divide by Zero */ |
707 | case 0x002: /* Denormalize */ | ||
708 | case 0x010: /* Underflow */ | ||
709 | info.si_code = FPE_FLTUND; | ||
710 | break; | ||
711 | case 0x004: /* Zero Divide */ | ||
712 | info.si_code = FPE_FLTDIV; | 703 | info.si_code = FPE_FLTDIV; |
713 | break; | 704 | } else if (err & 0x008) { /* Overflow */ |
714 | case 0x008: /* Overflow */ | ||
715 | info.si_code = FPE_FLTOVF; | 705 | info.si_code = FPE_FLTOVF; |
716 | break; | 706 | } else if (err & 0x012) { /* Denormal, Underflow */ |
717 | case 0x020: /* Precision */ | 707 | info.si_code = FPE_FLTUND; |
708 | } else if (err & 0x020) { /* Precision */ | ||
718 | info.si_code = FPE_FLTRES; | 709 | info.si_code = FPE_FLTRES; |
719 | break; | 710 | } else { |
711 | info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ | ||
720 | } | 712 | } |
721 | force_sig_info(SIGFPE, &info, task); | 713 | force_sig_info(SIGFPE, &info, task); |
722 | } | 714 | } |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 424093b157d3..599e58168631 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <asm/vgtod.h> | 15 | #include <asm/vgtod.h> |
16 | #include <asm/time.h> | 16 | #include <asm/time.h> |
17 | #include <asm/delay.h> | 17 | #include <asm/delay.h> |
18 | #include <asm/hypervisor.h> | ||
18 | 19 | ||
19 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | 20 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ |
20 | EXPORT_SYMBOL(cpu_khz); | 21 | EXPORT_SYMBOL(cpu_khz); |
@@ -31,6 +32,7 @@ static int tsc_unstable; | |||
31 | erroneous rdtsc usage on !cpu_has_tsc processors */ | 32 | erroneous rdtsc usage on !cpu_has_tsc processors */ |
32 | static int tsc_disabled = -1; | 33 | static int tsc_disabled = -1; |
33 | 34 | ||
35 | static int tsc_clocksource_reliable; | ||
34 | /* | 36 | /* |
35 | * Scheduler clock - returns current time in nanosec units. | 37 | * Scheduler clock - returns current time in nanosec units. |
36 | */ | 38 | */ |
@@ -98,6 +100,15 @@ int __init notsc_setup(char *str) | |||
98 | 100 | ||
99 | __setup("notsc", notsc_setup); | 101 | __setup("notsc", notsc_setup); |
100 | 102 | ||
103 | static int __init tsc_setup(char *str) | ||
104 | { | ||
105 | if (!strcmp(str, "reliable")) | ||
106 | tsc_clocksource_reliable = 1; | ||
107 | return 1; | ||
108 | } | ||
109 | |||
110 | __setup("tsc=", tsc_setup); | ||
111 | |||
101 | #define MAX_RETRIES 5 | 112 | #define MAX_RETRIES 5 |
102 | #define SMI_TRESHOLD 50000 | 113 | #define SMI_TRESHOLD 50000 |
103 | 114 | ||
@@ -352,9 +363,15 @@ unsigned long native_calibrate_tsc(void) | |||
352 | { | 363 | { |
353 | u64 tsc1, tsc2, delta, ref1, ref2; | 364 | u64 tsc1, tsc2, delta, ref1, ref2; |
354 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | 365 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; |
355 | unsigned long flags, latch, ms, fast_calibrate; | 366 | unsigned long flags, latch, ms, fast_calibrate, tsc_khz; |
356 | int hpet = is_hpet_enabled(), i, loopmin; | 367 | int hpet = is_hpet_enabled(), i, loopmin; |
357 | 368 | ||
369 | tsc_khz = get_hypervisor_tsc_freq(); | ||
370 | if (tsc_khz) { | ||
371 | printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); | ||
372 | return tsc_khz; | ||
373 | } | ||
374 | |||
358 | local_irq_save(flags); | 375 | local_irq_save(flags); |
359 | fast_calibrate = quick_pit_calibrate(); | 376 | fast_calibrate = quick_pit_calibrate(); |
360 | local_irq_restore(flags); | 377 | local_irq_restore(flags); |
@@ -731,24 +748,21 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | |||
731 | {} | 748 | {} |
732 | }; | 749 | }; |
733 | 750 | ||
734 | /* | 751 | static void __init check_system_tsc_reliable(void) |
735 | * Geode_LX - the OLPC CPU has a possibly a very reliable TSC | 752 | { |
736 | */ | ||
737 | #ifdef CONFIG_MGEODE_LX | 753 | #ifdef CONFIG_MGEODE_LX |
738 | /* RTSC counts during suspend */ | 754 | /* RTSC counts during suspend */ |
739 | #define RTSC_SUSP 0x100 | 755 | #define RTSC_SUSP 0x100 |
740 | |||
741 | static void __init check_geode_tsc_reliable(void) | ||
742 | { | ||
743 | unsigned long res_low, res_high; | 756 | unsigned long res_low, res_high; |
744 | 757 | ||
745 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); | 758 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); |
759 | /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ | ||
746 | if (res_low & RTSC_SUSP) | 760 | if (res_low & RTSC_SUSP) |
747 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | 761 | tsc_clocksource_reliable = 1; |
748 | } | ||
749 | #else | ||
750 | static inline void check_geode_tsc_reliable(void) { } | ||
751 | #endif | 762 | #endif |
763 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) | ||
764 | tsc_clocksource_reliable = 1; | ||
765 | } | ||
752 | 766 | ||
753 | /* | 767 | /* |
754 | * Make an educated guess if the TSC is trustworthy and synchronized | 768 | * Make an educated guess if the TSC is trustworthy and synchronized |
@@ -783,6 +797,8 @@ static void __init init_tsc_clocksource(void) | |||
783 | { | 797 | { |
784 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, | 798 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, |
785 | clocksource_tsc.shift); | 799 | clocksource_tsc.shift); |
800 | if (tsc_clocksource_reliable) | ||
801 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | ||
786 | /* lower the rating if we already know its unstable: */ | 802 | /* lower the rating if we already know its unstable: */ |
787 | if (check_tsc_unstable()) { | 803 | if (check_tsc_unstable()) { |
788 | clocksource_tsc.rating = 0; | 804 | clocksource_tsc.rating = 0; |
@@ -843,7 +859,7 @@ void __init tsc_init(void) | |||
843 | if (unsynchronized_tsc()) | 859 | if (unsynchronized_tsc()) |
844 | mark_tsc_unstable("TSCs unsynchronized"); | 860 | mark_tsc_unstable("TSCs unsynchronized"); |
845 | 861 | ||
846 | check_geode_tsc_reliable(); | 862 | check_system_tsc_reliable(); |
847 | init_tsc_clocksource(); | 863 | init_tsc_clocksource(); |
848 | } | 864 | } |
849 | 865 | ||
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 1c0dfbca87c1..bf36328f6ef9 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -112,6 +112,12 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
112 | if (unsynchronized_tsc()) | 112 | if (unsynchronized_tsc()) |
113 | return; | 113 | return; |
114 | 114 | ||
115 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | ||
116 | printk(KERN_INFO | ||
117 | "Skipping synchronization checks as TSC is reliable.\n"); | ||
118 | return; | ||
119 | } | ||
120 | |||
115 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", | 121 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", |
116 | smp_processor_id(), cpu); | 122 | smp_processor_id(), cpu); |
117 | 123 | ||
@@ -165,7 +171,7 @@ void __cpuinit check_tsc_sync_target(void) | |||
165 | { | 171 | { |
166 | int cpus = 2; | 172 | int cpus = 2; |
167 | 173 | ||
168 | if (unsynchronized_tsc()) | 174 | if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) |
169 | return; | 175 | return; |
170 | 176 | ||
171 | /* | 177 | /* |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 22fd6577156a..23206ba16874 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -266,109 +266,6 @@ static void vmi_nop(void) | |||
266 | { | 266 | { |
267 | } | 267 | } |
268 | 268 | ||
269 | #ifdef CONFIG_DEBUG_PAGE_TYPE | ||
270 | |||
271 | #ifdef CONFIG_X86_PAE | ||
272 | #define MAX_BOOT_PTS (2048+4+1) | ||
273 | #else | ||
274 | #define MAX_BOOT_PTS (1024+1) | ||
275 | #endif | ||
276 | |||
277 | /* | ||
278 | * During boot, mem_map is not yet available in paging_init, so stash | ||
279 | * all the boot page allocations here. | ||
280 | */ | ||
281 | static struct { | ||
282 | u32 pfn; | ||
283 | int type; | ||
284 | } boot_page_allocations[MAX_BOOT_PTS]; | ||
285 | static int num_boot_page_allocations; | ||
286 | static int boot_allocations_applied; | ||
287 | |||
288 | void vmi_apply_boot_page_allocations(void) | ||
289 | { | ||
290 | int i; | ||
291 | BUG_ON(!mem_map); | ||
292 | for (i = 0; i < num_boot_page_allocations; i++) { | ||
293 | struct page *page = pfn_to_page(boot_page_allocations[i].pfn); | ||
294 | page->type = boot_page_allocations[i].type; | ||
295 | page->type = boot_page_allocations[i].type & | ||
296 | ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
297 | } | ||
298 | boot_allocations_applied = 1; | ||
299 | } | ||
300 | |||
301 | static void record_page_type(u32 pfn, int type) | ||
302 | { | ||
303 | BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS); | ||
304 | boot_page_allocations[num_boot_page_allocations].pfn = pfn; | ||
305 | boot_page_allocations[num_boot_page_allocations].type = type; | ||
306 | num_boot_page_allocations++; | ||
307 | } | ||
308 | |||
309 | static void check_zeroed_page(u32 pfn, int type, struct page *page) | ||
310 | { | ||
311 | u32 *ptr; | ||
312 | int i; | ||
313 | int limit = PAGE_SIZE / sizeof(int); | ||
314 | |||
315 | if (page_address(page)) | ||
316 | ptr = (u32 *)page_address(page); | ||
317 | else | ||
318 | ptr = (u32 *)__va(pfn << PAGE_SHIFT); | ||
319 | /* | ||
320 | * When cloning the root in non-PAE mode, only the userspace | ||
321 | * pdes need to be zeroed. | ||
322 | */ | ||
323 | if (type & VMI_PAGE_CLONE) | ||
324 | limit = KERNEL_PGD_BOUNDARY; | ||
325 | for (i = 0; i < limit; i++) | ||
326 | BUG_ON(ptr[i]); | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * We stash the page type into struct page so we can verify the page | ||
331 | * types are used properly. | ||
332 | */ | ||
333 | static void vmi_set_page_type(u32 pfn, int type) | ||
334 | { | ||
335 | /* PAE can have multiple roots per page - don't track */ | ||
336 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
337 | return; | ||
338 | |||
339 | if (boot_allocations_applied) { | ||
340 | struct page *page = pfn_to_page(pfn); | ||
341 | if (type != VMI_PAGE_NORMAL) | ||
342 | BUG_ON(page->type); | ||
343 | else | ||
344 | BUG_ON(page->type == VMI_PAGE_NORMAL); | ||
345 | page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
346 | if (type & VMI_PAGE_ZEROED) | ||
347 | check_zeroed_page(pfn, type, page); | ||
348 | } else { | ||
349 | record_page_type(pfn, type); | ||
350 | } | ||
351 | } | ||
352 | |||
353 | static void vmi_check_page_type(u32 pfn, int type) | ||
354 | { | ||
355 | /* PAE can have multiple roots per page - skip checks */ | ||
356 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
357 | return; | ||
358 | |||
359 | type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
360 | if (boot_allocations_applied) { | ||
361 | struct page *page = pfn_to_page(pfn); | ||
362 | BUG_ON((page->type ^ type) & VMI_PAGE_PAE); | ||
363 | BUG_ON(type == VMI_PAGE_NORMAL && page->type); | ||
364 | BUG_ON((type & page->type) == 0); | ||
365 | } | ||
366 | } | ||
367 | #else | ||
368 | #define vmi_set_page_type(p,t) do { } while (0) | ||
369 | #define vmi_check_page_type(p,t) do { } while (0) | ||
370 | #endif | ||
371 | |||
372 | #ifdef CONFIG_HIGHPTE | 269 | #ifdef CONFIG_HIGHPTE |
373 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | 270 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) |
374 | { | 271 | { |
@@ -395,7 +292,6 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | |||
395 | 292 | ||
396 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) | 293 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) |
397 | { | 294 | { |
398 | vmi_set_page_type(pfn, VMI_PAGE_L1); | ||
399 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 295 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
400 | } | 296 | } |
401 | 297 | ||
@@ -406,27 +302,22 @@ static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) | |||
406 | * It is called only for swapper_pg_dir, which already has | 302 | * It is called only for swapper_pg_dir, which already has |
407 | * data on it. | 303 | * data on it. |
408 | */ | 304 | */ |
409 | vmi_set_page_type(pfn, VMI_PAGE_L2); | ||
410 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | 305 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); |
411 | } | 306 | } |
412 | 307 | ||
413 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) | 308 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) |
414 | { | 309 | { |
415 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | ||
416 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | ||
417 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | 310 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); |
418 | } | 311 | } |
419 | 312 | ||
420 | static void vmi_release_pte(unsigned long pfn) | 313 | static void vmi_release_pte(unsigned long pfn) |
421 | { | 314 | { |
422 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | 315 | vmi_ops.release_page(pfn, VMI_PAGE_L1); |
423 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
424 | } | 316 | } |
425 | 317 | ||
426 | static void vmi_release_pmd(unsigned long pfn) | 318 | static void vmi_release_pmd(unsigned long pfn) |
427 | { | 319 | { |
428 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | 320 | vmi_ops.release_page(pfn, VMI_PAGE_L2); |
429 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
430 | } | 321 | } |
431 | 322 | ||
432 | /* | 323 | /* |
@@ -450,26 +341,22 @@ static void vmi_release_pmd(unsigned long pfn) | |||
450 | 341 | ||
451 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 342 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
452 | { | 343 | { |
453 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
454 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 344 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
455 | } | 345 | } |
456 | 346 | ||
457 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 347 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
458 | { | 348 | { |
459 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
460 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | 349 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); |
461 | } | 350 | } |
462 | 351 | ||
463 | static void vmi_set_pte(pte_t *ptep, pte_t pte) | 352 | static void vmi_set_pte(pte_t *ptep, pte_t pte) |
464 | { | 353 | { |
465 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ | 354 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ |
466 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD); | ||
467 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | 355 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); |
468 | } | 356 | } |
469 | 357 | ||
470 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 358 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
471 | { | 359 | { |
472 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
473 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 360 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
474 | } | 361 | } |
475 | 362 | ||
@@ -477,10 +364,8 @@ static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
477 | { | 364 | { |
478 | #ifdef CONFIG_X86_PAE | 365 | #ifdef CONFIG_X86_PAE |
479 | const pte_t pte = { .pte = pmdval.pmd }; | 366 | const pte_t pte = { .pte = pmdval.pmd }; |
480 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
481 | #else | 367 | #else |
482 | const pte_t pte = { pmdval.pud.pgd.pgd }; | 368 | const pte_t pte = { pmdval.pud.pgd.pgd }; |
483 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
484 | #endif | 369 | #endif |
485 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); | 370 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); |
486 | } | 371 | } |
@@ -502,7 +387,6 @@ static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) | |||
502 | 387 | ||
503 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 388 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
504 | { | 389 | { |
505 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
506 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); | 390 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); |
507 | } | 391 | } |
508 | 392 | ||
@@ -510,21 +394,18 @@ static void vmi_set_pud(pud_t *pudp, pud_t pudval) | |||
510 | { | 394 | { |
511 | /* Um, eww */ | 395 | /* Um, eww */ |
512 | const pte_t pte = { .pte = pudval.pgd.pgd }; | 396 | const pte_t pte = { .pte = pudval.pgd.pgd }; |
513 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
514 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | 397 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); |
515 | } | 398 | } |
516 | 399 | ||
517 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 400 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
518 | { | 401 | { |
519 | const pte_t pte = { .pte = 0 }; | 402 | const pte_t pte = { .pte = 0 }; |
520 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
521 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 403 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
522 | } | 404 | } |
523 | 405 | ||
524 | static void vmi_pmd_clear(pmd_t *pmd) | 406 | static void vmi_pmd_clear(pmd_t *pmd) |
525 | { | 407 | { |
526 | const pte_t pte = { .pte = 0 }; | 408 | const pte_t pte = { .pte = 0 }; |
527 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
528 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | 409 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); |
529 | } | 410 | } |
530 | #endif | 411 | #endif |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 0b8b6690a86d..ebf2f12900f5 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -128,7 +128,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
128 | gettimeofday(tv,NULL); | 128 | gettimeofday(tv,NULL); |
129 | return; | 129 | return; |
130 | } | 130 | } |
131 | |||
132 | /* | ||
133 | * Surround the RDTSC by barriers, to make sure it's not | ||
134 | * speculated to outside the seqlock critical section and | ||
135 | * does not cause time warps: | ||
136 | */ | ||
137 | rdtsc_barrier(); | ||
131 | now = vread(); | 138 | now = vread(); |
139 | rdtsc_barrier(); | ||
140 | |||
132 | base = __vsyscall_gtod_data.clock.cycle_last; | 141 | base = __vsyscall_gtod_data.clock.cycle_last; |
133 | mask = __vsyscall_gtod_data.clock.mask; | 142 | mask = __vsyscall_gtod_data.clock.mask; |
134 | mult = __vsyscall_gtod_data.clock.mult; | 143 | mult = __vsyscall_gtod_data.clock.mult; |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a5d8e1ace1cf..50a779264bb1 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -590,7 +590,8 @@ static void __init lguest_init_IRQ(void) | |||
590 | * a straightforward 1 to 1 mapping, so force that here. */ | 590 | * a straightforward 1 to 1 mapping, so force that here. */ |
591 | __get_cpu_var(vector_irq)[vector] = i; | 591 | __get_cpu_var(vector_irq)[vector] = i; |
592 | if (vector != SYSCALL_VECTOR) { | 592 | if (vector != SYSCALL_VECTOR) { |
593 | set_intr_gate(vector, interrupt[vector]); | 593 | set_intr_gate(vector, |
594 | interrupt[vector-FIRST_EXTERNAL_VECTOR]); | ||
594 | set_irq_chip_and_handler_name(i, &lguest_irq_controller, | 595 | set_irq_chip_and_handler_name(i, &lguest_irq_controller, |
595 | handle_level_irq, | 596 | handle_level_irq, |
596 | "level"); | 597 | "level"); |
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 3c3b471ea496..3624a364b7f3 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/bigsmp/apic.h> | 17 | #include <asm/bigsmp/apic.h> |
18 | #include <asm/bigsmp/ipi.h> | 18 | #include <asm/bigsmp/ipi.h> |
19 | #include <asm/mach-default/mach_mpparse.h> | 19 | #include <asm/mach-default/mach_mpparse.h> |
20 | #include <asm/mach-default/mach_wakecpu.h> | ||
20 | 21 | ||
21 | static int dmi_bigsmp; /* can be set by dmi scanners */ | 22 | static int dmi_bigsmp; /* can be set by dmi scanners */ |
22 | 23 | ||
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 9e835a11a13a..e63a4a76d8cd 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/mach-default/mach_apic.h> | 16 | #include <asm/mach-default/mach_apic.h> |
17 | #include <asm/mach-default/mach_ipi.h> | 17 | #include <asm/mach-default/mach_ipi.h> |
18 | #include <asm/mach-default/mach_mpparse.h> | 18 | #include <asm/mach-default/mach_mpparse.h> |
19 | #include <asm/mach-default/mach_wakecpu.h> | ||
19 | 20 | ||
20 | /* should be called last. */ | 21 | /* should be called last. */ |
21 | static int probe_default(void) | 22 | static int probe_default(void) |
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 28459cab3ddb..7b4e6d0d1690 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c | |||
@@ -16,7 +16,19 @@ | |||
16 | #include <asm/es7000/apic.h> | 16 | #include <asm/es7000/apic.h> |
17 | #include <asm/es7000/ipi.h> | 17 | #include <asm/es7000/ipi.h> |
18 | #include <asm/es7000/mpparse.h> | 18 | #include <asm/es7000/mpparse.h> |
19 | #include <asm/es7000/wakecpu.h> | 19 | #include <asm/mach-default/mach_wakecpu.h> |
20 | |||
21 | void __init es7000_update_genapic_to_cluster(void) | ||
22 | { | ||
23 | genapic->target_cpus = target_cpus_cluster; | ||
24 | genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER; | ||
25 | genapic->int_dest_mode = INT_DEST_MODE_CLUSTER; | ||
26 | genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; | ||
27 | |||
28 | genapic->init_apic_ldr = init_apic_ldr_cluster; | ||
29 | |||
30 | genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; | ||
31 | } | ||
20 | 32 | ||
21 | static int probe_es7000(void) | 33 | static int probe_es7000(void) |
22 | { | 34 | { |
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index 5a7e4619e1c4..c346d9d0226f 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <asm/mpspec.h> | 15 | #include <asm/mpspec.h> |
16 | #include <asm/apicdef.h> | 16 | #include <asm/apicdef.h> |
17 | #include <asm/genapic.h> | 17 | #include <asm/genapic.h> |
18 | #include <asm/setup.h> | ||
18 | 19 | ||
19 | extern struct genapic apic_numaq; | 20 | extern struct genapic apic_numaq; |
20 | extern struct genapic apic_summit; | 21 | extern struct genapic apic_summit; |
@@ -57,6 +58,9 @@ static int __init parse_apic(char *arg) | |||
57 | } | 58 | } |
58 | } | 59 | } |
59 | 60 | ||
61 | if (x86_quirks->update_genapic) | ||
62 | x86_quirks->update_genapic(); | ||
63 | |||
60 | /* Parsed again by __setup for debug/verbose */ | 64 | /* Parsed again by __setup for debug/verbose */ |
61 | return 0; | 65 | return 0; |
62 | } | 66 | } |
@@ -72,12 +76,15 @@ void __init generic_bigsmp_probe(void) | |||
72 | * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support | 76 | * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support |
73 | */ | 77 | */ |
74 | 78 | ||
75 | if (!cmdline_apic && genapic == &apic_default) | 79 | if (!cmdline_apic && genapic == &apic_default) { |
76 | if (apic_bigsmp.probe()) { | 80 | if (apic_bigsmp.probe()) { |
77 | genapic = &apic_bigsmp; | 81 | genapic = &apic_bigsmp; |
82 | if (x86_quirks->update_genapic) | ||
83 | x86_quirks->update_genapic(); | ||
78 | printk(KERN_INFO "Overriding APIC driver with %s\n", | 84 | printk(KERN_INFO "Overriding APIC driver with %s\n", |
79 | genapic->name); | 85 | genapic->name); |
80 | } | 86 | } |
87 | } | ||
81 | #endif | 88 | #endif |
82 | } | 89 | } |
83 | 90 | ||
@@ -94,6 +101,9 @@ void __init generic_apic_probe(void) | |||
94 | /* Not visible without early console */ | 101 | /* Not visible without early console */ |
95 | if (!apic_probe[i]) | 102 | if (!apic_probe[i]) |
96 | panic("Didn't find an APIC driver"); | 103 | panic("Didn't find an APIC driver"); |
104 | |||
105 | if (x86_quirks->update_genapic) | ||
106 | x86_quirks->update_genapic(); | ||
97 | } | 107 | } |
98 | printk(KERN_INFO "Using APIC driver %s\n", genapic->name); | 108 | printk(KERN_INFO "Using APIC driver %s\n", genapic->name); |
99 | } | 109 | } |
@@ -108,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem, | |||
108 | if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { | 118 | if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { |
109 | if (!cmdline_apic) { | 119 | if (!cmdline_apic) { |
110 | genapic = apic_probe[i]; | 120 | genapic = apic_probe[i]; |
121 | if (x86_quirks->update_genapic) | ||
122 | x86_quirks->update_genapic(); | ||
111 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", | 123 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", |
112 | genapic->name); | 124 | genapic->name); |
113 | } | 125 | } |
@@ -124,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
124 | if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { | 136 | if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { |
125 | if (!cmdline_apic) { | 137 | if (!cmdline_apic) { |
126 | genapic = apic_probe[i]; | 138 | genapic = apic_probe[i]; |
139 | if (x86_quirks->update_genapic) | ||
140 | x86_quirks->update_genapic(); | ||
127 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", | 141 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", |
128 | genapic->name); | 142 | genapic->name); |
129 | } | 143 | } |
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 6272b5e69da6..2c6d234e0009 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/summit/apic.h> | 16 | #include <asm/summit/apic.h> |
17 | #include <asm/summit/ipi.h> | 17 | #include <asm/summit/ipi.h> |
18 | #include <asm/summit/mpparse.h> | 18 | #include <asm/summit/mpparse.h> |
19 | #include <asm/mach-default/mach_wakecpu.h> | ||
19 | 20 | ||
20 | static int probe_summit(void) | 21 | static int probe_summit(void) |
21 | { | 22 | { |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3a1b6ef4f05d..46b5f753ff81 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
413 | unsigned long error_code) | 413 | unsigned long error_code) |
414 | { | 414 | { |
415 | unsigned long flags = oops_begin(); | 415 | unsigned long flags = oops_begin(); |
416 | int sig = SIGKILL; | ||
416 | struct task_struct *tsk; | 417 | struct task_struct *tsk; |
417 | 418 | ||
418 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", | 419 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", |
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
423 | tsk->thread.trap_no = 14; | 424 | tsk->thread.trap_no = 14; |
424 | tsk->thread.error_code = error_code; | 425 | tsk->thread.error_code = error_code; |
425 | if (__die("Bad pagetable", regs, error_code)) | 426 | if (__die("Bad pagetable", regs, error_code)) |
426 | regs = NULL; | 427 | sig = 0; |
427 | oops_end(flags, regs, SIGKILL); | 428 | oops_end(flags, regs, sig); |
428 | } | 429 | } |
429 | #endif | 430 | #endif |
430 | 431 | ||
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
590 | int fault; | 591 | int fault; |
591 | #ifdef CONFIG_X86_64 | 592 | #ifdef CONFIG_X86_64 |
592 | unsigned long flags; | 593 | unsigned long flags; |
594 | int sig; | ||
593 | #endif | 595 | #endif |
594 | 596 | ||
595 | tsk = current; | 597 | tsk = current; |
@@ -849,11 +851,12 @@ no_context: | |||
849 | bust_spinlocks(0); | 851 | bust_spinlocks(0); |
850 | do_exit(SIGKILL); | 852 | do_exit(SIGKILL); |
851 | #else | 853 | #else |
854 | sig = SIGKILL; | ||
852 | if (__die("Oops", regs, error_code)) | 855 | if (__die("Oops", regs, error_code)) |
853 | regs = NULL; | 856 | sig = 0; |
854 | /* Executive summary in case the body of the oops scrolled away */ | 857 | /* Executive summary in case the body of the oops scrolled away */ |
855 | printk(KERN_EMERG "CR2: %016lx\n", address); | 858 | printk(KERN_EMERG "CR2: %016lx\n", address); |
856 | oops_end(flags, regs, SIGKILL); | 859 | oops_end(flags, regs, sig); |
857 | #endif | 860 | #endif |
858 | 861 | ||
859 | /* | 862 | /* |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c483f4242079..800e1d94c1b5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -67,7 +67,7 @@ static unsigned long __meminitdata table_top; | |||
67 | 67 | ||
68 | static int __initdata after_init_bootmem; | 68 | static int __initdata after_init_bootmem; |
69 | 69 | ||
70 | static __init void *alloc_low_page(unsigned long *phys) | 70 | static __init void *alloc_low_page(void) |
71 | { | 71 | { |
72 | unsigned long pfn = table_end++; | 72 | unsigned long pfn = table_end++; |
73 | void *adr; | 73 | void *adr; |
@@ -77,7 +77,6 @@ static __init void *alloc_low_page(unsigned long *phys) | |||
77 | 77 | ||
78 | adr = __va(pfn * PAGE_SIZE); | 78 | adr = __va(pfn * PAGE_SIZE); |
79 | memset(adr, 0, PAGE_SIZE); | 79 | memset(adr, 0, PAGE_SIZE); |
80 | *phys = pfn * PAGE_SIZE; | ||
81 | return adr; | 80 | return adr; |
82 | } | 81 | } |
83 | 82 | ||
@@ -92,16 +91,17 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
92 | pmd_t *pmd_table; | 91 | pmd_t *pmd_table; |
93 | 92 | ||
94 | #ifdef CONFIG_X86_PAE | 93 | #ifdef CONFIG_X86_PAE |
95 | unsigned long phys; | ||
96 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 94 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
97 | if (after_init_bootmem) | 95 | if (after_init_bootmem) |
98 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 96 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
99 | else | 97 | else |
100 | pmd_table = (pmd_t *)alloc_low_page(&phys); | 98 | pmd_table = (pmd_t *)alloc_low_page(); |
101 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | 99 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); |
102 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 100 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
103 | pud = pud_offset(pgd, 0); | 101 | pud = pud_offset(pgd, 0); |
104 | BUG_ON(pmd_table != pmd_offset(pud, 0)); | 102 | BUG_ON(pmd_table != pmd_offset(pud, 0)); |
103 | |||
104 | return pmd_table; | ||
105 | } | 105 | } |
106 | #endif | 106 | #endif |
107 | pud = pud_offset(pgd, 0); | 107 | pud = pud_offset(pgd, 0); |
@@ -126,10 +126,8 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
126 | if (!page_table) | 126 | if (!page_table) |
127 | page_table = | 127 | page_table = |
128 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 128 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
129 | } else { | 129 | } else |
130 | unsigned long phys; | 130 | page_table = (pte_t *)alloc_low_page(); |
131 | page_table = (pte_t *)alloc_low_page(&phys); | ||
132 | } | ||
133 | 131 | ||
134 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); | 132 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); |
135 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 133 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
@@ -969,8 +967,6 @@ void __init mem_init(void) | |||
969 | int codesize, reservedpages, datasize, initsize; | 967 | int codesize, reservedpages, datasize, initsize; |
970 | int tmp; | 968 | int tmp; |
971 | 969 | ||
972 | start_periodic_check_for_corruption(); | ||
973 | |||
974 | #ifdef CONFIG_FLATMEM | 970 | #ifdef CONFIG_FLATMEM |
975 | BUG_ON(!mem_map); | 971 | BUG_ON(!mem_map); |
976 | #endif | 972 | #endif |
@@ -1040,11 +1036,25 @@ void __init mem_init(void) | |||
1040 | (unsigned long)&_text, (unsigned long)&_etext, | 1036 | (unsigned long)&_text, (unsigned long)&_etext, |
1041 | ((unsigned long)&_etext - (unsigned long)&_text) >> 10); | 1037 | ((unsigned long)&_etext - (unsigned long)&_text) >> 10); |
1042 | 1038 | ||
1039 | /* | ||
1040 | * Check boundaries twice: Some fundamental inconsistencies can | ||
1041 | * be detected at build time already. | ||
1042 | */ | ||
1043 | #define __FIXADDR_TOP (-PAGE_SIZE) | ||
1044 | #ifdef CONFIG_HIGHMEM | ||
1045 | BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); | ||
1046 | BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE); | ||
1047 | #endif | ||
1048 | #define high_memory (-128UL << 20) | ||
1049 | BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); | ||
1050 | #undef high_memory | ||
1051 | #undef __FIXADDR_TOP | ||
1052 | |||
1043 | #ifdef CONFIG_HIGHMEM | 1053 | #ifdef CONFIG_HIGHMEM |
1044 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); | 1054 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); |
1045 | BUG_ON(VMALLOC_END > PKMAP_BASE); | 1055 | BUG_ON(VMALLOC_END > PKMAP_BASE); |
1046 | #endif | 1056 | #endif |
1047 | BUG_ON(VMALLOC_START > VMALLOC_END); | 1057 | BUG_ON(VMALLOC_START >= VMALLOC_END); |
1048 | BUG_ON((unsigned long)high_memory > VMALLOC_START); | 1058 | BUG_ON((unsigned long)high_memory > VMALLOC_START); |
1049 | 1059 | ||
1050 | if (boot_cpu_data.wp_works_ok < 0) | 1060 | if (boot_cpu_data.wp_works_ok < 0) |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9db01db6e3cd..9f7a0d24d42a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -902,8 +902,6 @@ void __init mem_init(void) | |||
902 | long codesize, reservedpages, datasize, initsize; | 902 | long codesize, reservedpages, datasize, initsize; |
903 | unsigned long absent_pages; | 903 | unsigned long absent_pages; |
904 | 904 | ||
905 | start_periodic_check_for_corruption(); | ||
906 | |||
907 | pci_iommu_alloc(); | 905 | pci_iommu_alloc(); |
908 | 906 | ||
909 | /* clear_bss() already clear the empty_zero_page */ | 907 | /* clear_bss() already clear the empty_zero_page */ |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index d4c4307ff3e0..bd85d42819e1 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -223,7 +223,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
223 | * Check if the request spans more than any BAR in the iomem resource | 223 | * Check if the request spans more than any BAR in the iomem resource |
224 | * tree. | 224 | * tree. |
225 | */ | 225 | */ |
226 | WARN_ON(iomem_map_sanity_check(phys_addr, size)); | 226 | WARN_ONCE(iomem_map_sanity_check(phys_addr, size), |
227 | KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); | ||
227 | 228 | ||
228 | /* | 229 | /* |
229 | * Don't allow anybody to remap normal RAM that we're using.. | 230 | * Don't allow anybody to remap normal RAM that we're using.. |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index eb1bf000d12e..85cbd3cd3723 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -596,6 +596,242 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) | |||
596 | free_memtype(addr, addr + size); | 596 | free_memtype(addr, addr + size); |
597 | } | 597 | } |
598 | 598 | ||
599 | /* | ||
600 | * Internal interface to reserve a range of physical memory with prot. | ||
601 | * Reserved non RAM regions only and after successful reserve_memtype, | ||
602 | * this func also keeps identity mapping (if any) in sync with this new prot. | ||
603 | */ | ||
604 | static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot) | ||
605 | { | ||
606 | int is_ram = 0; | ||
607 | int id_sz, ret; | ||
608 | unsigned long flags; | ||
609 | unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); | ||
610 | |||
611 | is_ram = pagerange_is_ram(paddr, paddr + size); | ||
612 | |||
613 | if (is_ram != 0) { | ||
614 | /* | ||
615 | * For mapping RAM pages, drivers need to call | ||
616 | * set_memory_[uc|wc|wb] directly, for reserve and free, before | ||
617 | * setting up the PTE. | ||
618 | */ | ||
619 | WARN_ON_ONCE(1); | ||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); | ||
624 | if (ret) | ||
625 | return ret; | ||
626 | |||
627 | if (flags != want_flags) { | ||
628 | free_memtype(paddr, paddr + size); | ||
629 | printk(KERN_ERR | ||
630 | "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n", | ||
631 | current->comm, current->pid, | ||
632 | cattr_name(want_flags), | ||
633 | (unsigned long long)paddr, | ||
634 | (unsigned long long)(paddr + size), | ||
635 | cattr_name(flags)); | ||
636 | return -EINVAL; | ||
637 | } | ||
638 | |||
639 | /* Need to keep identity mapping in sync */ | ||
640 | if (paddr >= __pa(high_memory)) | ||
641 | return 0; | ||
642 | |||
643 | id_sz = (__pa(high_memory) < paddr + size) ? | ||
644 | __pa(high_memory) - paddr : | ||
645 | size; | ||
646 | |||
647 | if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { | ||
648 | free_memtype(paddr, paddr + size); | ||
649 | printk(KERN_ERR | ||
650 | "%s:%d reserve_pfn_range ioremap_change_attr failed %s " | ||
651 | "for %Lx-%Lx\n", | ||
652 | current->comm, current->pid, | ||
653 | cattr_name(flags), | ||
654 | (unsigned long long)paddr, | ||
655 | (unsigned long long)(paddr + size)); | ||
656 | return -EINVAL; | ||
657 | } | ||
658 | return 0; | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * Internal interface to free a range of physical memory. | ||
663 | * Frees non RAM regions only. | ||
664 | */ | ||
665 | static void free_pfn_range(u64 paddr, unsigned long size) | ||
666 | { | ||
667 | int is_ram; | ||
668 | |||
669 | is_ram = pagerange_is_ram(paddr, paddr + size); | ||
670 | if (is_ram == 0) | ||
671 | free_memtype(paddr, paddr + size); | ||
672 | } | ||
673 | |||
674 | /* | ||
675 | * track_pfn_vma_copy is called when vma that is covering the pfnmap gets | ||
676 | * copied through copy_page_range(). | ||
677 | * | ||
678 | * If the vma has a linear pfn mapping for the entire range, we get the prot | ||
679 | * from pte and reserve the entire vma range with single reserve_pfn_range call. | ||
680 | * Otherwise, we reserve the entire vma range, my ging through the PTEs page | ||
681 | * by page to get physical address and protection. | ||
682 | */ | ||
683 | int track_pfn_vma_copy(struct vm_area_struct *vma) | ||
684 | { | ||
685 | int retval = 0; | ||
686 | unsigned long i, j; | ||
687 | resource_size_t paddr; | ||
688 | unsigned long prot; | ||
689 | unsigned long vma_start = vma->vm_start; | ||
690 | unsigned long vma_end = vma->vm_end; | ||
691 | unsigned long vma_size = vma_end - vma_start; | ||
692 | |||
693 | if (!pat_enabled) | ||
694 | return 0; | ||
695 | |||
696 | if (is_linear_pfn_mapping(vma)) { | ||
697 | /* | ||
698 | * reserve the whole chunk covered by vma. We need the | ||
699 | * starting address and protection from pte. | ||
700 | */ | ||
701 | if (follow_phys(vma, vma_start, 0, &prot, &paddr)) { | ||
702 | WARN_ON_ONCE(1); | ||
703 | return -EINVAL; | ||
704 | } | ||
705 | return reserve_pfn_range(paddr, vma_size, __pgprot(prot)); | ||
706 | } | ||
707 | |||
708 | /* reserve entire vma page by page, using pfn and prot from pte */ | ||
709 | for (i = 0; i < vma_size; i += PAGE_SIZE) { | ||
710 | if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) | ||
711 | continue; | ||
712 | |||
713 | retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot)); | ||
714 | if (retval) | ||
715 | goto cleanup_ret; | ||
716 | } | ||
717 | return 0; | ||
718 | |||
719 | cleanup_ret: | ||
720 | /* Reserve error: Cleanup partial reservation and return error */ | ||
721 | for (j = 0; j < i; j += PAGE_SIZE) { | ||
722 | if (follow_phys(vma, vma_start + j, 0, &prot, &paddr)) | ||
723 | continue; | ||
724 | |||
725 | free_pfn_range(paddr, PAGE_SIZE); | ||
726 | } | ||
727 | |||
728 | return retval; | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * track_pfn_vma_new is called when a _new_ pfn mapping is being established | ||
733 | * for physical range indicated by pfn and size. | ||
734 | * | ||
735 | * prot is passed in as a parameter for the new mapping. If the vma has a | ||
736 | * linear pfn mapping for the entire range reserve the entire vma range with | ||
737 | * single reserve_pfn_range call. | ||
738 | * Otherwise, we look t the pfn and size and reserve only the specified range | ||
739 | * page by page. | ||
740 | * | ||
741 | * Note that this function can be called with caller trying to map only a | ||
742 | * subrange/page inside the vma. | ||
743 | */ | ||
744 | int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, | ||
745 | unsigned long pfn, unsigned long size) | ||
746 | { | ||
747 | int retval = 0; | ||
748 | unsigned long i, j; | ||
749 | resource_size_t base_paddr; | ||
750 | resource_size_t paddr; | ||
751 | unsigned long vma_start = vma->vm_start; | ||
752 | unsigned long vma_end = vma->vm_end; | ||
753 | unsigned long vma_size = vma_end - vma_start; | ||
754 | |||
755 | if (!pat_enabled) | ||
756 | return 0; | ||
757 | |||
758 | if (is_linear_pfn_mapping(vma)) { | ||
759 | /* reserve the whole chunk starting from vm_pgoff */ | ||
760 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | ||
761 | return reserve_pfn_range(paddr, vma_size, prot); | ||
762 | } | ||
763 | |||
764 | /* reserve page by page using pfn and size */ | ||
765 | base_paddr = (resource_size_t)pfn << PAGE_SHIFT; | ||
766 | for (i = 0; i < size; i += PAGE_SIZE) { | ||
767 | paddr = base_paddr + i; | ||
768 | retval = reserve_pfn_range(paddr, PAGE_SIZE, prot); | ||
769 | if (retval) | ||
770 | goto cleanup_ret; | ||
771 | } | ||
772 | return 0; | ||
773 | |||
774 | cleanup_ret: | ||
775 | /* Reserve error: Cleanup partial reservation and return error */ | ||
776 | for (j = 0; j < i; j += PAGE_SIZE) { | ||
777 | paddr = base_paddr + j; | ||
778 | free_pfn_range(paddr, PAGE_SIZE); | ||
779 | } | ||
780 | |||
781 | return retval; | ||
782 | } | ||
783 | |||
784 | /* | ||
785 | * untrack_pfn_vma is called while unmapping a pfnmap for a region. | ||
786 | * untrack can be called for a specific region indicated by pfn and size or | ||
787 | * can be for the entire vma (in which case size can be zero). | ||
788 | */ | ||
789 | void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, | ||
790 | unsigned long size) | ||
791 | { | ||
792 | unsigned long i; | ||
793 | resource_size_t paddr; | ||
794 | unsigned long prot; | ||
795 | unsigned long vma_start = vma->vm_start; | ||
796 | unsigned long vma_end = vma->vm_end; | ||
797 | unsigned long vma_size = vma_end - vma_start; | ||
798 | |||
799 | if (!pat_enabled) | ||
800 | return; | ||
801 | |||
802 | if (is_linear_pfn_mapping(vma)) { | ||
803 | /* free the whole chunk starting from vm_pgoff */ | ||
804 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | ||
805 | free_pfn_range(paddr, vma_size); | ||
806 | return; | ||
807 | } | ||
808 | |||
809 | if (size != 0 && size != vma_size) { | ||
810 | /* free page by page, using pfn and size */ | ||
811 | paddr = (resource_size_t)pfn << PAGE_SHIFT; | ||
812 | for (i = 0; i < size; i += PAGE_SIZE) { | ||
813 | paddr = paddr + i; | ||
814 | free_pfn_range(paddr, PAGE_SIZE); | ||
815 | } | ||
816 | } else { | ||
817 | /* free entire vma, page by page, using the pfn from pte */ | ||
818 | for (i = 0; i < vma_size; i += PAGE_SIZE) { | ||
819 | if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) | ||
820 | continue; | ||
821 | |||
822 | free_pfn_range(paddr, PAGE_SIZE); | ||
823 | } | ||
824 | } | ||
825 | } | ||
826 | |||
827 | pgprot_t pgprot_writecombine(pgprot_t prot) | ||
828 | { | ||
829 | if (pat_enabled) | ||
830 | return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC); | ||
831 | else | ||
832 | return pgprot_noncached(prot); | ||
833 | } | ||
834 | |||
599 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) | 835 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) |
600 | 836 | ||
601 | /* get Nth element of the linked list */ | 837 | /* get Nth element of the linked list */ |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index b67732bbb85a..bb1a01f089e2 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -23,6 +23,12 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | | |||
23 | unsigned int pci_early_dump_regs; | 23 | unsigned int pci_early_dump_regs; |
24 | static int pci_bf_sort; | 24 | static int pci_bf_sort; |
25 | int pci_routeirq; | 25 | int pci_routeirq; |
26 | int noioapicquirk; | ||
27 | #ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS | ||
28 | int noioapicreroute = 0; | ||
29 | #else | ||
30 | int noioapicreroute = 1; | ||
31 | #endif | ||
26 | int pcibios_last_bus = -1; | 32 | int pcibios_last_bus = -1; |
27 | unsigned long pirq_table_addr; | 33 | unsigned long pirq_table_addr; |
28 | struct pci_bus *pci_root_bus; | 34 | struct pci_bus *pci_root_bus; |
@@ -519,6 +525,17 @@ char * __devinit pcibios_setup(char *str) | |||
519 | } else if (!strcmp(str, "skip_isa_align")) { | 525 | } else if (!strcmp(str, "skip_isa_align")) { |
520 | pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; | 526 | pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; |
521 | return NULL; | 527 | return NULL; |
528 | } else if (!strcmp(str, "noioapicquirk")) { | ||
529 | noioapicquirk = 1; | ||
530 | return NULL; | ||
531 | } else if (!strcmp(str, "ioapicreroute")) { | ||
532 | if (noioapicreroute != -1) | ||
533 | noioapicreroute = 0; | ||
534 | return NULL; | ||
535 | } else if (!strcmp(str, "noioapicreroute")) { | ||
536 | if (noioapicreroute != -1) | ||
537 | noioapicreroute = 1; | ||
538 | return NULL; | ||
522 | } | 539 | } |
523 | return str; | 540 | return str; |
524 | } | 541 | } |
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 9915293500fb..9a5af6c8fbe9 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c | |||
@@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, | |||
173 | 173 | ||
174 | #undef PCI_CONF2_ADDRESS | 174 | #undef PCI_CONF2_ADDRESS |
175 | 175 | ||
176 | static struct pci_raw_ops pci_direct_conf2 = { | 176 | struct pci_raw_ops pci_direct_conf2 = { |
177 | .read = pci_conf2_read, | 177 | .read = pci_conf2_read, |
178 | .write = pci_conf2_write, | 178 | .write = pci_conf2_write, |
179 | }; | 179 | }; |
@@ -289,6 +289,7 @@ int __init pci_direct_probe(void) | |||
289 | 289 | ||
290 | if (pci_check_type1()) { | 290 | if (pci_check_type1()) { |
291 | raw_pci_ops = &pci_direct_conf1; | 291 | raw_pci_ops = &pci_direct_conf1; |
292 | port_cf9_safe = true; | ||
292 | return 1; | 293 | return 1; |
293 | } | 294 | } |
294 | release_resource(region); | 295 | release_resource(region); |
@@ -305,6 +306,7 @@ int __init pci_direct_probe(void) | |||
305 | 306 | ||
306 | if (pci_check_type2()) { | 307 | if (pci_check_type2()) { |
307 | raw_pci_ops = &pci_direct_conf2; | 308 | raw_pci_ops = &pci_direct_conf2; |
309 | port_cf9_safe = true; | ||
308 | return 2; | 310 | return 2; |
309 | } | 311 | } |
310 | 312 | ||
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 15b9cf6be729..1959018aac02 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h | |||
@@ -96,6 +96,7 @@ extern struct pci_raw_ops *raw_pci_ops; | |||
96 | extern struct pci_raw_ops *raw_pci_ext_ops; | 96 | extern struct pci_raw_ops *raw_pci_ext_ops; |
97 | 97 | ||
98 | extern struct pci_raw_ops pci_direct_conf1; | 98 | extern struct pci_raw_ops pci_direct_conf1; |
99 | extern bool port_cf9_safe; | ||
99 | 100 | ||
100 | /* arch_initcall level */ | 101 | /* arch_initcall level */ |
101 | extern int pci_direct_probe(void); | 102 | extern int pci_direct_probe(void); |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5e4686d70f62..bea215230b20 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/console.h> | 28 | #include <linux/console.h> |
29 | 29 | ||
30 | #include <xen/interface/xen.h> | 30 | #include <xen/interface/xen.h> |
31 | #include <xen/interface/version.h> | ||
31 | #include <xen/interface/physdev.h> | 32 | #include <xen/interface/physdev.h> |
32 | #include <xen/interface/vcpu.h> | 33 | #include <xen/interface/vcpu.h> |
33 | #include <xen/features.h> | 34 | #include <xen/features.h> |
@@ -793,7 +794,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
793 | 794 | ||
794 | ret = 0; | 795 | ret = 0; |
795 | 796 | ||
796 | switch(msr) { | 797 | switch (msr) { |
797 | #ifdef CONFIG_X86_64 | 798 | #ifdef CONFIG_X86_64 |
798 | unsigned which; | 799 | unsigned which; |
799 | u64 base; | 800 | u64 base; |
@@ -1453,7 +1454,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1453 | 1454 | ||
1454 | ident_pte = 0; | 1455 | ident_pte = 0; |
1455 | pfn = 0; | 1456 | pfn = 0; |
1456 | for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { | 1457 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { |
1457 | pte_t *pte_page; | 1458 | pte_t *pte_page; |
1458 | 1459 | ||
1459 | /* Reuse or allocate a page of ptes */ | 1460 | /* Reuse or allocate a page of ptes */ |
@@ -1471,7 +1472,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1471 | } | 1472 | } |
1472 | 1473 | ||
1473 | /* Install mappings */ | 1474 | /* Install mappings */ |
1474 | for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { | 1475 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { |
1475 | pte_t pte; | 1476 | pte_t pte; |
1476 | 1477 | ||
1477 | if (pfn > max_pfn_mapped) | 1478 | if (pfn > max_pfn_mapped) |
@@ -1485,7 +1486,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1485 | } | 1486 | } |
1486 | } | 1487 | } |
1487 | 1488 | ||
1488 | for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) | 1489 | for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) |
1489 | set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); | 1490 | set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); |
1490 | 1491 | ||
1491 | set_page_prot(pmd, PAGE_KERNEL_RO); | 1492 | set_page_prot(pmd, PAGE_KERNEL_RO); |
@@ -1499,7 +1500,7 @@ static void convert_pfn_mfn(void *v) | |||
1499 | 1500 | ||
1500 | /* All levels are converted the same way, so just treat them | 1501 | /* All levels are converted the same way, so just treat them |
1501 | as ptes. */ | 1502 | as ptes. */ |
1502 | for(i = 0; i < PTRS_PER_PTE; i++) | 1503 | for (i = 0; i < PTRS_PER_PTE; i++) |
1503 | pte[i] = xen_make_pte(pte[i].pte); | 1504 | pte[i] = xen_make_pte(pte[i].pte); |
1504 | } | 1505 | } |
1505 | 1506 | ||
@@ -1514,7 +1515,8 @@ static void convert_pfn_mfn(void *v) | |||
1514 | * of the physical mapping once some sort of allocator has been set | 1515 | * of the physical mapping once some sort of allocator has been set |
1515 | * up. | 1516 | * up. |
1516 | */ | 1517 | */ |
1517 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1518 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, |
1519 | unsigned long max_pfn) | ||
1518 | { | 1520 | { |
1519 | pud_t *l3; | 1521 | pud_t *l3; |
1520 | pmd_t *l2; | 1522 | pmd_t *l2; |
@@ -1577,7 +1579,8 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf | |||
1577 | #else /* !CONFIG_X86_64 */ | 1579 | #else /* !CONFIG_X86_64 */ |
1578 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; | 1580 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; |
1579 | 1581 | ||
1580 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1582 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, |
1583 | unsigned long max_pfn) | ||
1581 | { | 1584 | { |
1582 | pmd_t *kernel_pmd; | 1585 | pmd_t *kernel_pmd; |
1583 | 1586 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 636ef4caa52d..773d68d3e912 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -154,13 +154,13 @@ void xen_setup_mfn_list_list(void) | |||
154 | { | 154 | { |
155 | unsigned pfn, idx; | 155 | unsigned pfn, idx; |
156 | 156 | ||
157 | for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { | 157 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { |
158 | unsigned topidx = p2m_top_index(pfn); | 158 | unsigned topidx = p2m_top_index(pfn); |
159 | 159 | ||
160 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); | 160 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); |
161 | } | 161 | } |
162 | 162 | ||
163 | for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { | 163 | for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { |
164 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | 164 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; |
165 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | 165 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); |
166 | } | 166 | } |
@@ -179,7 +179,7 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
179 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | 179 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); |
180 | unsigned pfn; | 180 | unsigned pfn; |
181 | 181 | ||
182 | for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { | 182 | for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { |
183 | unsigned topidx = p2m_top_index(pfn); | 183 | unsigned topidx = p2m_top_index(pfn); |
184 | 184 | ||
185 | p2m_top[topidx] = &mfn_list[pfn]; | 185 | p2m_top[topidx] = &mfn_list[pfn]; |
@@ -207,7 +207,7 @@ static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) | |||
207 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | 207 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); |
208 | BUG_ON(p == NULL); | 208 | BUG_ON(p == NULL); |
209 | 209 | ||
210 | for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | 210 | for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) |
211 | p[i] = INVALID_P2M_ENTRY; | 211 | p[i] = INVALID_P2M_ENTRY; |
212 | 212 | ||
213 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) | 213 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) |
@@ -407,7 +407,8 @@ out: | |||
407 | preempt_enable(); | 407 | preempt_enable(); |
408 | } | 408 | } |
409 | 409 | ||
410 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 410 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
411 | unsigned long addr, pte_t *ptep) | ||
411 | { | 412 | { |
412 | /* Just return the pte as-is. We preserve the bits on commit */ | 413 | /* Just return the pte as-is. We preserve the bits on commit */ |
413 | return *ptep; | 414 | return *ptep; |
@@ -878,7 +879,8 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) | |||
878 | 879 | ||
879 | if (user_pgd) { | 880 | if (user_pgd) { |
880 | xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); | 881 | xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); |
881 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | 882 | xen_do_pin(MMUEXT_PIN_L4_TABLE, |
883 | PFN_DOWN(__pa(user_pgd))); | ||
882 | } | 884 | } |
883 | } | 885 | } |
884 | #else /* CONFIG_X86_32 */ | 886 | #else /* CONFIG_X86_32 */ |
@@ -993,7 +995,8 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) | |||
993 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | 995 | pgd_t *user_pgd = xen_get_user_pgd(pgd); |
994 | 996 | ||
995 | if (user_pgd) { | 997 | if (user_pgd) { |
996 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | 998 | xen_do_pin(MMUEXT_UNPIN_TABLE, |
999 | PFN_DOWN(__pa(user_pgd))); | ||
997 | xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); | 1000 | xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); |
998 | } | 1001 | } |
999 | } | 1002 | } |
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 8ea8a0d0b0de..c738644b5435 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -154,7 +154,7 @@ void xen_mc_flush(void) | |||
154 | ret, smp_processor_id()); | 154 | ret, smp_processor_id()); |
155 | dump_stack(); | 155 | dump_stack(); |
156 | for (i = 0; i < b->mcidx; i++) { | 156 | for (i = 0; i < b->mcidx; i++) { |
157 | printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", | 157 | printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\n", |
158 | i+1, b->mcidx, | 158 | i+1, b->mcidx, |
159 | b->debug[i].op, | 159 | b->debug[i].op, |
160 | b->debug[i].args[0], | 160 | b->debug[i].args[0], |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d67901083888..15c6c68db6a2 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -28,6 +28,9 @@ | |||
28 | /* These are code, but not functions. Defined in entry.S */ | 28 | /* These are code, but not functions. Defined in entry.S */ |
29 | extern const char xen_hypervisor_callback[]; | 29 | extern const char xen_hypervisor_callback[]; |
30 | extern const char xen_failsafe_callback[]; | 30 | extern const char xen_failsafe_callback[]; |
31 | extern void xen_sysenter_target(void); | ||
32 | extern void xen_syscall_target(void); | ||
33 | extern void xen_syscall32_target(void); | ||
31 | 34 | ||
32 | 35 | ||
33 | /** | 36 | /** |
@@ -110,7 +113,6 @@ static __cpuinit int register_callback(unsigned type, const void *func) | |||
110 | 113 | ||
111 | void __cpuinit xen_enable_sysenter(void) | 114 | void __cpuinit xen_enable_sysenter(void) |
112 | { | 115 | { |
113 | extern void xen_sysenter_target(void); | ||
114 | int ret; | 116 | int ret; |
115 | unsigned sysenter_feature; | 117 | unsigned sysenter_feature; |
116 | 118 | ||
@@ -132,8 +134,6 @@ void __cpuinit xen_enable_syscall(void) | |||
132 | { | 134 | { |
133 | #ifdef CONFIG_X86_64 | 135 | #ifdef CONFIG_X86_64 |
134 | int ret; | 136 | int ret; |
135 | extern void xen_syscall_target(void); | ||
136 | extern void xen_syscall32_target(void); | ||
137 | 137 | ||
138 | ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); | 138 | ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); |
139 | if (ret != 0) { | 139 | if (ret != 0) { |
@@ -160,7 +160,8 @@ void __init xen_arch_setup(void) | |||
160 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); | 160 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); |
161 | 161 | ||
162 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 162 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
163 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); | 163 | HYPERVISOR_vm_assist(VMASST_CMD_enable, |
164 | VMASST_TYPE_pae_extended_cr3); | ||
164 | 165 | ||
165 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || | 166 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || |
166 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) | 167 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) |