diff options
Diffstat (limited to 'arch/x86')
146 files changed, 3745 insertions, 3156 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe361ae7ef2f..5d2858119930 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -26,17 +26,10 @@ config X86 | |||
26 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) | 26 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) |
27 | select HAVE_ARCH_KGDB if !X86_VOYAGER | 27 | select HAVE_ARCH_KGDB if !X86_VOYAGER |
28 | 28 | ||
29 | config DEFCONFIG_LIST | 29 | config ARCH_DEFCONFIG |
30 | string | 30 | string |
31 | depends on X86_32 | 31 | default "arch/x86/configs/i386_defconfig" if X86_32 |
32 | option defconfig_list | 32 | default "arch/x86/configs/x86_64_defconfig" if X86_64 |
33 | default "arch/x86/configs/i386_defconfig" | ||
34 | |||
35 | config DEFCONFIG_LIST | ||
36 | string | ||
37 | depends on X86_64 | ||
38 | option defconfig_list | ||
39 | default "arch/x86/configs/x86_64_defconfig" | ||
40 | 33 | ||
41 | 34 | ||
42 | config GENERIC_LOCKBREAK | 35 | config GENERIC_LOCKBREAK |
@@ -258,7 +251,7 @@ config X86_ELAN | |||
258 | 251 | ||
259 | config X86_VOYAGER | 252 | config X86_VOYAGER |
260 | bool "Voyager (NCR)" | 253 | bool "Voyager (NCR)" |
261 | depends on X86_32 && (SMP || BROKEN) | 254 | depends on X86_32 && (SMP || BROKEN) && !PCI |
262 | help | 255 | help |
263 | Voyager is an MCA-based 32-way capable SMP architecture proprietary | 256 | Voyager is an MCA-based 32-way capable SMP architecture proprietary |
264 | to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. | 257 | to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. |
@@ -270,7 +263,7 @@ config X86_VOYAGER | |||
270 | 263 | ||
271 | config X86_NUMAQ | 264 | config X86_NUMAQ |
272 | bool "NUMAQ (IBM/Sequent)" | 265 | bool "NUMAQ (IBM/Sequent)" |
273 | depends on SMP && X86_32 | 266 | depends on SMP && X86_32 && PCI |
274 | select NUMA | 267 | select NUMA |
275 | help | 268 | help |
276 | This option is used for getting Linux to run on a (IBM/Sequent) NUMA | 269 | This option is used for getting Linux to run on a (IBM/Sequent) NUMA |
@@ -300,7 +293,7 @@ config X86_BIGSMP | |||
300 | 293 | ||
301 | config X86_VISWS | 294 | config X86_VISWS |
302 | bool "SGI 320/540 (Visual Workstation)" | 295 | bool "SGI 320/540 (Visual Workstation)" |
303 | depends on X86_32 | 296 | depends on X86_32 && !PCI |
304 | help | 297 | help |
305 | The SGI Visual Workstation series is an IA32-based workstation | 298 | The SGI Visual Workstation series is an IA32-based workstation |
306 | based on SGI systems chips with some legacy PC hardware attached. | 299 | based on SGI systems chips with some legacy PC hardware attached. |
@@ -344,7 +337,7 @@ config X86_RDC321X | |||
344 | config X86_VSMP | 337 | config X86_VSMP |
345 | bool "Support for ScaleMP vSMP" | 338 | bool "Support for ScaleMP vSMP" |
346 | select PARAVIRT | 339 | select PARAVIRT |
347 | depends on X86_64 | 340 | depends on X86_64 && !PCI |
348 | help | 341 | help |
349 | Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is | 342 | Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is |
350 | supposed to run on these EM64T-based machines. Only choose this option | 343 | supposed to run on these EM64T-based machines. Only choose this option |
@@ -390,6 +383,7 @@ config VMI | |||
390 | config KVM_CLOCK | 383 | config KVM_CLOCK |
391 | bool "KVM paravirtualized clock" | 384 | bool "KVM paravirtualized clock" |
392 | select PARAVIRT | 385 | select PARAVIRT |
386 | select PARAVIRT_CLOCK | ||
393 | depends on !(X86_VISWS || X86_VOYAGER) | 387 | depends on !(X86_VISWS || X86_VOYAGER) |
394 | help | 388 | help |
395 | Turning on this option will allow you to run a paravirtualized clock | 389 | Turning on this option will allow you to run a paravirtualized clock |
@@ -417,37 +411,25 @@ config PARAVIRT | |||
417 | over full virtualization. However, when run without a hypervisor | 411 | over full virtualization. However, when run without a hypervisor |
418 | the kernel is theoretically slower and slightly larger. | 412 | the kernel is theoretically slower and slightly larger. |
419 | 413 | ||
414 | config PARAVIRT_CLOCK | ||
415 | bool | ||
416 | default n | ||
417 | |||
420 | endif | 418 | endif |
421 | 419 | ||
422 | config MEMTEST_BOOTPARAM | 420 | config MEMTEST |
423 | bool "Memtest boot parameter" | 421 | bool "Memtest" |
424 | depends on X86_64 | 422 | depends on X86_64 |
425 | default y | 423 | default y |
426 | help | 424 | help |
427 | This option adds a kernel parameter 'memtest', which allows memtest | 425 | This option adds a kernel parameter 'memtest', which allows memtest |
428 | to be disabled at boot. If this option is selected, memtest | 426 | to be set. |
429 | functionality can be disabled with memtest=0 on the kernel | 427 | memtest=0, mean disabled; -- default |
430 | command line. The purpose of this option is to allow a single | 428 | memtest=1, mean do 1 test pattern; |
431 | kernel image to be distributed with memtest built in, but not | 429 | ... |
432 | necessarily enabled. | 430 | memtest=4, mean do 4 test patterns. |
433 | |||
434 | If you are unsure how to answer this question, answer Y. | 431 | If you are unsure how to answer this question, answer Y. |
435 | 432 | ||
436 | config MEMTEST_BOOTPARAM_VALUE | ||
437 | int "Memtest boot parameter default value (0-4)" | ||
438 | depends on MEMTEST_BOOTPARAM | ||
439 | range 0 4 | ||
440 | default 0 | ||
441 | help | ||
442 | This option sets the default value for the kernel parameter | ||
443 | 'memtest', which allows memtest to be disabled at boot. If this | ||
444 | option is set to 0 (zero), the memtest kernel parameter will | ||
445 | default to 0, disabling memtest at bootup. If this option is | ||
446 | set to 4, the memtest kernel parameter will default to 4, | ||
447 | enabling memtest at bootup, and use that as pattern number. | ||
448 | |||
449 | If you are unsure how to answer this question, answer 0. | ||
450 | |||
451 | config ACPI_SRAT | 433 | config ACPI_SRAT |
452 | def_bool y | 434 | def_bool y |
453 | depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) | 435 | depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) |
@@ -565,18 +547,18 @@ config IOMMU_HELPER | |||
565 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) | 547 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) |
566 | 548 | ||
567 | config NR_CPUS | 549 | config NR_CPUS |
568 | int "Maximum number of CPUs (2-255)" | 550 | int "Maximum number of CPUs (2-4096)" |
569 | range 2 255 | 551 | range 2 4096 |
570 | depends on SMP | 552 | depends on SMP |
571 | default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 | 553 | default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 |
572 | default "8" | 554 | default "8" |
573 | help | 555 | help |
574 | This allows you to specify the maximum number of CPUs which this | 556 | This allows you to specify the maximum number of CPUs which this |
575 | kernel will support. The maximum supported value is 255 and the | 557 | kernel will support. The maximum supported value is 4096 and the |
576 | minimum value which makes sense is 2. | 558 | minimum value which makes sense is 2. |
577 | 559 | ||
578 | This is purely to save memory - each supported CPU adds | 560 | This is purely to save memory - each supported CPU adds |
579 | approximately eight kilobytes to the kernel image. | 561 | approximately one kilobyte to the kernel image. |
580 | 562 | ||
581 | config SCHED_SMT | 563 | config SCHED_SMT |
582 | bool "SMT (Hyperthreading) scheduler support" | 564 | bool "SMT (Hyperthreading) scheduler support" |
@@ -968,8 +950,8 @@ config NUMA_EMU | |||
968 | number of nodes. This is only useful for debugging. | 950 | number of nodes. This is only useful for debugging. |
969 | 951 | ||
970 | config NODES_SHIFT | 952 | config NODES_SHIFT |
971 | int "Max num nodes shift(1-15)" | 953 | int "Max num nodes shift(1-9)" |
972 | range 1 15 if X86_64 | 954 | range 1 9 if X86_64 |
973 | default "6" if X86_64 | 955 | default "6" if X86_64 |
974 | default "4" if X86_NUMAQ | 956 | default "4" if X86_NUMAQ |
975 | default "3" | 957 | default "3" |
@@ -1477,8 +1459,7 @@ endmenu | |||
1477 | menu "Bus options (PCI etc.)" | 1459 | menu "Bus options (PCI etc.)" |
1478 | 1460 | ||
1479 | config PCI | 1461 | config PCI |
1480 | bool "PCI support" if !X86_VISWS && !X86_VSMP | 1462 | bool "PCI support" |
1481 | depends on !X86_VOYAGER | ||
1482 | default y | 1463 | default y |
1483 | select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) | 1464 | select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) |
1484 | help | 1465 | help |
@@ -1515,13 +1496,13 @@ config PCI_GOMMCONFIG | |||
1515 | config PCI_GODIRECT | 1496 | config PCI_GODIRECT |
1516 | bool "Direct" | 1497 | bool "Direct" |
1517 | 1498 | ||
1518 | config PCI_GOANY | ||
1519 | bool "Any" | ||
1520 | |||
1521 | config PCI_GOOLPC | 1499 | config PCI_GOOLPC |
1522 | bool "OLPC" | 1500 | bool "OLPC" |
1523 | depends on OLPC | 1501 | depends on OLPC |
1524 | 1502 | ||
1503 | config PCI_GOANY | ||
1504 | bool "Any" | ||
1505 | |||
1525 | endchoice | 1506 | endchoice |
1526 | 1507 | ||
1527 | config PCI_BIOS | 1508 | config PCI_BIOS |
@@ -1538,9 +1519,8 @@ config PCI_MMCONFIG | |||
1538 | depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) | 1519 | depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) |
1539 | 1520 | ||
1540 | config PCI_OLPC | 1521 | config PCI_OLPC |
1541 | bool | 1522 | def_bool y |
1542 | depends on PCI && PCI_GOOLPC | 1523 | depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) |
1543 | default y | ||
1544 | 1524 | ||
1545 | config PCI_DOMAINS | 1525 | config PCI_DOMAINS |
1546 | def_bool y | 1526 | def_bool y |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 2ad6301849a1..3d22bb8175b4 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -399,6 +399,10 @@ config X86_TSC | |||
399 | def_bool y | 399 | def_bool y |
400 | depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 | 400 | depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 |
401 | 401 | ||
402 | config X86_CMPXCHG64 | ||
403 | def_bool y | ||
404 | depends on X86_PAE || X86_64 | ||
405 | |||
402 | # this should be set for all -march=.. options where the compiler | 406 | # this should be set for all -march=.. options where the compiler |
403 | # generates cmov. | 407 | # generates cmov. |
404 | config X86_CMOV | 408 | config X86_CMOV |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ac1e31ba4795..18363374d51a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -6,15 +6,19 @@ config TRACE_IRQFLAGS_SUPPORT | |||
6 | source "lib/Kconfig.debug" | 6 | source "lib/Kconfig.debug" |
7 | 7 | ||
8 | config NONPROMISC_DEVMEM | 8 | config NONPROMISC_DEVMEM |
9 | bool "Disable promiscuous /dev/mem" | 9 | bool "Filter access to /dev/mem" |
10 | help | 10 | help |
11 | The /dev/mem file by default only allows userspace access to PCI | 11 | If this option is left off, you allow userspace access to all |
12 | space and the BIOS code and data regions. This is sufficient for | 12 | of memory, including kernel and userspace memory. Accidental |
13 | dosemu and X and all common users of /dev/mem. With this config | 13 | access to this is obviously disastrous, but specific access can |
14 | option, you allow userspace access to all of memory, including | 14 | be used by people debugging the kernel. |
15 | kernel and userspace memory. Accidental access to this is | 15 | |
16 | obviously disasterous, but specific access can be used by people | 16 | If this option is switched on, the /dev/mem file only allows |
17 | debugging the kernel. | 17 | userspace access to PCI space and the BIOS code and data regions. |
18 | This is sufficient for dosemu and X and all common users of | ||
19 | /dev/mem. | ||
20 | |||
21 | If in doubt, say Y. | ||
18 | 22 | ||
19 | config EARLY_PRINTK | 23 | config EARLY_PRINTK |
20 | bool "Early printk" if EMBEDDED | 24 | bool "Early printk" if EMBEDDED |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3cff3c894cf3..5df0d1e330b1 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -210,12 +210,12 @@ all: bzImage | |||
210 | 210 | ||
211 | # KBUILD_IMAGE specify target image being built | 211 | # KBUILD_IMAGE specify target image being built |
212 | KBUILD_IMAGE := $(boot)/bzImage | 212 | KBUILD_IMAGE := $(boot)/bzImage |
213 | zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage | 213 | zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage |
214 | 214 | ||
215 | zImage bzImage: vmlinux | 215 | zImage bzImage: vmlinux |
216 | $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) | 216 | $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) |
217 | $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot | 217 | $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot |
218 | $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/bzImage | 218 | $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ |
219 | 219 | ||
220 | compressed: zImage | 220 | compressed: zImage |
221 | 221 | ||
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 90943f83e84d..e01aafd03bde 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c | |||
@@ -115,8 +115,6 @@ static void enable_a20_fast(void) | |||
115 | 115 | ||
116 | int enable_a20(void) | 116 | int enable_a20(void) |
117 | { | 117 | { |
118 | int loops = A20_ENABLE_LOOPS; | ||
119 | |||
120 | #if defined(CONFIG_X86_ELAN) | 118 | #if defined(CONFIG_X86_ELAN) |
121 | /* Elan croaks if we try to touch the KBC */ | 119 | /* Elan croaks if we try to touch the KBC */ |
122 | enable_a20_fast(); | 120 | enable_a20_fast(); |
@@ -128,6 +126,7 @@ int enable_a20(void) | |||
128 | enable_a20_kbc(); | 126 | enable_a20_kbc(); |
129 | return 0; | 127 | return 0; |
130 | #else | 128 | #else |
129 | int loops = A20_ENABLE_LOOPS; | ||
131 | while (loops--) { | 130 | while (loops--) { |
132 | /* First, check to see if A20 is already enabled | 131 | /* First, check to see if A20 is already enabled |
133 | (legacy free, etc.) */ | 132 | (legacy free, etc.) */ |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d8819efac81d..1d5dff4123e1 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/page.h> | 30 | #include <asm/page.h> |
31 | #include <asm/boot.h> | 31 | #include <asm/boot.h> |
32 | #include <asm/msr.h> | 32 | #include <asm/msr.h> |
33 | #include <asm/processor-flags.h> | ||
33 | #include <asm/asm-offsets.h> | 34 | #include <asm/asm-offsets.h> |
34 | 35 | ||
35 | .section ".text.head" | 36 | .section ".text.head" |
@@ -109,7 +110,7 @@ startup_32: | |||
109 | 110 | ||
110 | /* Enable PAE mode */ | 111 | /* Enable PAE mode */ |
111 | xorl %eax, %eax | 112 | xorl %eax, %eax |
112 | orl $(1 << 5), %eax | 113 | orl $(X86_CR4_PAE), %eax |
113 | movl %eax, %cr4 | 114 | movl %eax, %cr4 |
114 | 115 | ||
115 | /* | 116 | /* |
@@ -170,7 +171,7 @@ startup_32: | |||
170 | pushl %eax | 171 | pushl %eax |
171 | 172 | ||
172 | /* Enter paged protected Mode, activating Long Mode */ | 173 | /* Enter paged protected Mode, activating Long Mode */ |
173 | movl $0x80000001, %eax /* Enable Paging and Protected mode */ | 174 | movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */ |
174 | movl %eax, %cr0 | 175 | movl %eax, %cr0 |
175 | 176 | ||
176 | /* Jump from 32bit compatibility mode into 64bit mode. */ | 177 | /* Jump from 32bit compatibility mode into 64bit mode. */ |
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index c1d00c0274c4..50e47cdbdddd 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c | |||
@@ -56,7 +56,7 @@ static char *number(char *str, long num, int base, int size, int precision, | |||
56 | if (type & LEFT) | 56 | if (type & LEFT) |
57 | type &= ~ZEROPAD; | 57 | type &= ~ZEROPAD; |
58 | if (base < 2 || base > 36) | 58 | if (base < 2 || base > 36) |
59 | return 0; | 59 | return NULL; |
60 | c = (type & ZEROPAD) ? '0' : ' '; | 60 | c = (type & ZEROPAD) ? '0' : ' '; |
61 | sign = 0; | 61 | sign = 0; |
62 | if (type & SIGN) { | 62 | if (type & SIGN) { |
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 40ecb8d7688c..b939cb476dec 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c | |||
@@ -259,8 +259,7 @@ static int vga_probe(void) | |||
259 | return mode_count[adapter]; | 259 | return mode_count[adapter]; |
260 | } | 260 | } |
261 | 261 | ||
262 | __videocard video_vga = | 262 | __videocard video_vga = { |
263 | { | ||
264 | .card_name = "VGA", | 263 | .card_name = "VGA", |
265 | .probe = vga_probe, | 264 | .probe = vga_probe, |
266 | .set_mode = vga_set_mode, | 265 | .set_mode = vga_set_mode, |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index b5e329da166c..3aefbce2de48 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -370,13 +370,11 @@ quiet_ni_syscall: | |||
370 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi | 370 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi |
371 | PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi | 371 | PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi |
372 | PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx | 372 | PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx |
373 | PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx | ||
374 | PTREGSCALL stub32_execve, sys32_execve, %rcx | 373 | PTREGSCALL stub32_execve, sys32_execve, %rcx |
375 | PTREGSCALL stub32_fork, sys_fork, %rdi | 374 | PTREGSCALL stub32_fork, sys_fork, %rdi |
376 | PTREGSCALL stub32_clone, sys32_clone, %rdx | 375 | PTREGSCALL stub32_clone, sys32_clone, %rdx |
377 | PTREGSCALL stub32_vfork, sys_vfork, %rdi | 376 | PTREGSCALL stub32_vfork, sys_vfork, %rdi |
378 | PTREGSCALL stub32_iopl, sys_iopl, %rsi | 377 | PTREGSCALL stub32_iopl, sys_iopl, %rsi |
379 | PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx | ||
380 | 378 | ||
381 | ENTRY(ia32_ptregs_common) | 379 | ENTRY(ia32_ptregs_common) |
382 | popq %r11 | 380 | popq %r11 |
@@ -476,7 +474,7 @@ ia32_sys_call_table: | |||
476 | .quad sys_ssetmask | 474 | .quad sys_ssetmask |
477 | .quad sys_setreuid16 /* 70 */ | 475 | .quad sys_setreuid16 /* 70 */ |
478 | .quad sys_setregid16 | 476 | .quad sys_setregid16 |
479 | .quad stub32_sigsuspend | 477 | .quad sys32_sigsuspend |
480 | .quad compat_sys_sigpending | 478 | .quad compat_sys_sigpending |
481 | .quad sys_sethostname | 479 | .quad sys_sethostname |
482 | .quad compat_sys_setrlimit /* 75 */ | 480 | .quad compat_sys_setrlimit /* 75 */ |
@@ -583,7 +581,7 @@ ia32_sys_call_table: | |||
583 | .quad sys32_rt_sigpending | 581 | .quad sys32_rt_sigpending |
584 | .quad compat_sys_rt_sigtimedwait | 582 | .quad compat_sys_rt_sigtimedwait |
585 | .quad sys32_rt_sigqueueinfo | 583 | .quad sys32_rt_sigqueueinfo |
586 | .quad stub32_rt_sigsuspend | 584 | .quad sys_rt_sigsuspend |
587 | .quad sys32_pread /* 180 */ | 585 | .quad sys32_pread /* 180 */ |
588 | .quad sys32_pwrite | 586 | .quad sys32_pwrite |
589 | .quad sys_chown16 | 587 | .quad sys_chown16 |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b4720..8a42b797cd6b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -18,14 +18,13 @@ CFLAGS_tsc_64.o := $(nostackp) | |||
18 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o | 18 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o |
19 | obj-y += traps_$(BITS).o irq_$(BITS).o | 19 | obj-y += traps_$(BITS).o irq_$(BITS).o |
20 | obj-y += time_$(BITS).o ioport.o ldt.o | 20 | obj-y += time_$(BITS).o ioport.o ldt.o |
21 | obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o | 21 | obj-y += setup_$(BITS).o i8259.o irqinit_$(BITS).o setup.o |
22 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 22 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
23 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 23 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
24 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o | 24 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o |
25 | obj-y += bootflag.o e820_$(BITS).o | 25 | obj-y += bootflag.o e820_$(BITS).o |
26 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 26 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
27 | obj-y += alternative.o i8253.o pci-nommu.o | 27 | obj-y += alternative.o i8253.o pci-nommu.o |
28 | obj-$(CONFIG_X86_64) += bugs_64.o | ||
29 | obj-y += tsc_$(BITS).o io_delay.o rtc.o | 28 | obj-y += tsc_$(BITS).o io_delay.o rtc.o |
30 | 29 | ||
31 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 30 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
@@ -82,6 +81,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o | |||
82 | obj-$(CONFIG_KVM_GUEST) += kvm.o | 81 | obj-$(CONFIG_KVM_GUEST) += kvm.o |
83 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o | 82 | obj-$(CONFIG_KVM_CLOCK) += kvmclock.o |
84 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o | 83 | obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o |
84 | obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | ||
85 | 85 | ||
86 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 86 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
87 | 87 | ||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c49ebcc6c41e..33c5216fd3e1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -242,12 +242,19 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) | |||
242 | 242 | ||
243 | static void __cpuinit acpi_register_lapic(int id, u8 enabled) | 243 | static void __cpuinit acpi_register_lapic(int id, u8 enabled) |
244 | { | 244 | { |
245 | unsigned int ver = 0; | ||
246 | |||
245 | if (!enabled) { | 247 | if (!enabled) { |
246 | ++disabled_cpus; | 248 | ++disabled_cpus; |
247 | return; | 249 | return; |
248 | } | 250 | } |
249 | 251 | ||
250 | generic_processor_info(id, 0); | 252 | #ifdef CONFIG_X86_32 |
253 | if (boot_cpu_physical_apicid != -1U) | ||
254 | ver = apic_version[boot_cpu_physical_apicid]; | ||
255 | #endif | ||
256 | |||
257 | generic_processor_info(id, ver); | ||
251 | } | 258 | } |
252 | 259 | ||
253 | static int __init | 260 | static int __init |
@@ -767,8 +774,13 @@ static void __init acpi_register_lapic_address(unsigned long address) | |||
767 | mp_lapic_addr = address; | 774 | mp_lapic_addr = address; |
768 | 775 | ||
769 | set_fixmap_nocache(FIX_APIC_BASE, address); | 776 | set_fixmap_nocache(FIX_APIC_BASE, address); |
770 | if (boot_cpu_physical_apicid == -1U) | 777 | if (boot_cpu_physical_apicid == -1U) { |
771 | boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); | 778 | boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); |
779 | #ifdef CONFIG_X86_32 | ||
780 | apic_version[boot_cpu_physical_apicid] = | ||
781 | GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
782 | #endif | ||
783 | } | ||
772 | } | 784 | } |
773 | 785 | ||
774 | static int __init early_acpi_parse_madt_lapic_addr_ovr(void) | 786 | static int __init early_acpi_parse_madt_lapic_addr_ovr(void) |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index f9b77fb37e5b..3355973b12ac 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <asm/msr-index.h> | 5 | #include <asm/msr-index.h> |
6 | #include <asm/page.h> | 6 | #include <asm/page.h> |
7 | #include <asm/pgtable.h> | 7 | #include <asm/pgtable.h> |
8 | #include <asm/processor-flags.h> | ||
8 | 9 | ||
9 | .code16 | 10 | .code16 |
10 | .section ".header", "a" | 11 | .section ".header", "a" |
@@ -24,6 +25,11 @@ pmode_gdt: .quad 0 | |||
24 | realmode_flags: .long 0 | 25 | realmode_flags: .long 0 |
25 | real_magic: .long 0 | 26 | real_magic: .long 0 |
26 | trampoline_segment: .word 0 | 27 | trampoline_segment: .word 0 |
28 | _pad1: .byte 0 | ||
29 | wakeup_jmp: .byte 0xea /* ljmpw */ | ||
30 | wakeup_jmp_off: .word 3f | ||
31 | wakeup_jmp_seg: .word 0 | ||
32 | wakeup_gdt: .quad 0, 0, 0 | ||
27 | signature: .long 0x51ee1111 | 33 | signature: .long 0x51ee1111 |
28 | 34 | ||
29 | .text | 35 | .text |
@@ -34,11 +40,34 @@ _start: | |||
34 | cli | 40 | cli |
35 | cld | 41 | cld |
36 | 42 | ||
43 | /* Apparently some dimwit BIOS programmers don't know how to | ||
44 | program a PM to RM transition, and we might end up here with | ||
45 | junk in the data segment descriptor registers. The only way | ||
46 | to repair that is to go into PM and fix it ourselves... */ | ||
47 | movw $16, %cx | ||
48 | lgdtl %cs:wakeup_gdt | ||
49 | movl %cr0, %eax | ||
50 | orb $X86_CR0_PE, %al | ||
51 | movl %eax, %cr0 | ||
52 | jmp 1f | ||
53 | 1: ljmpw $8, $2f | ||
54 | 2: | ||
55 | movw %cx, %ds | ||
56 | movw %cx, %es | ||
57 | movw %cx, %ss | ||
58 | movw %cx, %fs | ||
59 | movw %cx, %gs | ||
60 | |||
61 | andb $~X86_CR0_PE, %al | ||
62 | movl %eax, %cr0 | ||
63 | jmp wakeup_jmp | ||
64 | 3: | ||
37 | /* Set up segments */ | 65 | /* Set up segments */ |
38 | movw %cs, %ax | 66 | movw %cs, %ax |
39 | movw %ax, %ds | 67 | movw %ax, %ds |
40 | movw %ax, %es | 68 | movw %ax, %es |
41 | movw %ax, %ss | 69 | movw %ax, %ss |
70 | lidtl wakeup_idt | ||
42 | 71 | ||
43 | movl $wakeup_stack_end, %esp | 72 | movl $wakeup_stack_end, %esp |
44 | 73 | ||
@@ -98,7 +127,14 @@ bogus_real_magic: | |||
98 | jmp 1b | 127 | jmp 1b |
99 | 128 | ||
100 | .data | 129 | .data |
101 | .balign 4 | 130 | .balign 8 |
131 | |||
132 | /* This is the standard real-mode IDT */ | ||
133 | wakeup_idt: | ||
134 | .word 0xffff /* limit */ | ||
135 | .long 0 /* address */ | ||
136 | .word 0 | ||
137 | |||
102 | .globl HEAP, heap_end | 138 | .globl HEAP, heap_end |
103 | HEAP: | 139 | HEAP: |
104 | .long wakeup_heap | 140 | .long wakeup_heap |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h index ef8166fe8020..69d38d0b2b64 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ b/arch/x86/kernel/acpi/realmode/wakeup.h | |||
@@ -24,6 +24,11 @@ struct wakeup_header { | |||
24 | u32 realmode_flags; | 24 | u32 realmode_flags; |
25 | u32 real_magic; | 25 | u32 real_magic; |
26 | u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ | 26 | u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ |
27 | u8 _pad1; | ||
28 | u8 wakeup_jmp; | ||
29 | u16 wakeup_jmp_off; | ||
30 | u16 wakeup_jmp_seg; | ||
31 | u64 wakeup_gdt[3]; | ||
27 | u32 signature; /* To check we have correct structure */ | 32 | u32 signature; /* To check we have correct structure */ |
28 | } __attribute__((__packed__)); | 33 | } __attribute__((__packed__)); |
29 | 34 | ||
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 22fab6c4be15..7da00b799cda 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S | |||
@@ -12,11 +12,6 @@ ENTRY(_start) | |||
12 | 12 | ||
13 | SECTIONS | 13 | SECTIONS |
14 | { | 14 | { |
15 | . = HEADER_OFFSET; | ||
16 | .header : { | ||
17 | *(.header) | ||
18 | } | ||
19 | |||
20 | . = 0; | 15 | . = 0; |
21 | .text : { | 16 | .text : { |
22 | *(.text*) | 17 | *(.text*) |
@@ -50,6 +45,11 @@ SECTIONS | |||
50 | __bss_end = .; | 45 | __bss_end = .; |
51 | } | 46 | } |
52 | 47 | ||
48 | . = HEADER_OFFSET; | ||
49 | .header : { | ||
50 | *(.header) | ||
51 | } | ||
52 | |||
53 | . = ALIGN(16); | 53 | . = ALIGN(16); |
54 | _end = .; | 54 | _end = .; |
55 | 55 | ||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index afc25ee9964b..36af01f029ed 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -50,6 +50,20 @@ int acpi_save_state_mem(void) | |||
50 | 50 | ||
51 | header->video_mode = saved_video_mode; | 51 | header->video_mode = saved_video_mode; |
52 | 52 | ||
53 | header->wakeup_jmp_seg = acpi_wakeup_address >> 4; | ||
54 | /* GDT[0]: GDT self-pointer */ | ||
55 | header->wakeup_gdt[0] = | ||
56 | (u64)(sizeof(header->wakeup_gdt) - 1) + | ||
57 | ((u64)(acpi_wakeup_address + | ||
58 | ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) | ||
59 | << 16); | ||
60 | /* GDT[1]: real-mode-like code segment */ | ||
61 | header->wakeup_gdt[1] = (0x009bULL << 40) + | ||
62 | ((u64)acpi_wakeup_address << 16) + 0xffff; | ||
63 | /* GDT[2]: real-mode-like data segment */ | ||
64 | header->wakeup_gdt[2] = (0x0093ULL << 40) + | ||
65 | ((u64)acpi_wakeup_address << 16) + 0xffff; | ||
66 | |||
53 | #ifndef CONFIG_64BIT | 67 | #ifndef CONFIG_64BIT |
54 | store_gdt((struct desc_ptr *)&header->pmode_gdt); | 68 | store_gdt((struct desc_ptr *)&header->pmode_gdt); |
55 | 69 | ||
@@ -111,7 +125,7 @@ void __init acpi_reserve_bootmem(void) | |||
111 | return; | 125 | return; |
112 | } | 126 | } |
113 | 127 | ||
114 | acpi_wakeup_address = acpi_realmode; | 128 | acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); |
115 | } | 129 | } |
116 | 130 | ||
117 | 131 | ||
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 479926d9e004..e819362c7068 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -35,6 +35,18 @@ int fallback_aper_force __initdata; | |||
35 | 35 | ||
36 | int fix_aperture __initdata = 1; | 36 | int fix_aperture __initdata = 1; |
37 | 37 | ||
38 | struct bus_dev_range { | ||
39 | int bus; | ||
40 | int dev_base; | ||
41 | int dev_limit; | ||
42 | }; | ||
43 | |||
44 | static struct bus_dev_range bus_dev_ranges[] __initdata = { | ||
45 | { 0x00, 0x18, 0x20}, | ||
46 | { 0xff, 0x00, 0x20}, | ||
47 | { 0xfe, 0x00, 0x20} | ||
48 | }; | ||
49 | |||
38 | static struct resource gart_resource = { | 50 | static struct resource gart_resource = { |
39 | .name = "GART", | 51 | .name = "GART", |
40 | .flags = IORESOURCE_MEM, | 52 | .flags = IORESOURCE_MEM, |
@@ -55,8 +67,9 @@ static u32 __init allocate_aperture(void) | |||
55 | u32 aper_size; | 67 | u32 aper_size; |
56 | void *p; | 68 | void *p; |
57 | 69 | ||
58 | if (fallback_aper_order > 7) | 70 | /* aper_size should <= 1G */ |
59 | fallback_aper_order = 7; | 71 | if (fallback_aper_order > 5) |
72 | fallback_aper_order = 5; | ||
60 | aper_size = (32 * 1024 * 1024) << fallback_aper_order; | 73 | aper_size = (32 * 1024 * 1024) << fallback_aper_order; |
61 | 74 | ||
62 | /* | 75 | /* |
@@ -65,7 +78,20 @@ static u32 __init allocate_aperture(void) | |||
65 | * memory. Unfortunately we cannot move it up because that would | 78 | * memory. Unfortunately we cannot move it up because that would |
66 | * make the IOMMU useless. | 79 | * make the IOMMU useless. |
67 | */ | 80 | */ |
68 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); | 81 | /* |
82 | * using 512M as goal, in case kexec will load kernel_big | ||
83 | * that will do the on position decompress, and could overlap with | ||
84 | * that positon with gart that is used. | ||
85 | * sequende: | ||
86 | * kernel_small | ||
87 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) | ||
88 | * ==> kernel_small(gart area become e820_reserved) | ||
89 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) | ||
90 | * ==> kerne_big (uncompressed size will be big than 64M or 128M) | ||
91 | * so don't use 512M below as gart iommu, leave the space for kernel | ||
92 | * code for safe | ||
93 | */ | ||
94 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); | ||
69 | if (!p || __pa(p)+aper_size > 0xffffffff) { | 95 | if (!p || __pa(p)+aper_size > 0xffffffff) { |
70 | printk(KERN_ERR | 96 | printk(KERN_ERR |
71 | "Cannot allocate aperture memory hole (%p,%uK)\n", | 97 | "Cannot allocate aperture memory hole (%p,%uK)\n", |
@@ -83,69 +109,53 @@ static u32 __init allocate_aperture(void) | |||
83 | return (u32)__pa(p); | 109 | return (u32)__pa(p); |
84 | } | 110 | } |
85 | 111 | ||
86 | static int __init aperture_valid(u64 aper_base, u32 aper_size) | ||
87 | { | ||
88 | if (!aper_base) | ||
89 | return 0; | ||
90 | |||
91 | if (aper_base + aper_size > 0x100000000UL) { | ||
92 | printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); | ||
93 | return 0; | ||
94 | } | ||
95 | if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { | ||
96 | printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); | ||
97 | return 0; | ||
98 | } | ||
99 | if (aper_size < 64*1024*1024) { | ||
100 | printk(KERN_ERR "Aperture too small (%d MB)\n", aper_size>>20); | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | return 1; | ||
105 | } | ||
106 | 112 | ||
107 | /* Find a PCI capability */ | 113 | /* Find a PCI capability */ |
108 | static __u32 __init find_cap(int num, int slot, int func, int cap) | 114 | static u32 __init find_cap(int bus, int slot, int func, int cap) |
109 | { | 115 | { |
110 | int bytes; | 116 | int bytes; |
111 | u8 pos; | 117 | u8 pos; |
112 | 118 | ||
113 | if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & | 119 | if (!(read_pci_config_16(bus, slot, func, PCI_STATUS) & |
114 | PCI_STATUS_CAP_LIST)) | 120 | PCI_STATUS_CAP_LIST)) |
115 | return 0; | 121 | return 0; |
116 | 122 | ||
117 | pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); | 123 | pos = read_pci_config_byte(bus, slot, func, PCI_CAPABILITY_LIST); |
118 | for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { | 124 | for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { |
119 | u8 id; | 125 | u8 id; |
120 | 126 | ||
121 | pos &= ~3; | 127 | pos &= ~3; |
122 | id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); | 128 | id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID); |
123 | if (id == 0xff) | 129 | if (id == 0xff) |
124 | break; | 130 | break; |
125 | if (id == cap) | 131 | if (id == cap) |
126 | return pos; | 132 | return pos; |
127 | pos = read_pci_config_byte(num, slot, func, | 133 | pos = read_pci_config_byte(bus, slot, func, |
128 | pos+PCI_CAP_LIST_NEXT); | 134 | pos+PCI_CAP_LIST_NEXT); |
129 | } | 135 | } |
130 | return 0; | 136 | return 0; |
131 | } | 137 | } |
132 | 138 | ||
133 | /* Read a standard AGPv3 bridge header */ | 139 | /* Read a standard AGPv3 bridge header */ |
134 | static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) | 140 | static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) |
135 | { | 141 | { |
136 | u32 apsize; | 142 | u32 apsize; |
137 | u32 apsizereg; | 143 | u32 apsizereg; |
138 | int nbits; | 144 | int nbits; |
139 | u32 aper_low, aper_hi; | 145 | u32 aper_low, aper_hi; |
140 | u64 aper; | 146 | u64 aper; |
147 | u32 old_order; | ||
141 | 148 | ||
142 | printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", num, slot, func); | 149 | printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func); |
143 | apsizereg = read_pci_config_16(num, slot, func, cap + 0x14); | 150 | apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14); |
144 | if (apsizereg == 0xffffffff) { | 151 | if (apsizereg == 0xffffffff) { |
145 | printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); | 152 | printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); |
146 | return 0; | 153 | return 0; |
147 | } | 154 | } |
148 | 155 | ||
156 | /* old_order could be the value from NB gart setting */ | ||
157 | old_order = *order; | ||
158 | |||
149 | apsize = apsizereg & 0xfff; | 159 | apsize = apsizereg & 0xfff; |
150 | /* Some BIOS use weird encodings not in the AGPv3 table. */ | 160 | /* Some BIOS use weird encodings not in the AGPv3 table. */ |
151 | if (apsize & 0xff) | 161 | if (apsize & 0xff) |
@@ -155,14 +165,26 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) | |||
155 | if ((int)*order < 0) /* < 32MB */ | 165 | if ((int)*order < 0) /* < 32MB */ |
156 | *order = 0; | 166 | *order = 0; |
157 | 167 | ||
158 | aper_low = read_pci_config(num, slot, func, 0x10); | 168 | aper_low = read_pci_config(bus, slot, func, 0x10); |
159 | aper_hi = read_pci_config(num, slot, func, 0x14); | 169 | aper_hi = read_pci_config(bus, slot, func, 0x14); |
160 | aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); | 170 | aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); |
161 | 171 | ||
172 | /* | ||
173 | * On some sick chips, APSIZE is 0. It means it wants 4G | ||
174 | * so let double check that order, and lets trust AMD NB settings: | ||
175 | */ | ||
176 | printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n", | ||
177 | aper, 32 << old_order); | ||
178 | if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) { | ||
179 | printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n", | ||
180 | 32 << *order, apsizereg); | ||
181 | *order = old_order; | ||
182 | } | ||
183 | |||
162 | printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", | 184 | printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", |
163 | aper, 32 << *order, apsizereg); | 185 | aper, 32 << *order, apsizereg); |
164 | 186 | ||
165 | if (!aperture_valid(aper, (32*1024*1024) << *order)) | 187 | if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20)) |
166 | return 0; | 188 | return 0; |
167 | return (u32)aper; | 189 | return (u32)aper; |
168 | } | 190 | } |
@@ -180,17 +202,17 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) | |||
180 | * the AGP bridges should be always an own bus on the HT hierarchy, | 202 | * the AGP bridges should be always an own bus on the HT hierarchy, |
181 | * but do it here for future safety. | 203 | * but do it here for future safety. |
182 | */ | 204 | */ |
183 | static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) | 205 | static u32 __init search_agp_bridge(u32 *order, int *valid_agp) |
184 | { | 206 | { |
185 | int num, slot, func; | 207 | int bus, slot, func; |
186 | 208 | ||
187 | /* Poor man's PCI discovery */ | 209 | /* Poor man's PCI discovery */ |
188 | for (num = 0; num < 256; num++) { | 210 | for (bus = 0; bus < 256; bus++) { |
189 | for (slot = 0; slot < 32; slot++) { | 211 | for (slot = 0; slot < 32; slot++) { |
190 | for (func = 0; func < 8; func++) { | 212 | for (func = 0; func < 8; func++) { |
191 | u32 class, cap; | 213 | u32 class, cap; |
192 | u8 type; | 214 | u8 type; |
193 | class = read_pci_config(num, slot, func, | 215 | class = read_pci_config(bus, slot, func, |
194 | PCI_CLASS_REVISION); | 216 | PCI_CLASS_REVISION); |
195 | if (class == 0xffffffff) | 217 | if (class == 0xffffffff) |
196 | break; | 218 | break; |
@@ -199,17 +221,17 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) | |||
199 | case PCI_CLASS_BRIDGE_HOST: | 221 | case PCI_CLASS_BRIDGE_HOST: |
200 | case PCI_CLASS_BRIDGE_OTHER: /* needed? */ | 222 | case PCI_CLASS_BRIDGE_OTHER: /* needed? */ |
201 | /* AGP bridge? */ | 223 | /* AGP bridge? */ |
202 | cap = find_cap(num, slot, func, | 224 | cap = find_cap(bus, slot, func, |
203 | PCI_CAP_ID_AGP); | 225 | PCI_CAP_ID_AGP); |
204 | if (!cap) | 226 | if (!cap) |
205 | break; | 227 | break; |
206 | *valid_agp = 1; | 228 | *valid_agp = 1; |
207 | return read_agp(num, slot, func, cap, | 229 | return read_agp(bus, slot, func, cap, |
208 | order); | 230 | order); |
209 | } | 231 | } |
210 | 232 | ||
211 | /* No multi-function device? */ | 233 | /* No multi-function device? */ |
212 | type = read_pci_config_byte(num, slot, func, | 234 | type = read_pci_config_byte(bus, slot, func, |
213 | PCI_HEADER_TYPE); | 235 | PCI_HEADER_TYPE); |
214 | if (!(type & 0x80)) | 236 | if (!(type & 0x80)) |
215 | break; | 237 | break; |
@@ -249,36 +271,50 @@ void __init early_gart_iommu_check(void) | |||
249 | * or BIOS forget to put that in reserved. | 271 | * or BIOS forget to put that in reserved. |
250 | * try to update e820 to make that region as reserved. | 272 | * try to update e820 to make that region as reserved. |
251 | */ | 273 | */ |
252 | int fix, num; | 274 | int i, fix, slot; |
253 | u32 ctl; | 275 | u32 ctl; |
254 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; | 276 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; |
255 | u64 aper_base = 0, last_aper_base = 0; | 277 | u64 aper_base = 0, last_aper_base = 0; |
256 | int aper_enabled = 0, last_aper_enabled = 0; | 278 | int aper_enabled = 0, last_aper_enabled = 0, last_valid = 0; |
257 | 279 | ||
258 | if (!early_pci_allowed()) | 280 | if (!early_pci_allowed()) |
259 | return; | 281 | return; |
260 | 282 | ||
283 | /* This is mostly duplicate of iommu_hole_init */ | ||
261 | fix = 0; | 284 | fix = 0; |
262 | for (num = 24; num < 32; num++) { | 285 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
263 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) | 286 | int bus; |
264 | continue; | 287 | int dev_base, dev_limit; |
265 | 288 | ||
266 | ctl = read_pci_config(0, num, 3, 0x90); | 289 | bus = bus_dev_ranges[i].bus; |
267 | aper_enabled = ctl & 1; | 290 | dev_base = bus_dev_ranges[i].dev_base; |
268 | aper_order = (ctl >> 1) & 7; | 291 | dev_limit = bus_dev_ranges[i].dev_limit; |
269 | aper_size = (32 * 1024 * 1024) << aper_order; | 292 | |
270 | aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; | 293 | for (slot = dev_base; slot < dev_limit; slot++) { |
271 | aper_base <<= 25; | 294 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) |
272 | 295 | continue; | |
273 | if ((last_aper_order && aper_order != last_aper_order) || | 296 | |
274 | (last_aper_base && aper_base != last_aper_base) || | 297 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
275 | (last_aper_enabled && aper_enabled != last_aper_enabled)) { | 298 | aper_enabled = ctl & AMD64_GARTEN; |
276 | fix = 1; | 299 | aper_order = (ctl >> 1) & 7; |
277 | break; | 300 | aper_size = (32 * 1024 * 1024) << aper_order; |
301 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; | ||
302 | aper_base <<= 25; | ||
303 | |||
304 | if (last_valid) { | ||
305 | if ((aper_order != last_aper_order) || | ||
306 | (aper_base != last_aper_base) || | ||
307 | (aper_enabled != last_aper_enabled)) { | ||
308 | fix = 1; | ||
309 | break; | ||
310 | } | ||
311 | } | ||
312 | |||
313 | last_aper_order = aper_order; | ||
314 | last_aper_base = aper_base; | ||
315 | last_aper_enabled = aper_enabled; | ||
316 | last_valid = 1; | ||
278 | } | 317 | } |
279 | last_aper_order = aper_order; | ||
280 | last_aper_base = aper_base; | ||
281 | last_aper_enabled = aper_enabled; | ||
282 | } | 318 | } |
283 | 319 | ||
284 | if (!fix && !aper_enabled) | 320 | if (!fix && !aper_enabled) |
@@ -290,32 +326,46 @@ void __init early_gart_iommu_check(void) | |||
290 | if (gart_fix_e820 && !fix && aper_enabled) { | 326 | if (gart_fix_e820 && !fix && aper_enabled) { |
291 | if (e820_any_mapped(aper_base, aper_base + aper_size, | 327 | if (e820_any_mapped(aper_base, aper_base + aper_size, |
292 | E820_RAM)) { | 328 | E820_RAM)) { |
293 | /* reserved it, so we can resuse it in second kernel */ | 329 | /* reserve it, so we can reuse it in second kernel */ |
294 | printk(KERN_INFO "update e820 for GART\n"); | 330 | printk(KERN_INFO "update e820 for GART\n"); |
295 | add_memory_region(aper_base, aper_size, E820_RESERVED); | 331 | add_memory_region(aper_base, aper_size, E820_RESERVED); |
296 | update_e820(); | 332 | update_e820(); |
297 | } | 333 | } |
298 | return; | ||
299 | } | 334 | } |
300 | 335 | ||
336 | if (!fix) | ||
337 | return; | ||
338 | |||
301 | /* different nodes have different setting, disable them all at first*/ | 339 | /* different nodes have different setting, disable them all at first*/ |
302 | for (num = 24; num < 32; num++) { | 340 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
303 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) | 341 | int bus; |
304 | continue; | 342 | int dev_base, dev_limit; |
343 | |||
344 | bus = bus_dev_ranges[i].bus; | ||
345 | dev_base = bus_dev_ranges[i].dev_base; | ||
346 | dev_limit = bus_dev_ranges[i].dev_limit; | ||
347 | |||
348 | for (slot = dev_base; slot < dev_limit; slot++) { | ||
349 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) | ||
350 | continue; | ||
305 | 351 | ||
306 | ctl = read_pci_config(0, num, 3, 0x90); | 352 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
307 | ctl &= ~1; | 353 | ctl &= ~AMD64_GARTEN; |
308 | write_pci_config(0, num, 3, 0x90, ctl); | 354 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); |
355 | } | ||
309 | } | 356 | } |
310 | 357 | ||
311 | } | 358 | } |
312 | 359 | ||
360 | static int __initdata printed_gart_size_msg; | ||
361 | |||
313 | void __init gart_iommu_hole_init(void) | 362 | void __init gart_iommu_hole_init(void) |
314 | { | 363 | { |
364 | u32 agp_aper_base = 0, agp_aper_order = 0; | ||
315 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; | 365 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; |
316 | u64 aper_base, last_aper_base = 0; | 366 | u64 aper_base, last_aper_base = 0; |
317 | int fix, num, valid_agp = 0; | 367 | int fix, slot, valid_agp = 0; |
318 | int node; | 368 | int i, node; |
319 | 369 | ||
320 | if (gart_iommu_aperture_disabled || !fix_aperture || | 370 | if (gart_iommu_aperture_disabled || !fix_aperture || |
321 | !early_pci_allowed()) | 371 | !early_pci_allowed()) |
@@ -323,38 +373,63 @@ void __init gart_iommu_hole_init(void) | |||
323 | 373 | ||
324 | printk(KERN_INFO "Checking aperture...\n"); | 374 | printk(KERN_INFO "Checking aperture...\n"); |
325 | 375 | ||
376 | if (!fallback_aper_force) | ||
377 | agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); | ||
378 | |||
326 | fix = 0; | 379 | fix = 0; |
327 | node = 0; | 380 | node = 0; |
328 | for (num = 24; num < 32; num++) { | 381 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
329 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) | 382 | int bus; |
330 | continue; | 383 | int dev_base, dev_limit; |
331 | 384 | ||
332 | iommu_detected = 1; | 385 | bus = bus_dev_ranges[i].bus; |
333 | gart_iommu_aperture = 1; | 386 | dev_base = bus_dev_ranges[i].dev_base; |
334 | 387 | dev_limit = bus_dev_ranges[i].dev_limit; | |
335 | aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; | 388 | |
336 | aper_size = (32 * 1024 * 1024) << aper_order; | 389 | for (slot = dev_base; slot < dev_limit; slot++) { |
337 | aper_base = read_pci_config(0, num, 3, 0x94) & 0x7fff; | 390 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) |
338 | aper_base <<= 25; | 391 | continue; |
339 | 392 | ||
340 | printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", | 393 | iommu_detected = 1; |
341 | node, aper_base, aper_size >> 20); | 394 | gart_iommu_aperture = 1; |
342 | node++; | 395 | |
343 | 396 | aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; | |
344 | if (!aperture_valid(aper_base, aper_size)) { | 397 | aper_size = (32 * 1024 * 1024) << aper_order; |
345 | fix = 1; | 398 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; |
346 | break; | 399 | aper_base <<= 25; |
347 | } | 400 | |
401 | printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", | ||
402 | node, aper_base, aper_size >> 20); | ||
403 | node++; | ||
404 | |||
405 | if (!aperture_valid(aper_base, aper_size, 64<<20)) { | ||
406 | if (valid_agp && agp_aper_base && | ||
407 | agp_aper_base == aper_base && | ||
408 | agp_aper_order == aper_order) { | ||
409 | /* the same between two setting from NB and agp */ | ||
410 | if (!no_iommu && end_pfn > MAX_DMA32_PFN && !printed_gart_size_msg) { | ||
411 | printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); | ||
412 | printk(KERN_ERR "please increase GART size in your BIOS setup\n"); | ||
413 | printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); | ||
414 | printed_gart_size_msg = 1; | ||
415 | } | ||
416 | } else { | ||
417 | fix = 1; | ||
418 | goto out; | ||
419 | } | ||
420 | } | ||
348 | 421 | ||
349 | if ((last_aper_order && aper_order != last_aper_order) || | 422 | if ((last_aper_order && aper_order != last_aper_order) || |
350 | (last_aper_base && aper_base != last_aper_base)) { | 423 | (last_aper_base && aper_base != last_aper_base)) { |
351 | fix = 1; | 424 | fix = 1; |
352 | break; | 425 | goto out; |
426 | } | ||
427 | last_aper_order = aper_order; | ||
428 | last_aper_base = aper_base; | ||
353 | } | 429 | } |
354 | last_aper_order = aper_order; | ||
355 | last_aper_base = aper_base; | ||
356 | } | 430 | } |
357 | 431 | ||
432 | out: | ||
358 | if (!fix && !fallback_aper_force) { | 433 | if (!fix && !fallback_aper_force) { |
359 | if (last_aper_base) { | 434 | if (last_aper_base) { |
360 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; | 435 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; |
@@ -364,8 +439,10 @@ void __init gart_iommu_hole_init(void) | |||
364 | return; | 439 | return; |
365 | } | 440 | } |
366 | 441 | ||
367 | if (!fallback_aper_force) | 442 | if (!fallback_aper_force) { |
368 | aper_alloc = search_agp_bridge(&aper_order, &valid_agp); | 443 | aper_alloc = agp_aper_base; |
444 | aper_order = agp_aper_order; | ||
445 | } | ||
369 | 446 | ||
370 | if (aper_alloc) { | 447 | if (aper_alloc) { |
371 | /* Got the aperture from the AGP bridge */ | 448 | /* Got the aperture from the AGP bridge */ |
@@ -401,16 +478,24 @@ void __init gart_iommu_hole_init(void) | |||
401 | } | 478 | } |
402 | 479 | ||
403 | /* Fix up the north bridges */ | 480 | /* Fix up the north bridges */ |
404 | for (num = 24; num < 32; num++) { | 481 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
405 | if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) | 482 | int bus; |
406 | continue; | 483 | int dev_base, dev_limit; |
407 | 484 | ||
408 | /* | 485 | bus = bus_dev_ranges[i].bus; |
409 | * Don't enable translation yet. That is done later. | 486 | dev_base = bus_dev_ranges[i].dev_base; |
410 | * Assume this BIOS didn't initialise the GART so | 487 | dev_limit = bus_dev_ranges[i].dev_limit; |
411 | * just overwrite all previous bits | 488 | for (slot = dev_base; slot < dev_limit; slot++) { |
412 | */ | 489 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) |
413 | write_pci_config(0, num, 3, 0x90, aper_order<<1); | 490 | continue; |
414 | write_pci_config(0, num, 3, 0x94, aper_alloc>>25); | 491 | |
492 | /* Don't enable translation yet. That is done later. | ||
493 | Assume this BIOS didn't initialise the GART so | ||
494 | just overwrite all previous bits */ | ||
495 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1); | ||
496 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); | ||
497 | } | ||
415 | } | 498 | } |
499 | |||
500 | set_up_gart_resume(aper_order, aper_alloc); | ||
416 | } | 501 | } |
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4b99b1bdeb6c..45d8da405ad9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -64,9 +64,8 @@ static int enable_local_apic __initdata; | |||
64 | 64 | ||
65 | /* Local APIC timer verification ok */ | 65 | /* Local APIC timer verification ok */ |
66 | static int local_apic_timer_verify_ok; | 66 | static int local_apic_timer_verify_ok; |
67 | /* Disable local APIC timer from the kernel commandline or via dmi quirk | 67 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ |
68 | or using CPU MSR check */ | 68 | static int local_apic_timer_disabled; |
69 | int local_apic_timer_disabled; | ||
70 | /* Local APIC timer works in C2 */ | 69 | /* Local APIC timer works in C2 */ |
71 | int local_apic_timer_c2_ok; | 70 | int local_apic_timer_c2_ok; |
72 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | 71 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); |
@@ -1154,9 +1153,6 @@ static int __init detect_init_APIC(void) | |||
1154 | if (l & MSR_IA32_APICBASE_ENABLE) | 1153 | if (l & MSR_IA32_APICBASE_ENABLE) |
1155 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; | 1154 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; |
1156 | 1155 | ||
1157 | if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED) | ||
1158 | nmi_watchdog = NMI_LOCAL_APIC; | ||
1159 | |||
1160 | printk(KERN_INFO "Found and enabled local APIC!\n"); | 1156 | printk(KERN_INFO "Found and enabled local APIC!\n"); |
1161 | 1157 | ||
1162 | apic_pm_activate(); | 1158 | apic_pm_activate(); |
@@ -1269,6 +1265,10 @@ int __init APIC_init_uniprocessor(void) | |||
1269 | 1265 | ||
1270 | setup_local_APIC(); | 1266 | setup_local_APIC(); |
1271 | 1267 | ||
1268 | #ifdef CONFIG_X86_IO_APIC | ||
1269 | if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) | ||
1270 | #endif | ||
1271 | localise_nmi_watchdog(); | ||
1272 | end_local_APIC_setup(); | 1272 | end_local_APIC_setup(); |
1273 | #ifdef CONFIG_X86_IO_APIC | 1273 | #ifdef CONFIG_X86_IO_APIC |
1274 | if (smp_found_config) | 1274 | if (smp_found_config) |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5910020c3f24..3ef7752aa8e5 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
@@ -43,7 +43,7 @@ | |||
43 | #include <mach_ipi.h> | 43 | #include <mach_ipi.h> |
44 | #include <mach_apic.h> | 44 | #include <mach_apic.h> |
45 | 45 | ||
46 | int disable_apic_timer __cpuinitdata; | 46 | static int disable_apic_timer __cpuinitdata; |
47 | static int apic_calibrate_pmtmr __initdata; | 47 | static int apic_calibrate_pmtmr __initdata; |
48 | int disable_apic; | 48 | int disable_apic; |
49 | 49 | ||
@@ -422,32 +422,8 @@ void __init setup_boot_APIC_clock(void) | |||
422 | setup_APIC_timer(); | 422 | setup_APIC_timer(); |
423 | } | 423 | } |
424 | 424 | ||
425 | /* | ||
426 | * AMD C1E enabled CPUs have a real nasty problem: Some BIOSes set the | ||
427 | * C1E flag only in the secondary CPU, so when we detect the wreckage | ||
428 | * we already have enabled the boot CPU local apic timer. Check, if | ||
429 | * disable_apic_timer is set and the DUMMY flag is cleared. If yes, | ||
430 | * set the DUMMY flag again and force the broadcast mode in the | ||
431 | * clockevents layer. | ||
432 | */ | ||
433 | static void __cpuinit check_boot_apic_timer_broadcast(void) | ||
434 | { | ||
435 | if (!disable_apic_timer || | ||
436 | (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) | ||
437 | return; | ||
438 | |||
439 | printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); | ||
440 | lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; | ||
441 | |||
442 | local_irq_enable(); | ||
443 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, | ||
444 | &boot_cpu_physical_apicid); | ||
445 | local_irq_disable(); | ||
446 | } | ||
447 | |||
448 | void __cpuinit setup_secondary_APIC_clock(void) | 425 | void __cpuinit setup_secondary_APIC_clock(void) |
449 | { | 426 | { |
450 | check_boot_apic_timer_broadcast(); | ||
451 | setup_APIC_timer(); | 427 | setup_APIC_timer(); |
452 | } | 428 | } |
453 | 429 | ||
@@ -534,7 +510,7 @@ int setup_profiling_timer(unsigned int multiplier) | |||
534 | */ | 510 | */ |
535 | void clear_local_APIC(void) | 511 | void clear_local_APIC(void) |
536 | { | 512 | { |
537 | int maxlvt = lapic_get_maxlvt(); | 513 | int maxlvt; |
538 | u32 v; | 514 | u32 v; |
539 | 515 | ||
540 | /* APIC hasn't been mapped yet */ | 516 | /* APIC hasn't been mapped yet */ |
@@ -875,7 +851,7 @@ static int __init detect_init_APIC(void) | |||
875 | 851 | ||
876 | void __init early_init_lapic_mapping(void) | 852 | void __init early_init_lapic_mapping(void) |
877 | { | 853 | { |
878 | unsigned long apic_phys; | 854 | unsigned long phys_addr; |
879 | 855 | ||
880 | /* | 856 | /* |
881 | * If no local APIC can be found then go out | 857 | * If no local APIC can be found then go out |
@@ -884,11 +860,11 @@ void __init early_init_lapic_mapping(void) | |||
884 | if (!smp_found_config) | 860 | if (!smp_found_config) |
885 | return; | 861 | return; |
886 | 862 | ||
887 | apic_phys = mp_lapic_addr; | 863 | phys_addr = mp_lapic_addr; |
888 | 864 | ||
889 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); | 865 | set_fixmap_nocache(FIX_APIC_BASE, phys_addr); |
890 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", | 866 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", |
891 | APIC_BASE, apic_phys); | 867 | APIC_BASE, phys_addr); |
892 | 868 | ||
893 | /* | 869 | /* |
894 | * Fetch the APIC ID of the BSP in case we have a | 870 | * Fetch the APIC ID of the BSP in case we have a |
@@ -954,6 +930,8 @@ int __init APIC_init_uniprocessor(void) | |||
954 | if (!skip_ioapic_setup && nr_ioapics) | 930 | if (!skip_ioapic_setup && nr_ioapics) |
955 | enable_IO_APIC(); | 931 | enable_IO_APIC(); |
956 | 932 | ||
933 | if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) | ||
934 | localise_nmi_watchdog(); | ||
957 | end_local_APIC_setup(); | 935 | end_local_APIC_setup(); |
958 | 936 | ||
959 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) | 937 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index bf9290e29013..00e6d1370954 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -228,6 +228,7 @@ | |||
228 | #include <linux/suspend.h> | 228 | #include <linux/suspend.h> |
229 | #include <linux/kthread.h> | 229 | #include <linux/kthread.h> |
230 | #include <linux/jiffies.h> | 230 | #include <linux/jiffies.h> |
231 | #include <linux/smp_lock.h> | ||
231 | 232 | ||
232 | #include <asm/system.h> | 233 | #include <asm/system.h> |
233 | #include <asm/uaccess.h> | 234 | #include <asm/uaccess.h> |
@@ -1149,7 +1150,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender) | |||
1149 | as->event_tail = 0; | 1150 | as->event_tail = 0; |
1150 | } | 1151 | } |
1151 | as->events[as->event_head] = event; | 1152 | as->events[as->event_head] = event; |
1152 | if ((!as->suser) || (!as->writer)) | 1153 | if (!as->suser || !as->writer) |
1153 | continue; | 1154 | continue; |
1154 | switch (event) { | 1155 | switch (event) { |
1155 | case APM_SYS_SUSPEND: | 1156 | case APM_SYS_SUSPEND: |
@@ -1396,7 +1397,7 @@ static void apm_mainloop(void) | |||
1396 | 1397 | ||
1397 | static int check_apm_user(struct apm_user *as, const char *func) | 1398 | static int check_apm_user(struct apm_user *as, const char *func) |
1398 | { | 1399 | { |
1399 | if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) { | 1400 | if (as == NULL || as->magic != APM_BIOS_MAGIC) { |
1400 | printk(KERN_ERR "apm: %s passed bad filp\n", func); | 1401 | printk(KERN_ERR "apm: %s passed bad filp\n", func); |
1401 | return 1; | 1402 | return 1; |
1402 | } | 1403 | } |
@@ -1459,18 +1460,19 @@ static unsigned int do_poll(struct file *fp, poll_table *wait) | |||
1459 | return 0; | 1460 | return 0; |
1460 | } | 1461 | } |
1461 | 1462 | ||
1462 | static int do_ioctl(struct inode *inode, struct file *filp, | 1463 | static long do_ioctl(struct file *filp, u_int cmd, u_long arg) |
1463 | u_int cmd, u_long arg) | ||
1464 | { | 1464 | { |
1465 | struct apm_user *as; | 1465 | struct apm_user *as; |
1466 | int ret; | ||
1466 | 1467 | ||
1467 | as = filp->private_data; | 1468 | as = filp->private_data; |
1468 | if (check_apm_user(as, "ioctl")) | 1469 | if (check_apm_user(as, "ioctl")) |
1469 | return -EIO; | 1470 | return -EIO; |
1470 | if ((!as->suser) || (!as->writer)) | 1471 | if (!as->suser || !as->writer) |
1471 | return -EPERM; | 1472 | return -EPERM; |
1472 | switch (cmd) { | 1473 | switch (cmd) { |
1473 | case APM_IOC_STANDBY: | 1474 | case APM_IOC_STANDBY: |
1475 | lock_kernel(); | ||
1474 | if (as->standbys_read > 0) { | 1476 | if (as->standbys_read > 0) { |
1475 | as->standbys_read--; | 1477 | as->standbys_read--; |
1476 | as->standbys_pending--; | 1478 | as->standbys_pending--; |
@@ -1479,8 +1481,10 @@ static int do_ioctl(struct inode *inode, struct file *filp, | |||
1479 | queue_event(APM_USER_STANDBY, as); | 1481 | queue_event(APM_USER_STANDBY, as); |
1480 | if (standbys_pending <= 0) | 1482 | if (standbys_pending <= 0) |
1481 | standby(); | 1483 | standby(); |
1484 | unlock_kernel(); | ||
1482 | break; | 1485 | break; |
1483 | case APM_IOC_SUSPEND: | 1486 | case APM_IOC_SUSPEND: |
1487 | lock_kernel(); | ||
1484 | if (as->suspends_read > 0) { | 1488 | if (as->suspends_read > 0) { |
1485 | as->suspends_read--; | 1489 | as->suspends_read--; |
1486 | as->suspends_pending--; | 1490 | as->suspends_pending--; |
@@ -1488,16 +1492,17 @@ static int do_ioctl(struct inode *inode, struct file *filp, | |||
1488 | } else | 1492 | } else |
1489 | queue_event(APM_USER_SUSPEND, as); | 1493 | queue_event(APM_USER_SUSPEND, as); |
1490 | if (suspends_pending <= 0) { | 1494 | if (suspends_pending <= 0) { |
1491 | return suspend(1); | 1495 | ret = suspend(1); |
1492 | } else { | 1496 | } else { |
1493 | as->suspend_wait = 1; | 1497 | as->suspend_wait = 1; |
1494 | wait_event_interruptible(apm_suspend_waitqueue, | 1498 | wait_event_interruptible(apm_suspend_waitqueue, |
1495 | as->suspend_wait == 0); | 1499 | as->suspend_wait == 0); |
1496 | return as->suspend_result; | 1500 | ret = as->suspend_result; |
1497 | } | 1501 | } |
1498 | break; | 1502 | unlock_kernel(); |
1503 | return ret; | ||
1499 | default: | 1504 | default: |
1500 | return -EINVAL; | 1505 | return -ENOTTY; |
1501 | } | 1506 | } |
1502 | return 0; | 1507 | return 0; |
1503 | } | 1508 | } |
@@ -1860,7 +1865,7 @@ static const struct file_operations apm_bios_fops = { | |||
1860 | .owner = THIS_MODULE, | 1865 | .owner = THIS_MODULE, |
1861 | .read = do_read, | 1866 | .read = do_read, |
1862 | .poll = do_poll, | 1867 | .poll = do_poll, |
1863 | .ioctl = do_ioctl, | 1868 | .unlocked_ioctl = do_ioctl, |
1864 | .open = do_open, | 1869 | .open = do_open, |
1865 | .release = do_release, | 1870 | .release = do_release, |
1866 | }; | 1871 | }; |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a0c6f8190887..65b1be5fe9ce 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -6,11 +6,15 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o | |||
6 | obj-y += proc.o feature_names.o | 6 | obj-y += proc.o feature_names.o |
7 | 7 | ||
8 | obj-$(CONFIG_X86_32) += common.o bugs.o | 8 | obj-$(CONFIG_X86_32) += common.o bugs.o |
9 | obj-$(CONFIG_X86_64) += bugs_64.o | ||
9 | obj-$(CONFIG_X86_32) += amd.o | 10 | obj-$(CONFIG_X86_32) += amd.o |
11 | obj-$(CONFIG_X86_64) += amd_64.o | ||
10 | obj-$(CONFIG_X86_32) += cyrix.o | 12 | obj-$(CONFIG_X86_32) += cyrix.o |
11 | obj-$(CONFIG_X86_32) += centaur.o | 13 | obj-$(CONFIG_X86_32) += centaur.o |
14 | obj-$(CONFIG_X86_64) += centaur_64.o | ||
12 | obj-$(CONFIG_X86_32) += transmeta.o | 15 | obj-$(CONFIG_X86_32) += transmeta.o |
13 | obj-$(CONFIG_X86_32) += intel.o | 16 | obj-$(CONFIG_X86_32) += intel.o |
17 | obj-$(CONFIG_X86_64) += intel_64.o | ||
14 | obj-$(CONFIG_X86_32) += umc.o | 18 | obj-$(CONFIG_X86_32) += umc.o |
15 | 19 | ||
16 | obj-$(CONFIG_X86_MCE) += mcheck/ | 20 | obj-$(CONFIG_X86_MCE) += mcheck/ |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index c2e1ce33c7cb..84a8220a6072 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -1,9 +1,7 @@ | |||
1 | |||
2 | /* | 1 | /* |
3 | * Routines to indentify additional cpu features that are scattered in | 2 | * Routines to indentify additional cpu features that are scattered in |
4 | * cpuid space. | 3 | * cpuid space. |
5 | */ | 4 | */ |
6 | |||
7 | #include <linux/cpu.h> | 5 | #include <linux/cpu.h> |
8 | 6 | ||
9 | #include <asm/pat.h> | 7 | #include <asm/pat.h> |
@@ -53,19 +51,20 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
53 | #ifdef CONFIG_X86_PAT | 51 | #ifdef CONFIG_X86_PAT |
54 | void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) | 52 | void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) |
55 | { | 53 | { |
54 | if (!cpu_has_pat) | ||
55 | pat_disable("PAT not supported by CPU."); | ||
56 | |||
56 | switch (c->x86_vendor) { | 57 | switch (c->x86_vendor) { |
57 | case X86_VENDOR_AMD: | ||
58 | if (c->x86 >= 0xf && c->x86 <= 0x11) | ||
59 | return; | ||
60 | break; | ||
61 | case X86_VENDOR_INTEL: | 58 | case X86_VENDOR_INTEL: |
62 | if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) | 59 | if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) |
63 | return; | 60 | return; |
64 | break; | 61 | break; |
62 | case X86_VENDOR_AMD: | ||
63 | case X86_VENDOR_CENTAUR: | ||
64 | case X86_VENDOR_TRANSMETA: | ||
65 | return; | ||
65 | } | 66 | } |
66 | 67 | ||
67 | pat_disable(cpu_has_pat ? | 68 | pat_disable("PAT disabled. Not yet verified on this CPU type."); |
68 | "PAT disabled. Not yet verified on this CPU type." : | ||
69 | "PAT not supported by CPU."); | ||
70 | } | 69 | } |
71 | #endif | 70 | #endif |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 245866828294..81a07ca65d44 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -24,43 +24,6 @@ | |||
24 | extern void vide(void); | 24 | extern void vide(void); |
25 | __asm__(".align 4\nvide: ret"); | 25 | __asm__(".align 4\nvide: ret"); |
26 | 26 | ||
27 | #ifdef CONFIG_X86_LOCAL_APIC | ||
28 | #define ENABLE_C1E_MASK 0x18000000 | ||
29 | #define CPUID_PROCESSOR_SIGNATURE 1 | ||
30 | #define CPUID_XFAM 0x0ff00000 | ||
31 | #define CPUID_XFAM_K8 0x00000000 | ||
32 | #define CPUID_XFAM_10H 0x00100000 | ||
33 | #define CPUID_XFAM_11H 0x00200000 | ||
34 | #define CPUID_XMOD 0x000f0000 | ||
35 | #define CPUID_XMOD_REV_F 0x00040000 | ||
36 | |||
37 | /* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ | ||
38 | static __cpuinit int amd_apic_timer_broken(void) | ||
39 | { | ||
40 | u32 lo, hi; | ||
41 | u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | ||
42 | switch (eax & CPUID_XFAM) { | ||
43 | case CPUID_XFAM_K8: | ||
44 | if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) | ||
45 | break; | ||
46 | case CPUID_XFAM_10H: | ||
47 | case CPUID_XFAM_11H: | ||
48 | rdmsr(MSR_K8_ENABLE_C1E, lo, hi); | ||
49 | if (lo & ENABLE_C1E_MASK) { | ||
50 | if (smp_processor_id() != boot_cpu_physical_apicid) | ||
51 | printk(KERN_INFO "AMD C1E detected late. " | ||
52 | " Force timer broadcast.\n"); | ||
53 | return 1; | ||
54 | } | ||
55 | break; | ||
56 | default: | ||
57 | /* err on the side of caution */ | ||
58 | return 1; | ||
59 | } | ||
60 | return 0; | ||
61 | } | ||
62 | #endif | ||
63 | |||
64 | int force_mwait __cpuinitdata; | 27 | int force_mwait __cpuinitdata; |
65 | 28 | ||
66 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | 29 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) |
@@ -297,11 +260,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
297 | num_cache_leaves = 3; | 260 | num_cache_leaves = 3; |
298 | } | 261 | } |
299 | 262 | ||
300 | #ifdef CONFIG_X86_LOCAL_APIC | ||
301 | if (amd_apic_timer_broken()) | ||
302 | local_apic_timer_disabled = 1; | ||
303 | #endif | ||
304 | |||
305 | /* K6s reports MCEs but don't actually have all the MSRs */ | 263 | /* K6s reports MCEs but don't actually have all the MSRs */ |
306 | if (c->x86 < 6) | 264 | if (c->x86 < 6) |
307 | clear_cpu_cap(c, X86_FEATURE_MCE); | 265 | clear_cpu_cap(c, X86_FEATURE_MCE); |
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c new file mode 100644 index 000000000000..30b7557c9641 --- /dev/null +++ b/arch/x86/kernel/cpu/amd_64.c | |||
@@ -0,0 +1,211 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/mm.h> | ||
3 | |||
4 | #include <asm/numa_64.h> | ||
5 | #include <asm/mmconfig.h> | ||
6 | #include <asm/cacheflush.h> | ||
7 | |||
8 | #include <mach_apic.h> | ||
9 | |||
10 | #include "cpu.h" | ||
11 | |||
12 | int force_mwait __cpuinitdata; | ||
13 | |||
14 | #ifdef CONFIG_NUMA | ||
15 | static int __cpuinit nearby_node(int apicid) | ||
16 | { | ||
17 | int i, node; | ||
18 | |||
19 | for (i = apicid - 1; i >= 0; i--) { | ||
20 | node = apicid_to_node[i]; | ||
21 | if (node != NUMA_NO_NODE && node_online(node)) | ||
22 | return node; | ||
23 | } | ||
24 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | ||
25 | node = apicid_to_node[i]; | ||
26 | if (node != NUMA_NO_NODE && node_online(node)) | ||
27 | return node; | ||
28 | } | ||
29 | return first_node(node_online_map); /* Shouldn't happen */ | ||
30 | } | ||
31 | #endif | ||
32 | |||
33 | /* | ||
34 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | ||
35 | * Assumes number of cores is a power of two. | ||
36 | */ | ||
37 | static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | ||
38 | { | ||
39 | #ifdef CONFIG_SMP | ||
40 | unsigned bits; | ||
41 | #ifdef CONFIG_NUMA | ||
42 | int cpu = smp_processor_id(); | ||
43 | int node = 0; | ||
44 | unsigned apicid = hard_smp_processor_id(); | ||
45 | #endif | ||
46 | bits = c->x86_coreid_bits; | ||
47 | |||
48 | /* Low order bits define the core id (index of core in socket) */ | ||
49 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | ||
50 | /* Convert the initial APIC ID into the socket ID */ | ||
51 | c->phys_proc_id = c->initial_apicid >> bits; | ||
52 | |||
53 | #ifdef CONFIG_NUMA | ||
54 | node = c->phys_proc_id; | ||
55 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | ||
56 | node = apicid_to_node[apicid]; | ||
57 | if (!node_online(node)) { | ||
58 | /* Two possibilities here: | ||
59 | - The CPU is missing memory and no node was created. | ||
60 | In that case try picking one from a nearby CPU | ||
61 | - The APIC IDs differ from the HyperTransport node IDs | ||
62 | which the K8 northbridge parsing fills in. | ||
63 | Assume they are all increased by a constant offset, | ||
64 | but in the same order as the HT nodeids. | ||
65 | If that doesn't result in a usable node fall back to the | ||
66 | path for the previous case. */ | ||
67 | |||
68 | int ht_nodeid = c->initial_apicid; | ||
69 | |||
70 | if (ht_nodeid >= 0 && | ||
71 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | ||
72 | node = apicid_to_node[ht_nodeid]; | ||
73 | /* Pick a nearby node */ | ||
74 | if (!node_online(node)) | ||
75 | node = nearby_node(apicid); | ||
76 | } | ||
77 | numa_set_node(cpu, node); | ||
78 | |||
79 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
80 | #endif | ||
81 | #endif | ||
82 | } | ||
83 | |||
84 | static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) | ||
85 | { | ||
86 | #ifdef CONFIG_SMP | ||
87 | unsigned bits, ecx; | ||
88 | |||
89 | /* Multi core CPU? */ | ||
90 | if (c->extended_cpuid_level < 0x80000008) | ||
91 | return; | ||
92 | |||
93 | ecx = cpuid_ecx(0x80000008); | ||
94 | |||
95 | c->x86_max_cores = (ecx & 0xff) + 1; | ||
96 | |||
97 | /* CPU telling us the core id bits shift? */ | ||
98 | bits = (ecx >> 12) & 0xF; | ||
99 | |||
100 | /* Otherwise recompute */ | ||
101 | if (bits == 0) { | ||
102 | while ((1 << bits) < c->x86_max_cores) | ||
103 | bits++; | ||
104 | } | ||
105 | |||
106 | c->x86_coreid_bits = bits; | ||
107 | |||
108 | #endif | ||
109 | } | ||
110 | |||
111 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | ||
112 | { | ||
113 | early_init_amd_mc(c); | ||
114 | |||
115 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | ||
116 | if (c->x86_power & (1<<8)) | ||
117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
118 | } | ||
119 | |||
120 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | ||
121 | { | ||
122 | unsigned level; | ||
123 | |||
124 | #ifdef CONFIG_SMP | ||
125 | unsigned long value; | ||
126 | |||
127 | /* | ||
128 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 | ||
129 | * bit 6 of msr C001_0015 | ||
130 | * | ||
131 | * Errata 63 for SH-B3 steppings | ||
132 | * Errata 122 for all steppings (F+ have it disabled by default) | ||
133 | */ | ||
134 | if (c->x86 == 15) { | ||
135 | rdmsrl(MSR_K8_HWCR, value); | ||
136 | value |= 1 << 6; | ||
137 | wrmsrl(MSR_K8_HWCR, value); | ||
138 | } | ||
139 | #endif | ||
140 | |||
141 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | ||
142 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | ||
143 | clear_cpu_cap(c, 0*32+31); | ||
144 | |||
145 | /* On C+ stepping K8 rep microcode works well for copy/memset */ | ||
146 | level = cpuid_eax(1); | ||
147 | if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || | ||
148 | level >= 0x0f58)) | ||
149 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
150 | if (c->x86 == 0x10 || c->x86 == 0x11) | ||
151 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
152 | |||
153 | /* Enable workaround for FXSAVE leak */ | ||
154 | if (c->x86 >= 6) | ||
155 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); | ||
156 | |||
157 | level = get_model_name(c); | ||
158 | if (!level) { | ||
159 | switch (c->x86) { | ||
160 | case 15: | ||
161 | /* Should distinguish Models here, but this is only | ||
162 | a fallback anyways. */ | ||
163 | strcpy(c->x86_model_id, "Hammer"); | ||
164 | break; | ||
165 | } | ||
166 | } | ||
167 | display_cacheinfo(c); | ||
168 | |||
169 | /* Multi core CPU? */ | ||
170 | if (c->extended_cpuid_level >= 0x80000008) | ||
171 | amd_detect_cmp(c); | ||
172 | |||
173 | if (c->extended_cpuid_level >= 0x80000006 && | ||
174 | (cpuid_edx(0x80000006) & 0xf000)) | ||
175 | num_cache_leaves = 4; | ||
176 | else | ||
177 | num_cache_leaves = 3; | ||
178 | |||
179 | if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) | ||
180 | set_cpu_cap(c, X86_FEATURE_K8); | ||
181 | |||
182 | /* MFENCE stops RDTSC speculation */ | ||
183 | set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); | ||
184 | |||
185 | if (c->x86 == 0x10) | ||
186 | fam10h_check_enable_mmcfg(); | ||
187 | |||
188 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | ||
189 | unsigned long long tseg; | ||
190 | |||
191 | /* | ||
192 | * Split up direct mapping around the TSEG SMM area. | ||
193 | * Don't do it for gbpages because there seems very little | ||
194 | * benefit in doing so. | ||
195 | */ | ||
196 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && | ||
197 | (tseg >> PMD_SHIFT) < | ||
198 | (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) | ||
199 | set_memory_4k((unsigned long)__va(tseg), 1); | ||
200 | } | ||
201 | } | ||
202 | |||
203 | static struct cpu_dev amd_cpu_dev __cpuinitdata = { | ||
204 | .c_vendor = "AMD", | ||
205 | .c_ident = { "AuthenticAMD" }, | ||
206 | .c_early_init = early_init_amd, | ||
207 | .c_init = init_amd, | ||
208 | }; | ||
209 | |||
210 | cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); | ||
211 | |||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 170d2f5523b2..1b1c56bb338f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -59,8 +59,12 @@ static void __init check_fpu(void) | |||
59 | return; | 59 | return; |
60 | } | 60 | } |
61 | 61 | ||
62 | /* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ | 62 | /* |
63 | /* Test for the divl bug.. */ | 63 | * trap_init() enabled FXSR and company _before_ testing for FP |
64 | * problems here. | ||
65 | * | ||
66 | * Test for the divl bug.. | ||
67 | */ | ||
64 | __asm__("fninit\n\t" | 68 | __asm__("fninit\n\t" |
65 | "fldl %1\n\t" | 69 | "fldl %1\n\t" |
66 | "fdivl %2\n\t" | 70 | "fdivl %2\n\t" |
@@ -108,10 +112,15 @@ static void __init check_popad(void) | |||
108 | "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " | 112 | "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " |
109 | : "=&a" (res) | 113 | : "=&a" (res) |
110 | : "d" (inp) | 114 | : "d" (inp) |
111 | : "ecx", "edi" ); | 115 | : "ecx", "edi"); |
112 | /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ | 116 | /* |
113 | if (res != 12345678) printk( "Buggy.\n" ); | 117 | * If this fails, it means that any user program may lock the |
114 | else printk( "OK.\n" ); | 118 | * CPU hard. Too bad. |
119 | */ | ||
120 | if (res != 12345678) | ||
121 | printk("Buggy.\n"); | ||
122 | else | ||
123 | printk("OK.\n"); | ||
115 | #endif | 124 | #endif |
116 | } | 125 | } |
117 | 126 | ||
@@ -137,7 +146,8 @@ static void __init check_config(void) | |||
137 | * i486+ only features! (WP works in supervisor mode and the | 146 | * i486+ only features! (WP works in supervisor mode and the |
138 | * new "invlpg" and "bswap" instructions) | 147 | * new "invlpg" and "bswap" instructions) |
139 | */ | 148 | */ |
140 | #if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) | 149 | #if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \ |
150 | defined(CONFIG_X86_BSWAP) | ||
141 | if (boot_cpu_data.x86 == 3) | 151 | if (boot_cpu_data.x86 == 3) |
142 | panic("Kernel requires i486+ for 'invlpg' and other features"); | 152 | panic("Kernel requires i486+ for 'invlpg' and other features"); |
143 | #endif | 153 | #endif |
@@ -170,6 +180,7 @@ void __init check_bugs(void) | |||
170 | check_fpu(); | 180 | check_fpu(); |
171 | check_hlt(); | 181 | check_hlt(); |
172 | check_popad(); | 182 | check_popad(); |
173 | init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | 183 | init_utsname()->machine[1] = |
184 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | ||
174 | alternative_instructions(); | 185 | alternative_instructions(); |
175 | } | 186 | } |
diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c index 9a3ed0649d4e..9a3ed0649d4e 100644 --- a/arch/x86/kernel/bugs_64.c +++ b/arch/x86/kernel/cpu/bugs_64.c | |||
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c new file mode 100644 index 000000000000..13526fd5cce1 --- /dev/null +++ b/arch/x86/kernel/cpu/centaur_64.c | |||
@@ -0,0 +1,43 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/smp.h> | ||
3 | |||
4 | #include <asm/cpufeature.h> | ||
5 | #include <asm/processor.h> | ||
6 | |||
7 | #include "cpu.h" | ||
8 | |||
9 | static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) | ||
10 | { | ||
11 | if (c->x86 == 0x6 && c->x86_model >= 0xf) | ||
12 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
13 | } | ||
14 | |||
15 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) | ||
16 | { | ||
17 | /* Cache sizes */ | ||
18 | unsigned n; | ||
19 | |||
20 | n = c->extended_cpuid_level; | ||
21 | if (n >= 0x80000008) { | ||
22 | unsigned eax = cpuid_eax(0x80000008); | ||
23 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
24 | c->x86_phys_bits = eax & 0xff; | ||
25 | } | ||
26 | |||
27 | if (c->x86 == 0x6 && c->x86_model >= 0xf) { | ||
28 | c->x86_cache_alignment = c->x86_clflush_size * 2; | ||
29 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
30 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
31 | } | ||
32 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | ||
33 | } | ||
34 | |||
35 | static struct cpu_dev centaur_cpu_dev __cpuinitdata = { | ||
36 | .c_vendor = "Centaur", | ||
37 | .c_ident = { "CentaurHauls" }, | ||
38 | .c_early_init = early_init_centaur, | ||
39 | .c_init = init_centaur, | ||
40 | }; | ||
41 | |||
42 | cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); | ||
43 | |||
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 783691b2a738..4d894e8565fe 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -1,3 +1,6 @@ | |||
1 | #ifndef ARCH_X86_CPU_H | ||
2 | |||
3 | #define ARCH_X86_CPU_H | ||
1 | 4 | ||
2 | struct cpu_model_info { | 5 | struct cpu_model_info { |
3 | int vendor; | 6 | int vendor; |
@@ -36,3 +39,5 @@ extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; | |||
36 | 39 | ||
37 | extern int get_model_name(struct cpuinfo_x86 *c); | 40 | extern int get_model_name(struct cpuinfo_x86 *c); |
38 | extern void display_cacheinfo(struct cpuinfo_x86 *c); | 41 | extern void display_cacheinfo(struct cpuinfo_x86 *c); |
42 | |||
43 | #endif | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index f03e9153618e..965ea52767ac 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c | |||
@@ -26,9 +26,10 @@ | |||
26 | #define NFORCE2_SAFE_DISTANCE 50 | 26 | #define NFORCE2_SAFE_DISTANCE 50 |
27 | 27 | ||
28 | /* Delay in ms between FSB changes */ | 28 | /* Delay in ms between FSB changes */ |
29 | //#define NFORCE2_DELAY 10 | 29 | /* #define NFORCE2_DELAY 10 */ |
30 | 30 | ||
31 | /* nforce2_chipset: | 31 | /* |
32 | * nforce2_chipset: | ||
32 | * FSB is changed using the chipset | 33 | * FSB is changed using the chipset |
33 | */ | 34 | */ |
34 | static struct pci_dev *nforce2_chipset_dev; | 35 | static struct pci_dev *nforce2_chipset_dev; |
@@ -36,13 +37,13 @@ static struct pci_dev *nforce2_chipset_dev; | |||
36 | /* fid: | 37 | /* fid: |
37 | * multiplier * 10 | 38 | * multiplier * 10 |
38 | */ | 39 | */ |
39 | static int fid = 0; | 40 | static int fid; |
40 | 41 | ||
41 | /* min_fsb, max_fsb: | 42 | /* min_fsb, max_fsb: |
42 | * minimum and maximum FSB (= FSB at boot time) | 43 | * minimum and maximum FSB (= FSB at boot time) |
43 | */ | 44 | */ |
44 | static int min_fsb = 0; | 45 | static int min_fsb; |
45 | static int max_fsb = 0; | 46 | static int max_fsb; |
46 | 47 | ||
47 | MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); | 48 | MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); |
48 | MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); | 49 | MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); |
@@ -53,7 +54,7 @@ module_param(min_fsb, int, 0444); | |||
53 | 54 | ||
54 | MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); | 55 | MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); |
55 | MODULE_PARM_DESC(min_fsb, | 56 | MODULE_PARM_DESC(min_fsb, |
56 | "Minimum FSB to use, if not defined: current FSB - 50"); | 57 | "Minimum FSB to use, if not defined: current FSB - 50"); |
57 | 58 | ||
58 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) | 59 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) |
59 | 60 | ||
@@ -139,7 +140,7 @@ static unsigned int nforce2_fsb_read(int bootfsb) | |||
139 | 140 | ||
140 | /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ | 141 | /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ |
141 | nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, | 142 | nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, |
142 | 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); | 143 | 0x01EF, PCI_ANY_ID, PCI_ANY_ID, NULL); |
143 | if (!nforce2_sub5) | 144 | if (!nforce2_sub5) |
144 | return 0; | 145 | return 0; |
145 | 146 | ||
@@ -147,13 +148,13 @@ static unsigned int nforce2_fsb_read(int bootfsb) | |||
147 | fsb /= 1000000; | 148 | fsb /= 1000000; |
148 | 149 | ||
149 | /* Check if PLL register is already set */ | 150 | /* Check if PLL register is already set */ |
150 | pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); | 151 | pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); |
151 | 152 | ||
152 | if(bootfsb || !temp) | 153 | if (bootfsb || !temp) |
153 | return fsb; | 154 | return fsb; |
154 | 155 | ||
155 | /* Use PLL register FSB value */ | 156 | /* Use PLL register FSB value */ |
156 | pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); | 157 | pci_read_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, &temp); |
157 | fsb = nforce2_calc_fsb(temp); | 158 | fsb = nforce2_calc_fsb(temp); |
158 | 159 | ||
159 | return fsb; | 160 | return fsb; |
@@ -184,7 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb) | |||
184 | } | 185 | } |
185 | 186 | ||
186 | /* First write? Then set actual value */ | 187 | /* First write? Then set actual value */ |
187 | pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); | 188 | pci_read_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8 *)&temp); |
188 | if (!temp) { | 189 | if (!temp) { |
189 | pll = nforce2_calc_pll(tfsb); | 190 | pll = nforce2_calc_pll(tfsb); |
190 | 191 | ||
@@ -210,7 +211,8 @@ static int nforce2_set_fsb(unsigned int fsb) | |||
210 | tfsb--; | 211 | tfsb--; |
211 | 212 | ||
212 | /* Calculate the PLL reg. value */ | 213 | /* Calculate the PLL reg. value */ |
213 | if ((pll = nforce2_calc_pll(tfsb)) == -1) | 214 | pll = nforce2_calc_pll(tfsb); |
215 | if (pll == -1) | ||
214 | return -EINVAL; | 216 | return -EINVAL; |
215 | 217 | ||
216 | nforce2_write_pll(pll); | 218 | nforce2_write_pll(pll); |
@@ -249,7 +251,7 @@ static unsigned int nforce2_get(unsigned int cpu) | |||
249 | static int nforce2_target(struct cpufreq_policy *policy, | 251 | static int nforce2_target(struct cpufreq_policy *policy, |
250 | unsigned int target_freq, unsigned int relation) | 252 | unsigned int target_freq, unsigned int relation) |
251 | { | 253 | { |
252 | // unsigned long flags; | 254 | /* unsigned long flags; */ |
253 | struct cpufreq_freqs freqs; | 255 | struct cpufreq_freqs freqs; |
254 | unsigned int target_fsb; | 256 | unsigned int target_fsb; |
255 | 257 | ||
@@ -271,17 +273,17 @@ static int nforce2_target(struct cpufreq_policy *policy, | |||
271 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 273 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
272 | 274 | ||
273 | /* Disable IRQs */ | 275 | /* Disable IRQs */ |
274 | //local_irq_save(flags); | 276 | /* local_irq_save(flags); */ |
275 | 277 | ||
276 | if (nforce2_set_fsb(target_fsb) < 0) | 278 | if (nforce2_set_fsb(target_fsb) < 0) |
277 | printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", | 279 | printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", |
278 | target_fsb); | 280 | target_fsb); |
279 | else | 281 | else |
280 | dprintk("Changed FSB successfully to %d\n", | 282 | dprintk("Changed FSB successfully to %d\n", |
281 | target_fsb); | 283 | target_fsb); |
282 | 284 | ||
283 | /* Enable IRQs */ | 285 | /* Enable IRQs */ |
284 | //local_irq_restore(flags); | 286 | /* local_irq_restore(flags); */ |
285 | 287 | ||
286 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 288 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
287 | 289 | ||
@@ -302,8 +304,8 @@ static int nforce2_verify(struct cpufreq_policy *policy) | |||
302 | policy->max = (fsb_pol_max + 1) * fid * 100; | 304 | policy->max = (fsb_pol_max + 1) * fid * 100; |
303 | 305 | ||
304 | cpufreq_verify_within_limits(policy, | 306 | cpufreq_verify_within_limits(policy, |
305 | policy->cpuinfo.min_freq, | 307 | policy->cpuinfo.min_freq, |
306 | policy->cpuinfo.max_freq); | 308 | policy->cpuinfo.max_freq); |
307 | return 0; | 309 | return 0; |
308 | } | 310 | } |
309 | 311 | ||
@@ -347,7 +349,7 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) | |||
347 | /* Set maximum FSB to FSB at boot time */ | 349 | /* Set maximum FSB to FSB at boot time */ |
348 | max_fsb = nforce2_fsb_read(1); | 350 | max_fsb = nforce2_fsb_read(1); |
349 | 351 | ||
350 | if(!max_fsb) | 352 | if (!max_fsb) |
351 | return -EIO; | 353 | return -EIO; |
352 | 354 | ||
353 | if (!min_fsb) | 355 | if (!min_fsb) |
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index af4a867a097c..777a7ff075de 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c | |||
@@ -245,7 +245,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, | |||
245 | if ((ecx > 95) || (ecx == 0) || (eax < ebx)) | 245 | if ((ecx > 95) || (ecx == 0) || (eax < ebx)) |
246 | return -EIO; | 246 | return -EIO; |
247 | 247 | ||
248 | edx = (eax - ebx) / (100 - ecx); | 248 | edx = ((eax - ebx) * 100) / (100 - ecx); |
249 | *low_freq = edx * 1000; /* back to kHz */ | 249 | *low_freq = edx * 1000; /* back to kHz */ |
250 | 250 | ||
251 | dprintk("low frequency is %u kHz\n", *low_freq); | 251 | dprintk("low frequency is %u kHz\n", *low_freq); |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 46d4034d9f37..206791eb46e3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1127,12 +1127,23 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1127 | * an UP version, and is deprecated by AMD. | 1127 | * an UP version, and is deprecated by AMD. |
1128 | */ | 1128 | */ |
1129 | if (num_online_cpus() != 1) { | 1129 | if (num_online_cpus() != 1) { |
1130 | printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); | 1130 | #ifndef CONFIG_ACPI_PROCESSOR |
1131 | printk(KERN_ERR PFX "ACPI Processor support is required " | ||
1132 | "for SMP systems but is absent. Please load the " | ||
1133 | "ACPI Processor module before starting this " | ||
1134 | "driver.\n"); | ||
1135 | #else | ||
1136 | printk(KERN_ERR PFX "Your BIOS does not provide ACPI " | ||
1137 | "_PSS objects in a way that Linux understands. " | ||
1138 | "Please report this to the Linux ACPI maintainers" | ||
1139 | " and complain to your BIOS vendor.\n"); | ||
1140 | #endif | ||
1131 | kfree(data); | 1141 | kfree(data); |
1132 | return -ENODEV; | 1142 | return -ENODEV; |
1133 | } | 1143 | } |
1134 | if (pol->cpu != 0) { | 1144 | if (pol->cpu != 0) { |
1135 | printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); | 1145 | printk(KERN_ERR PFX "No ACPI _PSS objects for CPU other than " |
1146 | "CPU0. Complain to your BIOS vendor.\n"); | ||
1136 | kfree(data); | 1147 | kfree(data); |
1137 | return -ENODEV; | 1148 | return -ENODEV; |
1138 | } | 1149 | } |
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c new file mode 100644 index 000000000000..fcb1cc9d75ca --- /dev/null +++ b/arch/x86/kernel/cpu/intel_64.c | |||
@@ -0,0 +1,103 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/smp.h> | ||
3 | #include <asm/processor.h> | ||
4 | #include <asm/ptrace.h> | ||
5 | #include <asm/topology.h> | ||
6 | #include <asm/numa_64.h> | ||
7 | |||
8 | #include "cpu.h" | ||
9 | |||
10 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | ||
11 | { | ||
12 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | ||
13 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | ||
14 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
15 | } | ||
16 | |||
17 | /* | ||
18 | * find out the number of processor cores on the die | ||
19 | */ | ||
20 | static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) | ||
21 | { | ||
22 | unsigned int eax, t; | ||
23 | |||
24 | if (c->cpuid_level < 4) | ||
25 | return 1; | ||
26 | |||
27 | cpuid_count(4, 0, &eax, &t, &t, &t); | ||
28 | |||
29 | if (eax & 0x1f) | ||
30 | return ((eax >> 26) + 1); | ||
31 | else | ||
32 | return 1; | ||
33 | } | ||
34 | |||
35 | static void __cpuinit srat_detect_node(void) | ||
36 | { | ||
37 | #ifdef CONFIG_NUMA | ||
38 | unsigned node; | ||
39 | int cpu = smp_processor_id(); | ||
40 | int apicid = hard_smp_processor_id(); | ||
41 | |||
42 | /* Don't do the funky fallback heuristics the AMD version employs | ||
43 | for now. */ | ||
44 | node = apicid_to_node[apicid]; | ||
45 | if (node == NUMA_NO_NODE || !node_online(node)) | ||
46 | node = first_node(node_online_map); | ||
47 | numa_set_node(cpu, node); | ||
48 | |||
49 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
50 | #endif | ||
51 | } | ||
52 | |||
53 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) | ||
54 | { | ||
55 | /* Cache sizes */ | ||
56 | unsigned n; | ||
57 | |||
58 | init_intel_cacheinfo(c); | ||
59 | if (c->cpuid_level > 9) { | ||
60 | unsigned eax = cpuid_eax(10); | ||
61 | /* Check for version and the number of counters */ | ||
62 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) | ||
63 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | ||
64 | } | ||
65 | |||
66 | if (cpu_has_ds) { | ||
67 | unsigned int l1, l2; | ||
68 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | ||
69 | if (!(l1 & (1<<11))) | ||
70 | set_cpu_cap(c, X86_FEATURE_BTS); | ||
71 | if (!(l1 & (1<<12))) | ||
72 | set_cpu_cap(c, X86_FEATURE_PEBS); | ||
73 | } | ||
74 | |||
75 | |||
76 | if (cpu_has_bts) | ||
77 | ds_init_intel(c); | ||
78 | |||
79 | n = c->extended_cpuid_level; | ||
80 | if (n >= 0x80000008) { | ||
81 | unsigned eax = cpuid_eax(0x80000008); | ||
82 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
83 | c->x86_phys_bits = eax & 0xff; | ||
84 | } | ||
85 | |||
86 | if (c->x86 == 15) | ||
87 | c->x86_cache_alignment = c->x86_clflush_size * 2; | ||
88 | if (c->x86 == 6) | ||
89 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
90 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | ||
91 | c->x86_max_cores = intel_num_cpu_cores(c); | ||
92 | |||
93 | srat_detect_node(); | ||
94 | } | ||
95 | |||
96 | static struct cpu_dev intel_cpu_dev __cpuinitdata = { | ||
97 | .c_vendor = "Intel", | ||
98 | .c_ident = { "GenuineIntel" }, | ||
99 | .c_early_init = early_init_intel, | ||
100 | .c_init = init_intel, | ||
101 | }; | ||
102 | cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); | ||
103 | |||
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 26d615dcb149..2c8afafa18e8 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -62,6 +62,7 @@ static struct _cache_table cache_table[] __cpuinitdata = | |||
62 | { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ | 62 | { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ |
63 | { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ | 63 | { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ |
64 | { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ | 64 | { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ |
65 | { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ | ||
65 | { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 66 | { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
66 | { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 67 | { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
67 | { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 68 | { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index e633c9c2b764..f390c9f66351 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c | |||
@@ -9,23 +9,23 @@ | |||
9 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
10 | #include <linux/smp.h> | 10 | #include <linux/smp.h> |
11 | 11 | ||
12 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
13 | #include <asm/system.h> | 13 | #include <asm/system.h> |
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | 15 | ||
16 | #include "mce.h" | 16 | #include "mce.h" |
17 | 17 | ||
18 | /* Machine Check Handler For AMD Athlon/Duron */ | 18 | /* Machine Check Handler For AMD Athlon/Duron */ |
19 | static void k7_machine_check(struct pt_regs * regs, long error_code) | 19 | static void k7_machine_check(struct pt_regs *regs, long error_code) |
20 | { | 20 | { |
21 | int recover=1; | 21 | int recover = 1; |
22 | u32 alow, ahigh, high, low; | 22 | u32 alow, ahigh, high, low; |
23 | u32 mcgstl, mcgsth; | 23 | u32 mcgstl, mcgsth; |
24 | int i; | 24 | int i; |
25 | 25 | ||
26 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 26 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
27 | if (mcgstl & (1<<0)) /* Recoverable ? */ | 27 | if (mcgstl & (1<<0)) /* Recoverable ? */ |
28 | recover=0; | 28 | recover = 0; |
29 | 29 | ||
30 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | 30 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", |
31 | smp_processor_id(), mcgsth, mcgstl); | 31 | smp_processor_id(), mcgsth, mcgstl); |
@@ -60,12 +60,12 @@ static void k7_machine_check(struct pt_regs * regs, long error_code) | |||
60 | } | 60 | } |
61 | 61 | ||
62 | if (recover&2) | 62 | if (recover&2) |
63 | panic ("CPU context corrupt"); | 63 | panic("CPU context corrupt"); |
64 | if (recover&1) | 64 | if (recover&1) |
65 | panic ("Unable to continue"); | 65 | panic("Unable to continue"); |
66 | printk (KERN_EMERG "Attempting to continue.\n"); | 66 | printk(KERN_EMERG "Attempting to continue.\n"); |
67 | mcgstl &= ~(1<<2); | 67 | mcgstl &= ~(1<<2); |
68 | wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); | 68 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
69 | } | 69 | } |
70 | 70 | ||
71 | 71 | ||
@@ -81,25 +81,25 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) | |||
81 | machine_check_vector = k7_machine_check; | 81 | machine_check_vector = k7_machine_check; |
82 | wmb(); | 82 | wmb(); |
83 | 83 | ||
84 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | 84 | printk(KERN_INFO "Intel machine check architecture supported.\n"); |
85 | rdmsr (MSR_IA32_MCG_CAP, l, h); | 85 | rdmsr(MSR_IA32_MCG_CAP, l, h); |
86 | if (l & (1<<8)) /* Control register present ? */ | 86 | if (l & (1<<8)) /* Control register present ? */ |
87 | wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 87 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
88 | nr_mce_banks = l & 0xff; | 88 | nr_mce_banks = l & 0xff; |
89 | 89 | ||
90 | /* Clear status for MC index 0 separately, we don't touch CTL, | 90 | /* Clear status for MC index 0 separately, we don't touch CTL, |
91 | * as some K7 Athlons cause spurious MCEs when its enabled. */ | 91 | * as some K7 Athlons cause spurious MCEs when its enabled. */ |
92 | if (boot_cpu_data.x86 == 6) { | 92 | if (boot_cpu_data.x86 == 6) { |
93 | wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); | 93 | wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); |
94 | i = 1; | 94 | i = 1; |
95 | } else | 95 | } else |
96 | i = 0; | 96 | i = 0; |
97 | for (; i<nr_mce_banks; i++) { | 97 | for (; i < nr_mce_banks; i++) { |
98 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | 98 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); |
99 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | 99 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); |
100 | } | 100 | } |
101 | 101 | ||
102 | set_in_cr4 (X86_CR4_MCE); | 102 | set_in_cr4(X86_CR4_MCE); |
103 | printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | 103 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", |
104 | smp_processor_id()); | 104 | smp_processor_id()); |
105 | } | 105 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index e07e8c068ae0..501ca1cea27d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <asm/idle.h> | 31 | #include <asm/idle.h> |
32 | 32 | ||
33 | #define MISC_MCELOG_MINOR 227 | 33 | #define MISC_MCELOG_MINOR 227 |
34 | #define NR_BANKS 6 | 34 | #define NR_SYSFS_BANKS 6 |
35 | 35 | ||
36 | atomic_t mce_entry; | 36 | atomic_t mce_entry; |
37 | 37 | ||
@@ -46,7 +46,7 @@ static int mce_dont_init; | |||
46 | */ | 46 | */ |
47 | static int tolerant = 1; | 47 | static int tolerant = 1; |
48 | static int banks; | 48 | static int banks; |
49 | static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; | 49 | static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; |
50 | static unsigned long notify_user; | 50 | static unsigned long notify_user; |
51 | static int rip_msr; | 51 | static int rip_msr; |
52 | static int mce_bootlog = -1; | 52 | static int mce_bootlog = -1; |
@@ -209,7 +209,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
209 | barrier(); | 209 | barrier(); |
210 | 210 | ||
211 | for (i = 0; i < banks; i++) { | 211 | for (i = 0; i < banks; i++) { |
212 | if (!bank[i]) | 212 | if (i < NR_SYSFS_BANKS && !bank[i]) |
213 | continue; | 213 | continue; |
214 | 214 | ||
215 | m.misc = 0; | 215 | m.misc = 0; |
@@ -444,9 +444,10 @@ static void mce_init(void *dummy) | |||
444 | 444 | ||
445 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 445 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
446 | banks = cap & 0xff; | 446 | banks = cap & 0xff; |
447 | if (banks > NR_BANKS) { | 447 | if (banks > MCE_EXTENDED_BANK) { |
448 | printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); | 448 | banks = MCE_EXTENDED_BANK; |
449 | banks = NR_BANKS; | 449 | printk(KERN_INFO "MCE: warning: using only %d banks\n", |
450 | MCE_EXTENDED_BANK); | ||
450 | } | 451 | } |
451 | /* Use accurate RIP reporting if available. */ | 452 | /* Use accurate RIP reporting if available. */ |
452 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) | 453 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) |
@@ -462,7 +463,11 @@ static void mce_init(void *dummy) | |||
462 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 463 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
463 | 464 | ||
464 | for (i = 0; i < banks; i++) { | 465 | for (i = 0; i < banks; i++) { |
465 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | 466 | if (i < NR_SYSFS_BANKS) |
467 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | ||
468 | else | ||
469 | wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL); | ||
470 | |||
466 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 471 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); |
467 | } | 472 | } |
468 | } | 473 | } |
@@ -766,7 +771,10 @@ DEFINE_PER_CPU(struct sys_device, device_mce); | |||
766 | } \ | 771 | } \ |
767 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); | 772 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); |
768 | 773 | ||
769 | /* TBD should generate these dynamically based on number of available banks */ | 774 | /* |
775 | * TBD should generate these dynamically based on number of available banks. | ||
776 | * Have only 6 contol banks in /sysfs until then. | ||
777 | */ | ||
770 | ACCESSOR(bank0ctl,bank[0],mce_restart()) | 778 | ACCESSOR(bank0ctl,bank[0],mce_restart()) |
771 | ACCESSOR(bank1ctl,bank[1],mce_restart()) | 779 | ACCESSOR(bank1ctl,bank[1],mce_restart()) |
772 | ACCESSOR(bank2ctl,bank[2],mce_restart()) | 780 | ACCESSOR(bank2ctl,bank[2],mce_restart()) |
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index cb03345554a5..eef001ad3bde 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
@@ -8,7 +8,7 @@ | |||
8 | #include <linux/interrupt.h> | 8 | #include <linux/interrupt.h> |
9 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
@@ -32,12 +32,12 @@ struct intel_mce_extended_msrs { | |||
32 | /* u32 *reserved[]; */ | 32 | /* u32 *reserved[]; */ |
33 | }; | 33 | }; |
34 | 34 | ||
35 | static int mce_num_extended_msrs = 0; | 35 | static int mce_num_extended_msrs; |
36 | 36 | ||
37 | 37 | ||
38 | #ifdef CONFIG_X86_MCE_P4THERMAL | 38 | #ifdef CONFIG_X86_MCE_P4THERMAL |
39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) | 39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) |
40 | { | 40 | { |
41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | 41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", |
42 | smp_processor_id()); | 42 | smp_processor_id()); |
43 | add_taint(TAINT_MACHINE_CHECK); | 43 | add_taint(TAINT_MACHINE_CHECK); |
@@ -83,7 +83,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
83 | * be some SMM goo which handles it, so we can't even put a handler | 83 | * be some SMM goo which handles it, so we can't even put a handler |
84 | * since it might be delivered via SMI already -zwanem. | 84 | * since it might be delivered via SMI already -zwanem. |
85 | */ | 85 | */ |
86 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); | 86 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
87 | h = apic_read(APIC_LVTTHMR); | 87 | h = apic_read(APIC_LVTTHMR); |
88 | if ((l & (1<<3)) && (h & APIC_DM_SMI)) { | 88 | if ((l & (1<<3)) && (h & APIC_DM_SMI)) { |
89 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", | 89 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", |
@@ -91,7 +91,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
91 | return; /* -EBUSY */ | 91 | return; /* -EBUSY */ |
92 | } | 92 | } |
93 | 93 | ||
94 | /* check whether a vector already exists, temporarily masked? */ | 94 | /* check whether a vector already exists, temporarily masked? */ |
95 | if (h & APIC_VECTOR_MASK) { | 95 | if (h & APIC_VECTOR_MASK) { |
96 | printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " | 96 | printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " |
97 | "installed\n", | 97 | "installed\n", |
@@ -104,18 +104,18 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ | 104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ |
105 | apic_write_around(APIC_LVTTHMR, h); | 105 | apic_write_around(APIC_LVTTHMR, h); |
106 | 106 | ||
107 | rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); | 107 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
108 | wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); | 108 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); |
109 | 109 | ||
110 | /* ok we're good to go... */ | 110 | /* ok we're good to go... */ |
111 | vendor_thermal_interrupt = intel_thermal_interrupt; | 111 | vendor_thermal_interrupt = intel_thermal_interrupt; |
112 | |||
113 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); | ||
114 | wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); | ||
115 | 112 | ||
116 | l = apic_read (APIC_LVTTHMR); | 113 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
117 | apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 114 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); |
118 | printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | 115 | |
116 | l = apic_read(APIC_LVTTHMR); | ||
117 | apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
118 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | ||
119 | 119 | ||
120 | /* enable thermal throttle processing */ | 120 | /* enable thermal throttle processing */ |
121 | atomic_set(&therm_throt_en, 1); | 121 | atomic_set(&therm_throt_en, 1); |
@@ -129,28 +129,28 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | |||
129 | { | 129 | { |
130 | u32 h; | 130 | u32 h; |
131 | 131 | ||
132 | rdmsr (MSR_IA32_MCG_EAX, r->eax, h); | 132 | rdmsr(MSR_IA32_MCG_EAX, r->eax, h); |
133 | rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); | 133 | rdmsr(MSR_IA32_MCG_EBX, r->ebx, h); |
134 | rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); | 134 | rdmsr(MSR_IA32_MCG_ECX, r->ecx, h); |
135 | rdmsr (MSR_IA32_MCG_EDX, r->edx, h); | 135 | rdmsr(MSR_IA32_MCG_EDX, r->edx, h); |
136 | rdmsr (MSR_IA32_MCG_ESI, r->esi, h); | 136 | rdmsr(MSR_IA32_MCG_ESI, r->esi, h); |
137 | rdmsr (MSR_IA32_MCG_EDI, r->edi, h); | 137 | rdmsr(MSR_IA32_MCG_EDI, r->edi, h); |
138 | rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); | 138 | rdmsr(MSR_IA32_MCG_EBP, r->ebp, h); |
139 | rdmsr (MSR_IA32_MCG_ESP, r->esp, h); | 139 | rdmsr(MSR_IA32_MCG_ESP, r->esp, h); |
140 | rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); | 140 | rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h); |
141 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); | 141 | rdmsr(MSR_IA32_MCG_EIP, r->eip, h); |
142 | } | 142 | } |
143 | 143 | ||
144 | static void intel_machine_check(struct pt_regs * regs, long error_code) | 144 | static void intel_machine_check(struct pt_regs *regs, long error_code) |
145 | { | 145 | { |
146 | int recover=1; | 146 | int recover = 1; |
147 | u32 alow, ahigh, high, low; | 147 | u32 alow, ahigh, high, low; |
148 | u32 mcgstl, mcgsth; | 148 | u32 mcgstl, mcgsth; |
149 | int i; | 149 | int i; |
150 | 150 | ||
151 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 151 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
152 | if (mcgstl & (1<<0)) /* Recoverable ? */ | 152 | if (mcgstl & (1<<0)) /* Recoverable ? */ |
153 | recover=0; | 153 | recover = 0; |
154 | 154 | ||
155 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | 155 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", |
156 | smp_processor_id(), mcgsth, mcgstl); | 156 | smp_processor_id(), mcgsth, mcgstl); |
@@ -191,20 +191,20 @@ static void intel_machine_check(struct pt_regs * regs, long error_code) | |||
191 | } | 191 | } |
192 | 192 | ||
193 | if (recover & 2) | 193 | if (recover & 2) |
194 | panic ("CPU context corrupt"); | 194 | panic("CPU context corrupt"); |
195 | if (recover & 1) | 195 | if (recover & 1) |
196 | panic ("Unable to continue"); | 196 | panic("Unable to continue"); |
197 | 197 | ||
198 | printk(KERN_EMERG "Attempting to continue.\n"); | 198 | printk(KERN_EMERG "Attempting to continue.\n"); |
199 | /* | 199 | /* |
200 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | 200 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not |
201 | * recoverable/continuable.This will allow BIOS to look at the MSRs | 201 | * recoverable/continuable.This will allow BIOS to look at the MSRs |
202 | * for errors if the OS could not log the error. | 202 | * for errors if the OS could not log the error. |
203 | */ | 203 | */ |
204 | for (i=0; i<nr_mce_banks; i++) { | 204 | for (i = 0; i < nr_mce_banks; i++) { |
205 | u32 msr; | 205 | u32 msr; |
206 | msr = MSR_IA32_MC0_STATUS+i*4; | 206 | msr = MSR_IA32_MC0_STATUS+i*4; |
207 | rdmsr (msr, low, high); | 207 | rdmsr(msr, low, high); |
208 | if (high&(1<<31)) { | 208 | if (high&(1<<31)) { |
209 | /* Clear it */ | 209 | /* Clear it */ |
210 | wrmsr(msr, 0UL, 0UL); | 210 | wrmsr(msr, 0UL, 0UL); |
@@ -214,7 +214,7 @@ static void intel_machine_check(struct pt_regs * regs, long error_code) | |||
214 | } | 214 | } |
215 | } | 215 | } |
216 | mcgstl &= ~(1<<2); | 216 | mcgstl &= ~(1<<2); |
217 | wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); | 217 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
218 | } | 218 | } |
219 | 219 | ||
220 | 220 | ||
@@ -222,30 +222,30 @@ void intel_p4_mcheck_init(struct cpuinfo_x86 *c) | |||
222 | { | 222 | { |
223 | u32 l, h; | 223 | u32 l, h; |
224 | int i; | 224 | int i; |
225 | 225 | ||
226 | machine_check_vector = intel_machine_check; | 226 | machine_check_vector = intel_machine_check; |
227 | wmb(); | 227 | wmb(); |
228 | 228 | ||
229 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | 229 | printk(KERN_INFO "Intel machine check architecture supported.\n"); |
230 | rdmsr (MSR_IA32_MCG_CAP, l, h); | 230 | rdmsr(MSR_IA32_MCG_CAP, l, h); |
231 | if (l & (1<<8)) /* Control register present ? */ | 231 | if (l & (1<<8)) /* Control register present ? */ |
232 | wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 232 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
233 | nr_mce_banks = l & 0xff; | 233 | nr_mce_banks = l & 0xff; |
234 | 234 | ||
235 | for (i=0; i<nr_mce_banks; i++) { | 235 | for (i = 0; i < nr_mce_banks; i++) { |
236 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | 236 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); |
237 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | 237 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); |
238 | } | 238 | } |
239 | 239 | ||
240 | set_in_cr4 (X86_CR4_MCE); | 240 | set_in_cr4(X86_CR4_MCE); |
241 | printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | 241 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", |
242 | smp_processor_id()); | 242 | smp_processor_id()); |
243 | 243 | ||
244 | /* Check for P4/Xeon extended MCE MSRs */ | 244 | /* Check for P4/Xeon extended MCE MSRs */ |
245 | rdmsr (MSR_IA32_MCG_CAP, l, h); | 245 | rdmsr(MSR_IA32_MCG_CAP, l, h); |
246 | if (l & (1<<9)) {/* MCG_EXT_P */ | 246 | if (l & (1<<9)) {/* MCG_EXT_P */ |
247 | mce_num_extended_msrs = (l >> 16) & 0xff; | 247 | mce_num_extended_msrs = (l >> 16) & 0xff; |
248 | printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" | 248 | printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" |
249 | " available\n", | 249 | " available\n", |
250 | smp_processor_id(), mce_num_extended_msrs); | 250 | smp_processor_id(), mce_num_extended_msrs); |
251 | 251 | ||
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 9f51e1ea9e82..84fd9f2a28ff 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -98,17 +98,6 @@ static void __init nvidia_bugs(int num, int slot, int func) | |||
98 | 98 | ||
99 | } | 99 | } |
100 | 100 | ||
101 | static void __init ati_bugs(int num, int slot, int func) | ||
102 | { | ||
103 | #ifdef CONFIG_X86_IO_APIC | ||
104 | if (timer_over_8254 == 1) { | ||
105 | timer_over_8254 = 0; | ||
106 | printk(KERN_INFO | ||
107 | "ATI board detected. Disabling timer routing over 8254.\n"); | ||
108 | } | ||
109 | #endif | ||
110 | } | ||
111 | |||
112 | #define QFLAG_APPLY_ONCE 0x1 | 101 | #define QFLAG_APPLY_ONCE 0x1 |
113 | #define QFLAG_APPLIED 0x2 | 102 | #define QFLAG_APPLIED 0x2 |
114 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) | 103 | #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) |
@@ -126,8 +115,6 @@ static struct chipset early_qrk[] __initdata = { | |||
126 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, | 115 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs }, |
127 | { PCI_VENDOR_ID_VIA, PCI_ANY_ID, | 116 | { PCI_VENDOR_ID_VIA, PCI_ANY_ID, |
128 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, | 117 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, |
129 | { PCI_VENDOR_ID_ATI, PCI_ANY_ID, | ||
130 | PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, ati_bugs }, | ||
131 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, | 118 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, |
132 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, | 119 | PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, |
133 | {} | 120 | {} |
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 5d23d85624d4..4b63c8e1f13b 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c | |||
@@ -49,13 +49,13 @@ void efi_call_phys_prelog(void) | |||
49 | local_irq_save(efi_rt_eflags); | 49 | local_irq_save(efi_rt_eflags); |
50 | 50 | ||
51 | /* | 51 | /* |
52 | * If I don't have PSE, I should just duplicate two entries in page | 52 | * If I don't have PAE, I should just duplicate two entries in page |
53 | * directory. If I have PSE, I just need to duplicate one entry in | 53 | * directory. If I have PAE, I just need to duplicate one entry in |
54 | * page directory. | 54 | * page directory. |
55 | */ | 55 | */ |
56 | cr4 = read_cr4(); | 56 | cr4 = read_cr4(); |
57 | 57 | ||
58 | if (cr4 & X86_CR4_PSE) { | 58 | if (cr4 & X86_CR4_PAE) { |
59 | efi_bak_pg_dir_pointer[0].pgd = | 59 | efi_bak_pg_dir_pointer[0].pgd = |
60 | swapper_pg_dir[pgd_index(0)].pgd; | 60 | swapper_pg_dir[pgd_index(0)].pgd; |
61 | swapper_pg_dir[0].pgd = | 61 | swapper_pg_dir[0].pgd = |
@@ -93,7 +93,7 @@ void efi_call_phys_epilog(void) | |||
93 | 93 | ||
94 | cr4 = read_cr4(); | 94 | cr4 = read_cr4(); |
95 | 95 | ||
96 | if (cr4 & X86_CR4_PSE) { | 96 | if (cr4 & X86_CR4_PAE) { |
97 | swapper_pg_dir[pgd_index(0)].pgd = | 97 | swapper_pg_dir[pgd_index(0)].pgd = |
98 | efi_bak_pg_dir_pointer[0].pgd; | 98 | efi_bak_pg_dir_pointer[0].pgd; |
99 | } else { | 99 | } else { |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2a609dc3271c..c778e4fa55a2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -248,6 +248,7 @@ ENTRY(resume_userspace) | |||
248 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | 248 | DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt |
249 | # setting need_resched or sigpending | 249 | # setting need_resched or sigpending |
250 | # between sampling and the iret | 250 | # between sampling and the iret |
251 | TRACE_IRQS_OFF | ||
251 | movl TI_flags(%ebp), %ecx | 252 | movl TI_flags(%ebp), %ecx |
252 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done on | 253 | andl $_TIF_WORK_MASK, %ecx # is there any work to be done on |
253 | # int/exception return? | 254 | # int/exception return? |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 556a8df522a7..5cf0aa993f4f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -420,7 +420,6 @@ END(\label) | |||
420 | PTREGSCALL stub_clone, sys_clone, %r8 | 420 | PTREGSCALL stub_clone, sys_clone, %r8 |
421 | PTREGSCALL stub_fork, sys_fork, %rdi | 421 | PTREGSCALL stub_fork, sys_fork, %rdi |
422 | PTREGSCALL stub_vfork, sys_vfork, %rdi | 422 | PTREGSCALL stub_vfork, sys_vfork, %rdi |
423 | PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx | ||
424 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx | 423 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx |
425 | PTREGSCALL stub_iopl, sys_iopl, %rsi | 424 | PTREGSCALL stub_iopl, sys_iopl, %rsi |
426 | 425 | ||
@@ -1120,10 +1119,6 @@ ENTRY(coprocessor_segment_overrun) | |||
1120 | zeroentry do_coprocessor_segment_overrun | 1119 | zeroentry do_coprocessor_segment_overrun |
1121 | END(coprocessor_segment_overrun) | 1120 | END(coprocessor_segment_overrun) |
1122 | 1121 | ||
1123 | ENTRY(reserved) | ||
1124 | zeroentry do_reserved | ||
1125 | END(reserved) | ||
1126 | |||
1127 | /* runs on exception stack */ | 1122 | /* runs on exception stack */ |
1128 | ENTRY(double_fault) | 1123 | ENTRY(double_fault) |
1129 | XCPT_FRAME | 1124 | XCPT_FRAME |
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c index e8edd63ab000..9b08e852fd1a 100644 --- a/arch/x86/kernel/geode_32.c +++ b/arch/x86/kernel/geode_32.c | |||
@@ -166,6 +166,8 @@ int geode_has_vsa2(void) | |||
166 | static int has_vsa2 = -1; | 166 | static int has_vsa2 = -1; |
167 | 167 | ||
168 | if (has_vsa2 == -1) { | 168 | if (has_vsa2 == -1) { |
169 | u16 val; | ||
170 | |||
169 | /* | 171 | /* |
170 | * The VSA has virtual registers that we can query for a | 172 | * The VSA has virtual registers that we can query for a |
171 | * signature. | 173 | * signature. |
@@ -173,7 +175,8 @@ int geode_has_vsa2(void) | |||
173 | outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); | 175 | outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); |
174 | outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); | 176 | outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); |
175 | 177 | ||
176 | has_vsa2 = (inw(VSA_VRC_DATA) == VSA_SIG); | 178 | val = inw(VSA_VRC_DATA); |
179 | has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG); | ||
177 | } | 180 | } |
178 | 181 | ||
179 | return has_vsa2; | 182 | return has_vsa2; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b2cc73768a9d..f7357cc0162c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -189,7 +189,7 @@ default_entry: | |||
189 | * this stage. | 189 | * this stage. |
190 | */ | 190 | */ |
191 | 191 | ||
192 | #define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */ | 192 | #define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ |
193 | 193 | ||
194 | xorl %ebx,%ebx /* %ebx is kept at zero */ | 194 | xorl %ebx,%ebx /* %ebx is kept at zero */ |
195 | 195 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 10a1955bb1d1..263b9d14753e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/page.h> | 18 | #include <asm/page.h> |
19 | #include <asm/msr.h> | 19 | #include <asm/msr.h> |
20 | #include <asm/cache.h> | 20 | #include <asm/cache.h> |
21 | #include <asm/processor-flags.h> | ||
21 | 22 | ||
22 | #ifdef CONFIG_PARAVIRT | 23 | #ifdef CONFIG_PARAVIRT |
23 | #include <asm/asm-offsets.h> | 24 | #include <asm/asm-offsets.h> |
@@ -128,7 +129,7 @@ ident_complete: | |||
128 | /* Fixup phys_base */ | 129 | /* Fixup phys_base */ |
129 | addq %rbp, phys_base(%rip) | 130 | addq %rbp, phys_base(%rip) |
130 | 131 | ||
131 | #ifdef CONFIG_SMP | 132 | #ifdef CONFIG_X86_TRAMPOLINE |
132 | addq %rbp, trampoline_level4_pgt + 0(%rip) | 133 | addq %rbp, trampoline_level4_pgt + 0(%rip) |
133 | addq %rbp, trampoline_level4_pgt + (511*8)(%rip) | 134 | addq %rbp, trampoline_level4_pgt + (511*8)(%rip) |
134 | #endif | 135 | #endif |
@@ -154,9 +155,7 @@ ENTRY(secondary_startup_64) | |||
154 | */ | 155 | */ |
155 | 156 | ||
156 | /* Enable PAE mode and PGE */ | 157 | /* Enable PAE mode and PGE */ |
157 | xorq %rax, %rax | 158 | movl $(X86_CR4_PAE | X86_CR4_PGE), %eax |
158 | btsq $5, %rax | ||
159 | btsq $7, %rax | ||
160 | movq %rax, %cr4 | 159 | movq %rax, %cr4 |
161 | 160 | ||
162 | /* Setup early boot stage 4 level pagetables. */ | 161 | /* Setup early boot stage 4 level pagetables. */ |
@@ -184,14 +183,10 @@ ENTRY(secondary_startup_64) | |||
184 | 1: wrmsr /* Make changes effective */ | 183 | 1: wrmsr /* Make changes effective */ |
185 | 184 | ||
186 | /* Setup cr0 */ | 185 | /* Setup cr0 */ |
187 | #define CR0_PM 1 /* protected mode */ | 186 | #define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ |
188 | #define CR0_MP (1<<1) | 187 | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ |
189 | #define CR0_ET (1<<4) | 188 | X86_CR0_PG) |
190 | #define CR0_NE (1<<5) | 189 | movl $CR0_STATE, %eax |
191 | #define CR0_WP (1<<16) | ||
192 | #define CR0_AM (1<<18) | ||
193 | #define CR0_PAGING (1<<31) | ||
194 | movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax | ||
195 | /* Make changes effective */ | 190 | /* Make changes effective */ |
196 | movq %rax, %cr0 | 191 | movq %rax, %cr0 |
197 | 192 | ||
@@ -327,11 +322,11 @@ early_idt_ripmsg: | |||
327 | ENTRY(name) | 322 | ENTRY(name) |
328 | 323 | ||
329 | /* Automate the creation of 1 to 1 mapping pmd entries */ | 324 | /* Automate the creation of 1 to 1 mapping pmd entries */ |
330 | #define PMDS(START, PERM, COUNT) \ | 325 | #define PMDS(START, PERM, COUNT) \ |
331 | i = 0 ; \ | 326 | i = 0 ; \ |
332 | .rept (COUNT) ; \ | 327 | .rept (COUNT) ; \ |
333 | .quad (START) + (i << 21) + (PERM) ; \ | 328 | .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ |
334 | i = i + 1 ; \ | 329 | i = i + 1 ; \ |
335 | .endr | 330 | .endr |
336 | 331 | ||
337 | /* | 332 | /* |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9b5cfcdfc426..ea230ec69057 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -17,7 +17,7 @@ | |||
17 | 17 | ||
18 | /* FSEC = 10^-15 | 18 | /* FSEC = 10^-15 |
19 | NSEC = 10^-9 */ | 19 | NSEC = 10^-9 */ |
20 | #define FSEC_PER_NSEC 1000000 | 20 | #define FSEC_PER_NSEC 1000000L |
21 | 21 | ||
22 | /* | 22 | /* |
23 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 23 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
@@ -206,20 +206,19 @@ static void hpet_enable_legacy_int(void) | |||
206 | 206 | ||
207 | static void hpet_legacy_clockevent_register(void) | 207 | static void hpet_legacy_clockevent_register(void) |
208 | { | 208 | { |
209 | uint64_t hpet_freq; | ||
210 | |||
211 | /* Start HPET legacy interrupts */ | 209 | /* Start HPET legacy interrupts */ |
212 | hpet_enable_legacy_int(); | 210 | hpet_enable_legacy_int(); |
213 | 211 | ||
214 | /* | 212 | /* |
215 | * The period is a femto seconds value. We need to calculate the | 213 | * The mult factor is defined as (include/linux/clockchips.h) |
216 | * scaled math multiplication factor for nanosecond to hpet tick | 214 | * mult/2^shift = cyc/ns (in contrast to ns/cyc in clocksource.h) |
217 | * conversion. | 215 | * hpet_period is in units of femtoseconds (per cycle), so |
216 | * mult/2^shift = cyc/ns = 10^6/hpet_period | ||
217 | * mult = (10^6 * 2^shift)/hpet_period | ||
218 | * mult = (FSEC_PER_NSEC << hpet_clockevent.shift)/hpet_period | ||
218 | */ | 219 | */ |
219 | hpet_freq = 1000000000000000ULL; | 220 | hpet_clockevent.mult = div_sc((unsigned long) FSEC_PER_NSEC, |
220 | do_div(hpet_freq, hpet_period); | 221 | hpet_period, hpet_clockevent.shift); |
221 | hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, | ||
222 | NSEC_PER_SEC, hpet_clockevent.shift); | ||
223 | /* Calculate the min / max delta */ | 222 | /* Calculate the min / max delta */ |
224 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | 223 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, |
225 | &hpet_clockevent); | 224 | &hpet_clockevent); |
@@ -324,7 +323,7 @@ static struct clocksource clocksource_hpet = { | |||
324 | 323 | ||
325 | static int hpet_clocksource_register(void) | 324 | static int hpet_clocksource_register(void) |
326 | { | 325 | { |
327 | u64 tmp, start, now; | 326 | u64 start, now; |
328 | cycle_t t1; | 327 | cycle_t t1; |
329 | 328 | ||
330 | /* Start the counter */ | 329 | /* Start the counter */ |
@@ -351,21 +350,15 @@ static int hpet_clocksource_register(void) | |||
351 | return -ENODEV; | 350 | return -ENODEV; |
352 | } | 351 | } |
353 | 352 | ||
354 | /* Initialize and register HPET clocksource | 353 | /* |
355 | * | 354 | * The definition of mult is (include/linux/clocksource.h) |
356 | * hpet period is in femto seconds per cycle | 355 | * mult/2^shift = ns/cyc and hpet_period is in units of fsec/cyc |
357 | * so we need to convert this to ns/cyc units | 356 | * so we first need to convert hpet_period to ns/cyc units: |
358 | * approximated by mult/2^shift | 357 | * mult/2^shift = ns/cyc = hpet_period/10^6 |
359 | * | 358 | * mult = (hpet_period * 2^shift)/10^6 |
360 | * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift | 359 | * mult = (hpet_period << shift)/FSEC_PER_NSEC |
361 | * fsec/cyc * 1ns/1000000fsec * 2^shift = mult | ||
362 | * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult | ||
363 | * (fsec/cyc << shift)/1000000 = mult | ||
364 | * (hpet_period << shift)/FSEC_PER_NSEC = mult | ||
365 | */ | 360 | */ |
366 | tmp = (u64)hpet_period << HPET_SHIFT; | 361 | clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT); |
367 | do_div(tmp, FSEC_PER_NSEC); | ||
368 | clocksource_hpet.mult = (u32)tmp; | ||
369 | 362 | ||
370 | clocksource_register(&clocksource_hpet); | 363 | clocksource_register(&clocksource_hpet); |
371 | 364 | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e03cc952f233..95e80e5033c3 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -56,6 +56,11 @@ void __cpuinit mxcsr_feature_mask_init(void) | |||
56 | 56 | ||
57 | void __init init_thread_xstate(void) | 57 | void __init init_thread_xstate(void) |
58 | { | 58 | { |
59 | if (!HAVE_HWFP) { | ||
60 | xstate_size = sizeof(struct i387_soft_struct); | ||
61 | return; | ||
62 | } | ||
63 | |||
59 | if (cpu_has_fxsr) | 64 | if (cpu_has_fxsr) |
60 | xstate_size = sizeof(struct i387_fxsave_struct); | 65 | xstate_size = sizeof(struct i387_fxsave_struct); |
61 | #ifdef CONFIG_X86_32 | 66 | #ifdef CONFIG_X86_32 |
@@ -94,7 +99,7 @@ void __cpuinit fpu_init(void) | |||
94 | int init_fpu(struct task_struct *tsk) | 99 | int init_fpu(struct task_struct *tsk) |
95 | { | 100 | { |
96 | if (tsk_used_math(tsk)) { | 101 | if (tsk_used_math(tsk)) { |
97 | if (tsk == current) | 102 | if (HAVE_HWFP && tsk == current) |
98 | unlazy_fpu(tsk); | 103 | unlazy_fpu(tsk); |
99 | return 0; | 104 | return 0; |
100 | } | 105 | } |
@@ -109,6 +114,15 @@ int init_fpu(struct task_struct *tsk) | |||
109 | return -ENOMEM; | 114 | return -ENOMEM; |
110 | } | 115 | } |
111 | 116 | ||
117 | #ifdef CONFIG_X86_32 | ||
118 | if (!HAVE_HWFP) { | ||
119 | memset(tsk->thread.xstate, 0, xstate_size); | ||
120 | finit(); | ||
121 | set_stopped_child_used_math(tsk); | ||
122 | return 0; | ||
123 | } | ||
124 | #endif | ||
125 | |||
112 | if (cpu_has_fxsr) { | 126 | if (cpu_has_fxsr) { |
113 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 127 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; |
114 | 128 | ||
@@ -148,7 +162,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
148 | int ret; | 162 | int ret; |
149 | 163 | ||
150 | if (!cpu_has_fxsr) | 164 | if (!cpu_has_fxsr) |
151 | return -ENODEV; | 165 | return -EIO; |
152 | 166 | ||
153 | ret = init_fpu(target); | 167 | ret = init_fpu(target); |
154 | if (ret) | 168 | if (ret) |
@@ -165,7 +179,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
165 | int ret; | 179 | int ret; |
166 | 180 | ||
167 | if (!cpu_has_fxsr) | 181 | if (!cpu_has_fxsr) |
168 | return -ENODEV; | 182 | return -EIO; |
169 | 183 | ||
170 | ret = init_fpu(target); | 184 | ret = init_fpu(target); |
171 | if (ret) | 185 | if (ret) |
@@ -330,13 +344,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
330 | struct user_i387_ia32_struct env; | 344 | struct user_i387_ia32_struct env; |
331 | int ret; | 345 | int ret; |
332 | 346 | ||
333 | if (!HAVE_HWFP) | ||
334 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | ||
335 | |||
336 | ret = init_fpu(target); | 347 | ret = init_fpu(target); |
337 | if (ret) | 348 | if (ret) |
338 | return ret; | 349 | return ret; |
339 | 350 | ||
351 | if (!HAVE_HWFP) | ||
352 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | ||
353 | |||
340 | if (!cpu_has_fxsr) { | 354 | if (!cpu_has_fxsr) { |
341 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 355 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
342 | &target->thread.xstate->fsave, 0, | 356 | &target->thread.xstate->fsave, 0, |
@@ -360,15 +374,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
360 | struct user_i387_ia32_struct env; | 374 | struct user_i387_ia32_struct env; |
361 | int ret; | 375 | int ret; |
362 | 376 | ||
363 | if (!HAVE_HWFP) | ||
364 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | ||
365 | |||
366 | ret = init_fpu(target); | 377 | ret = init_fpu(target); |
367 | if (ret) | 378 | if (ret) |
368 | return ret; | 379 | return ret; |
369 | 380 | ||
370 | set_stopped_child_used_math(target); | 381 | set_stopped_child_used_math(target); |
371 | 382 | ||
383 | if (!HAVE_HWFP) | ||
384 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | ||
385 | |||
372 | if (!cpu_has_fxsr) { | 386 | if (!cpu_has_fxsr) { |
373 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 387 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
374 | &target->thread.xstate->fsave, 0, -1); | 388 | &target->thread.xstate->fsave, 0, -1); |
@@ -474,18 +488,18 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) | |||
474 | int restore_i387_ia32(struct _fpstate_ia32 __user *buf) | 488 | int restore_i387_ia32(struct _fpstate_ia32 __user *buf) |
475 | { | 489 | { |
476 | int err; | 490 | int err; |
491 | struct task_struct *tsk = current; | ||
477 | 492 | ||
478 | if (HAVE_HWFP) { | 493 | if (HAVE_HWFP) |
479 | struct task_struct *tsk = current; | ||
480 | |||
481 | clear_fpu(tsk); | 494 | clear_fpu(tsk); |
482 | 495 | ||
483 | if (!used_math()) { | 496 | if (!used_math()) { |
484 | err = init_fpu(tsk); | 497 | err = init_fpu(tsk); |
485 | if (err) | 498 | if (err) |
486 | return err; | 499 | return err; |
487 | } | 500 | } |
488 | 501 | ||
502 | if (HAVE_HWFP) { | ||
489 | if (cpu_has_fxsr) | 503 | if (cpu_has_fxsr) |
490 | err = restore_i387_fxsave(buf); | 504 | err = restore_i387_fxsave(buf); |
491 | else | 505 | else |
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259.c index fe631967d625..7a0fda8f01b5 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -1,8 +1,10 @@ | |||
1 | #include <linux/linkage.h> | ||
1 | #include <linux/errno.h> | 2 | #include <linux/errno.h> |
2 | #include <linux/signal.h> | 3 | #include <linux/signal.h> |
3 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
4 | #include <linux/ioport.h> | 5 | #include <linux/ioport.h> |
5 | #include <linux/interrupt.h> | 6 | #include <linux/interrupt.h> |
7 | #include <linux/timex.h> | ||
6 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
7 | #include <linux/random.h> | 9 | #include <linux/random.h> |
8 | #include <linux/init.h> | 10 | #include <linux/init.h> |
@@ -10,10 +12,12 @@ | |||
10 | #include <linux/sysdev.h> | 12 | #include <linux/sysdev.h> |
11 | #include <linux/bitops.h> | 13 | #include <linux/bitops.h> |
12 | 14 | ||
15 | #include <asm/acpi.h> | ||
13 | #include <asm/atomic.h> | 16 | #include <asm/atomic.h> |
14 | #include <asm/system.h> | 17 | #include <asm/system.h> |
15 | #include <asm/io.h> | 18 | #include <asm/io.h> |
16 | #include <asm/timer.h> | 19 | #include <asm/timer.h> |
20 | #include <asm/hw_irq.h> | ||
17 | #include <asm/pgtable.h> | 21 | #include <asm/pgtable.h> |
18 | #include <asm/delay.h> | 22 | #include <asm/delay.h> |
19 | #include <asm/desc.h> | 23 | #include <asm/desc.h> |
@@ -32,7 +36,7 @@ static int i8259A_auto_eoi; | |||
32 | DEFINE_SPINLOCK(i8259A_lock); | 36 | DEFINE_SPINLOCK(i8259A_lock); |
33 | static void mask_and_ack_8259A(unsigned int); | 37 | static void mask_and_ack_8259A(unsigned int); |
34 | 38 | ||
35 | static struct irq_chip i8259A_chip = { | 39 | struct irq_chip i8259A_chip = { |
36 | .name = "XT-PIC", | 40 | .name = "XT-PIC", |
37 | .mask = disable_8259A_irq, | 41 | .mask = disable_8259A_irq, |
38 | .disable = disable_8259A_irq, | 42 | .disable = disable_8259A_irq, |
@@ -125,14 +129,14 @@ static inline int i8259A_irq_real(unsigned int irq) | |||
125 | int irqmask = 1<<irq; | 129 | int irqmask = 1<<irq; |
126 | 130 | ||
127 | if (irq < 8) { | 131 | if (irq < 8) { |
128 | outb(0x0B,PIC_MASTER_CMD); /* ISR register */ | 132 | outb(0x0B, PIC_MASTER_CMD); /* ISR register */ |
129 | value = inb(PIC_MASTER_CMD) & irqmask; | 133 | value = inb(PIC_MASTER_CMD) & irqmask; |
130 | outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */ | 134 | outb(0x0A, PIC_MASTER_CMD); /* back to the IRR register */ |
131 | return value; | 135 | return value; |
132 | } | 136 | } |
133 | outb(0x0B,PIC_SLAVE_CMD); /* ISR register */ | 137 | outb(0x0B, PIC_SLAVE_CMD); /* ISR register */ |
134 | value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); | 138 | value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); |
135 | outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ | 139 | outb(0x0A, PIC_SLAVE_CMD); /* back to the IRR register */ |
136 | return value; | 140 | return value; |
137 | } | 141 | } |
138 | 142 | ||
@@ -171,12 +175,14 @@ handle_real_irq: | |||
171 | if (irq & 8) { | 175 | if (irq & 8) { |
172 | inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ | 176 | inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ |
173 | outb(cached_slave_mask, PIC_SLAVE_IMR); | 177 | outb(cached_slave_mask, PIC_SLAVE_IMR); |
174 | outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ | 178 | /* 'Specific EOI' to slave */ |
175 | outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ | 179 | outb(0x60+(irq&7), PIC_SLAVE_CMD); |
180 | /* 'Specific EOI' to master-IRQ2 */ | ||
181 | outb(0x60+PIC_CASCADE_IR, PIC_MASTER_CMD); | ||
176 | } else { | 182 | } else { |
177 | inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ | 183 | inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ |
178 | outb(cached_master_mask, PIC_MASTER_IMR); | 184 | outb(cached_master_mask, PIC_MASTER_IMR); |
179 | outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ | 185 | outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ |
180 | } | 186 | } |
181 | spin_unlock_irqrestore(&i8259A_lock, flags); | 187 | spin_unlock_irqrestore(&i8259A_lock, flags); |
182 | return; | 188 | return; |
@@ -199,7 +205,8 @@ spurious_8259A_irq: | |||
199 | * lets ACK and report it. [once per IRQ] | 205 | * lets ACK and report it. [once per IRQ] |
200 | */ | 206 | */ |
201 | if (!(spurious_irq_mask & irqmask)) { | 207 | if (!(spurious_irq_mask & irqmask)) { |
202 | printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); | 208 | printk(KERN_DEBUG |
209 | "spurious 8259A interrupt: IRQ%d.\n", irq); | ||
203 | spurious_irq_mask |= irqmask; | 210 | spurious_irq_mask |= irqmask; |
204 | } | 211 | } |
205 | atomic_inc(&irq_err_count); | 212 | atomic_inc(&irq_err_count); |
@@ -290,17 +297,34 @@ void init_8259A(int auto_eoi) | |||
290 | * outb_pic - this has to work on a wide range of PC hardware. | 297 | * outb_pic - this has to work on a wide range of PC hardware. |
291 | */ | 298 | */ |
292 | outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ | 299 | outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ |
300 | #ifndef CONFIG_X86_64 | ||
293 | outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ | 301 | outb_pic(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ |
294 | outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ | 302 | outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ |
303 | #else /* CONFIG_X86_64 */ | ||
304 | /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ | ||
305 | outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); | ||
306 | /* 8259A-1 (the master) has a slave on IR2 */ | ||
307 | outb_pic(0x04, PIC_MASTER_IMR); | ||
308 | #endif /* CONFIG_X86_64 */ | ||
295 | if (auto_eoi) /* master does Auto EOI */ | 309 | if (auto_eoi) /* master does Auto EOI */ |
296 | outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); | 310 | outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); |
297 | else /* master expects normal EOI */ | 311 | else /* master expects normal EOI */ |
298 | outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); | 312 | outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); |
299 | 313 | ||
300 | outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ | 314 | outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ |
315 | #ifndef CONFIG_X86_64 | ||
301 | outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ | 316 | outb_pic(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ |
302 | outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ | 317 | outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ |
303 | outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ | 318 | outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ |
319 | #else /* CONFIG_X86_64 */ | ||
320 | /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ | ||
321 | outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); | ||
322 | /* 8259A-2 is a slave on master's IR2 */ | ||
323 | outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); | ||
324 | /* (slave's support for AEOI in flat mode is to be investigated) */ | ||
325 | outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); | ||
326 | |||
327 | #endif /* CONFIG_X86_64 */ | ||
304 | if (auto_eoi) | 328 | if (auto_eoi) |
305 | /* | 329 | /* |
306 | * In AEOI mode we just have to mask the interrupt | 330 | * In AEOI mode we just have to mask the interrupt |
@@ -317,93 +341,3 @@ void init_8259A(int auto_eoi) | |||
317 | 341 | ||
318 | spin_unlock_irqrestore(&i8259A_lock, flags); | 342 | spin_unlock_irqrestore(&i8259A_lock, flags); |
319 | } | 343 | } |
320 | |||
321 | /* | ||
322 | * Note that on a 486, we don't want to do a SIGFPE on an irq13 | ||
323 | * as the irq is unreliable, and exception 16 works correctly | ||
324 | * (ie as explained in the intel literature). On a 386, you | ||
325 | * can't use exception 16 due to bad IBM design, so we have to | ||
326 | * rely on the less exact irq13. | ||
327 | * | ||
328 | * Careful.. Not only is IRQ13 unreliable, but it is also | ||
329 | * leads to races. IBM designers who came up with it should | ||
330 | * be shot. | ||
331 | */ | ||
332 | |||
333 | |||
334 | static irqreturn_t math_error_irq(int cpl, void *dev_id) | ||
335 | { | ||
336 | extern void math_error(void __user *); | ||
337 | outb(0,0xF0); | ||
338 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | ||
339 | return IRQ_NONE; | ||
340 | math_error((void __user *)get_irq_regs()->ip); | ||
341 | return IRQ_HANDLED; | ||
342 | } | ||
343 | |||
344 | /* | ||
345 | * New motherboards sometimes make IRQ 13 be a PCI interrupt, | ||
346 | * so allow interrupt sharing. | ||
347 | */ | ||
348 | static struct irqaction fpu_irq = { | ||
349 | .handler = math_error_irq, | ||
350 | .mask = CPU_MASK_NONE, | ||
351 | .name = "fpu", | ||
352 | }; | ||
353 | |||
354 | void __init init_ISA_irqs (void) | ||
355 | { | ||
356 | int i; | ||
357 | |||
358 | #ifdef CONFIG_X86_LOCAL_APIC | ||
359 | init_bsp_APIC(); | ||
360 | #endif | ||
361 | init_8259A(0); | ||
362 | |||
363 | /* | ||
364 | * 16 old-style INTA-cycle interrupts: | ||
365 | */ | ||
366 | for (i = 0; i < 16; i++) { | ||
367 | set_irq_chip_and_handler_name(i, &i8259A_chip, | ||
368 | handle_level_irq, "XT"); | ||
369 | } | ||
370 | } | ||
371 | |||
372 | /* Overridden in paravirt.c */ | ||
373 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
374 | |||
375 | void __init native_init_IRQ(void) | ||
376 | { | ||
377 | int i; | ||
378 | |||
379 | /* all the set up before the call gates are initialised */ | ||
380 | pre_intr_init_hook(); | ||
381 | |||
382 | /* | ||
383 | * Cover the whole vector space, no vector can escape | ||
384 | * us. (some of these will be overridden and become | ||
385 | * 'special' SMP interrupts) | ||
386 | */ | ||
387 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
388 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
389 | if (i >= NR_IRQS) | ||
390 | break; | ||
391 | /* SYSCALL_VECTOR was reserved in trap_init. */ | ||
392 | if (!test_bit(vector, used_vectors)) | ||
393 | set_intr_gate(vector, interrupt[i]); | ||
394 | } | ||
395 | |||
396 | /* setup after call gates are initialised (usually add in | ||
397 | * the architecture specific gates) | ||
398 | */ | ||
399 | intr_init_hook(); | ||
400 | |||
401 | /* | ||
402 | * External FPU? Set up irq13 if so, for | ||
403 | * original braindamaged IBM FERR coupling. | ||
404 | */ | ||
405 | if (boot_cpu_data.hard_math && !cpu_has_fpu) | ||
406 | setup_irq(FPU_IRQ, &fpu_irq); | ||
407 | |||
408 | irq_ctx_init(smp_processor_id()); | ||
409 | } | ||
diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c deleted file mode 100644 index fa57a1568508..000000000000 --- a/arch/x86/kernel/i8259_64.c +++ /dev/null | |||
@@ -1,512 +0,0 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/signal.h> | ||
4 | #include <linux/sched.h> | ||
5 | #include <linux/ioport.h> | ||
6 | #include <linux/interrupt.h> | ||
7 | #include <linux/timex.h> | ||
8 | #include <linux/slab.h> | ||
9 | #include <linux/random.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/kernel_stat.h> | ||
12 | #include <linux/sysdev.h> | ||
13 | #include <linux/bitops.h> | ||
14 | |||
15 | #include <asm/acpi.h> | ||
16 | #include <asm/atomic.h> | ||
17 | #include <asm/system.h> | ||
18 | #include <asm/io.h> | ||
19 | #include <asm/hw_irq.h> | ||
20 | #include <asm/pgtable.h> | ||
21 | #include <asm/delay.h> | ||
22 | #include <asm/desc.h> | ||
23 | #include <asm/apic.h> | ||
24 | #include <asm/i8259.h> | ||
25 | |||
26 | /* | ||
27 | * Common place to define all x86 IRQ vectors | ||
28 | * | ||
29 | * This builds up the IRQ handler stubs using some ugly macros in irq.h | ||
30 | * | ||
31 | * These macros create the low-level assembly IRQ routines that save | ||
32 | * register context and call do_IRQ(). do_IRQ() then does all the | ||
33 | * operations that are needed to keep the AT (or SMP IOAPIC) | ||
34 | * interrupt-controller happy. | ||
35 | */ | ||
36 | |||
37 | #define BI(x,y) \ | ||
38 | BUILD_IRQ(x##y) | ||
39 | |||
40 | #define BUILD_16_IRQS(x) \ | ||
41 | BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ | ||
42 | BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ | ||
43 | BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ | ||
44 | BI(x,c) BI(x,d) BI(x,e) BI(x,f) | ||
45 | |||
46 | /* | ||
47 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: | ||
48 | * (these are usually mapped to vectors 0x30-0x3f) | ||
49 | */ | ||
50 | |||
51 | /* | ||
52 | * The IO-APIC gives us many more interrupt sources. Most of these | ||
53 | * are unused but an SMP system is supposed to have enough memory ... | ||
54 | * sometimes (mostly wrt. hw bugs) we get corrupted vectors all | ||
55 | * across the spectrum, so we really want to be prepared to get all | ||
56 | * of these. Plus, more powerful systems might have more than 64 | ||
57 | * IO-APIC registers. | ||
58 | * | ||
59 | * (these are usually mapped into the 0x30-0xff vector range) | ||
60 | */ | ||
61 | BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) | ||
62 | BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) | ||
63 | BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) | ||
64 | BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) | ||
65 | |||
66 | #undef BUILD_16_IRQS | ||
67 | #undef BI | ||
68 | |||
69 | |||
70 | #define IRQ(x,y) \ | ||
71 | IRQ##x##y##_interrupt | ||
72 | |||
73 | #define IRQLIST_16(x) \ | ||
74 | IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ | ||
75 | IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ | ||
76 | IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ | ||
77 | IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) | ||
78 | |||
79 | /* for the irq vectors */ | ||
80 | static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { | ||
81 | IRQLIST_16(0x2), IRQLIST_16(0x3), | ||
82 | IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), | ||
83 | IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), | ||
84 | IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) | ||
85 | }; | ||
86 | |||
87 | #undef IRQ | ||
88 | #undef IRQLIST_16 | ||
89 | |||
90 | /* | ||
91 | * This is the 'legacy' 8259A Programmable Interrupt Controller, | ||
92 | * present in the majority of PC/AT boxes. | ||
93 | * plus some generic x86 specific things if generic specifics makes | ||
94 | * any sense at all. | ||
95 | * this file should become arch/i386/kernel/irq.c when the old irq.c | ||
96 | * moves to arch independent land | ||
97 | */ | ||
98 | |||
99 | static int i8259A_auto_eoi; | ||
100 | DEFINE_SPINLOCK(i8259A_lock); | ||
101 | static void mask_and_ack_8259A(unsigned int); | ||
102 | |||
103 | static struct irq_chip i8259A_chip = { | ||
104 | .name = "XT-PIC", | ||
105 | .mask = disable_8259A_irq, | ||
106 | .disable = disable_8259A_irq, | ||
107 | .unmask = enable_8259A_irq, | ||
108 | .mask_ack = mask_and_ack_8259A, | ||
109 | }; | ||
110 | |||
111 | /* | ||
112 | * 8259A PIC functions to handle ISA devices: | ||
113 | */ | ||
114 | |||
115 | /* | ||
116 | * This contains the irq mask for both 8259A irq controllers, | ||
117 | */ | ||
118 | unsigned int cached_irq_mask = 0xffff; | ||
119 | |||
120 | /* | ||
121 | * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) | ||
122 | * boards the timer interrupt is not really connected to any IO-APIC pin, | ||
123 | * it's fed to the master 8259A's IR0 line only. | ||
124 | * | ||
125 | * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. | ||
126 | * this 'mixed mode' IRQ handling costs nothing because it's only used | ||
127 | * at IRQ setup time. | ||
128 | */ | ||
129 | unsigned long io_apic_irqs; | ||
130 | |||
131 | void disable_8259A_irq(unsigned int irq) | ||
132 | { | ||
133 | unsigned int mask = 1 << irq; | ||
134 | unsigned long flags; | ||
135 | |||
136 | spin_lock_irqsave(&i8259A_lock, flags); | ||
137 | cached_irq_mask |= mask; | ||
138 | if (irq & 8) | ||
139 | outb(cached_slave_mask, PIC_SLAVE_IMR); | ||
140 | else | ||
141 | outb(cached_master_mask, PIC_MASTER_IMR); | ||
142 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
143 | } | ||
144 | |||
145 | void enable_8259A_irq(unsigned int irq) | ||
146 | { | ||
147 | unsigned int mask = ~(1 << irq); | ||
148 | unsigned long flags; | ||
149 | |||
150 | spin_lock_irqsave(&i8259A_lock, flags); | ||
151 | cached_irq_mask &= mask; | ||
152 | if (irq & 8) | ||
153 | outb(cached_slave_mask, PIC_SLAVE_IMR); | ||
154 | else | ||
155 | outb(cached_master_mask, PIC_MASTER_IMR); | ||
156 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
157 | } | ||
158 | |||
159 | int i8259A_irq_pending(unsigned int irq) | ||
160 | { | ||
161 | unsigned int mask = 1<<irq; | ||
162 | unsigned long flags; | ||
163 | int ret; | ||
164 | |||
165 | spin_lock_irqsave(&i8259A_lock, flags); | ||
166 | if (irq < 8) | ||
167 | ret = inb(PIC_MASTER_CMD) & mask; | ||
168 | else | ||
169 | ret = inb(PIC_SLAVE_CMD) & (mask >> 8); | ||
170 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
171 | |||
172 | return ret; | ||
173 | } | ||
174 | |||
175 | void make_8259A_irq(unsigned int irq) | ||
176 | { | ||
177 | disable_irq_nosync(irq); | ||
178 | io_apic_irqs &= ~(1<<irq); | ||
179 | set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, | ||
180 | "XT"); | ||
181 | enable_irq(irq); | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * This function assumes to be called rarely. Switching between | ||
186 | * 8259A registers is slow. | ||
187 | * This has to be protected by the irq controller spinlock | ||
188 | * before being called. | ||
189 | */ | ||
190 | static inline int i8259A_irq_real(unsigned int irq) | ||
191 | { | ||
192 | int value; | ||
193 | int irqmask = 1<<irq; | ||
194 | |||
195 | if (irq < 8) { | ||
196 | outb(0x0B,PIC_MASTER_CMD); /* ISR register */ | ||
197 | value = inb(PIC_MASTER_CMD) & irqmask; | ||
198 | outb(0x0A,PIC_MASTER_CMD); /* back to the IRR register */ | ||
199 | return value; | ||
200 | } | ||
201 | outb(0x0B,PIC_SLAVE_CMD); /* ISR register */ | ||
202 | value = inb(PIC_SLAVE_CMD) & (irqmask >> 8); | ||
203 | outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ | ||
204 | return value; | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * Careful! The 8259A is a fragile beast, it pretty | ||
209 | * much _has_ to be done exactly like this (mask it | ||
210 | * first, _then_ send the EOI, and the order of EOI | ||
211 | * to the two 8259s is important! | ||
212 | */ | ||
213 | static void mask_and_ack_8259A(unsigned int irq) | ||
214 | { | ||
215 | unsigned int irqmask = 1 << irq; | ||
216 | unsigned long flags; | ||
217 | |||
218 | spin_lock_irqsave(&i8259A_lock, flags); | ||
219 | /* | ||
220 | * Lightweight spurious IRQ detection. We do not want | ||
221 | * to overdo spurious IRQ handling - it's usually a sign | ||
222 | * of hardware problems, so we only do the checks we can | ||
223 | * do without slowing down good hardware unnecessarily. | ||
224 | * | ||
225 | * Note that IRQ7 and IRQ15 (the two spurious IRQs | ||
226 | * usually resulting from the 8259A-1|2 PICs) occur | ||
227 | * even if the IRQ is masked in the 8259A. Thus we | ||
228 | * can check spurious 8259A IRQs without doing the | ||
229 | * quite slow i8259A_irq_real() call for every IRQ. | ||
230 | * This does not cover 100% of spurious interrupts, | ||
231 | * but should be enough to warn the user that there | ||
232 | * is something bad going on ... | ||
233 | */ | ||
234 | if (cached_irq_mask & irqmask) | ||
235 | goto spurious_8259A_irq; | ||
236 | cached_irq_mask |= irqmask; | ||
237 | |||
238 | handle_real_irq: | ||
239 | if (irq & 8) { | ||
240 | inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ | ||
241 | outb(cached_slave_mask, PIC_SLAVE_IMR); | ||
242 | /* 'Specific EOI' to slave */ | ||
243 | outb(0x60+(irq&7),PIC_SLAVE_CMD); | ||
244 | /* 'Specific EOI' to master-IRQ2 */ | ||
245 | outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); | ||
246 | } else { | ||
247 | inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ | ||
248 | outb(cached_master_mask, PIC_MASTER_IMR); | ||
249 | /* 'Specific EOI' to master */ | ||
250 | outb(0x60+irq,PIC_MASTER_CMD); | ||
251 | } | ||
252 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
253 | return; | ||
254 | |||
255 | spurious_8259A_irq: | ||
256 | /* | ||
257 | * this is the slow path - should happen rarely. | ||
258 | */ | ||
259 | if (i8259A_irq_real(irq)) | ||
260 | /* | ||
261 | * oops, the IRQ _is_ in service according to the | ||
262 | * 8259A - not spurious, go handle it. | ||
263 | */ | ||
264 | goto handle_real_irq; | ||
265 | |||
266 | { | ||
267 | static int spurious_irq_mask; | ||
268 | /* | ||
269 | * At this point we can be sure the IRQ is spurious, | ||
270 | * lets ACK and report it. [once per IRQ] | ||
271 | */ | ||
272 | if (!(spurious_irq_mask & irqmask)) { | ||
273 | printk(KERN_DEBUG | ||
274 | "spurious 8259A interrupt: IRQ%d.\n", irq); | ||
275 | spurious_irq_mask |= irqmask; | ||
276 | } | ||
277 | atomic_inc(&irq_err_count); | ||
278 | /* | ||
279 | * Theoretically we do not have to handle this IRQ, | ||
280 | * but in Linux this does not cause problems and is | ||
281 | * simpler for us. | ||
282 | */ | ||
283 | goto handle_real_irq; | ||
284 | } | ||
285 | } | ||
286 | |||
287 | static char irq_trigger[2]; | ||
288 | /** | ||
289 | * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ | ||
290 | */ | ||
291 | static void restore_ELCR(char *trigger) | ||
292 | { | ||
293 | outb(trigger[0], 0x4d0); | ||
294 | outb(trigger[1], 0x4d1); | ||
295 | } | ||
296 | |||
297 | static void save_ELCR(char *trigger) | ||
298 | { | ||
299 | /* IRQ 0,1,2,8,13 are marked as reserved */ | ||
300 | trigger[0] = inb(0x4d0) & 0xF8; | ||
301 | trigger[1] = inb(0x4d1) & 0xDE; | ||
302 | } | ||
303 | |||
304 | static int i8259A_resume(struct sys_device *dev) | ||
305 | { | ||
306 | init_8259A(i8259A_auto_eoi); | ||
307 | restore_ELCR(irq_trigger); | ||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | static int i8259A_suspend(struct sys_device *dev, pm_message_t state) | ||
312 | { | ||
313 | save_ELCR(irq_trigger); | ||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | static int i8259A_shutdown(struct sys_device *dev) | ||
318 | { | ||
319 | /* Put the i8259A into a quiescent state that | ||
320 | * the kernel initialization code can get it | ||
321 | * out of. | ||
322 | */ | ||
323 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | ||
324 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ | ||
325 | return 0; | ||
326 | } | ||
327 | |||
328 | static struct sysdev_class i8259_sysdev_class = { | ||
329 | .name = "i8259", | ||
330 | .suspend = i8259A_suspend, | ||
331 | .resume = i8259A_resume, | ||
332 | .shutdown = i8259A_shutdown, | ||
333 | }; | ||
334 | |||
335 | static struct sys_device device_i8259A = { | ||
336 | .id = 0, | ||
337 | .cls = &i8259_sysdev_class, | ||
338 | }; | ||
339 | |||
340 | static int __init i8259A_init_sysfs(void) | ||
341 | { | ||
342 | int error = sysdev_class_register(&i8259_sysdev_class); | ||
343 | if (!error) | ||
344 | error = sysdev_register(&device_i8259A); | ||
345 | return error; | ||
346 | } | ||
347 | |||
348 | device_initcall(i8259A_init_sysfs); | ||
349 | |||
350 | void init_8259A(int auto_eoi) | ||
351 | { | ||
352 | unsigned long flags; | ||
353 | |||
354 | i8259A_auto_eoi = auto_eoi; | ||
355 | |||
356 | spin_lock_irqsave(&i8259A_lock, flags); | ||
357 | |||
358 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | ||
359 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | ||
360 | |||
361 | /* | ||
362 | * outb_pic - this has to work on a wide range of PC hardware. | ||
363 | */ | ||
364 | outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ | ||
365 | /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 */ | ||
366 | outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); | ||
367 | /* 8259A-1 (the master) has a slave on IR2 */ | ||
368 | outb_pic(0x04, PIC_MASTER_IMR); | ||
369 | if (auto_eoi) /* master does Auto EOI */ | ||
370 | outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); | ||
371 | else /* master expects normal EOI */ | ||
372 | outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); | ||
373 | |||
374 | outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ | ||
375 | /* ICW2: 8259A-2 IR0-7 mapped to 0x38-0x3f */ | ||
376 | outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); | ||
377 | /* 8259A-2 is a slave on master's IR2 */ | ||
378 | outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); | ||
379 | /* (slave's support for AEOI in flat mode is to be investigated) */ | ||
380 | outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); | ||
381 | |||
382 | if (auto_eoi) | ||
383 | /* | ||
384 | * In AEOI mode we just have to mask the interrupt | ||
385 | * when acking. | ||
386 | */ | ||
387 | i8259A_chip.mask_ack = disable_8259A_irq; | ||
388 | else | ||
389 | i8259A_chip.mask_ack = mask_and_ack_8259A; | ||
390 | |||
391 | udelay(100); /* wait for 8259A to initialize */ | ||
392 | |||
393 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ | ||
394 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ | ||
395 | |||
396 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
397 | } | ||
398 | |||
399 | |||
400 | |||
401 | |||
402 | /* | ||
403 | * IRQ2 is cascade interrupt to second interrupt controller | ||
404 | */ | ||
405 | |||
406 | static struct irqaction irq2 = { | ||
407 | .handler = no_action, | ||
408 | .mask = CPU_MASK_NONE, | ||
409 | .name = "cascade", | ||
410 | }; | ||
411 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | ||
412 | [0 ... IRQ0_VECTOR - 1] = -1, | ||
413 | [IRQ0_VECTOR] = 0, | ||
414 | [IRQ1_VECTOR] = 1, | ||
415 | [IRQ2_VECTOR] = 2, | ||
416 | [IRQ3_VECTOR] = 3, | ||
417 | [IRQ4_VECTOR] = 4, | ||
418 | [IRQ5_VECTOR] = 5, | ||
419 | [IRQ6_VECTOR] = 6, | ||
420 | [IRQ7_VECTOR] = 7, | ||
421 | [IRQ8_VECTOR] = 8, | ||
422 | [IRQ9_VECTOR] = 9, | ||
423 | [IRQ10_VECTOR] = 10, | ||
424 | [IRQ11_VECTOR] = 11, | ||
425 | [IRQ12_VECTOR] = 12, | ||
426 | [IRQ13_VECTOR] = 13, | ||
427 | [IRQ14_VECTOR] = 14, | ||
428 | [IRQ15_VECTOR] = 15, | ||
429 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | ||
430 | }; | ||
431 | |||
432 | void __init init_ISA_irqs (void) | ||
433 | { | ||
434 | int i; | ||
435 | |||
436 | init_bsp_APIC(); | ||
437 | init_8259A(0); | ||
438 | |||
439 | for (i = 0; i < NR_IRQS; i++) { | ||
440 | irq_desc[i].status = IRQ_DISABLED; | ||
441 | irq_desc[i].action = NULL; | ||
442 | irq_desc[i].depth = 1; | ||
443 | |||
444 | if (i < 16) { | ||
445 | /* | ||
446 | * 16 old-style INTA-cycle interrupts: | ||
447 | */ | ||
448 | set_irq_chip_and_handler_name(i, &i8259A_chip, | ||
449 | handle_level_irq, "XT"); | ||
450 | } else { | ||
451 | /* | ||
452 | * 'high' PCI IRQs filled in on demand | ||
453 | */ | ||
454 | irq_desc[i].chip = &no_irq_chip; | ||
455 | } | ||
456 | } | ||
457 | } | ||
458 | |||
459 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
460 | |||
461 | void __init native_init_IRQ(void) | ||
462 | { | ||
463 | int i; | ||
464 | |||
465 | init_ISA_irqs(); | ||
466 | /* | ||
467 | * Cover the whole vector space, no vector can escape | ||
468 | * us. (some of these will be overridden and become | ||
469 | * 'special' SMP interrupts) | ||
470 | */ | ||
471 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
472 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
473 | if (vector != IA32_SYSCALL_VECTOR) | ||
474 | set_intr_gate(vector, interrupt[i]); | ||
475 | } | ||
476 | |||
477 | #ifdef CONFIG_SMP | ||
478 | /* | ||
479 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | ||
480 | * IPI, driven by wakeup. | ||
481 | */ | ||
482 | set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | ||
483 | |||
484 | /* IPIs for invalidation */ | ||
485 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); | ||
486 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); | ||
487 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); | ||
488 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); | ||
489 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); | ||
490 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); | ||
491 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); | ||
492 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); | ||
493 | |||
494 | /* IPI for generic function call */ | ||
495 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | ||
496 | |||
497 | /* Low priority IPI to cleanup after moving an irq */ | ||
498 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | ||
499 | #endif | ||
500 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | ||
501 | set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | ||
502 | |||
503 | /* self generated IPI for local APIC timer */ | ||
504 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | ||
505 | |||
506 | /* IPI vectors for APIC spurious and error interrupts */ | ||
507 | set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | ||
508 | set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | ||
509 | |||
510 | if (!acpi_ioapic) | ||
511 | setup_irq(2, &irq2); | ||
512 | } | ||
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 3d01e47777db..a4f93b4120c1 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <asm/desc.h> | 11 | #include <asm/desc.h> |
12 | 12 | ||
13 | static struct fs_struct init_fs = INIT_FS; | 13 | static struct fs_struct init_fs = INIT_FS; |
14 | static struct files_struct init_files = INIT_FILES; | ||
15 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | 14 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); |
16 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | 15 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); |
17 | struct mm_struct init_mm = INIT_MM(init_mm); | 16 | struct mm_struct init_mm = INIT_MM(init_mm); |
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index a40d54fc1fdd..d4f9df2b022a 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c | |||
@@ -58,7 +58,7 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | |||
58 | static DEFINE_SPINLOCK(ioapic_lock); | 58 | static DEFINE_SPINLOCK(ioapic_lock); |
59 | static DEFINE_SPINLOCK(vector_lock); | 59 | static DEFINE_SPINLOCK(vector_lock); |
60 | 60 | ||
61 | int timer_over_8254 __initdata = 1; | 61 | int timer_through_8259 __initdata; |
62 | 62 | ||
63 | /* | 63 | /* |
64 | * Is the SiS APIC rmw bug present ? | 64 | * Is the SiS APIC rmw bug present ? |
@@ -239,7 +239,7 @@ static void __init replace_pin_at_irq(unsigned int irq, | |||
239 | } | 239 | } |
240 | } | 240 | } |
241 | 241 | ||
242 | static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) | 242 | static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) |
243 | { | 243 | { |
244 | struct irq_pin_list *entry = irq_2_pin + irq; | 244 | struct irq_pin_list *entry = irq_2_pin + irq; |
245 | unsigned int pin, reg; | 245 | unsigned int pin, reg; |
@@ -259,30 +259,32 @@ static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsign | |||
259 | } | 259 | } |
260 | 260 | ||
261 | /* mask = 1 */ | 261 | /* mask = 1 */ |
262 | static void __mask_IO_APIC_irq (unsigned int irq) | 262 | static void __mask_IO_APIC_irq(unsigned int irq) |
263 | { | 263 | { |
264 | __modify_IO_APIC_irq(irq, 0x00010000, 0); | 264 | __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); |
265 | } | 265 | } |
266 | 266 | ||
267 | /* mask = 0 */ | 267 | /* mask = 0 */ |
268 | static void __unmask_IO_APIC_irq (unsigned int irq) | 268 | static void __unmask_IO_APIC_irq(unsigned int irq) |
269 | { | 269 | { |
270 | __modify_IO_APIC_irq(irq, 0, 0x00010000); | 270 | __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); |
271 | } | 271 | } |
272 | 272 | ||
273 | /* mask = 1, trigger = 0 */ | 273 | /* mask = 1, trigger = 0 */ |
274 | static void __mask_and_edge_IO_APIC_irq (unsigned int irq) | 274 | static void __mask_and_edge_IO_APIC_irq(unsigned int irq) |
275 | { | 275 | { |
276 | __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); | 276 | __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, |
277 | IO_APIC_REDIR_LEVEL_TRIGGER); | ||
277 | } | 278 | } |
278 | 279 | ||
279 | /* mask = 0, trigger = 1 */ | 280 | /* mask = 0, trigger = 1 */ |
280 | static void __unmask_and_level_IO_APIC_irq (unsigned int irq) | 281 | static void __unmask_and_level_IO_APIC_irq(unsigned int irq) |
281 | { | 282 | { |
282 | __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); | 283 | __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, |
284 | IO_APIC_REDIR_MASKED); | ||
283 | } | 285 | } |
284 | 286 | ||
285 | static void mask_IO_APIC_irq (unsigned int irq) | 287 | static void mask_IO_APIC_irq(unsigned int irq) |
286 | { | 288 | { |
287 | unsigned long flags; | 289 | unsigned long flags; |
288 | 290 | ||
@@ -291,7 +293,7 @@ static void mask_IO_APIC_irq (unsigned int irq) | |||
291 | spin_unlock_irqrestore(&ioapic_lock, flags); | 293 | spin_unlock_irqrestore(&ioapic_lock, flags); |
292 | } | 294 | } |
293 | 295 | ||
294 | static void unmask_IO_APIC_irq (unsigned int irq) | 296 | static void unmask_IO_APIC_irq(unsigned int irq) |
295 | { | 297 | { |
296 | unsigned long flags; | 298 | unsigned long flags; |
297 | 299 | ||
@@ -303,7 +305,7 @@ static void unmask_IO_APIC_irq (unsigned int irq) | |||
303 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | 305 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
304 | { | 306 | { |
305 | struct IO_APIC_route_entry entry; | 307 | struct IO_APIC_route_entry entry; |
306 | 308 | ||
307 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ | 309 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ |
308 | entry = ioapic_read_entry(apic, pin); | 310 | entry = ioapic_read_entry(apic, pin); |
309 | if (entry.delivery_mode == dest_SMI) | 311 | if (entry.delivery_mode == dest_SMI) |
@@ -315,7 +317,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | |||
315 | ioapic_mask_entry(apic, pin); | 317 | ioapic_mask_entry(apic, pin); |
316 | } | 318 | } |
317 | 319 | ||
318 | static void clear_IO_APIC (void) | 320 | static void clear_IO_APIC(void) |
319 | { | 321 | { |
320 | int apic, pin; | 322 | int apic, pin; |
321 | 323 | ||
@@ -332,7 +334,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) | |||
332 | struct irq_pin_list *entry = irq_2_pin + irq; | 334 | struct irq_pin_list *entry = irq_2_pin + irq; |
333 | unsigned int apicid_value; | 335 | unsigned int apicid_value; |
334 | cpumask_t tmp; | 336 | cpumask_t tmp; |
335 | 337 | ||
336 | cpus_and(tmp, cpumask, cpu_online_map); | 338 | cpus_and(tmp, cpumask, cpu_online_map); |
337 | if (cpus_empty(tmp)) | 339 | if (cpus_empty(tmp)) |
338 | tmp = TARGET_CPUS; | 340 | tmp = TARGET_CPUS; |
@@ -361,7 +363,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) | |||
361 | # include <linux/kernel_stat.h> /* kstat */ | 363 | # include <linux/kernel_stat.h> /* kstat */ |
362 | # include <linux/slab.h> /* kmalloc() */ | 364 | # include <linux/slab.h> /* kmalloc() */ |
363 | # include <linux/timer.h> | 365 | # include <linux/timer.h> |
364 | 366 | ||
365 | #define IRQBALANCE_CHECK_ARCH -999 | 367 | #define IRQBALANCE_CHECK_ARCH -999 |
366 | #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) | 368 | #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) |
367 | #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) | 369 | #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) |
@@ -373,14 +375,14 @@ static int physical_balance __read_mostly; | |||
373 | static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; | 375 | static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; |
374 | 376 | ||
375 | static struct irq_cpu_info { | 377 | static struct irq_cpu_info { |
376 | unsigned long * last_irq; | 378 | unsigned long *last_irq; |
377 | unsigned long * irq_delta; | 379 | unsigned long *irq_delta; |
378 | unsigned long irq; | 380 | unsigned long irq; |
379 | } irq_cpu_data[NR_CPUS]; | 381 | } irq_cpu_data[NR_CPUS]; |
380 | 382 | ||
381 | #define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) | 383 | #define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) |
382 | #define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) | 384 | #define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq]) |
383 | #define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) | 385 | #define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq]) |
384 | 386 | ||
385 | #define IDLE_ENOUGH(cpu,now) \ | 387 | #define IDLE_ENOUGH(cpu,now) \ |
386 | (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) | 388 | (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) |
@@ -419,8 +421,8 @@ inside: | |||
419 | if (cpu == -1) | 421 | if (cpu == -1) |
420 | cpu = NR_CPUS-1; | 422 | cpu = NR_CPUS-1; |
421 | } | 423 | } |
422 | } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || | 424 | } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) || |
423 | (search_idle && !IDLE_ENOUGH(cpu,now))); | 425 | (search_idle && !IDLE_ENOUGH(cpu, now))); |
424 | 426 | ||
425 | return cpu; | 427 | return cpu; |
426 | } | 428 | } |
@@ -430,15 +432,14 @@ static inline void balance_irq(int cpu, int irq) | |||
430 | unsigned long now = jiffies; | 432 | unsigned long now = jiffies; |
431 | cpumask_t allowed_mask; | 433 | cpumask_t allowed_mask; |
432 | unsigned int new_cpu; | 434 | unsigned int new_cpu; |
433 | 435 | ||
434 | if (irqbalance_disabled) | 436 | if (irqbalance_disabled) |
435 | return; | 437 | return; |
436 | 438 | ||
437 | cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); | 439 | cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); |
438 | new_cpu = move(cpu, allowed_mask, now, 1); | 440 | new_cpu = move(cpu, allowed_mask, now, 1); |
439 | if (cpu != new_cpu) { | 441 | if (cpu != new_cpu) |
440 | set_pending_irq(irq, cpumask_of_cpu(new_cpu)); | 442 | set_pending_irq(irq, cpumask_of_cpu(new_cpu)); |
441 | } | ||
442 | } | 443 | } |
443 | 444 | ||
444 | static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) | 445 | static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) |
@@ -450,14 +451,14 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) | |||
450 | if (!irq_desc[j].action) | 451 | if (!irq_desc[j].action) |
451 | continue; | 452 | continue; |
452 | /* Is it a significant load ? */ | 453 | /* Is it a significant load ? */ |
453 | if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < | 454 | if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < |
454 | useful_load_threshold) | 455 | useful_load_threshold) |
455 | continue; | 456 | continue; |
456 | balance_irq(i, j); | 457 | balance_irq(i, j); |
457 | } | 458 | } |
458 | } | 459 | } |
459 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, | 460 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, |
460 | balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | 461 | balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); |
461 | return; | 462 | return; |
462 | } | 463 | } |
463 | 464 | ||
@@ -486,22 +487,22 @@ static void do_irq_balance(void) | |||
486 | /* Is this an active IRQ or balancing disabled ? */ | 487 | /* Is this an active IRQ or balancing disabled ? */ |
487 | if (!irq_desc[j].action || irq_balancing_disabled(j)) | 488 | if (!irq_desc[j].action || irq_balancing_disabled(j)) |
488 | continue; | 489 | continue; |
489 | if ( package_index == i ) | 490 | if (package_index == i) |
490 | IRQ_DELTA(package_index,j) = 0; | 491 | IRQ_DELTA(package_index, j) = 0; |
491 | /* Determine the total count per processor per IRQ */ | 492 | /* Determine the total count per processor per IRQ */ |
492 | value_now = (unsigned long) kstat_cpu(i).irqs[j]; | 493 | value_now = (unsigned long) kstat_cpu(i).irqs[j]; |
493 | 494 | ||
494 | /* Determine the activity per processor per IRQ */ | 495 | /* Determine the activity per processor per IRQ */ |
495 | delta = value_now - LAST_CPU_IRQ(i,j); | 496 | delta = value_now - LAST_CPU_IRQ(i, j); |
496 | 497 | ||
497 | /* Update last_cpu_irq[][] for the next time */ | 498 | /* Update last_cpu_irq[][] for the next time */ |
498 | LAST_CPU_IRQ(i,j) = value_now; | 499 | LAST_CPU_IRQ(i, j) = value_now; |
499 | 500 | ||
500 | /* Ignore IRQs whose rate is less than the clock */ | 501 | /* Ignore IRQs whose rate is less than the clock */ |
501 | if (delta < useful_load_threshold) | 502 | if (delta < useful_load_threshold) |
502 | continue; | 503 | continue; |
503 | /* update the load for the processor or package total */ | 504 | /* update the load for the processor or package total */ |
504 | IRQ_DELTA(package_index,j) += delta; | 505 | IRQ_DELTA(package_index, j) += delta; |
505 | 506 | ||
506 | /* Keep track of the higher numbered sibling as well */ | 507 | /* Keep track of the higher numbered sibling as well */ |
507 | if (i != package_index) | 508 | if (i != package_index) |
@@ -527,7 +528,8 @@ static void do_irq_balance(void) | |||
527 | max_cpu_irq = ULONG_MAX; | 528 | max_cpu_irq = ULONG_MAX; |
528 | 529 | ||
529 | tryanothercpu: | 530 | tryanothercpu: |
530 | /* Look for heaviest loaded processor. | 531 | /* |
532 | * Look for heaviest loaded processor. | ||
531 | * We may come back to get the next heaviest loaded processor. | 533 | * We may come back to get the next heaviest loaded processor. |
532 | * Skip processors with trivial loads. | 534 | * Skip processors with trivial loads. |
533 | */ | 535 | */ |
@@ -536,7 +538,7 @@ tryanothercpu: | |||
536 | for_each_online_cpu(i) { | 538 | for_each_online_cpu(i) { |
537 | if (i != CPU_TO_PACKAGEINDEX(i)) | 539 | if (i != CPU_TO_PACKAGEINDEX(i)) |
538 | continue; | 540 | continue; |
539 | if (max_cpu_irq <= CPU_IRQ(i)) | 541 | if (max_cpu_irq <= CPU_IRQ(i)) |
540 | continue; | 542 | continue; |
541 | if (tmp_cpu_irq < CPU_IRQ(i)) { | 543 | if (tmp_cpu_irq < CPU_IRQ(i)) { |
542 | tmp_cpu_irq = CPU_IRQ(i); | 544 | tmp_cpu_irq = CPU_IRQ(i); |
@@ -545,8 +547,9 @@ tryanothercpu: | |||
545 | } | 547 | } |
546 | 548 | ||
547 | if (tmp_loaded == -1) { | 549 | if (tmp_loaded == -1) { |
548 | /* In the case of small number of heavy interrupt sources, | 550 | /* |
549 | * loading some of the cpus too much. We use Ingo's original | 551 | * In the case of small number of heavy interrupt sources, |
552 | * loading some of the cpus too much. We use Ingo's original | ||
550 | * approach to rotate them around. | 553 | * approach to rotate them around. |
551 | */ | 554 | */ |
552 | if (!first_attempt && imbalance >= useful_load_threshold) { | 555 | if (!first_attempt && imbalance >= useful_load_threshold) { |
@@ -555,13 +558,14 @@ tryanothercpu: | |||
555 | } | 558 | } |
556 | goto not_worth_the_effort; | 559 | goto not_worth_the_effort; |
557 | } | 560 | } |
558 | 561 | ||
559 | first_attempt = 0; /* heaviest search */ | 562 | first_attempt = 0; /* heaviest search */ |
560 | max_cpu_irq = tmp_cpu_irq; /* load */ | 563 | max_cpu_irq = tmp_cpu_irq; /* load */ |
561 | max_loaded = tmp_loaded; /* processor */ | 564 | max_loaded = tmp_loaded; /* processor */ |
562 | imbalance = (max_cpu_irq - min_cpu_irq) / 2; | 565 | imbalance = (max_cpu_irq - min_cpu_irq) / 2; |
563 | 566 | ||
564 | /* if imbalance is less than approx 10% of max load, then | 567 | /* |
568 | * if imbalance is less than approx 10% of max load, then | ||
565 | * observe diminishing returns action. - quit | 569 | * observe diminishing returns action. - quit |
566 | */ | 570 | */ |
567 | if (imbalance < (max_cpu_irq >> 3)) | 571 | if (imbalance < (max_cpu_irq >> 3)) |
@@ -577,26 +581,25 @@ tryanotherirq: | |||
577 | /* Is this an active IRQ? */ | 581 | /* Is this an active IRQ? */ |
578 | if (!irq_desc[j].action) | 582 | if (!irq_desc[j].action) |
579 | continue; | 583 | continue; |
580 | if (imbalance <= IRQ_DELTA(max_loaded,j)) | 584 | if (imbalance <= IRQ_DELTA(max_loaded, j)) |
581 | continue; | 585 | continue; |
582 | /* Try to find the IRQ that is closest to the imbalance | 586 | /* Try to find the IRQ that is closest to the imbalance |
583 | * without going over. | 587 | * without going over. |
584 | */ | 588 | */ |
585 | if (move_this_load < IRQ_DELTA(max_loaded,j)) { | 589 | if (move_this_load < IRQ_DELTA(max_loaded, j)) { |
586 | move_this_load = IRQ_DELTA(max_loaded,j); | 590 | move_this_load = IRQ_DELTA(max_loaded, j); |
587 | selected_irq = j; | 591 | selected_irq = j; |
588 | } | 592 | } |
589 | } | 593 | } |
590 | if (selected_irq == -1) { | 594 | if (selected_irq == -1) |
591 | goto tryanothercpu; | 595 | goto tryanothercpu; |
592 | } | ||
593 | 596 | ||
594 | imbalance = move_this_load; | 597 | imbalance = move_this_load; |
595 | 598 | ||
596 | /* For physical_balance case, we accumulated both load | 599 | /* For physical_balance case, we accumulated both load |
597 | * values in the one of the siblings cpu_irq[], | 600 | * values in the one of the siblings cpu_irq[], |
598 | * to use the same code for physical and logical processors | 601 | * to use the same code for physical and logical processors |
599 | * as much as possible. | 602 | * as much as possible. |
600 | * | 603 | * |
601 | * NOTE: the cpu_irq[] array holds the sum of the load for | 604 | * NOTE: the cpu_irq[] array holds the sum of the load for |
602 | * sibling A and sibling B in the slot for the lowest numbered | 605 | * sibling A and sibling B in the slot for the lowest numbered |
@@ -625,11 +628,11 @@ tryanotherirq: | |||
625 | /* mark for change destination */ | 628 | /* mark for change destination */ |
626 | set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); | 629 | set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); |
627 | 630 | ||
628 | /* Since we made a change, come back sooner to | 631 | /* Since we made a change, come back sooner to |
629 | * check for more variation. | 632 | * check for more variation. |
630 | */ | 633 | */ |
631 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, | 634 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, |
632 | balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | 635 | balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); |
633 | return; | 636 | return; |
634 | } | 637 | } |
635 | goto tryanotherirq; | 638 | goto tryanotherirq; |
@@ -640,7 +643,7 @@ not_worth_the_effort: | |||
640 | * upward | 643 | * upward |
641 | */ | 644 | */ |
642 | balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, | 645 | balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, |
643 | balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); | 646 | balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); |
644 | return; | 647 | return; |
645 | } | 648 | } |
646 | 649 | ||
@@ -679,13 +682,13 @@ static int __init balanced_irq_init(void) | |||
679 | cpumask_t tmp; | 682 | cpumask_t tmp; |
680 | 683 | ||
681 | cpus_shift_right(tmp, cpu_online_map, 2); | 684 | cpus_shift_right(tmp, cpu_online_map, 2); |
682 | c = &boot_cpu_data; | 685 | c = &boot_cpu_data; |
683 | /* When not overwritten by the command line ask subarchitecture. */ | 686 | /* When not overwritten by the command line ask subarchitecture. */ |
684 | if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) | 687 | if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) |
685 | irqbalance_disabled = NO_BALANCE_IRQ; | 688 | irqbalance_disabled = NO_BALANCE_IRQ; |
686 | if (irqbalance_disabled) | 689 | if (irqbalance_disabled) |
687 | return 0; | 690 | return 0; |
688 | 691 | ||
689 | /* disable irqbalance completely if there is only one processor online */ | 692 | /* disable irqbalance completely if there is only one processor online */ |
690 | if (num_online_cpus() < 2) { | 693 | if (num_online_cpus() < 2) { |
691 | irqbalance_disabled = 1; | 694 | irqbalance_disabled = 1; |
@@ -699,16 +702,14 @@ static int __init balanced_irq_init(void) | |||
699 | physical_balance = 1; | 702 | physical_balance = 1; |
700 | 703 | ||
701 | for_each_online_cpu(i) { | 704 | for_each_online_cpu(i) { |
702 | irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); | 705 | irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); |
703 | irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); | 706 | irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); |
704 | if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { | 707 | if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { |
705 | printk(KERN_ERR "balanced_irq_init: out of memory"); | 708 | printk(KERN_ERR "balanced_irq_init: out of memory"); |
706 | goto failed; | 709 | goto failed; |
707 | } | 710 | } |
708 | memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); | ||
709 | memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); | ||
710 | } | 711 | } |
711 | 712 | ||
712 | printk(KERN_INFO "Starting balanced_irq\n"); | 713 | printk(KERN_INFO "Starting balanced_irq\n"); |
713 | if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) | 714 | if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) |
714 | return 0; | 715 | return 0; |
@@ -843,7 +844,7 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
843 | } | 844 | } |
844 | if (i < mp_irq_entries) { | 845 | if (i < mp_irq_entries) { |
845 | int apic; | 846 | int apic; |
846 | for(apic = 0; apic < nr_ioapics; apic++) { | 847 | for (apic = 0; apic < nr_ioapics; apic++) { |
847 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) | 848 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) |
848 | return apic; | 849 | return apic; |
849 | } | 850 | } |
@@ -880,7 +881,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
880 | !mp_irqs[i].mpc_irqtype && | 881 | !mp_irqs[i].mpc_irqtype && |
881 | (bus == lbus) && | 882 | (bus == lbus) && |
882 | (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { | 883 | (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { |
883 | int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); | 884 | int irq = pin_2_irq(i, apic, mp_irqs[i].mpc_dstirq); |
884 | 885 | ||
885 | if (!(apic || IO_APIC_IRQ(irq))) | 886 | if (!(apic || IO_APIC_IRQ(irq))) |
886 | continue; | 887 | continue; |
@@ -900,7 +901,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
900 | EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); | 901 | EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); |
901 | 902 | ||
902 | /* | 903 | /* |
903 | * This function currently is only a helper for the i386 smp boot process where | 904 | * This function currently is only a helper for the i386 smp boot process where |
904 | * we need to reprogram the ioredtbls to cater for the cpus which have come online | 905 | * we need to reprogram the ioredtbls to cater for the cpus which have come online |
905 | * so mask in all cases should simply be TARGET_CPUS | 906 | * so mask in all cases should simply be TARGET_CPUS |
906 | */ | 907 | */ |
@@ -975,37 +976,36 @@ static int MPBIOS_polarity(int idx) | |||
975 | /* | 976 | /* |
976 | * Determine IRQ line polarity (high active or low active): | 977 | * Determine IRQ line polarity (high active or low active): |
977 | */ | 978 | */ |
978 | switch (mp_irqs[idx].mpc_irqflag & 3) | 979 | switch (mp_irqs[idx].mpc_irqflag & 3) { |
980 | case 0: /* conforms, ie. bus-type dependent polarity */ | ||
979 | { | 981 | { |
980 | case 0: /* conforms, ie. bus-type dependent polarity */ | 982 | polarity = test_bit(bus, mp_bus_not_pci)? |
981 | { | 983 | default_ISA_polarity(idx): |
982 | polarity = test_bit(bus, mp_bus_not_pci)? | 984 | default_PCI_polarity(idx); |
983 | default_ISA_polarity(idx): | 985 | break; |
984 | default_PCI_polarity(idx); | 986 | } |
985 | break; | 987 | case 1: /* high active */ |
986 | } | 988 | { |
987 | case 1: /* high active */ | 989 | polarity = 0; |
988 | { | 990 | break; |
989 | polarity = 0; | 991 | } |
990 | break; | 992 | case 2: /* reserved */ |
991 | } | 993 | { |
992 | case 2: /* reserved */ | 994 | printk(KERN_WARNING "broken BIOS!!\n"); |
993 | { | 995 | polarity = 1; |
994 | printk(KERN_WARNING "broken BIOS!!\n"); | 996 | break; |
995 | polarity = 1; | 997 | } |
996 | break; | 998 | case 3: /* low active */ |
997 | } | 999 | { |
998 | case 3: /* low active */ | 1000 | polarity = 1; |
999 | { | 1001 | break; |
1000 | polarity = 1; | 1002 | } |
1001 | break; | 1003 | default: /* invalid */ |
1002 | } | 1004 | { |
1003 | default: /* invalid */ | 1005 | printk(KERN_WARNING "broken BIOS!!\n"); |
1004 | { | 1006 | polarity = 1; |
1005 | printk(KERN_WARNING "broken BIOS!!\n"); | 1007 | break; |
1006 | polarity = 1; | 1008 | } |
1007 | break; | ||
1008 | } | ||
1009 | } | 1009 | } |
1010 | return polarity; | 1010 | return polarity; |
1011 | } | 1011 | } |
@@ -1018,69 +1018,67 @@ static int MPBIOS_trigger(int idx) | |||
1018 | /* | 1018 | /* |
1019 | * Determine IRQ trigger mode (edge or level sensitive): | 1019 | * Determine IRQ trigger mode (edge or level sensitive): |
1020 | */ | 1020 | */ |
1021 | switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) | 1021 | switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) { |
1022 | case 0: /* conforms, ie. bus-type dependent */ | ||
1022 | { | 1023 | { |
1023 | case 0: /* conforms, ie. bus-type dependent */ | 1024 | trigger = test_bit(bus, mp_bus_not_pci)? |
1024 | { | 1025 | default_ISA_trigger(idx): |
1025 | trigger = test_bit(bus, mp_bus_not_pci)? | 1026 | default_PCI_trigger(idx); |
1026 | default_ISA_trigger(idx): | ||
1027 | default_PCI_trigger(idx); | ||
1028 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | 1027 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) |
1029 | switch (mp_bus_id_to_type[bus]) | 1028 | switch (mp_bus_id_to_type[bus]) { |
1030 | { | 1029 | case MP_BUS_ISA: /* ISA pin */ |
1031 | case MP_BUS_ISA: /* ISA pin */ | 1030 | { |
1032 | { | 1031 | /* set before the switch */ |
1033 | /* set before the switch */ | ||
1034 | break; | ||
1035 | } | ||
1036 | case MP_BUS_EISA: /* EISA pin */ | ||
1037 | { | ||
1038 | trigger = default_EISA_trigger(idx); | ||
1039 | break; | ||
1040 | } | ||
1041 | case MP_BUS_PCI: /* PCI pin */ | ||
1042 | { | ||
1043 | /* set before the switch */ | ||
1044 | break; | ||
1045 | } | ||
1046 | case MP_BUS_MCA: /* MCA pin */ | ||
1047 | { | ||
1048 | trigger = default_MCA_trigger(idx); | ||
1049 | break; | ||
1050 | } | ||
1051 | default: | ||
1052 | { | ||
1053 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
1054 | trigger = 1; | ||
1055 | break; | ||
1056 | } | ||
1057 | } | ||
1058 | #endif | ||
1059 | break; | 1032 | break; |
1060 | } | 1033 | } |
1061 | case 1: /* edge */ | 1034 | case MP_BUS_EISA: /* EISA pin */ |
1062 | { | 1035 | { |
1063 | trigger = 0; | 1036 | trigger = default_EISA_trigger(idx); |
1064 | break; | 1037 | break; |
1065 | } | 1038 | } |
1066 | case 2: /* reserved */ | 1039 | case MP_BUS_PCI: /* PCI pin */ |
1067 | { | 1040 | { |
1068 | printk(KERN_WARNING "broken BIOS!!\n"); | 1041 | /* set before the switch */ |
1069 | trigger = 1; | ||
1070 | break; | 1042 | break; |
1071 | } | 1043 | } |
1072 | case 3: /* level */ | 1044 | case MP_BUS_MCA: /* MCA pin */ |
1073 | { | 1045 | { |
1074 | trigger = 1; | 1046 | trigger = default_MCA_trigger(idx); |
1075 | break; | 1047 | break; |
1076 | } | 1048 | } |
1077 | default: /* invalid */ | 1049 | default: |
1078 | { | 1050 | { |
1079 | printk(KERN_WARNING "broken BIOS!!\n"); | 1051 | printk(KERN_WARNING "broken BIOS!!\n"); |
1080 | trigger = 0; | 1052 | trigger = 1; |
1081 | break; | 1053 | break; |
1082 | } | 1054 | } |
1083 | } | 1055 | } |
1056 | #endif | ||
1057 | break; | ||
1058 | } | ||
1059 | case 1: /* edge */ | ||
1060 | { | ||
1061 | trigger = 0; | ||
1062 | break; | ||
1063 | } | ||
1064 | case 2: /* reserved */ | ||
1065 | { | ||
1066 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
1067 | trigger = 1; | ||
1068 | break; | ||
1069 | } | ||
1070 | case 3: /* level */ | ||
1071 | { | ||
1072 | trigger = 1; | ||
1073 | break; | ||
1074 | } | ||
1075 | default: /* invalid */ | ||
1076 | { | ||
1077 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
1078 | trigger = 0; | ||
1079 | break; | ||
1080 | } | ||
1081 | } | ||
1084 | return trigger; | 1082 | return trigger; |
1085 | } | 1083 | } |
1086 | 1084 | ||
@@ -1148,8 +1146,8 @@ static inline int IO_APIC_irq_trigger(int irq) | |||
1148 | 1146 | ||
1149 | for (apic = 0; apic < nr_ioapics; apic++) { | 1147 | for (apic = 0; apic < nr_ioapics; apic++) { |
1150 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | 1148 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { |
1151 | idx = find_irq_entry(apic,pin,mp_INT); | 1149 | idx = find_irq_entry(apic, pin, mp_INT); |
1152 | if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) | 1150 | if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) |
1153 | return irq_trigger(idx); | 1151 | return irq_trigger(idx); |
1154 | } | 1152 | } |
1155 | } | 1153 | } |
@@ -1164,7 +1162,7 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 } | |||
1164 | 1162 | ||
1165 | static int __assign_irq_vector(int irq) | 1163 | static int __assign_irq_vector(int irq) |
1166 | { | 1164 | { |
1167 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; | 1165 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset; |
1168 | int vector, offset; | 1166 | int vector, offset; |
1169 | 1167 | ||
1170 | BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); | 1168 | BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); |
@@ -1237,15 +1235,15 @@ static void __init setup_IO_APIC_irqs(void) | |||
1237 | /* | 1235 | /* |
1238 | * add it to the IO-APIC irq-routing table: | 1236 | * add it to the IO-APIC irq-routing table: |
1239 | */ | 1237 | */ |
1240 | memset(&entry,0,sizeof(entry)); | 1238 | memset(&entry, 0, sizeof(entry)); |
1241 | 1239 | ||
1242 | entry.delivery_mode = INT_DELIVERY_MODE; | 1240 | entry.delivery_mode = INT_DELIVERY_MODE; |
1243 | entry.dest_mode = INT_DEST_MODE; | 1241 | entry.dest_mode = INT_DEST_MODE; |
1244 | entry.mask = 0; /* enable IRQ */ | 1242 | entry.mask = 0; /* enable IRQ */ |
1245 | entry.dest.logical.logical_dest = | 1243 | entry.dest.logical.logical_dest = |
1246 | cpu_mask_to_apicid(TARGET_CPUS); | 1244 | cpu_mask_to_apicid(TARGET_CPUS); |
1247 | 1245 | ||
1248 | idx = find_irq_entry(apic,pin,mp_INT); | 1246 | idx = find_irq_entry(apic, pin, mp_INT); |
1249 | if (idx == -1) { | 1247 | if (idx == -1) { |
1250 | if (first_notcon) { | 1248 | if (first_notcon) { |
1251 | apic_printk(APIC_VERBOSE, KERN_DEBUG | 1249 | apic_printk(APIC_VERBOSE, KERN_DEBUG |
@@ -1289,7 +1287,7 @@ static void __init setup_IO_APIC_irqs(void) | |||
1289 | vector = assign_irq_vector(irq); | 1287 | vector = assign_irq_vector(irq); |
1290 | entry.vector = vector; | 1288 | entry.vector = vector; |
1291 | ioapic_register_intr(irq, vector, IOAPIC_AUTO); | 1289 | ioapic_register_intr(irq, vector, IOAPIC_AUTO); |
1292 | 1290 | ||
1293 | if (!apic && (irq < 16)) | 1291 | if (!apic && (irq < 16)) |
1294 | disable_8259A_irq(irq); | 1292 | disable_8259A_irq(irq); |
1295 | } | 1293 | } |
@@ -1302,25 +1300,21 @@ static void __init setup_IO_APIC_irqs(void) | |||
1302 | } | 1300 | } |
1303 | 1301 | ||
1304 | /* | 1302 | /* |
1305 | * Set up the 8259A-master output pin: | 1303 | * Set up the timer pin, possibly with the 8259A-master behind. |
1306 | */ | 1304 | */ |
1307 | static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) | 1305 | static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, |
1306 | int vector) | ||
1308 | { | 1307 | { |
1309 | struct IO_APIC_route_entry entry; | 1308 | struct IO_APIC_route_entry entry; |
1310 | 1309 | ||
1311 | memset(&entry,0,sizeof(entry)); | 1310 | memset(&entry, 0, sizeof(entry)); |
1312 | |||
1313 | disable_8259A_irq(0); | ||
1314 | |||
1315 | /* mask LVT0 */ | ||
1316 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | ||
1317 | 1311 | ||
1318 | /* | 1312 | /* |
1319 | * We use logical delivery to get the timer IRQ | 1313 | * We use logical delivery to get the timer IRQ |
1320 | * to the first CPU. | 1314 | * to the first CPU. |
1321 | */ | 1315 | */ |
1322 | entry.dest_mode = INT_DEST_MODE; | 1316 | entry.dest_mode = INT_DEST_MODE; |
1323 | entry.mask = 0; /* unmask IRQ now */ | 1317 | entry.mask = 1; /* mask IRQ now */ |
1324 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | 1318 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); |
1325 | entry.delivery_mode = INT_DELIVERY_MODE; | 1319 | entry.delivery_mode = INT_DELIVERY_MODE; |
1326 | entry.polarity = 0; | 1320 | entry.polarity = 0; |
@@ -1329,17 +1323,14 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in | |||
1329 | 1323 | ||
1330 | /* | 1324 | /* |
1331 | * The timer IRQ doesn't have to know that behind the | 1325 | * The timer IRQ doesn't have to know that behind the |
1332 | * scene we have a 8259A-master in AEOI mode ... | 1326 | * scene we may have a 8259A-master in AEOI mode ... |
1333 | */ | 1327 | */ |
1334 | irq_desc[0].chip = &ioapic_chip; | 1328 | ioapic_register_intr(0, vector, IOAPIC_EDGE); |
1335 | set_irq_handler(0, handle_edge_irq); | ||
1336 | 1329 | ||
1337 | /* | 1330 | /* |
1338 | * Add it to the IO-APIC irq-routing table: | 1331 | * Add it to the IO-APIC irq-routing table: |
1339 | */ | 1332 | */ |
1340 | ioapic_write_entry(apic, pin, entry); | 1333 | ioapic_write_entry(apic, pin, entry); |
1341 | |||
1342 | enable_8259A_irq(0); | ||
1343 | } | 1334 | } |
1344 | 1335 | ||
1345 | void __init print_IO_APIC(void) | 1336 | void __init print_IO_APIC(void) |
@@ -1354,7 +1345,7 @@ void __init print_IO_APIC(void) | |||
1354 | if (apic_verbosity == APIC_QUIET) | 1345 | if (apic_verbosity == APIC_QUIET) |
1355 | return; | 1346 | return; |
1356 | 1347 | ||
1357 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1348 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
1358 | for (i = 0; i < nr_ioapics; i++) | 1349 | for (i = 0; i < nr_ioapics; i++) |
1359 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 1350 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
1360 | mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); | 1351 | mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); |
@@ -1459,7 +1450,7 @@ void __init print_IO_APIC(void) | |||
1459 | 1450 | ||
1460 | #if 0 | 1451 | #if 0 |
1461 | 1452 | ||
1462 | static void print_APIC_bitfield (int base) | 1453 | static void print_APIC_bitfield(int base) |
1463 | { | 1454 | { |
1464 | unsigned int v; | 1455 | unsigned int v; |
1465 | int i, j; | 1456 | int i, j; |
@@ -1480,7 +1471,7 @@ static void print_APIC_bitfield (int base) | |||
1480 | } | 1471 | } |
1481 | } | 1472 | } |
1482 | 1473 | ||
1483 | void /*__init*/ print_local_APIC(void * dummy) | 1474 | void /*__init*/ print_local_APIC(void *dummy) |
1484 | { | 1475 | { |
1485 | unsigned int v, ver, maxlvt; | 1476 | unsigned int v, ver, maxlvt; |
1486 | 1477 | ||
@@ -1489,6 +1480,7 @@ void /*__init*/ print_local_APIC(void * dummy) | |||
1489 | 1480 | ||
1490 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | 1481 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", |
1491 | smp_processor_id(), hard_smp_processor_id()); | 1482 | smp_processor_id(), hard_smp_processor_id()); |
1483 | v = apic_read(APIC_ID); | ||
1492 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, | 1484 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, |
1493 | GET_APIC_ID(read_apic_id())); | 1485 | GET_APIC_ID(read_apic_id())); |
1494 | v = apic_read(APIC_LVR); | 1486 | v = apic_read(APIC_LVR); |
@@ -1563,7 +1555,7 @@ void /*__init*/ print_local_APIC(void * dummy) | |||
1563 | printk("\n"); | 1555 | printk("\n"); |
1564 | } | 1556 | } |
1565 | 1557 | ||
1566 | void print_all_local_APICs (void) | 1558 | void print_all_local_APICs(void) |
1567 | { | 1559 | { |
1568 | on_each_cpu(print_local_APIC, NULL, 1, 1); | 1560 | on_each_cpu(print_local_APIC, NULL, 1, 1); |
1569 | } | 1561 | } |
@@ -1586,11 +1578,11 @@ void /*__init*/ print_PIC(void) | |||
1586 | v = inb(0xa0) << 8 | inb(0x20); | 1578 | v = inb(0xa0) << 8 | inb(0x20); |
1587 | printk(KERN_DEBUG "... PIC IRR: %04x\n", v); | 1579 | printk(KERN_DEBUG "... PIC IRR: %04x\n", v); |
1588 | 1580 | ||
1589 | outb(0x0b,0xa0); | 1581 | outb(0x0b, 0xa0); |
1590 | outb(0x0b,0x20); | 1582 | outb(0x0b, 0x20); |
1591 | v = inb(0xa0) << 8 | inb(0x20); | 1583 | v = inb(0xa0) << 8 | inb(0x20); |
1592 | outb(0x0a,0xa0); | 1584 | outb(0x0a, 0xa0); |
1593 | outb(0x0a,0x20); | 1585 | outb(0x0a, 0x20); |
1594 | 1586 | ||
1595 | spin_unlock_irqrestore(&i8259A_lock, flags); | 1587 | spin_unlock_irqrestore(&i8259A_lock, flags); |
1596 | 1588 | ||
@@ -1626,7 +1618,7 @@ static void __init enable_IO_APIC(void) | |||
1626 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1618 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1627 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | 1619 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; |
1628 | } | 1620 | } |
1629 | for(apic = 0; apic < nr_ioapics; apic++) { | 1621 | for (apic = 0; apic < nr_ioapics; apic++) { |
1630 | int pin; | 1622 | int pin; |
1631 | /* See if any of the pins is in ExtINT mode */ | 1623 | /* See if any of the pins is in ExtINT mode */ |
1632 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | 1624 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { |
@@ -1748,7 +1740,7 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1748 | spin_lock_irqsave(&ioapic_lock, flags); | 1740 | spin_lock_irqsave(&ioapic_lock, flags); |
1749 | reg_00.raw = io_apic_read(apic, 0); | 1741 | reg_00.raw = io_apic_read(apic, 0); |
1750 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1742 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1751 | 1743 | ||
1752 | old_id = mp_ioapics[apic].mpc_apicid; | 1744 | old_id = mp_ioapics[apic].mpc_apicid; |
1753 | 1745 | ||
1754 | if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { | 1746 | if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { |
@@ -1800,7 +1792,7 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1800 | /* | 1792 | /* |
1801 | * Read the right value from the MPC table and | 1793 | * Read the right value from the MPC table and |
1802 | * write it into the ID register. | 1794 | * write it into the ID register. |
1803 | */ | 1795 | */ |
1804 | apic_printk(APIC_VERBOSE, KERN_INFO | 1796 | apic_printk(APIC_VERBOSE, KERN_INFO |
1805 | "...changing IO-APIC physical APIC ID to %d ...", | 1797 | "...changing IO-APIC physical APIC ID to %d ...", |
1806 | mp_ioapics[apic].mpc_apicid); | 1798 | mp_ioapics[apic].mpc_apicid); |
@@ -2020,7 +2012,7 @@ static void ack_apic(unsigned int irq) | |||
2020 | ack_APIC_irq(); | 2012 | ack_APIC_irq(); |
2021 | } | 2013 | } |
2022 | 2014 | ||
2023 | static void mask_lapic_irq (unsigned int irq) | 2015 | static void mask_lapic_irq(unsigned int irq) |
2024 | { | 2016 | { |
2025 | unsigned long v; | 2017 | unsigned long v; |
2026 | 2018 | ||
@@ -2028,7 +2020,7 @@ static void mask_lapic_irq (unsigned int irq) | |||
2028 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | 2020 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); |
2029 | } | 2021 | } |
2030 | 2022 | ||
2031 | static void unmask_lapic_irq (unsigned int irq) | 2023 | static void unmask_lapic_irq(unsigned int irq) |
2032 | { | 2024 | { |
2033 | unsigned long v; | 2025 | unsigned long v; |
2034 | 2026 | ||
@@ -2037,7 +2029,7 @@ static void unmask_lapic_irq (unsigned int irq) | |||
2037 | } | 2029 | } |
2038 | 2030 | ||
2039 | static struct irq_chip lapic_chip __read_mostly = { | 2031 | static struct irq_chip lapic_chip __read_mostly = { |
2040 | .name = "local-APIC-edge", | 2032 | .name = "local-APIC", |
2041 | .mask = mask_lapic_irq, | 2033 | .mask = mask_lapic_irq, |
2042 | .unmask = unmask_lapic_irq, | 2034 | .unmask = unmask_lapic_irq, |
2043 | .eoi = ack_apic, | 2035 | .eoi = ack_apic, |
@@ -2046,14 +2038,14 @@ static struct irq_chip lapic_chip __read_mostly = { | |||
2046 | static void __init setup_nmi(void) | 2038 | static void __init setup_nmi(void) |
2047 | { | 2039 | { |
2048 | /* | 2040 | /* |
2049 | * Dirty trick to enable the NMI watchdog ... | 2041 | * Dirty trick to enable the NMI watchdog ... |
2050 | * We put the 8259A master into AEOI mode and | 2042 | * We put the 8259A master into AEOI mode and |
2051 | * unmask on all local APICs LVT0 as NMI. | 2043 | * unmask on all local APICs LVT0 as NMI. |
2052 | * | 2044 | * |
2053 | * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') | 2045 | * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') |
2054 | * is from Maciej W. Rozycki - so we do not have to EOI from | 2046 | * is from Maciej W. Rozycki - so we do not have to EOI from |
2055 | * the NMI handler or the timer interrupt. | 2047 | * the NMI handler or the timer interrupt. |
2056 | */ | 2048 | */ |
2057 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); | 2049 | apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); |
2058 | 2050 | ||
2059 | enable_NMI_through_LVT0(); | 2051 | enable_NMI_through_LVT0(); |
@@ -2129,6 +2121,7 @@ static inline void __init unlock_ExtINT_logic(void) | |||
2129 | static inline void __init check_timer(void) | 2121 | static inline void __init check_timer(void) |
2130 | { | 2122 | { |
2131 | int apic1, pin1, apic2, pin2; | 2123 | int apic1, pin1, apic2, pin2; |
2124 | int no_pin1 = 0; | ||
2132 | int vector; | 2125 | int vector; |
2133 | unsigned int ver; | 2126 | unsigned int ver; |
2134 | unsigned long flags; | 2127 | unsigned long flags; |
@@ -2146,21 +2139,17 @@ static inline void __init check_timer(void) | |||
2146 | set_intr_gate(vector, interrupt[0]); | 2139 | set_intr_gate(vector, interrupt[0]); |
2147 | 2140 | ||
2148 | /* | 2141 | /* |
2149 | * Subtle, code in do_timer_interrupt() expects an AEOI | 2142 | * As IRQ0 is to be enabled in the 8259A, the virtual |
2150 | * mode for the 8259A whenever interrupts are routed | 2143 | * wire has to be disabled in the local APIC. Also |
2151 | * through I/O APICs. Also IRQ0 has to be enabled in | 2144 | * timer interrupts need to be acknowledged manually in |
2152 | * the 8259A which implies the virtual wire has to be | 2145 | * the 8259A for the i82489DX when using the NMI |
2153 | * disabled in the local APIC. Finally timer interrupts | 2146 | * watchdog as that APIC treats NMIs as level-triggered. |
2154 | * need to be acknowledged manually in the 8259A for | 2147 | * The AEOI mode will finish them in the 8259A |
2155 | * timer_interrupt() and for the i82489DX when using | 2148 | * automatically. |
2156 | * the NMI watchdog. | ||
2157 | */ | 2149 | */ |
2158 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2150 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2159 | init_8259A(1); | 2151 | init_8259A(1); |
2160 | timer_ack = !cpu_has_tsc; | 2152 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); |
2161 | timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | ||
2162 | if (timer_over_8254 > 0) | ||
2163 | enable_8259A_irq(0); | ||
2164 | 2153 | ||
2165 | pin1 = find_isa_irq_pin(0, mp_INT); | 2154 | pin1 = find_isa_irq_pin(0, mp_INT); |
2166 | apic1 = find_isa_irq_apic(0, mp_INT); | 2155 | apic1 = find_isa_irq_apic(0, mp_INT); |
@@ -2170,14 +2159,33 @@ static inline void __init check_timer(void) | |||
2170 | printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", | 2159 | printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", |
2171 | vector, apic1, pin1, apic2, pin2); | 2160 | vector, apic1, pin1, apic2, pin2); |
2172 | 2161 | ||
2162 | /* | ||
2163 | * Some BIOS writers are clueless and report the ExtINTA | ||
2164 | * I/O APIC input from the cascaded 8259A as the timer | ||
2165 | * interrupt input. So just in case, if only one pin | ||
2166 | * was found above, try it both directly and through the | ||
2167 | * 8259A. | ||
2168 | */ | ||
2169 | if (pin1 == -1) { | ||
2170 | pin1 = pin2; | ||
2171 | apic1 = apic2; | ||
2172 | no_pin1 = 1; | ||
2173 | } else if (pin2 == -1) { | ||
2174 | pin2 = pin1; | ||
2175 | apic2 = apic1; | ||
2176 | } | ||
2177 | |||
2173 | if (pin1 != -1) { | 2178 | if (pin1 != -1) { |
2174 | /* | 2179 | /* |
2175 | * Ok, does IRQ0 through the IOAPIC work? | 2180 | * Ok, does IRQ0 through the IOAPIC work? |
2176 | */ | 2181 | */ |
2182 | if (no_pin1) { | ||
2183 | add_pin_to_irq(0, apic1, pin1); | ||
2184 | setup_timer_IRQ0_pin(apic1, pin1, vector); | ||
2185 | } | ||
2177 | unmask_IO_APIC_irq(0); | 2186 | unmask_IO_APIC_irq(0); |
2178 | if (timer_irq_works()) { | 2187 | if (timer_irq_works()) { |
2179 | if (nmi_watchdog == NMI_IO_APIC) { | 2188 | if (nmi_watchdog == NMI_IO_APIC) { |
2180 | disable_8259A_irq(0); | ||
2181 | setup_nmi(); | 2189 | setup_nmi(); |
2182 | enable_8259A_irq(0); | 2190 | enable_8259A_irq(0); |
2183 | } | 2191 | } |
@@ -2186,43 +2194,46 @@ static inline void __init check_timer(void) | |||
2186 | goto out; | 2194 | goto out; |
2187 | } | 2195 | } |
2188 | clear_IO_APIC_pin(apic1, pin1); | 2196 | clear_IO_APIC_pin(apic1, pin1); |
2189 | printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " | 2197 | if (!no_pin1) |
2190 | "IO-APIC\n"); | 2198 | printk(KERN_ERR "..MP-BIOS bug: " |
2191 | } | 2199 | "8254 timer not connected to IO-APIC\n"); |
2192 | 2200 | ||
2193 | printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); | 2201 | printk(KERN_INFO "...trying to set up timer (IRQ0) " |
2194 | if (pin2 != -1) { | 2202 | "through the 8259A ... "); |
2195 | printk("\n..... (found pin %d) ...", pin2); | 2203 | printk("\n..... (found pin %d) ...", pin2); |
2196 | /* | 2204 | /* |
2197 | * legacy devices should be connected to IO APIC #0 | 2205 | * legacy devices should be connected to IO APIC #0 |
2198 | */ | 2206 | */ |
2199 | setup_ExtINT_IRQ0_pin(apic2, pin2, vector); | 2207 | replace_pin_at_irq(0, apic1, pin1, apic2, pin2); |
2208 | setup_timer_IRQ0_pin(apic2, pin2, vector); | ||
2209 | unmask_IO_APIC_irq(0); | ||
2210 | enable_8259A_irq(0); | ||
2200 | if (timer_irq_works()) { | 2211 | if (timer_irq_works()) { |
2201 | printk("works.\n"); | 2212 | printk("works.\n"); |
2202 | if (pin1 != -1) | 2213 | timer_through_8259 = 1; |
2203 | replace_pin_at_irq(0, apic1, pin1, apic2, pin2); | ||
2204 | else | ||
2205 | add_pin_to_irq(0, apic2, pin2); | ||
2206 | if (nmi_watchdog == NMI_IO_APIC) { | 2214 | if (nmi_watchdog == NMI_IO_APIC) { |
2215 | disable_8259A_irq(0); | ||
2207 | setup_nmi(); | 2216 | setup_nmi(); |
2217 | enable_8259A_irq(0); | ||
2208 | } | 2218 | } |
2209 | goto out; | 2219 | goto out; |
2210 | } | 2220 | } |
2211 | /* | 2221 | /* |
2212 | * Cleanup, just in case ... | 2222 | * Cleanup, just in case ... |
2213 | */ | 2223 | */ |
2224 | disable_8259A_irq(0); | ||
2214 | clear_IO_APIC_pin(apic2, pin2); | 2225 | clear_IO_APIC_pin(apic2, pin2); |
2226 | printk(" failed.\n"); | ||
2215 | } | 2227 | } |
2216 | printk(" failed.\n"); | ||
2217 | 2228 | ||
2218 | if (nmi_watchdog == NMI_IO_APIC) { | 2229 | if (nmi_watchdog == NMI_IO_APIC) { |
2219 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | 2230 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); |
2220 | nmi_watchdog = 0; | 2231 | nmi_watchdog = NMI_NONE; |
2221 | } | 2232 | } |
2233 | timer_ack = 0; | ||
2222 | 2234 | ||
2223 | printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | 2235 | printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); |
2224 | 2236 | ||
2225 | disable_8259A_irq(0); | ||
2226 | set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, | 2237 | set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, |
2227 | "fasteoi"); | 2238 | "fasteoi"); |
2228 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | 2239 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
@@ -2232,12 +2243,12 @@ static inline void __init check_timer(void) | |||
2232 | printk(" works.\n"); | 2243 | printk(" works.\n"); |
2233 | goto out; | 2244 | goto out; |
2234 | } | 2245 | } |
2246 | disable_8259A_irq(0); | ||
2235 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); | 2247 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); |
2236 | printk(" failed.\n"); | 2248 | printk(" failed.\n"); |
2237 | 2249 | ||
2238 | printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | 2250 | printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); |
2239 | 2251 | ||
2240 | timer_ack = 0; | ||
2241 | init_8259A(0); | 2252 | init_8259A(0); |
2242 | make_8259A_irq(0); | 2253 | make_8259A_irq(0); |
2243 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | 2254 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); |
@@ -2294,28 +2305,14 @@ void __init setup_IO_APIC(void) | |||
2294 | print_IO_APIC(); | 2305 | print_IO_APIC(); |
2295 | } | 2306 | } |
2296 | 2307 | ||
2297 | static int __init setup_disable_8254_timer(char *s) | ||
2298 | { | ||
2299 | timer_over_8254 = -1; | ||
2300 | return 1; | ||
2301 | } | ||
2302 | static int __init setup_enable_8254_timer(char *s) | ||
2303 | { | ||
2304 | timer_over_8254 = 2; | ||
2305 | return 1; | ||
2306 | } | ||
2307 | |||
2308 | __setup("disable_8254_timer", setup_disable_8254_timer); | ||
2309 | __setup("enable_8254_timer", setup_enable_8254_timer); | ||
2310 | |||
2311 | /* | 2308 | /* |
2312 | * Called after all the initialization is done. If we didnt find any | 2309 | * Called after all the initialization is done. If we didnt find any |
2313 | * APIC bugs then we can allow the modify fast path | 2310 | * APIC bugs then we can allow the modify fast path |
2314 | */ | 2311 | */ |
2315 | 2312 | ||
2316 | static int __init io_apic_bug_finalize(void) | 2313 | static int __init io_apic_bug_finalize(void) |
2317 | { | 2314 | { |
2318 | if(sis_apic_bug == -1) | 2315 | if (sis_apic_bug == -1) |
2319 | sis_apic_bug = 0; | 2316 | sis_apic_bug = 0; |
2320 | return 0; | 2317 | return 0; |
2321 | } | 2318 | } |
@@ -2326,17 +2323,17 @@ struct sysfs_ioapic_data { | |||
2326 | struct sys_device dev; | 2323 | struct sys_device dev; |
2327 | struct IO_APIC_route_entry entry[0]; | 2324 | struct IO_APIC_route_entry entry[0]; |
2328 | }; | 2325 | }; |
2329 | static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; | 2326 | static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS]; |
2330 | 2327 | ||
2331 | static int ioapic_suspend(struct sys_device *dev, pm_message_t state) | 2328 | static int ioapic_suspend(struct sys_device *dev, pm_message_t state) |
2332 | { | 2329 | { |
2333 | struct IO_APIC_route_entry *entry; | 2330 | struct IO_APIC_route_entry *entry; |
2334 | struct sysfs_ioapic_data *data; | 2331 | struct sysfs_ioapic_data *data; |
2335 | int i; | 2332 | int i; |
2336 | 2333 | ||
2337 | data = container_of(dev, struct sysfs_ioapic_data, dev); | 2334 | data = container_of(dev, struct sysfs_ioapic_data, dev); |
2338 | entry = data->entry; | 2335 | entry = data->entry; |
2339 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) | 2336 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) |
2340 | entry[i] = ioapic_read_entry(dev->id, i); | 2337 | entry[i] = ioapic_read_entry(dev->id, i); |
2341 | 2338 | ||
2342 | return 0; | 2339 | return 0; |
@@ -2349,7 +2346,7 @@ static int ioapic_resume(struct sys_device *dev) | |||
2349 | unsigned long flags; | 2346 | unsigned long flags; |
2350 | union IO_APIC_reg_00 reg_00; | 2347 | union IO_APIC_reg_00 reg_00; |
2351 | int i; | 2348 | int i; |
2352 | 2349 | ||
2353 | data = container_of(dev, struct sysfs_ioapic_data, dev); | 2350 | data = container_of(dev, struct sysfs_ioapic_data, dev); |
2354 | entry = data->entry; | 2351 | entry = data->entry; |
2355 | 2352 | ||
@@ -2360,7 +2357,7 @@ static int ioapic_resume(struct sys_device *dev) | |||
2360 | io_apic_write(dev->id, 0, reg_00.raw); | 2357 | io_apic_write(dev->id, 0, reg_00.raw); |
2361 | } | 2358 | } |
2362 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2359 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2363 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) | 2360 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) |
2364 | ioapic_write_entry(dev->id, i, entry[i]); | 2361 | ioapic_write_entry(dev->id, i, entry[i]); |
2365 | 2362 | ||
2366 | return 0; | 2363 | return 0; |
@@ -2374,24 +2371,23 @@ static struct sysdev_class ioapic_sysdev_class = { | |||
2374 | 2371 | ||
2375 | static int __init ioapic_init_sysfs(void) | 2372 | static int __init ioapic_init_sysfs(void) |
2376 | { | 2373 | { |
2377 | struct sys_device * dev; | 2374 | struct sys_device *dev; |
2378 | int i, size, error = 0; | 2375 | int i, size, error = 0; |
2379 | 2376 | ||
2380 | error = sysdev_class_register(&ioapic_sysdev_class); | 2377 | error = sysdev_class_register(&ioapic_sysdev_class); |
2381 | if (error) | 2378 | if (error) |
2382 | return error; | 2379 | return error; |
2383 | 2380 | ||
2384 | for (i = 0; i < nr_ioapics; i++ ) { | 2381 | for (i = 0; i < nr_ioapics; i++) { |
2385 | size = sizeof(struct sys_device) + nr_ioapic_registers[i] | 2382 | size = sizeof(struct sys_device) + nr_ioapic_registers[i] |
2386 | * sizeof(struct IO_APIC_route_entry); | 2383 | * sizeof(struct IO_APIC_route_entry); |
2387 | mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); | 2384 | mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); |
2388 | if (!mp_ioapic_data[i]) { | 2385 | if (!mp_ioapic_data[i]) { |
2389 | printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); | 2386 | printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); |
2390 | continue; | 2387 | continue; |
2391 | } | 2388 | } |
2392 | memset(mp_ioapic_data[i], 0, size); | ||
2393 | dev = &mp_ioapic_data[i]->dev; | 2389 | dev = &mp_ioapic_data[i]->dev; |
2394 | dev->id = i; | 2390 | dev->id = i; |
2395 | dev->cls = &ioapic_sysdev_class; | 2391 | dev->cls = &ioapic_sysdev_class; |
2396 | error = sysdev_register(dev); | 2392 | error = sysdev_register(dev); |
2397 | if (error) { | 2393 | if (error) { |
@@ -2466,7 +2462,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
2466 | msg->address_lo = | 2462 | msg->address_lo = |
2467 | MSI_ADDR_BASE_LO | | 2463 | MSI_ADDR_BASE_LO | |
2468 | ((INT_DEST_MODE == 0) ? | 2464 | ((INT_DEST_MODE == 0) ? |
2469 | MSI_ADDR_DEST_MODE_PHYSICAL: | 2465 | MSI_ADDR_DEST_MODE_PHYSICAL: |
2470 | MSI_ADDR_DEST_MODE_LOGICAL) | | 2466 | MSI_ADDR_DEST_MODE_LOGICAL) | |
2471 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | 2467 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? |
2472 | MSI_ADDR_REDIRECTION_CPU: | 2468 | MSI_ADDR_REDIRECTION_CPU: |
@@ -2477,7 +2473,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
2477 | MSI_DATA_TRIGGER_EDGE | | 2473 | MSI_DATA_TRIGGER_EDGE | |
2478 | MSI_DATA_LEVEL_ASSERT | | 2474 | MSI_DATA_LEVEL_ASSERT | |
2479 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? | 2475 | ((INT_DELIVERY_MODE != dest_LowestPrio) ? |
2480 | MSI_DATA_DELIVERY_FIXED: | 2476 | MSI_DATA_DELIVERY_FIXED: |
2481 | MSI_DATA_DELIVERY_LOWPRI) | | 2477 | MSI_DATA_DELIVERY_LOWPRI) | |
2482 | MSI_DATA_VECTOR(vector); | 2478 | MSI_DATA_VECTOR(vector); |
2483 | } | 2479 | } |
@@ -2648,12 +2644,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
2648 | #endif /* CONFIG_HT_IRQ */ | 2644 | #endif /* CONFIG_HT_IRQ */ |
2649 | 2645 | ||
2650 | /* -------------------------------------------------------------------------- | 2646 | /* -------------------------------------------------------------------------- |
2651 | ACPI-based IOAPIC Configuration | 2647 | ACPI-based IOAPIC Configuration |
2652 | -------------------------------------------------------------------------- */ | 2648 | -------------------------------------------------------------------------- */ |
2653 | 2649 | ||
2654 | #ifdef CONFIG_ACPI | 2650 | #ifdef CONFIG_ACPI |
2655 | 2651 | ||
2656 | int __init io_apic_get_unique_id (int ioapic, int apic_id) | 2652 | int __init io_apic_get_unique_id(int ioapic, int apic_id) |
2657 | { | 2653 | { |
2658 | union IO_APIC_reg_00 reg_00; | 2654 | union IO_APIC_reg_00 reg_00; |
2659 | static physid_mask_t apic_id_map = PHYSID_MASK_NONE; | 2655 | static physid_mask_t apic_id_map = PHYSID_MASK_NONE; |
@@ -2662,10 +2658,10 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) | |||
2662 | int i = 0; | 2658 | int i = 0; |
2663 | 2659 | ||
2664 | /* | 2660 | /* |
2665 | * The P4 platform supports up to 256 APIC IDs on two separate APIC | 2661 | * The P4 platform supports up to 256 APIC IDs on two separate APIC |
2666 | * buses (one for LAPICs, one for IOAPICs), where predecessors only | 2662 | * buses (one for LAPICs, one for IOAPICs), where predecessors only |
2667 | * supports up to 16 on one shared APIC bus. | 2663 | * supports up to 16 on one shared APIC bus. |
2668 | * | 2664 | * |
2669 | * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full | 2665 | * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full |
2670 | * advantage of new APIC bus architecture. | 2666 | * advantage of new APIC bus architecture. |
2671 | */ | 2667 | */ |
@@ -2684,7 +2680,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) | |||
2684 | } | 2680 | } |
2685 | 2681 | ||
2686 | /* | 2682 | /* |
2687 | * Every APIC in a system must have a unique ID or we get lots of nice | 2683 | * Every APIC in a system must have a unique ID or we get lots of nice |
2688 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 2684 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
2689 | */ | 2685 | */ |
2690 | if (check_apicid_used(apic_id_map, apic_id)) { | 2686 | if (check_apicid_used(apic_id_map, apic_id)) { |
@@ -2701,7 +2697,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) | |||
2701 | "trying %d\n", ioapic, apic_id, i); | 2697 | "trying %d\n", ioapic, apic_id, i); |
2702 | 2698 | ||
2703 | apic_id = i; | 2699 | apic_id = i; |
2704 | } | 2700 | } |
2705 | 2701 | ||
2706 | tmp = apicid_to_cpu_present(apic_id); | 2702 | tmp = apicid_to_cpu_present(apic_id); |
2707 | physids_or(apic_id_map, apic_id_map, tmp); | 2703 | physids_or(apic_id_map, apic_id_map, tmp); |
@@ -2728,7 +2724,7 @@ int __init io_apic_get_unique_id (int ioapic, int apic_id) | |||
2728 | } | 2724 | } |
2729 | 2725 | ||
2730 | 2726 | ||
2731 | int __init io_apic_get_version (int ioapic) | 2727 | int __init io_apic_get_version(int ioapic) |
2732 | { | 2728 | { |
2733 | union IO_APIC_reg_01 reg_01; | 2729 | union IO_APIC_reg_01 reg_01; |
2734 | unsigned long flags; | 2730 | unsigned long flags; |
@@ -2741,7 +2737,7 @@ int __init io_apic_get_version (int ioapic) | |||
2741 | } | 2737 | } |
2742 | 2738 | ||
2743 | 2739 | ||
2744 | int __init io_apic_get_redir_entries (int ioapic) | 2740 | int __init io_apic_get_redir_entries(int ioapic) |
2745 | { | 2741 | { |
2746 | union IO_APIC_reg_01 reg_01; | 2742 | union IO_APIC_reg_01 reg_01; |
2747 | unsigned long flags; | 2743 | unsigned long flags; |
@@ -2754,7 +2750,7 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
2754 | } | 2750 | } |
2755 | 2751 | ||
2756 | 2752 | ||
2757 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) | 2753 | int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low) |
2758 | { | 2754 | { |
2759 | struct IO_APIC_route_entry entry; | 2755 | struct IO_APIC_route_entry entry; |
2760 | 2756 | ||
@@ -2770,7 +2766,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a | |||
2770 | * corresponding device driver registers for this IRQ. | 2766 | * corresponding device driver registers for this IRQ. |
2771 | */ | 2767 | */ |
2772 | 2768 | ||
2773 | memset(&entry,0,sizeof(entry)); | 2769 | memset(&entry, 0, sizeof(entry)); |
2774 | 2770 | ||
2775 | entry.delivery_mode = INT_DELIVERY_MODE; | 2771 | entry.delivery_mode = INT_DELIVERY_MODE; |
2776 | entry.dest_mode = INT_DEST_MODE; | 2772 | entry.dest_mode = INT_DEST_MODE; |
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index ef1a8dfcc529..e5ef60303562 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
@@ -61,7 +61,7 @@ struct irq_cfg { | |||
61 | }; | 61 | }; |
62 | 62 | ||
63 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 63 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
64 | struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { | 64 | static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { |
65 | [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, | 65 | [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, |
66 | [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, | 66 | [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, |
67 | [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, | 67 | [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, |
@@ -90,7 +90,7 @@ static int no_timer_check; | |||
90 | 90 | ||
91 | static int disable_timer_pin_1 __initdata; | 91 | static int disable_timer_pin_1 __initdata; |
92 | 92 | ||
93 | int timer_over_8254 __initdata = 1; | 93 | int timer_through_8259 __initdata; |
94 | 94 | ||
95 | /* Where if anywhere is the i8259 connect in external int mode */ | 95 | /* Where if anywhere is the i8259 connect in external int mode */ |
96 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | 96 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; |
@@ -183,7 +183,7 @@ static bool io_apic_level_ack_pending(unsigned int irq) | |||
183 | break; | 183 | break; |
184 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | 184 | reg = io_apic_read(entry->apic, 0x10 + pin*2); |
185 | /* Is the remote IRR bit set? */ | 185 | /* Is the remote IRR bit set? */ |
186 | if ((reg >> 14) & 1) { | 186 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { |
187 | spin_unlock_irqrestore(&ioapic_lock, flags); | 187 | spin_unlock_irqrestore(&ioapic_lock, flags); |
188 | return true; | 188 | return true; |
189 | } | 189 | } |
@@ -298,7 +298,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
298 | break; | 298 | break; |
299 | io_apic_write(apic, 0x11 + pin*2, dest); | 299 | io_apic_write(apic, 0x11 + pin*2, dest); |
300 | reg = io_apic_read(apic, 0x10 + pin*2); | 300 | reg = io_apic_read(apic, 0x10 + pin*2); |
301 | reg &= ~0x000000ff; | 301 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; |
302 | reg |= vector; | 302 | reg |= vector; |
303 | io_apic_modify(apic, reg); | 303 | io_apic_modify(apic, reg); |
304 | if (!entry->next) | 304 | if (!entry->next) |
@@ -360,16 +360,37 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
360 | entry->pin = pin; | 360 | entry->pin = pin; |
361 | } | 361 | } |
362 | 362 | ||
363 | /* | ||
364 | * Reroute an IRQ to a different pin. | ||
365 | */ | ||
366 | static void __init replace_pin_at_irq(unsigned int irq, | ||
367 | int oldapic, int oldpin, | ||
368 | int newapic, int newpin) | ||
369 | { | ||
370 | struct irq_pin_list *entry = irq_2_pin + irq; | ||
371 | |||
372 | while (1) { | ||
373 | if (entry->apic == oldapic && entry->pin == oldpin) { | ||
374 | entry->apic = newapic; | ||
375 | entry->pin = newpin; | ||
376 | } | ||
377 | if (!entry->next) | ||
378 | break; | ||
379 | entry = irq_2_pin + entry->next; | ||
380 | } | ||
381 | } | ||
382 | |||
363 | 383 | ||
364 | #define DO_ACTION(name,R,ACTION, FINAL) \ | 384 | #define DO_ACTION(name,R,ACTION, FINAL) \ |
365 | \ | 385 | \ |
366 | static void name##_IO_APIC_irq (unsigned int irq) \ | 386 | static void name##_IO_APIC_irq (unsigned int irq) \ |
367 | __DO_ACTION(R, ACTION, FINAL) | 387 | __DO_ACTION(R, ACTION, FINAL) |
368 | 388 | ||
369 | DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) | 389 | /* mask = 1 */ |
370 | /* mask = 1 */ | 390 | DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) |
371 | DO_ACTION( __unmask, 0, &= 0xfffeffff, ) | 391 | |
372 | /* mask = 0 */ | 392 | /* mask = 0 */ |
393 | DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) | ||
373 | 394 | ||
374 | static void mask_IO_APIC_irq (unsigned int irq) | 395 | static void mask_IO_APIC_irq (unsigned int irq) |
375 | { | 396 | { |
@@ -430,20 +451,6 @@ static int __init disable_timer_pin_setup(char *arg) | |||
430 | } | 451 | } |
431 | __setup("disable_timer_pin_1", disable_timer_pin_setup); | 452 | __setup("disable_timer_pin_1", disable_timer_pin_setup); |
432 | 453 | ||
433 | static int __init setup_disable_8254_timer(char *s) | ||
434 | { | ||
435 | timer_over_8254 = -1; | ||
436 | return 1; | ||
437 | } | ||
438 | static int __init setup_enable_8254_timer(char *s) | ||
439 | { | ||
440 | timer_over_8254 = 2; | ||
441 | return 1; | ||
442 | } | ||
443 | |||
444 | __setup("disable_8254_timer", setup_disable_8254_timer); | ||
445 | __setup("enable_8254_timer", setup_enable_8254_timer); | ||
446 | |||
447 | 454 | ||
448 | /* | 455 | /* |
449 | * Find the IRQ entry number of a certain pin. | 456 | * Find the IRQ entry number of a certain pin. |
@@ -911,26 +918,21 @@ static void __init setup_IO_APIC_irqs(void) | |||
911 | } | 918 | } |
912 | 919 | ||
913 | /* | 920 | /* |
914 | * Set up the 8259A-master output pin as broadcast to all | 921 | * Set up the timer pin, possibly with the 8259A-master behind. |
915 | * CPUs. | ||
916 | */ | 922 | */ |
917 | static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) | 923 | static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, |
924 | int vector) | ||
918 | { | 925 | { |
919 | struct IO_APIC_route_entry entry; | 926 | struct IO_APIC_route_entry entry; |
920 | 927 | ||
921 | memset(&entry, 0, sizeof(entry)); | 928 | memset(&entry, 0, sizeof(entry)); |
922 | 929 | ||
923 | disable_8259A_irq(0); | ||
924 | |||
925 | /* mask LVT0 */ | ||
926 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | ||
927 | |||
928 | /* | 930 | /* |
929 | * We use logical delivery to get the timer IRQ | 931 | * We use logical delivery to get the timer IRQ |
930 | * to the first CPU. | 932 | * to the first CPU. |
931 | */ | 933 | */ |
932 | entry.dest_mode = INT_DEST_MODE; | 934 | entry.dest_mode = INT_DEST_MODE; |
933 | entry.mask = 0; /* unmask IRQ now */ | 935 | entry.mask = 1; /* mask IRQ now */ |
934 | entry.dest = cpu_mask_to_apicid(TARGET_CPUS); | 936 | entry.dest = cpu_mask_to_apicid(TARGET_CPUS); |
935 | entry.delivery_mode = INT_DELIVERY_MODE; | 937 | entry.delivery_mode = INT_DELIVERY_MODE; |
936 | entry.polarity = 0; | 938 | entry.polarity = 0; |
@@ -939,7 +941,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in | |||
939 | 941 | ||
940 | /* | 942 | /* |
941 | * The timer IRQ doesn't have to know that behind the | 943 | * The timer IRQ doesn't have to know that behind the |
942 | * scene we have a 8259A-master in AEOI mode ... | 944 | * scene we may have a 8259A-master in AEOI mode ... |
943 | */ | 945 | */ |
944 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); | 946 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); |
945 | 947 | ||
@@ -947,8 +949,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in | |||
947 | * Add it to the IO-APIC irq-routing table: | 949 | * Add it to the IO-APIC irq-routing table: |
948 | */ | 950 | */ |
949 | ioapic_write_entry(apic, pin, entry); | 951 | ioapic_write_entry(apic, pin, entry); |
950 | |||
951 | enable_8259A_irq(0); | ||
952 | } | 952 | } |
953 | 953 | ||
954 | void __apicdebuginit print_IO_APIC(void) | 954 | void __apicdebuginit print_IO_APIC(void) |
@@ -1077,6 +1077,7 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
1077 | 1077 | ||
1078 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | 1078 | printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", |
1079 | smp_processor_id(), hard_smp_processor_id()); | 1079 | smp_processor_id(), hard_smp_processor_id()); |
1080 | v = apic_read(APIC_ID); | ||
1080 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); | 1081 | printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); |
1081 | v = apic_read(APIC_LVR); | 1082 | v = apic_read(APIC_LVR); |
1082 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); | 1083 | printk(KERN_INFO "... APIC VERSION: %08x\n", v); |
@@ -1659,6 +1660,7 @@ static inline void __init check_timer(void) | |||
1659 | struct irq_cfg *cfg = irq_cfg + 0; | 1660 | struct irq_cfg *cfg = irq_cfg + 0; |
1660 | int apic1, pin1, apic2, pin2; | 1661 | int apic1, pin1, apic2, pin2; |
1661 | unsigned long flags; | 1662 | unsigned long flags; |
1663 | int no_pin1 = 0; | ||
1662 | 1664 | ||
1663 | local_irq_save(flags); | 1665 | local_irq_save(flags); |
1664 | 1666 | ||
@@ -1669,16 +1671,11 @@ static inline void __init check_timer(void) | |||
1669 | assign_irq_vector(0, TARGET_CPUS); | 1671 | assign_irq_vector(0, TARGET_CPUS); |
1670 | 1672 | ||
1671 | /* | 1673 | /* |
1672 | * Subtle, code in do_timer_interrupt() expects an AEOI | 1674 | * As IRQ0 is to be enabled in the 8259A, the virtual |
1673 | * mode for the 8259A whenever interrupts are routed | 1675 | * wire has to be disabled in the local APIC. |
1674 | * through I/O APICs. Also IRQ0 has to be enabled in | ||
1675 | * the 8259A which implies the virtual wire has to be | ||
1676 | * disabled in the local APIC. | ||
1677 | */ | 1676 | */ |
1678 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 1677 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
1679 | init_8259A(1); | 1678 | init_8259A(1); |
1680 | if (timer_over_8254 > 0) | ||
1681 | enable_8259A_irq(0); | ||
1682 | 1679 | ||
1683 | pin1 = find_isa_irq_pin(0, mp_INT); | 1680 | pin1 = find_isa_irq_pin(0, mp_INT); |
1684 | apic1 = find_isa_irq_apic(0, mp_INT); | 1681 | apic1 = find_isa_irq_apic(0, mp_INT); |
@@ -1688,15 +1685,39 @@ static inline void __init check_timer(void) | |||
1688 | apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", | 1685 | apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", |
1689 | cfg->vector, apic1, pin1, apic2, pin2); | 1686 | cfg->vector, apic1, pin1, apic2, pin2); |
1690 | 1687 | ||
1688 | /* | ||
1689 | * Some BIOS writers are clueless and report the ExtINTA | ||
1690 | * I/O APIC input from the cascaded 8259A as the timer | ||
1691 | * interrupt input. So just in case, if only one pin | ||
1692 | * was found above, try it both directly and through the | ||
1693 | * 8259A. | ||
1694 | */ | ||
1695 | if (pin1 == -1) { | ||
1696 | pin1 = pin2; | ||
1697 | apic1 = apic2; | ||
1698 | no_pin1 = 1; | ||
1699 | } else if (pin2 == -1) { | ||
1700 | pin2 = pin1; | ||
1701 | apic2 = apic1; | ||
1702 | } | ||
1703 | |||
1704 | replace_pin_at_irq(0, 0, 0, apic1, pin1); | ||
1705 | apic1 = 0; | ||
1706 | pin1 = 0; | ||
1707 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | ||
1708 | |||
1691 | if (pin1 != -1) { | 1709 | if (pin1 != -1) { |
1692 | /* | 1710 | /* |
1693 | * Ok, does IRQ0 through the IOAPIC work? | 1711 | * Ok, does IRQ0 through the IOAPIC work? |
1694 | */ | 1712 | */ |
1713 | if (no_pin1) { | ||
1714 | add_pin_to_irq(0, apic1, pin1); | ||
1715 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | ||
1716 | } | ||
1695 | unmask_IO_APIC_irq(0); | 1717 | unmask_IO_APIC_irq(0); |
1696 | if (!no_timer_check && timer_irq_works()) { | 1718 | if (!no_timer_check && timer_irq_works()) { |
1697 | nmi_watchdog_default(); | 1719 | nmi_watchdog_default(); |
1698 | if (nmi_watchdog == NMI_IO_APIC) { | 1720 | if (nmi_watchdog == NMI_IO_APIC) { |
1699 | disable_8259A_irq(0); | ||
1700 | setup_nmi(); | 1721 | setup_nmi(); |
1701 | enable_8259A_irq(0); | 1722 | enable_8259A_irq(0); |
1702 | } | 1723 | } |
@@ -1705,42 +1726,48 @@ static inline void __init check_timer(void) | |||
1705 | goto out; | 1726 | goto out; |
1706 | } | 1727 | } |
1707 | clear_IO_APIC_pin(apic1, pin1); | 1728 | clear_IO_APIC_pin(apic1, pin1); |
1708 | apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " | 1729 | if (!no_pin1) |
1709 | "connected to IO-APIC\n"); | 1730 | apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " |
1710 | } | 1731 | "8254 timer not connected to IO-APIC\n"); |
1711 | 1732 | ||
1712 | apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " | 1733 | apic_printk(APIC_VERBOSE,KERN_INFO |
1713 | "through the 8259A ... "); | 1734 | "...trying to set up timer (IRQ0) " |
1714 | if (pin2 != -1) { | 1735 | "through the 8259A ... "); |
1715 | apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", | 1736 | apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", |
1716 | apic2, pin2); | 1737 | apic2, pin2); |
1717 | /* | 1738 | /* |
1718 | * legacy devices should be connected to IO APIC #0 | 1739 | * legacy devices should be connected to IO APIC #0 |
1719 | */ | 1740 | */ |
1720 | setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector); | 1741 | replace_pin_at_irq(0, apic1, pin1, apic2, pin2); |
1742 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | ||
1743 | unmask_IO_APIC_irq(0); | ||
1744 | enable_8259A_irq(0); | ||
1721 | if (timer_irq_works()) { | 1745 | if (timer_irq_works()) { |
1722 | apic_printk(APIC_VERBOSE," works.\n"); | 1746 | apic_printk(APIC_VERBOSE," works.\n"); |
1747 | timer_through_8259 = 1; | ||
1723 | nmi_watchdog_default(); | 1748 | nmi_watchdog_default(); |
1724 | if (nmi_watchdog == NMI_IO_APIC) { | 1749 | if (nmi_watchdog == NMI_IO_APIC) { |
1750 | disable_8259A_irq(0); | ||
1725 | setup_nmi(); | 1751 | setup_nmi(); |
1752 | enable_8259A_irq(0); | ||
1726 | } | 1753 | } |
1727 | goto out; | 1754 | goto out; |
1728 | } | 1755 | } |
1729 | /* | 1756 | /* |
1730 | * Cleanup, just in case ... | 1757 | * Cleanup, just in case ... |
1731 | */ | 1758 | */ |
1759 | disable_8259A_irq(0); | ||
1732 | clear_IO_APIC_pin(apic2, pin2); | 1760 | clear_IO_APIC_pin(apic2, pin2); |
1761 | apic_printk(APIC_VERBOSE," failed.\n"); | ||
1733 | } | 1762 | } |
1734 | apic_printk(APIC_VERBOSE," failed.\n"); | ||
1735 | 1763 | ||
1736 | if (nmi_watchdog == NMI_IO_APIC) { | 1764 | if (nmi_watchdog == NMI_IO_APIC) { |
1737 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | 1765 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); |
1738 | nmi_watchdog = 0; | 1766 | nmi_watchdog = NMI_NONE; |
1739 | } | 1767 | } |
1740 | 1768 | ||
1741 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | 1769 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); |
1742 | 1770 | ||
1743 | disable_8259A_irq(0); | ||
1744 | irq_desc[0].chip = &lapic_irq_type; | 1771 | irq_desc[0].chip = &lapic_irq_type; |
1745 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 1772 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
1746 | enable_8259A_irq(0); | 1773 | enable_8259A_irq(0); |
@@ -1749,6 +1776,7 @@ static inline void __init check_timer(void) | |||
1749 | apic_printk(APIC_VERBOSE," works.\n"); | 1776 | apic_printk(APIC_VERBOSE," works.\n"); |
1750 | goto out; | 1777 | goto out; |
1751 | } | 1778 | } |
1779 | disable_8259A_irq(0); | ||
1752 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | 1780 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
1753 | apic_printk(APIC_VERBOSE," failed.\n"); | 1781 | apic_printk(APIC_VERBOSE," failed.\n"); |
1754 | 1782 | ||
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index c0df7b89ca23..9d98cda39ad9 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/kernel_stat.h> | 8 | #include <linux/kernel_stat.h> |
9 | #include <linux/mc146818rtc.h> | 9 | #include <linux/mc146818rtc.h> |
10 | #include <linux/cache.h> | 10 | #include <linux/cache.h> |
11 | #include <linux/interrupt.h> | ||
12 | #include <linux/cpu.h> | 11 | #include <linux/cpu.h> |
13 | #include <linux/module.h> | 12 | #include <linux/module.h> |
14 | 13 | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 147352df28b9..468acd04aa2e 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -313,16 +313,20 @@ skip: | |||
313 | per_cpu(irq_stat,j).irq_tlb_count); | 313 | per_cpu(irq_stat,j).irq_tlb_count); |
314 | seq_printf(p, " TLB shootdowns\n"); | 314 | seq_printf(p, " TLB shootdowns\n"); |
315 | #endif | 315 | #endif |
316 | #ifdef CONFIG_X86_MCE | ||
316 | seq_printf(p, "TRM: "); | 317 | seq_printf(p, "TRM: "); |
317 | for_each_online_cpu(j) | 318 | for_each_online_cpu(j) |
318 | seq_printf(p, "%10u ", | 319 | seq_printf(p, "%10u ", |
319 | per_cpu(irq_stat,j).irq_thermal_count); | 320 | per_cpu(irq_stat,j).irq_thermal_count); |
320 | seq_printf(p, " Thermal event interrupts\n"); | 321 | seq_printf(p, " Thermal event interrupts\n"); |
322 | #endif | ||
323 | #ifdef CONFIG_X86_LOCAL_APIC | ||
321 | seq_printf(p, "SPU: "); | 324 | seq_printf(p, "SPU: "); |
322 | for_each_online_cpu(j) | 325 | for_each_online_cpu(j) |
323 | seq_printf(p, "%10u ", | 326 | seq_printf(p, "%10u ", |
324 | per_cpu(irq_stat,j).irq_spurious_count); | 327 | per_cpu(irq_stat,j).irq_spurious_count); |
325 | seq_printf(p, " Spurious interrupts\n"); | 328 | seq_printf(p, " Spurious interrupts\n"); |
329 | #endif | ||
326 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); | 330 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); |
327 | #if defined(CONFIG_X86_IO_APIC) | 331 | #if defined(CONFIG_X86_IO_APIC) |
328 | seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); | 332 | seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); |
@@ -331,6 +335,40 @@ skip: | |||
331 | return 0; | 335 | return 0; |
332 | } | 336 | } |
333 | 337 | ||
338 | /* | ||
339 | * /proc/stat helpers | ||
340 | */ | ||
341 | u64 arch_irq_stat_cpu(unsigned int cpu) | ||
342 | { | ||
343 | u64 sum = nmi_count(cpu); | ||
344 | |||
345 | #ifdef CONFIG_X86_LOCAL_APIC | ||
346 | sum += per_cpu(irq_stat, cpu).apic_timer_irqs; | ||
347 | #endif | ||
348 | #ifdef CONFIG_SMP | ||
349 | sum += per_cpu(irq_stat, cpu).irq_resched_count; | ||
350 | sum += per_cpu(irq_stat, cpu).irq_call_count; | ||
351 | sum += per_cpu(irq_stat, cpu).irq_tlb_count; | ||
352 | #endif | ||
353 | #ifdef CONFIG_X86_MCE | ||
354 | sum += per_cpu(irq_stat, cpu).irq_thermal_count; | ||
355 | #endif | ||
356 | #ifdef CONFIG_X86_LOCAL_APIC | ||
357 | sum += per_cpu(irq_stat, cpu).irq_spurious_count; | ||
358 | #endif | ||
359 | return sum; | ||
360 | } | ||
361 | |||
362 | u64 arch_irq_stat(void) | ||
363 | { | ||
364 | u64 sum = atomic_read(&irq_err_count); | ||
365 | |||
366 | #ifdef CONFIG_X86_IO_APIC | ||
367 | sum += atomic_read(&irq_mis_count); | ||
368 | #endif | ||
369 | return sum; | ||
370 | } | ||
371 | |||
334 | #ifdef CONFIG_HOTPLUG_CPU | 372 | #ifdef CONFIG_HOTPLUG_CPU |
335 | #include <mach_apic.h> | 373 | #include <mach_apic.h> |
336 | 374 | ||
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 3aac15466a91..1f78b238d8d2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -135,6 +135,7 @@ skip: | |||
135 | seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); | 135 | seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); |
136 | seq_printf(p, " TLB shootdowns\n"); | 136 | seq_printf(p, " TLB shootdowns\n"); |
137 | #endif | 137 | #endif |
138 | #ifdef CONFIG_X86_MCE | ||
138 | seq_printf(p, "TRM: "); | 139 | seq_printf(p, "TRM: "); |
139 | for_each_online_cpu(j) | 140 | for_each_online_cpu(j) |
140 | seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); | 141 | seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); |
@@ -143,6 +144,7 @@ skip: | |||
143 | for_each_online_cpu(j) | 144 | for_each_online_cpu(j) |
144 | seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); | 145 | seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); |
145 | seq_printf(p, " Threshold APIC interrupts\n"); | 146 | seq_printf(p, " Threshold APIC interrupts\n"); |
147 | #endif | ||
146 | seq_printf(p, "SPU: "); | 148 | seq_printf(p, "SPU: "); |
147 | for_each_online_cpu(j) | 149 | for_each_online_cpu(j) |
148 | seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); | 150 | seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); |
@@ -153,6 +155,32 @@ skip: | |||
153 | } | 155 | } |
154 | 156 | ||
155 | /* | 157 | /* |
158 | * /proc/stat helpers | ||
159 | */ | ||
160 | u64 arch_irq_stat_cpu(unsigned int cpu) | ||
161 | { | ||
162 | u64 sum = cpu_pda(cpu)->__nmi_count; | ||
163 | |||
164 | sum += cpu_pda(cpu)->apic_timer_irqs; | ||
165 | #ifdef CONFIG_SMP | ||
166 | sum += cpu_pda(cpu)->irq_resched_count; | ||
167 | sum += cpu_pda(cpu)->irq_call_count; | ||
168 | sum += cpu_pda(cpu)->irq_tlb_count; | ||
169 | #endif | ||
170 | #ifdef CONFIG_X86_MCE | ||
171 | sum += cpu_pda(cpu)->irq_thermal_count; | ||
172 | sum += cpu_pda(cpu)->irq_threshold_count; | ||
173 | #endif | ||
174 | sum += cpu_pda(cpu)->irq_spurious_count; | ||
175 | return sum; | ||
176 | } | ||
177 | |||
178 | u64 arch_irq_stat(void) | ||
179 | { | ||
180 | return atomic_read(&irq_err_count); | ||
181 | } | ||
182 | |||
183 | /* | ||
156 | * do_IRQ handles all normal device IRQ's (the special | 184 | * do_IRQ handles all normal device IRQ's (the special |
157 | * SMP cross-CPU interrupts have their own specific | 185 | * SMP cross-CPU interrupts have their own specific |
158 | * handlers). | 186 | * handlers). |
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c new file mode 100644 index 000000000000..d66914287ee1 --- /dev/null +++ b/arch/x86/kernel/irqinit_32.c | |||
@@ -0,0 +1,114 @@ | |||
1 | #include <linux/errno.h> | ||
2 | #include <linux/signal.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/ioport.h> | ||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/slab.h> | ||
7 | #include <linux/random.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/kernel_stat.h> | ||
10 | #include <linux/sysdev.h> | ||
11 | #include <linux/bitops.h> | ||
12 | |||
13 | #include <asm/atomic.h> | ||
14 | #include <asm/system.h> | ||
15 | #include <asm/io.h> | ||
16 | #include <asm/timer.h> | ||
17 | #include <asm/pgtable.h> | ||
18 | #include <asm/delay.h> | ||
19 | #include <asm/desc.h> | ||
20 | #include <asm/apic.h> | ||
21 | #include <asm/arch_hooks.h> | ||
22 | #include <asm/i8259.h> | ||
23 | |||
24 | |||
25 | |||
26 | /* | ||
27 | * Note that on a 486, we don't want to do a SIGFPE on an irq13 | ||
28 | * as the irq is unreliable, and exception 16 works correctly | ||
29 | * (ie as explained in the intel literature). On a 386, you | ||
30 | * can't use exception 16 due to bad IBM design, so we have to | ||
31 | * rely on the less exact irq13. | ||
32 | * | ||
33 | * Careful.. Not only is IRQ13 unreliable, but it is also | ||
34 | * leads to races. IBM designers who came up with it should | ||
35 | * be shot. | ||
36 | */ | ||
37 | |||
38 | |||
39 | static irqreturn_t math_error_irq(int cpl, void *dev_id) | ||
40 | { | ||
41 | extern void math_error(void __user *); | ||
42 | outb(0,0xF0); | ||
43 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | ||
44 | return IRQ_NONE; | ||
45 | math_error((void __user *)get_irq_regs()->ip); | ||
46 | return IRQ_HANDLED; | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * New motherboards sometimes make IRQ 13 be a PCI interrupt, | ||
51 | * so allow interrupt sharing. | ||
52 | */ | ||
53 | static struct irqaction fpu_irq = { | ||
54 | .handler = math_error_irq, | ||
55 | .mask = CPU_MASK_NONE, | ||
56 | .name = "fpu", | ||
57 | }; | ||
58 | |||
59 | void __init init_ISA_irqs (void) | ||
60 | { | ||
61 | int i; | ||
62 | |||
63 | #ifdef CONFIG_X86_LOCAL_APIC | ||
64 | init_bsp_APIC(); | ||
65 | #endif | ||
66 | init_8259A(0); | ||
67 | |||
68 | /* | ||
69 | * 16 old-style INTA-cycle interrupts: | ||
70 | */ | ||
71 | for (i = 0; i < 16; i++) { | ||
72 | set_irq_chip_and_handler_name(i, &i8259A_chip, | ||
73 | handle_level_irq, "XT"); | ||
74 | } | ||
75 | } | ||
76 | |||
77 | /* Overridden in paravirt.c */ | ||
78 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
79 | |||
80 | void __init native_init_IRQ(void) | ||
81 | { | ||
82 | int i; | ||
83 | |||
84 | /* all the set up before the call gates are initialised */ | ||
85 | pre_intr_init_hook(); | ||
86 | |||
87 | /* | ||
88 | * Cover the whole vector space, no vector can escape | ||
89 | * us. (some of these will be overridden and become | ||
90 | * 'special' SMP interrupts) | ||
91 | */ | ||
92 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
93 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
94 | if (i >= NR_IRQS) | ||
95 | break; | ||
96 | /* SYSCALL_VECTOR was reserved in trap_init. */ | ||
97 | if (!test_bit(vector, used_vectors)) | ||
98 | set_intr_gate(vector, interrupt[i]); | ||
99 | } | ||
100 | |||
101 | /* setup after call gates are initialised (usually add in | ||
102 | * the architecture specific gates) | ||
103 | */ | ||
104 | intr_init_hook(); | ||
105 | |||
106 | /* | ||
107 | * External FPU? Set up irq13 if so, for | ||
108 | * original braindamaged IBM FERR coupling. | ||
109 | */ | ||
110 | if (boot_cpu_data.hard_math && !cpu_has_fpu) | ||
111 | setup_irq(FPU_IRQ, &fpu_irq); | ||
112 | |||
113 | irq_ctx_init(smp_processor_id()); | ||
114 | } | ||
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c new file mode 100644 index 000000000000..64bc0f14285f --- /dev/null +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -0,0 +1,203 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/signal.h> | ||
4 | #include <linux/sched.h> | ||
5 | #include <linux/ioport.h> | ||
6 | #include <linux/interrupt.h> | ||
7 | #include <linux/timex.h> | ||
8 | #include <linux/slab.h> | ||
9 | #include <linux/random.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/kernel_stat.h> | ||
12 | #include <linux/sysdev.h> | ||
13 | #include <linux/bitops.h> | ||
14 | |||
15 | #include <asm/acpi.h> | ||
16 | #include <asm/atomic.h> | ||
17 | #include <asm/system.h> | ||
18 | #include <asm/io.h> | ||
19 | #include <asm/hw_irq.h> | ||
20 | #include <asm/pgtable.h> | ||
21 | #include <asm/delay.h> | ||
22 | #include <asm/desc.h> | ||
23 | #include <asm/apic.h> | ||
24 | #include <asm/i8259.h> | ||
25 | |||
26 | /* | ||
27 | * Common place to define all x86 IRQ vectors | ||
28 | * | ||
29 | * This builds up the IRQ handler stubs using some ugly macros in irq.h | ||
30 | * | ||
31 | * These macros create the low-level assembly IRQ routines that save | ||
32 | * register context and call do_IRQ(). do_IRQ() then does all the | ||
33 | * operations that are needed to keep the AT (or SMP IOAPIC) | ||
34 | * interrupt-controller happy. | ||
35 | */ | ||
36 | |||
37 | #define BI(x,y) \ | ||
38 | BUILD_IRQ(x##y) | ||
39 | |||
40 | #define BUILD_16_IRQS(x) \ | ||
41 | BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ | ||
42 | BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ | ||
43 | BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ | ||
44 | BI(x,c) BI(x,d) BI(x,e) BI(x,f) | ||
45 | |||
46 | /* | ||
47 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: | ||
48 | * (these are usually mapped to vectors 0x30-0x3f) | ||
49 | */ | ||
50 | |||
51 | /* | ||
52 | * The IO-APIC gives us many more interrupt sources. Most of these | ||
53 | * are unused but an SMP system is supposed to have enough memory ... | ||
54 | * sometimes (mostly wrt. hw bugs) we get corrupted vectors all | ||
55 | * across the spectrum, so we really want to be prepared to get all | ||
56 | * of these. Plus, more powerful systems might have more than 64 | ||
57 | * IO-APIC registers. | ||
58 | * | ||
59 | * (these are usually mapped into the 0x30-0xff vector range) | ||
60 | */ | ||
61 | BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) | ||
62 | BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) | ||
63 | BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) | ||
64 | BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) | ||
65 | |||
66 | #undef BUILD_16_IRQS | ||
67 | #undef BI | ||
68 | |||
69 | |||
70 | #define IRQ(x,y) \ | ||
71 | IRQ##x##y##_interrupt | ||
72 | |||
73 | #define IRQLIST_16(x) \ | ||
74 | IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ | ||
75 | IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ | ||
76 | IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ | ||
77 | IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) | ||
78 | |||
79 | /* for the irq vectors */ | ||
80 | static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { | ||
81 | IRQLIST_16(0x2), IRQLIST_16(0x3), | ||
82 | IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), | ||
83 | IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), | ||
84 | IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) | ||
85 | }; | ||
86 | |||
87 | #undef IRQ | ||
88 | #undef IRQLIST_16 | ||
89 | |||
90 | |||
91 | |||
92 | |||
93 | /* | ||
94 | * IRQ2 is cascade interrupt to second interrupt controller | ||
95 | */ | ||
96 | |||
97 | static struct irqaction irq2 = { | ||
98 | .handler = no_action, | ||
99 | .mask = CPU_MASK_NONE, | ||
100 | .name = "cascade", | ||
101 | }; | ||
102 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | ||
103 | [0 ... IRQ0_VECTOR - 1] = -1, | ||
104 | [IRQ0_VECTOR] = 0, | ||
105 | [IRQ1_VECTOR] = 1, | ||
106 | [IRQ2_VECTOR] = 2, | ||
107 | [IRQ3_VECTOR] = 3, | ||
108 | [IRQ4_VECTOR] = 4, | ||
109 | [IRQ5_VECTOR] = 5, | ||
110 | [IRQ6_VECTOR] = 6, | ||
111 | [IRQ7_VECTOR] = 7, | ||
112 | [IRQ8_VECTOR] = 8, | ||
113 | [IRQ9_VECTOR] = 9, | ||
114 | [IRQ10_VECTOR] = 10, | ||
115 | [IRQ11_VECTOR] = 11, | ||
116 | [IRQ12_VECTOR] = 12, | ||
117 | [IRQ13_VECTOR] = 13, | ||
118 | [IRQ14_VECTOR] = 14, | ||
119 | [IRQ15_VECTOR] = 15, | ||
120 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | ||
121 | }; | ||
122 | |||
123 | static void __init init_ISA_irqs (void) | ||
124 | { | ||
125 | int i; | ||
126 | |||
127 | init_bsp_APIC(); | ||
128 | init_8259A(0); | ||
129 | |||
130 | for (i = 0; i < NR_IRQS; i++) { | ||
131 | irq_desc[i].status = IRQ_DISABLED; | ||
132 | irq_desc[i].action = NULL; | ||
133 | irq_desc[i].depth = 1; | ||
134 | |||
135 | if (i < 16) { | ||
136 | /* | ||
137 | * 16 old-style INTA-cycle interrupts: | ||
138 | */ | ||
139 | set_irq_chip_and_handler_name(i, &i8259A_chip, | ||
140 | handle_level_irq, "XT"); | ||
141 | } else { | ||
142 | /* | ||
143 | * 'high' PCI IRQs filled in on demand | ||
144 | */ | ||
145 | irq_desc[i].chip = &no_irq_chip; | ||
146 | } | ||
147 | } | ||
148 | } | ||
149 | |||
150 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
151 | |||
152 | void __init native_init_IRQ(void) | ||
153 | { | ||
154 | int i; | ||
155 | |||
156 | init_ISA_irqs(); | ||
157 | /* | ||
158 | * Cover the whole vector space, no vector can escape | ||
159 | * us. (some of these will be overridden and become | ||
160 | * 'special' SMP interrupts) | ||
161 | */ | ||
162 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
163 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
164 | if (vector != IA32_SYSCALL_VECTOR) | ||
165 | set_intr_gate(vector, interrupt[i]); | ||
166 | } | ||
167 | |||
168 | #ifdef CONFIG_SMP | ||
169 | /* | ||
170 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | ||
171 | * IPI, driven by wakeup. | ||
172 | */ | ||
173 | set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | ||
174 | |||
175 | /* IPIs for invalidation */ | ||
176 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); | ||
177 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); | ||
178 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); | ||
179 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); | ||
180 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); | ||
181 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); | ||
182 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); | ||
183 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); | ||
184 | |||
185 | /* IPI for generic function call */ | ||
186 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | ||
187 | |||
188 | /* Low priority IPI to cleanup after moving an irq */ | ||
189 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | ||
190 | #endif | ||
191 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | ||
192 | set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | ||
193 | |||
194 | /* self generated IPI for local APIC timer */ | ||
195 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | ||
196 | |||
197 | /* IPI vectors for APIC spurious and error interrupts */ | ||
198 | set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | ||
199 | set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | ||
200 | |||
201 | if (!acpi_ioapic) | ||
202 | setup_irq(2, &irq2); | ||
203 | } | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 4bc1be5d5472..87edf1ceb1df 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include <linux/clocksource.h> | 19 | #include <linux/clocksource.h> |
20 | #include <linux/kvm_para.h> | 20 | #include <linux/kvm_para.h> |
21 | #include <asm/pvclock.h> | ||
21 | #include <asm/arch_hooks.h> | 22 | #include <asm/arch_hooks.h> |
22 | #include <asm/msr.h> | 23 | #include <asm/msr.h> |
23 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
@@ -36,83 +37,47 @@ static int parse_no_kvmclock(char *arg) | |||
36 | early_param("no-kvmclock", parse_no_kvmclock); | 37 | early_param("no-kvmclock", parse_no_kvmclock); |
37 | 38 | ||
38 | /* The hypervisor will put information about time periodically here */ | 39 | /* The hypervisor will put information about time periodically here */ |
39 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); | 40 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); |
40 | #define get_clock(cpu, field) per_cpu(hv_clock, cpu).field | 41 | static struct pvclock_wall_clock wall_clock; |
41 | 42 | ||
42 | static inline u64 kvm_get_delta(u64 last_tsc) | ||
43 | { | ||
44 | int cpu = smp_processor_id(); | ||
45 | u64 delta = native_read_tsc() - last_tsc; | ||
46 | return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; | ||
47 | } | ||
48 | |||
49 | static struct kvm_wall_clock wall_clock; | ||
50 | static cycle_t kvm_clock_read(void); | ||
51 | /* | 43 | /* |
52 | * The wallclock is the time of day when we booted. Since then, some time may | 44 | * The wallclock is the time of day when we booted. Since then, some time may |
53 | * have elapsed since the hypervisor wrote the data. So we try to account for | 45 | * have elapsed since the hypervisor wrote the data. So we try to account for |
54 | * that with system time | 46 | * that with system time |
55 | */ | 47 | */ |
56 | unsigned long kvm_get_wallclock(void) | 48 | static unsigned long kvm_get_wallclock(void) |
57 | { | 49 | { |
58 | u32 wc_sec, wc_nsec; | 50 | struct pvclock_vcpu_time_info *vcpu_time; |
59 | u64 delta; | ||
60 | struct timespec ts; | 51 | struct timespec ts; |
61 | int version, nsec; | ||
62 | int low, high; | 52 | int low, high; |
63 | 53 | ||
64 | low = (int)__pa(&wall_clock); | 54 | low = (int)__pa(&wall_clock); |
65 | high = ((u64)__pa(&wall_clock) >> 32); | 55 | high = ((u64)__pa(&wall_clock) >> 32); |
56 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | ||
66 | 57 | ||
67 | delta = kvm_clock_read(); | 58 | vcpu_time = &get_cpu_var(hv_clock); |
59 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); | ||
60 | put_cpu_var(hv_clock); | ||
68 | 61 | ||
69 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | 62 | return ts.tv_sec; |
70 | do { | ||
71 | version = wall_clock.wc_version; | ||
72 | rmb(); | ||
73 | wc_sec = wall_clock.wc_sec; | ||
74 | wc_nsec = wall_clock.wc_nsec; | ||
75 | rmb(); | ||
76 | } while ((wall_clock.wc_version != version) || (version & 1)); | ||
77 | |||
78 | delta = kvm_clock_read() - delta; | ||
79 | delta += wc_nsec; | ||
80 | nsec = do_div(delta, NSEC_PER_SEC); | ||
81 | set_normalized_timespec(&ts, wc_sec + delta, nsec); | ||
82 | /* | ||
83 | * Of all mechanisms of time adjustment I've tested, this one | ||
84 | * was the champion! | ||
85 | */ | ||
86 | return ts.tv_sec + 1; | ||
87 | } | 63 | } |
88 | 64 | ||
89 | int kvm_set_wallclock(unsigned long now) | 65 | static int kvm_set_wallclock(unsigned long now) |
90 | { | 66 | { |
91 | return 0; | 67 | return -1; |
92 | } | 68 | } |
93 | 69 | ||
94 | /* | ||
95 | * This is our read_clock function. The host puts an tsc timestamp each time | ||
96 | * it updates a new time. Without the tsc adjustment, we can have a situation | ||
97 | * in which a vcpu starts to run earlier (smaller system_time), but probes | ||
98 | * time later (compared to another vcpu), leading to backwards time | ||
99 | */ | ||
100 | static cycle_t kvm_clock_read(void) | 70 | static cycle_t kvm_clock_read(void) |
101 | { | 71 | { |
102 | u64 last_tsc, now; | 72 | struct pvclock_vcpu_time_info *src; |
103 | int cpu; | 73 | cycle_t ret; |
104 | 74 | ||
105 | preempt_disable(); | 75 | src = &get_cpu_var(hv_clock); |
106 | cpu = smp_processor_id(); | 76 | ret = pvclock_clocksource_read(src); |
107 | 77 | put_cpu_var(hv_clock); | |
108 | last_tsc = get_clock(cpu, tsc_timestamp); | 78 | return ret; |
109 | now = get_clock(cpu, system_time); | ||
110 | |||
111 | now += kvm_get_delta(last_tsc); | ||
112 | preempt_enable(); | ||
113 | |||
114 | return now; | ||
115 | } | 79 | } |
80 | |||
116 | static struct clocksource kvm_clock = { | 81 | static struct clocksource kvm_clock = { |
117 | .name = "kvm-clock", | 82 | .name = "kvm-clock", |
118 | .read = kvm_clock_read, | 83 | .read = kvm_clock_read, |
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = { | |||
123 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 88 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
124 | }; | 89 | }; |
125 | 90 | ||
126 | static int kvm_register_clock(void) | 91 | static int kvm_register_clock(char *txt) |
127 | { | 92 | { |
128 | int cpu = smp_processor_id(); | 93 | int cpu = smp_processor_id(); |
129 | int low, high; | 94 | int low, high; |
130 | low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; | 95 | low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; |
131 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | 96 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); |
132 | 97 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | |
98 | cpu, high, low, txt); | ||
133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 99 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); |
134 | } | 100 | } |
135 | 101 | ||
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void) | |||
140 | * Now that the first cpu already had this clocksource initialized, | 106 | * Now that the first cpu already had this clocksource initialized, |
141 | * we shouldn't fail. | 107 | * we shouldn't fail. |
142 | */ | 108 | */ |
143 | WARN_ON(kvm_register_clock()); | 109 | WARN_ON(kvm_register_clock("secondary cpu clock")); |
144 | /* ok, done with our trickery, call native */ | 110 | /* ok, done with our trickery, call native */ |
145 | setup_secondary_APIC_clock(); | 111 | setup_secondary_APIC_clock(); |
146 | } | 112 | } |
147 | #endif | 113 | #endif |
148 | 114 | ||
115 | #ifdef CONFIG_SMP | ||
116 | void __init kvm_smp_prepare_boot_cpu(void) | ||
117 | { | ||
118 | WARN_ON(kvm_register_clock("primary cpu clock")); | ||
119 | native_smp_prepare_boot_cpu(); | ||
120 | } | ||
121 | #endif | ||
122 | |||
149 | /* | 123 | /* |
150 | * After the clock is registered, the host will keep writing to the | 124 | * After the clock is registered, the host will keep writing to the |
151 | * registered memory location. If the guest happens to shutdown, this memory | 125 | * registered memory location. If the guest happens to shutdown, this memory |
@@ -174,7 +148,7 @@ void __init kvmclock_init(void) | |||
174 | return; | 148 | return; |
175 | 149 | ||
176 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | 150 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { |
177 | if (kvm_register_clock()) | 151 | if (kvm_register_clock("boot clock")) |
178 | return; | 152 | return; |
179 | pv_time_ops.get_wallclock = kvm_get_wallclock; | 153 | pv_time_ops.get_wallclock = kvm_get_wallclock; |
180 | pv_time_ops.set_wallclock = kvm_set_wallclock; | 154 | pv_time_ops.set_wallclock = kvm_set_wallclock; |
@@ -182,6 +156,9 @@ void __init kvmclock_init(void) | |||
182 | #ifdef CONFIG_X86_LOCAL_APIC | 156 | #ifdef CONFIG_X86_LOCAL_APIC |
183 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | 157 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; |
184 | #endif | 158 | #endif |
159 | #ifdef CONFIG_SMP | ||
160 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | ||
161 | #endif | ||
185 | machine_ops.shutdown = kvm_shutdown; | 162 | machine_ops.shutdown = kvm_shutdown; |
186 | #ifdef CONFIG_KEXEC | 163 | #ifdef CONFIG_KEXEC |
187 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 164 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 0224c3637c73..21f2bae98c15 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -20,9 +20,9 @@ | |||
20 | #include <asm/mmu_context.h> | 20 | #include <asm/mmu_context.h> |
21 | 21 | ||
22 | #ifdef CONFIG_SMP | 22 | #ifdef CONFIG_SMP |
23 | static void flush_ldt(void *null) | 23 | static void flush_ldt(void *current_mm) |
24 | { | 24 | { |
25 | if (current->active_mm) | 25 | if (current->active_mm == current_mm) |
26 | load_LDT(¤t->active_mm->context); | 26 | load_LDT(¤t->active_mm->context); |
27 | } | 27 | } |
28 | #endif | 28 | #endif |
@@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
68 | load_LDT(pc); | 68 | load_LDT(pc); |
69 | mask = cpumask_of_cpu(smp_processor_id()); | 69 | mask = cpumask_of_cpu(smp_processor_id()); |
70 | if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | 70 | if (!cpus_equal(current->mm->cpu_vm_mask, mask)) |
71 | smp_call_function(flush_ldt, NULL, 1, 1); | 71 | smp_call_function(flush_ldt, current->mm, 1, 1); |
72 | preempt_enable(); | 72 | preempt_enable(); |
73 | #else | 73 | #else |
74 | load_LDT(pc); | 74 | load_LDT(pc); |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index d0b234c9fc31..f4960171bc66 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -39,7 +39,7 @@ static void set_idt(void *newidt, __u16 limit) | |||
39 | curidt.address = (unsigned long)newidt; | 39 | curidt.address = (unsigned long)newidt; |
40 | 40 | ||
41 | load_idt(&curidt); | 41 | load_idt(&curidt); |
42 | }; | 42 | } |
43 | 43 | ||
44 | 44 | ||
45 | static void set_gdt(void *newgdt, __u16 limit) | 45 | static void set_gdt(void *newgdt, __u16 limit) |
@@ -51,7 +51,7 @@ static void set_gdt(void *newgdt, __u16 limit) | |||
51 | curgdt.address = (unsigned long)newgdt; | 51 | curgdt.address = (unsigned long)newgdt; |
52 | 52 | ||
53 | load_gdt(&curgdt); | 53 | load_gdt(&curgdt); |
54 | }; | 54 | } |
55 | 55 | ||
56 | static void load_segments(void) | 56 | static void load_segments(void) |
57 | { | 57 | { |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 3cad17fe026b..07c0f828f488 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
@@ -155,6 +155,7 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) | |||
155 | wrmsr(msr, value, dummy); | 155 | wrmsr(msr, value, dummy); |
156 | return 0; | 156 | return 0; |
157 | } | 157 | } |
158 | EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); | ||
158 | 159 | ||
159 | int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) | 160 | int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) |
160 | { | 161 | { |
@@ -222,6 +223,7 @@ int geode_mfgpt_alloc_timer(int timer, int domain) | |||
222 | /* No timers available - too bad */ | 223 | /* No timers available - too bad */ |
223 | return -1; | 224 | return -1; |
224 | } | 225 | } |
226 | EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); | ||
225 | 227 | ||
226 | 228 | ||
227 | #ifdef CONFIG_GEODE_MFGPT_TIMER | 229 | #ifdef CONFIG_GEODE_MFGPT_TIMER |
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 69729e38b78a..9758fea87c5b 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c | |||
@@ -5,13 +5,14 @@ | |||
5 | * 2006 Shaohua Li <shaohua.li@intel.com> | 5 | * 2006 Shaohua Li <shaohua.li@intel.com> |
6 | * | 6 | * |
7 | * This driver allows to upgrade microcode on Intel processors | 7 | * This driver allows to upgrade microcode on Intel processors |
8 | * belonging to IA-32 family - PentiumPro, Pentium II, | 8 | * belonging to IA-32 family - PentiumPro, Pentium II, |
9 | * Pentium III, Xeon, Pentium 4, etc. | 9 | * Pentium III, Xeon, Pentium 4, etc. |
10 | * | 10 | * |
11 | * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, | 11 | * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture |
12 | * Order Number 245472 or free download from: | 12 | * Software Developer's Manual |
13 | * | 13 | * Order Number 253668 or free download from: |
14 | * http://developer.intel.com/design/pentium4/manuals/245472.htm | 14 | * |
15 | * http://developer.intel.com/design/pentium4/manuals/253668.htm | ||
15 | * | 16 | * |
16 | * For more information, go to http://www.urbanmyth.org/microcode | 17 | * For more information, go to http://www.urbanmyth.org/microcode |
17 | * | 18 | * |
@@ -58,12 +59,12 @@ | |||
58 | * nature of implementation. | 59 | * nature of implementation. |
59 | * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> | 60 | * 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com> |
60 | * Fix the panic when writing zero-length microcode chunk. | 61 | * Fix the panic when writing zero-length microcode chunk. |
61 | * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, | 62 | * 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>, |
62 | * Jun Nakajima <jun.nakajima@intel.com> | 63 | * Jun Nakajima <jun.nakajima@intel.com> |
63 | * Support for the microcode updates in the new format. | 64 | * Support for the microcode updates in the new format. |
64 | * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> | 65 | * 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com> |
65 | * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl | 66 | * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl |
66 | * because we no longer hold a copy of applied microcode | 67 | * because we no longer hold a copy of applied microcode |
67 | * in kernel memory. | 68 | * in kernel memory. |
68 | * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> | 69 | * 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com> |
69 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. |
@@ -320,11 +321,11 @@ static void apply_microcode(int cpu) | |||
320 | return; | 321 | return; |
321 | 322 | ||
322 | /* serialize access to the physical write to MSR 0x79 */ | 323 | /* serialize access to the physical write to MSR 0x79 */ |
323 | spin_lock_irqsave(µcode_update_lock, flags); | 324 | spin_lock_irqsave(µcode_update_lock, flags); |
324 | 325 | ||
325 | /* write microcode via MSR 0x79 */ | 326 | /* write microcode via MSR 0x79 */ |
326 | wrmsr(MSR_IA32_UCODE_WRITE, | 327 | wrmsr(MSR_IA32_UCODE_WRITE, |
327 | (unsigned long) uci->mc->bits, | 328 | (unsigned long) uci->mc->bits, |
328 | (unsigned long) uci->mc->bits >> 16 >> 16); | 329 | (unsigned long) uci->mc->bits >> 16 >> 16); |
329 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | 330 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); |
330 | 331 | ||
@@ -341,7 +342,7 @@ static void apply_microcode(int cpu) | |||
341 | return; | 342 | return; |
342 | } | 343 | } |
343 | printk(KERN_INFO "microcode: CPU%d updated from revision " | 344 | printk(KERN_INFO "microcode: CPU%d updated from revision " |
344 | "0x%x to 0x%x, date = %08x \n", | 345 | "0x%x to 0x%x, date = %08x \n", |
345 | cpu_num, uci->rev, val[1], uci->mc->hdr.date); | 346 | cpu_num, uci->rev, val[1], uci->mc->hdr.date); |
346 | uci->rev = val[1]; | 347 | uci->rev = val[1]; |
347 | } | 348 | } |
@@ -534,7 +535,7 @@ static int cpu_request_microcode(int cpu) | |||
534 | c->x86, c->x86_model, c->x86_mask); | 535 | c->x86, c->x86_model, c->x86_mask); |
535 | error = request_firmware(&firmware, name, µcode_pdev->dev); | 536 | error = request_firmware(&firmware, name, µcode_pdev->dev); |
536 | if (error) { | 537 | if (error) { |
537 | pr_debug("microcode: ucode data file %s load failed\n", name); | 538 | pr_debug("microcode: data file %s load failed\n", name); |
538 | return error; | 539 | return error; |
539 | } | 540 | } |
540 | buf = firmware->data; | 541 | buf = firmware->data; |
@@ -805,6 +806,9 @@ static int __init microcode_init (void) | |||
805 | { | 806 | { |
806 | int error; | 807 | int error; |
807 | 808 | ||
809 | printk(KERN_INFO | ||
810 | "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); | ||
811 | |||
808 | error = microcode_dev_init(); | 812 | error = microcode_dev_init(); |
809 | if (error) | 813 | if (error) |
810 | return error; | 814 | return error; |
@@ -825,9 +829,6 @@ static int __init microcode_init (void) | |||
825 | } | 829 | } |
826 | 830 | ||
827 | register_hotcpu_notifier(&mc_cpu_notifier); | 831 | register_hotcpu_notifier(&mc_cpu_notifier); |
828 | |||
829 | printk(KERN_INFO | ||
830 | "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); | ||
831 | return 0; | 832 | return 0; |
832 | } | 833 | } |
833 | 834 | ||
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index edc5fbfe85c0..fdfdc550b366 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/io.h> | 12 | #include <asm/io.h> |
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/acpi.h> | 14 | #include <asm/acpi.h> |
15 | #include <asm/mmconfig.h> | ||
15 | 16 | ||
16 | #include "../pci/pci.h" | 17 | #include "../pci/pci.h" |
17 | 18 | ||
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 11b14bbaa61e..6580dae46277 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <linux/kdebug.h> | 24 | #include <linux/kdebug.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | 26 | ||
27 | #include <asm/i8259.h> | ||
28 | #include <asm/io_apic.h> | ||
27 | #include <asm/smp.h> | 29 | #include <asm/smp.h> |
28 | #include <asm/nmi.h> | 30 | #include <asm/nmi.h> |
29 | #include <asm/timer.h> | 31 | #include <asm/timer.h> |
@@ -131,7 +133,9 @@ int __init check_nmi_watchdog(void) | |||
131 | kfree(prev_nmi_count); | 133 | kfree(prev_nmi_count); |
132 | return 0; | 134 | return 0; |
133 | error: | 135 | error: |
134 | timer_ack = !cpu_has_tsc; | 136 | if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) |
137 | disable_8259A_irq(0); | ||
138 | timer_ack = 0; | ||
135 | 139 | ||
136 | return -1; | 140 | return -1; |
137 | } | 141 | } |
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 5a29ded994fa..0060e44e8989 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include <linux/cpumask.h> | 21 | #include <linux/cpumask.h> |
22 | #include <linux/kdebug.h> | 22 | #include <linux/kdebug.h> |
23 | 23 | ||
24 | #include <asm/i8259.h> | ||
25 | #include <asm/io_apic.h> | ||
24 | #include <asm/smp.h> | 26 | #include <asm/smp.h> |
25 | #include <asm/nmi.h> | 27 | #include <asm/nmi.h> |
26 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
@@ -90,7 +92,7 @@ int __init check_nmi_watchdog(void) | |||
90 | 92 | ||
91 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 93 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
92 | if (!prev_nmi_count) | 94 | if (!prev_nmi_count) |
93 | return -1; | 95 | goto error; |
94 | 96 | ||
95 | printk(KERN_INFO "Testing NMI watchdog ... "); | 97 | printk(KERN_INFO "Testing NMI watchdog ... "); |
96 | 98 | ||
@@ -121,7 +123,7 @@ int __init check_nmi_watchdog(void) | |||
121 | if (!atomic_read(&nmi_active)) { | 123 | if (!atomic_read(&nmi_active)) { |
122 | kfree(prev_nmi_count); | 124 | kfree(prev_nmi_count); |
123 | atomic_set(&nmi_active, -1); | 125 | atomic_set(&nmi_active, -1); |
124 | return -1; | 126 | goto error; |
125 | } | 127 | } |
126 | printk("OK.\n"); | 128 | printk("OK.\n"); |
127 | 129 | ||
@@ -132,6 +134,11 @@ int __init check_nmi_watchdog(void) | |||
132 | 134 | ||
133 | kfree(prev_nmi_count); | 135 | kfree(prev_nmi_count); |
134 | return 0; | 136 | return 0; |
137 | error: | ||
138 | if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) | ||
139 | disable_8259A_irq(0); | ||
140 | |||
141 | return -1; | ||
135 | } | 142 | } |
136 | 143 | ||
137 | static int __init setup_nmi_watchdog(char *str) | 144 | static int __init setup_nmi_watchdog(char *str) |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 74f0c5ea2a03..f1ab0f727007 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -380,6 +380,9 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
380 | .pte_update = paravirt_nop, | 380 | .pte_update = paravirt_nop, |
381 | .pte_update_defer = paravirt_nop, | 381 | .pte_update_defer = paravirt_nop, |
382 | 382 | ||
383 | .ptep_modify_prot_start = __ptep_modify_prot_start, | ||
384 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | ||
385 | |||
383 | #ifdef CONFIG_HIGHPTE | 386 | #ifdef CONFIG_HIGHPTE |
384 | .kmap_atomic_pte = kmap_atomic, | 387 | .kmap_atomic_pte = kmap_atomic, |
385 | #endif | 388 | #endif |
@@ -403,6 +406,7 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
403 | #endif /* PAGETABLE_LEVELS >= 3 */ | 406 | #endif /* PAGETABLE_LEVELS >= 3 */ |
404 | 407 | ||
405 | .pte_val = native_pte_val, | 408 | .pte_val = native_pte_val, |
409 | .pte_flags = native_pte_val, | ||
406 | .pgd_val = native_pgd_val, | 410 | .pgd_val = native_pgd_val, |
407 | 411 | ||
408 | .make_pte = native_make_pte, | 412 | .make_pte = native_make_pte, |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0c37f16b6950..3c43109ba054 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -77,10 +77,14 @@ void __init dma32_reserve_bootmem(void) | |||
77 | if (end_pfn <= MAX_DMA32_PFN) | 77 | if (end_pfn <= MAX_DMA32_PFN) |
78 | return; | 78 | return; |
79 | 79 | ||
80 | /* | ||
81 | * check aperture_64.c allocate_aperture() for reason about | ||
82 | * using 512M as goal | ||
83 | */ | ||
80 | align = 64ULL<<20; | 84 | align = 64ULL<<20; |
81 | size = round_up(dma32_bootmem_size, align); | 85 | size = round_up(dma32_bootmem_size, align); |
82 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | 86 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, |
83 | __pa(MAX_DMA_ADDRESS)); | 87 | 512ULL<<20); |
84 | if (dma32_bootmem_ptr) | 88 | if (dma32_bootmem_ptr) |
85 | dma32_bootmem_size = size; | 89 | dma32_bootmem_size = size; |
86 | else | 90 | else |
@@ -88,7 +92,6 @@ void __init dma32_reserve_bootmem(void) | |||
88 | } | 92 | } |
89 | static void __init dma32_free_bootmem(void) | 93 | static void __init dma32_free_bootmem(void) |
90 | { | 94 | { |
91 | int node; | ||
92 | 95 | ||
93 | if (end_pfn <= MAX_DMA32_PFN) | 96 | if (end_pfn <= MAX_DMA32_PFN) |
94 | return; | 97 | return; |
@@ -96,9 +99,7 @@ static void __init dma32_free_bootmem(void) | |||
96 | if (!dma32_bootmem_ptr) | 99 | if (!dma32_bootmem_ptr) |
97 | return; | 100 | return; |
98 | 101 | ||
99 | for_each_online_node(node) | 102 | free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size); |
100 | free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), | ||
101 | dma32_bootmem_size); | ||
102 | 103 | ||
103 | dma32_bootmem_ptr = NULL; | 104 | dma32_bootmem_ptr = NULL; |
104 | dma32_bootmem_size = 0; | 105 | dma32_bootmem_size = 0; |
@@ -357,7 +358,7 @@ int dma_supported(struct device *dev, u64 mask) | |||
357 | EXPORT_SYMBOL(dma_supported); | 358 | EXPORT_SYMBOL(dma_supported); |
358 | 359 | ||
359 | /* Allocate DMA memory on node near device */ | 360 | /* Allocate DMA memory on node near device */ |
360 | noinline struct page * | 361 | static noinline struct page * |
361 | dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | 362 | dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) |
362 | { | 363 | { |
363 | int node; | 364 | int node; |
@@ -378,6 +379,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
378 | struct page *page; | 379 | struct page *page; |
379 | unsigned long dma_mask = 0; | 380 | unsigned long dma_mask = 0; |
380 | dma_addr_t bus; | 381 | dma_addr_t bus; |
382 | int noretry = 0; | ||
381 | 383 | ||
382 | /* ignore region specifiers */ | 384 | /* ignore region specifiers */ |
383 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | 385 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); |
@@ -385,30 +387,37 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
385 | if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) | 387 | if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) |
386 | return memory; | 388 | return memory; |
387 | 389 | ||
388 | if (!dev) | 390 | if (!dev) { |
389 | dev = &fallback_dev; | 391 | dev = &fallback_dev; |
392 | gfp |= GFP_DMA; | ||
393 | } | ||
390 | dma_mask = dev->coherent_dma_mask; | 394 | dma_mask = dev->coherent_dma_mask; |
391 | if (dma_mask == 0) | 395 | if (dma_mask == 0) |
392 | dma_mask = DMA_32BIT_MASK; | 396 | dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; |
393 | 397 | ||
394 | /* Device not DMA able */ | 398 | /* Device not DMA able */ |
395 | if (dev->dma_mask == NULL) | 399 | if (dev->dma_mask == NULL) |
396 | return NULL; | 400 | return NULL; |
397 | 401 | ||
398 | /* Don't invoke OOM killer */ | 402 | /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ |
399 | gfp |= __GFP_NORETRY; | 403 | if (gfp & __GFP_DMA) |
404 | noretry = 1; | ||
400 | 405 | ||
401 | #ifdef CONFIG_X86_64 | 406 | #ifdef CONFIG_X86_64 |
402 | /* Why <=? Even when the mask is smaller than 4GB it is often | 407 | /* Why <=? Even when the mask is smaller than 4GB it is often |
403 | larger than 16MB and in this case we have a chance of | 408 | larger than 16MB and in this case we have a chance of |
404 | finding fitting memory in the next higher zone first. If | 409 | finding fitting memory in the next higher zone first. If |
405 | not retry with true GFP_DMA. -AK */ | 410 | not retry with true GFP_DMA. -AK */ |
406 | if (dma_mask <= DMA_32BIT_MASK) | 411 | if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { |
407 | gfp |= GFP_DMA32; | 412 | gfp |= GFP_DMA32; |
413 | if (dma_mask < DMA_32BIT_MASK) | ||
414 | noretry = 1; | ||
415 | } | ||
408 | #endif | 416 | #endif |
409 | 417 | ||
410 | again: | 418 | again: |
411 | page = dma_alloc_pages(dev, gfp, get_order(size)); | 419 | page = dma_alloc_pages(dev, |
420 | noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); | ||
412 | if (page == NULL) | 421 | if (page == NULL) |
413 | return NULL; | 422 | return NULL; |
414 | 423 | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c07455d1695f..021f3c684a62 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/kdebug.h> | 26 | #include <linux/kdebug.h> |
27 | #include <linux/scatterlist.h> | 27 | #include <linux/scatterlist.h> |
28 | #include <linux/iommu-helper.h> | 28 | #include <linux/iommu-helper.h> |
29 | #include <linux/sysdev.h> | ||
29 | #include <asm/atomic.h> | 30 | #include <asm/atomic.h> |
30 | #include <asm/io.h> | 31 | #include <asm/io.h> |
31 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
@@ -103,7 +104,6 @@ static unsigned long alloc_iommu(struct device *dev, int size) | |||
103 | size, base_index, boundary_size, 0); | 104 | size, base_index, boundary_size, 0); |
104 | } | 105 | } |
105 | if (offset != -1) { | 106 | if (offset != -1) { |
106 | set_bit_string(iommu_gart_bitmap, offset, size); | ||
107 | next_bit = offset+size; | 107 | next_bit = offset+size; |
108 | if (next_bit >= iommu_pages) { | 108 | if (next_bit >= iommu_pages) { |
109 | next_bit = 0; | 109 | next_bit = 0; |
@@ -533,8 +533,8 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) | |||
533 | unsigned aper_size = 0, aper_base_32, aper_order; | 533 | unsigned aper_size = 0, aper_base_32, aper_order; |
534 | u64 aper_base; | 534 | u64 aper_base; |
535 | 535 | ||
536 | pci_read_config_dword(dev, 0x94, &aper_base_32); | 536 | pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32); |
537 | pci_read_config_dword(dev, 0x90, &aper_order); | 537 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order); |
538 | aper_order = (aper_order >> 1) & 7; | 538 | aper_order = (aper_order >> 1) & 7; |
539 | 539 | ||
540 | aper_base = aper_base_32 & 0x7fff; | 540 | aper_base = aper_base_32 & 0x7fff; |
@@ -548,6 +548,77 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) | |||
548 | return aper_base; | 548 | return aper_base; |
549 | } | 549 | } |
550 | 550 | ||
551 | static void enable_gart_translations(void) | ||
552 | { | ||
553 | int i; | ||
554 | |||
555 | for (i = 0; i < num_k8_northbridges; i++) { | ||
556 | struct pci_dev *dev = k8_northbridges[i]; | ||
557 | |||
558 | enable_gart_translation(dev, __pa(agp_gatt_table)); | ||
559 | } | ||
560 | } | ||
561 | |||
562 | /* | ||
563 | * If fix_up_north_bridges is set, the north bridges have to be fixed up on | ||
564 | * resume in the same way as they are handled in gart_iommu_hole_init(). | ||
565 | */ | ||
566 | static bool fix_up_north_bridges; | ||
567 | static u32 aperture_order; | ||
568 | static u32 aperture_alloc; | ||
569 | |||
570 | void set_up_gart_resume(u32 aper_order, u32 aper_alloc) | ||
571 | { | ||
572 | fix_up_north_bridges = true; | ||
573 | aperture_order = aper_order; | ||
574 | aperture_alloc = aper_alloc; | ||
575 | } | ||
576 | |||
577 | static int gart_resume(struct sys_device *dev) | ||
578 | { | ||
579 | printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n"); | ||
580 | |||
581 | if (fix_up_north_bridges) { | ||
582 | int i; | ||
583 | |||
584 | printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n"); | ||
585 | |||
586 | for (i = 0; i < num_k8_northbridges; i++) { | ||
587 | struct pci_dev *dev = k8_northbridges[i]; | ||
588 | |||
589 | /* | ||
590 | * Don't enable translations just yet. That is the next | ||
591 | * step. Restore the pre-suspend aperture settings. | ||
592 | */ | ||
593 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, | ||
594 | aperture_order << 1); | ||
595 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, | ||
596 | aperture_alloc >> 25); | ||
597 | } | ||
598 | } | ||
599 | |||
600 | enable_gart_translations(); | ||
601 | |||
602 | return 0; | ||
603 | } | ||
604 | |||
605 | static int gart_suspend(struct sys_device *dev, pm_message_t state) | ||
606 | { | ||
607 | return 0; | ||
608 | } | ||
609 | |||
610 | static struct sysdev_class gart_sysdev_class = { | ||
611 | .name = "gart", | ||
612 | .suspend = gart_suspend, | ||
613 | .resume = gart_resume, | ||
614 | |||
615 | }; | ||
616 | |||
617 | static struct sys_device device_gart = { | ||
618 | .id = 0, | ||
619 | .cls = &gart_sysdev_class, | ||
620 | }; | ||
621 | |||
551 | /* | 622 | /* |
552 | * Private Northbridge GATT initialization in case we cannot use the | 623 | * Private Northbridge GATT initialization in case we cannot use the |
553 | * AGP driver for some reason. | 624 | * AGP driver for some reason. |
@@ -558,7 +629,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
558 | unsigned aper_base, new_aper_base; | 629 | unsigned aper_base, new_aper_base; |
559 | struct pci_dev *dev; | 630 | struct pci_dev *dev; |
560 | void *gatt; | 631 | void *gatt; |
561 | int i; | 632 | int i, error; |
562 | 633 | ||
563 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 634 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); |
564 | aper_size = aper_base = info->aper_size = 0; | 635 | aper_size = aper_base = info->aper_size = 0; |
@@ -591,21 +662,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
591 | memset(gatt, 0, gatt_size); | 662 | memset(gatt, 0, gatt_size); |
592 | agp_gatt_table = gatt; | 663 | agp_gatt_table = gatt; |
593 | 664 | ||
594 | for (i = 0; i < num_k8_northbridges; i++) { | 665 | enable_gart_translations(); |
595 | u32 gatt_reg; | ||
596 | u32 ctl; | ||
597 | 666 | ||
598 | dev = k8_northbridges[i]; | 667 | error = sysdev_class_register(&gart_sysdev_class); |
599 | gatt_reg = __pa(gatt) >> 12; | 668 | if (!error) |
600 | gatt_reg <<= 4; | 669 | error = sysdev_register(&device_gart); |
601 | pci_write_config_dword(dev, 0x98, gatt_reg); | 670 | if (error) |
602 | pci_read_config_dword(dev, 0x90, &ctl); | 671 | panic("Could not register gart_sysdev -- would corrupt data on next suspend"); |
603 | 672 | ||
604 | ctl |= 1; | ||
605 | ctl &= ~((1<<4) | (1<<5)); | ||
606 | |||
607 | pci_write_config_dword(dev, 0x90, ctl); | ||
608 | } | ||
609 | flush_gart(); | 673 | flush_gart(); |
610 | 674 | ||
611 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", | 675 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", |
@@ -648,11 +712,11 @@ void gart_iommu_shutdown(void) | |||
648 | u32 ctl; | 712 | u32 ctl; |
649 | 713 | ||
650 | dev = k8_northbridges[i]; | 714 | dev = k8_northbridges[i]; |
651 | pci_read_config_dword(dev, 0x90, &ctl); | 715 | pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); |
652 | 716 | ||
653 | ctl &= ~1; | 717 | ctl &= ~GARTEN; |
654 | 718 | ||
655 | pci_write_config_dword(dev, 0x90, ctl); | 719 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); |
656 | } | 720 | } |
657 | } | 721 | } |
658 | 722 | ||
@@ -759,10 +823,10 @@ void __init gart_iommu_init(void) | |||
759 | wbinvd(); | 823 | wbinvd(); |
760 | 824 | ||
761 | /* | 825 | /* |
762 | * Try to workaround a bug (thanks to BenH) | 826 | * Try to workaround a bug (thanks to BenH): |
763 | * Set unmapped entries to a scratch page instead of 0. | 827 | * Set unmapped entries to a scratch page instead of 0. |
764 | * Any prefetches that hit unmapped entries won't get an bus abort | 828 | * Any prefetches that hit unmapped entries won't get an bus abort |
765 | * then. | 829 | * then. (P2P bridge may be prefetching on DMA reads). |
766 | */ | 830 | */ |
767 | scratch = get_zeroed_page(GFP_KERNEL); | 831 | scratch = get_zeroed_page(GFP_KERNEL); |
768 | if (!scratch) | 832 | if (!scratch) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 67e9b4a1e89d..4061d63aabe7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/pm.h> | 8 | #include <linux/pm.h> |
9 | #include <linux/clockchips.h> | ||
9 | 10 | ||
10 | struct kmem_cache *task_xstate_cachep; | 11 | struct kmem_cache *task_xstate_cachep; |
11 | 12 | ||
@@ -45,6 +46,76 @@ void arch_task_cache_init(void) | |||
45 | SLAB_PANIC, NULL); | 46 | SLAB_PANIC, NULL); |
46 | } | 47 | } |
47 | 48 | ||
49 | /* | ||
50 | * Idle related variables and functions | ||
51 | */ | ||
52 | unsigned long boot_option_idle_override = 0; | ||
53 | EXPORT_SYMBOL(boot_option_idle_override); | ||
54 | |||
55 | /* | ||
56 | * Powermanagement idle function, if any.. | ||
57 | */ | ||
58 | void (*pm_idle)(void); | ||
59 | EXPORT_SYMBOL(pm_idle); | ||
60 | |||
61 | #ifdef CONFIG_X86_32 | ||
62 | /* | ||
63 | * This halt magic was a workaround for ancient floppy DMA | ||
64 | * wreckage. It should be safe to remove. | ||
65 | */ | ||
66 | static int hlt_counter; | ||
67 | void disable_hlt(void) | ||
68 | { | ||
69 | hlt_counter++; | ||
70 | } | ||
71 | EXPORT_SYMBOL(disable_hlt); | ||
72 | |||
73 | void enable_hlt(void) | ||
74 | { | ||
75 | hlt_counter--; | ||
76 | } | ||
77 | EXPORT_SYMBOL(enable_hlt); | ||
78 | |||
79 | static inline int hlt_use_halt(void) | ||
80 | { | ||
81 | return (!hlt_counter && boot_cpu_data.hlt_works_ok); | ||
82 | } | ||
83 | #else | ||
84 | static inline int hlt_use_halt(void) | ||
85 | { | ||
86 | return 1; | ||
87 | } | ||
88 | #endif | ||
89 | |||
90 | /* | ||
91 | * We use this if we don't have any better | ||
92 | * idle routine.. | ||
93 | */ | ||
94 | void default_idle(void) | ||
95 | { | ||
96 | if (hlt_use_halt()) { | ||
97 | current_thread_info()->status &= ~TS_POLLING; | ||
98 | /* | ||
99 | * TS_POLLING-cleared state must be visible before we | ||
100 | * test NEED_RESCHED: | ||
101 | */ | ||
102 | smp_mb(); | ||
103 | |||
104 | if (!need_resched()) | ||
105 | safe_halt(); /* enables interrupts racelessly */ | ||
106 | else | ||
107 | local_irq_enable(); | ||
108 | current_thread_info()->status |= TS_POLLING; | ||
109 | } else { | ||
110 | local_irq_enable(); | ||
111 | /* loop is done by the caller */ | ||
112 | cpu_relax(); | ||
113 | } | ||
114 | } | ||
115 | #ifdef CONFIG_APM_MODULE | ||
116 | EXPORT_SYMBOL(default_idle); | ||
117 | #endif | ||
118 | |||
48 | static void do_nothing(void *unused) | 119 | static void do_nothing(void *unused) |
49 | { | 120 | { |
50 | } | 121 | } |
@@ -99,49 +170,152 @@ static void mwait_idle(void) | |||
99 | local_irq_enable(); | 170 | local_irq_enable(); |
100 | } | 171 | } |
101 | 172 | ||
173 | /* | ||
174 | * On SMP it's slightly faster (but much more power-consuming!) | ||
175 | * to poll the ->work.need_resched flag instead of waiting for the | ||
176 | * cross-CPU IPI to arrive. Use this option with caution. | ||
177 | */ | ||
178 | static void poll_idle(void) | ||
179 | { | ||
180 | local_irq_enable(); | ||
181 | cpu_relax(); | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * mwait selection logic: | ||
186 | * | ||
187 | * It depends on the CPU. For AMD CPUs that support MWAIT this is | ||
188 | * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings | ||
189 | * then depend on a clock divisor and current Pstate of the core. If | ||
190 | * all cores of a processor are in halt state (C1) the processor can | ||
191 | * enter the C1E (C1 enhanced) state. If mwait is used this will never | ||
192 | * happen. | ||
193 | * | ||
194 | * idle=mwait overrides this decision and forces the usage of mwait. | ||
195 | */ | ||
196 | |||
197 | #define MWAIT_INFO 0x05 | ||
198 | #define MWAIT_ECX_EXTENDED_INFO 0x01 | ||
199 | #define MWAIT_EDX_C1 0xf0 | ||
102 | 200 | ||
103 | static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | 201 | static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) |
104 | { | 202 | { |
203 | u32 eax, ebx, ecx, edx; | ||
204 | |||
105 | if (force_mwait) | 205 | if (force_mwait) |
106 | return 1; | 206 | return 1; |
107 | /* Any C1 states supported? */ | 207 | |
108 | return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; | 208 | if (c->cpuid_level < MWAIT_INFO) |
209 | return 0; | ||
210 | |||
211 | cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx); | ||
212 | /* Check, whether EDX has extended info about MWAIT */ | ||
213 | if (!(ecx & MWAIT_ECX_EXTENDED_INFO)) | ||
214 | return 1; | ||
215 | |||
216 | /* | ||
217 | * edx enumeratios MONITOR/MWAIT extensions. Check, whether | ||
218 | * C1 supports MWAIT | ||
219 | */ | ||
220 | return (edx & MWAIT_EDX_C1); | ||
109 | } | 221 | } |
110 | 222 | ||
111 | /* | 223 | /* |
112 | * On SMP it's slightly faster (but much more power-consuming!) | 224 | * Check for AMD CPUs, which have potentially C1E support |
113 | * to poll the ->work.need_resched flag instead of waiting for the | ||
114 | * cross-CPU IPI to arrive. Use this option with caution. | ||
115 | */ | 225 | */ |
116 | static void poll_idle(void) | 226 | static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) |
117 | { | 227 | { |
118 | local_irq_enable(); | 228 | if (c->x86_vendor != X86_VENDOR_AMD) |
119 | cpu_relax(); | 229 | return 0; |
230 | |||
231 | if (c->x86 < 0x0F) | ||
232 | return 0; | ||
233 | |||
234 | /* Family 0x0f models < rev F do not have C1E */ | ||
235 | if (c->x86 == 0x0f && c->x86_model < 0x40) | ||
236 | return 0; | ||
237 | |||
238 | return 1; | ||
120 | } | 239 | } |
121 | 240 | ||
122 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | 241 | /* |
242 | * C1E aware idle routine. We check for C1E active in the interrupt | ||
243 | * pending message MSR. If we detect C1E, then we handle it the same | ||
244 | * way as C3 power states (local apic timer and TSC stop) | ||
245 | */ | ||
246 | static void c1e_idle(void) | ||
123 | { | 247 | { |
124 | static int selected; | 248 | static cpumask_t c1e_mask = CPU_MASK_NONE; |
249 | static int c1e_detected; | ||
125 | 250 | ||
126 | if (selected) | 251 | if (need_resched()) |
127 | return; | 252 | return; |
253 | |||
254 | if (!c1e_detected) { | ||
255 | u32 lo, hi; | ||
256 | |||
257 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | ||
258 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | ||
259 | c1e_detected = 1; | ||
260 | mark_tsc_unstable("TSC halt in C1E"); | ||
261 | printk(KERN_INFO "System has C1E enabled\n"); | ||
262 | } | ||
263 | } | ||
264 | |||
265 | if (c1e_detected) { | ||
266 | int cpu = smp_processor_id(); | ||
267 | |||
268 | if (!cpu_isset(cpu, c1e_mask)) { | ||
269 | cpu_set(cpu, c1e_mask); | ||
270 | /* | ||
271 | * Force broadcast so ACPI can not interfere. Needs | ||
272 | * to run with interrupts enabled as it uses | ||
273 | * smp_function_call. | ||
274 | */ | ||
275 | local_irq_enable(); | ||
276 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, | ||
277 | &cpu); | ||
278 | printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", | ||
279 | cpu); | ||
280 | local_irq_disable(); | ||
281 | } | ||
282 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); | ||
283 | |||
284 | default_idle(); | ||
285 | |||
286 | /* | ||
287 | * The switch back from broadcast mode needs to be | ||
288 | * called with interrupts disabled. | ||
289 | */ | ||
290 | local_irq_disable(); | ||
291 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); | ||
292 | local_irq_enable(); | ||
293 | } else | ||
294 | default_idle(); | ||
295 | } | ||
296 | |||
297 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | ||
298 | { | ||
128 | #ifdef CONFIG_X86_SMP | 299 | #ifdef CONFIG_X86_SMP |
129 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | 300 | if (pm_idle == poll_idle && smp_num_siblings > 1) { |
130 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," | 301 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," |
131 | " performance may degrade.\n"); | 302 | " performance may degrade.\n"); |
132 | } | 303 | } |
133 | #endif | 304 | #endif |
305 | if (pm_idle) | ||
306 | return; | ||
307 | |||
134 | if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { | 308 | if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { |
135 | /* | 309 | /* |
136 | * Skip, if setup has overridden idle. | ||
137 | * One CPU supports mwait => All CPUs supports mwait | 310 | * One CPU supports mwait => All CPUs supports mwait |
138 | */ | 311 | */ |
139 | if (!pm_idle) { | 312 | printk(KERN_INFO "using mwait in idle threads.\n"); |
140 | printk(KERN_INFO "using mwait in idle threads.\n"); | 313 | pm_idle = mwait_idle; |
141 | pm_idle = mwait_idle; | 314 | } else if (check_c1e_idle(c)) { |
142 | } | 315 | printk(KERN_INFO "using C1E aware idle routine\n"); |
143 | } | 316 | pm_idle = c1e_idle; |
144 | selected = 1; | 317 | } else |
318 | pm_idle = default_idle; | ||
145 | } | 319 | } |
146 | 320 | ||
147 | static int __init idle_setup(char *str) | 321 | static int __init idle_setup(char *str) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476dfbb60d..c2a11d77b1b5 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -58,11 +58,6 @@ | |||
58 | 58 | ||
59 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 59 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
60 | 60 | ||
61 | static int hlt_counter; | ||
62 | |||
63 | unsigned long boot_option_idle_override = 0; | ||
64 | EXPORT_SYMBOL(boot_option_idle_override); | ||
65 | |||
66 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | 61 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; |
67 | EXPORT_PER_CPU_SYMBOL(current_task); | 62 | EXPORT_PER_CPU_SYMBOL(current_task); |
68 | 63 | ||
@@ -77,55 +72,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) | |||
77 | return ((unsigned long *)tsk->thread.sp)[3]; | 72 | return ((unsigned long *)tsk->thread.sp)[3]; |
78 | } | 73 | } |
79 | 74 | ||
80 | /* | ||
81 | * Powermanagement idle function, if any.. | ||
82 | */ | ||
83 | void (*pm_idle)(void); | ||
84 | EXPORT_SYMBOL(pm_idle); | ||
85 | |||
86 | void disable_hlt(void) | ||
87 | { | ||
88 | hlt_counter++; | ||
89 | } | ||
90 | |||
91 | EXPORT_SYMBOL(disable_hlt); | ||
92 | |||
93 | void enable_hlt(void) | ||
94 | { | ||
95 | hlt_counter--; | ||
96 | } | ||
97 | |||
98 | EXPORT_SYMBOL(enable_hlt); | ||
99 | |||
100 | /* | ||
101 | * We use this if we don't have any better | ||
102 | * idle routine.. | ||
103 | */ | ||
104 | void default_idle(void) | ||
105 | { | ||
106 | if (!hlt_counter && boot_cpu_data.hlt_works_ok) { | ||
107 | current_thread_info()->status &= ~TS_POLLING; | ||
108 | /* | ||
109 | * TS_POLLING-cleared state must be visible before we | ||
110 | * test NEED_RESCHED: | ||
111 | */ | ||
112 | smp_mb(); | ||
113 | |||
114 | if (!need_resched()) | ||
115 | safe_halt(); /* enables interrupts racelessly */ | ||
116 | else | ||
117 | local_irq_enable(); | ||
118 | current_thread_info()->status |= TS_POLLING; | ||
119 | } else { | ||
120 | local_irq_enable(); | ||
121 | /* loop is done by the caller */ | ||
122 | cpu_relax(); | ||
123 | } | ||
124 | } | ||
125 | #ifdef CONFIG_APM_MODULE | ||
126 | EXPORT_SYMBOL(default_idle); | ||
127 | #endif | ||
128 | |||
129 | #ifdef CONFIG_HOTPLUG_CPU | 75 | #ifdef CONFIG_HOTPLUG_CPU |
130 | #include <asm/nmi.h> | 76 | #include <asm/nmi.h> |
131 | /* We don't actually take CPU down, just spin without interrupts. */ | 77 | /* We don't actually take CPU down, just spin without interrupts. */ |
@@ -168,24 +114,19 @@ void cpu_idle(void) | |||
168 | while (1) { | 114 | while (1) { |
169 | tick_nohz_stop_sched_tick(); | 115 | tick_nohz_stop_sched_tick(); |
170 | while (!need_resched()) { | 116 | while (!need_resched()) { |
171 | void (*idle)(void); | ||
172 | 117 | ||
173 | check_pgt_cache(); | 118 | check_pgt_cache(); |
174 | rmb(); | 119 | rmb(); |
175 | idle = pm_idle; | ||
176 | 120 | ||
177 | if (rcu_pending(cpu)) | 121 | if (rcu_pending(cpu)) |
178 | rcu_check_callbacks(cpu, 0); | 122 | rcu_check_callbacks(cpu, 0); |
179 | 123 | ||
180 | if (!idle) | ||
181 | idle = default_idle; | ||
182 | |||
183 | if (cpu_is_offline(cpu)) | 124 | if (cpu_is_offline(cpu)) |
184 | play_dead(); | 125 | play_dead(); |
185 | 126 | ||
186 | local_irq_disable(); | 127 | local_irq_disable(); |
187 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; | 128 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; |
188 | idle(); | 129 | pm_idle(); |
189 | } | 130 | } |
190 | tick_nohz_restart_sched_tick(); | 131 | tick_nohz_restart_sched_tick(); |
191 | preempt_enable_no_resched(); | 132 | preempt_enable_no_resched(); |
@@ -333,6 +274,7 @@ void flush_thread(void) | |||
333 | /* | 274 | /* |
334 | * Forget coprocessor state.. | 275 | * Forget coprocessor state.. |
335 | */ | 276 | */ |
277 | tsk->fpu_counter = 0; | ||
336 | clear_fpu(tsk); | 278 | clear_fpu(tsk); |
337 | clear_used_math(); | 279 | clear_used_math(); |
338 | } | 280 | } |
@@ -649,8 +591,11 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct | |||
649 | /* If the task has used fpu the last 5 timeslices, just do a full | 591 | /* If the task has used fpu the last 5 timeslices, just do a full |
650 | * restore of the math state immediately to avoid the trap; the | 592 | * restore of the math state immediately to avoid the trap; the |
651 | * chances of needing FPU soon are obviously high now | 593 | * chances of needing FPU soon are obviously high now |
594 | * | ||
595 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
596 | * which can sleep in the case of !tsk_used_math() | ||
652 | */ | 597 | */ |
653 | if (next_p->fpu_counter > 5) | 598 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) |
654 | math_state_restore(); | 599 | math_state_restore(); |
655 | 600 | ||
656 | /* | 601 | /* |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988b..290183e9731a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -56,15 +56,6 @@ asmlinkage extern void ret_from_fork(void); | |||
56 | 56 | ||
57 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; | 57 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; |
58 | 58 | ||
59 | unsigned long boot_option_idle_override = 0; | ||
60 | EXPORT_SYMBOL(boot_option_idle_override); | ||
61 | |||
62 | /* | ||
63 | * Powermanagement idle function, if any.. | ||
64 | */ | ||
65 | void (*pm_idle)(void); | ||
66 | EXPORT_SYMBOL(pm_idle); | ||
67 | |||
68 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | 59 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); |
69 | 60 | ||
70 | void idle_notifier_register(struct notifier_block *n) | 61 | void idle_notifier_register(struct notifier_block *n) |
@@ -94,25 +85,6 @@ void exit_idle(void) | |||
94 | __exit_idle(); | 85 | __exit_idle(); |
95 | } | 86 | } |
96 | 87 | ||
97 | /* | ||
98 | * We use this if we don't have any better | ||
99 | * idle routine.. | ||
100 | */ | ||
101 | void default_idle(void) | ||
102 | { | ||
103 | current_thread_info()->status &= ~TS_POLLING; | ||
104 | /* | ||
105 | * TS_POLLING-cleared state must be visible before we | ||
106 | * test NEED_RESCHED: | ||
107 | */ | ||
108 | smp_mb(); | ||
109 | if (!need_resched()) | ||
110 | safe_halt(); /* enables interrupts racelessly */ | ||
111 | else | ||
112 | local_irq_enable(); | ||
113 | current_thread_info()->status |= TS_POLLING; | ||
114 | } | ||
115 | |||
116 | #ifdef CONFIG_HOTPLUG_CPU | 88 | #ifdef CONFIG_HOTPLUG_CPU |
117 | DECLARE_PER_CPU(int, cpu_state); | 89 | DECLARE_PER_CPU(int, cpu_state); |
118 | 90 | ||
@@ -150,12 +122,9 @@ void cpu_idle(void) | |||
150 | while (1) { | 122 | while (1) { |
151 | tick_nohz_stop_sched_tick(); | 123 | tick_nohz_stop_sched_tick(); |
152 | while (!need_resched()) { | 124 | while (!need_resched()) { |
153 | void (*idle)(void); | ||
154 | 125 | ||
155 | rmb(); | 126 | rmb(); |
156 | idle = pm_idle; | 127 | |
157 | if (!idle) | ||
158 | idle = default_idle; | ||
159 | if (cpu_is_offline(smp_processor_id())) | 128 | if (cpu_is_offline(smp_processor_id())) |
160 | play_dead(); | 129 | play_dead(); |
161 | /* | 130 | /* |
@@ -165,7 +134,7 @@ void cpu_idle(void) | |||
165 | */ | 134 | */ |
166 | local_irq_disable(); | 135 | local_irq_disable(); |
167 | enter_idle(); | 136 | enter_idle(); |
168 | idle(); | 137 | pm_idle(); |
169 | /* In many cases the interrupt that ended idle | 138 | /* In many cases the interrupt that ended idle |
170 | has already called exit_idle. But some idle | 139 | has already called exit_idle. But some idle |
171 | loops can be woken up without interrupt. */ | 140 | loops can be woken up without interrupt. */ |
@@ -294,6 +263,7 @@ void flush_thread(void) | |||
294 | /* | 263 | /* |
295 | * Forget coprocessor state.. | 264 | * Forget coprocessor state.. |
296 | */ | 265 | */ |
266 | tsk->fpu_counter = 0; | ||
297 | clear_fpu(tsk); | 267 | clear_fpu(tsk); |
298 | clear_used_math(); | 268 | clear_used_math(); |
299 | } | 269 | } |
@@ -658,8 +628,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
658 | /* If the task has used fpu the last 5 timeslices, just do a full | 628 | /* If the task has used fpu the last 5 timeslices, just do a full |
659 | * restore of the math state immediately to avoid the trap; the | 629 | * restore of the math state immediately to avoid the trap; the |
660 | * chances of needing FPU soon are obviously high now | 630 | * chances of needing FPU soon are obviously high now |
631 | * | ||
632 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
633 | * which can sleep in the case of !tsk_used_math() | ||
661 | */ | 634 | */ |
662 | if (next_p->fpu_counter>5) | 635 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) |
663 | math_state_restore(); | 636 | math_state_restore(); |
664 | return prev_p; | 637 | return prev_p; |
665 | } | 638 | } |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fb03ef380f0e..a7835f282936 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1303,6 +1303,9 @@ static const struct user_regset_view user_x86_64_view = { | |||
1303 | #define genregs32_get genregs_get | 1303 | #define genregs32_get genregs_get |
1304 | #define genregs32_set genregs_set | 1304 | #define genregs32_set genregs_set |
1305 | 1305 | ||
1306 | #define user_i387_ia32_struct user_i387_struct | ||
1307 | #define user32_fxsr_struct user_fxsr_struct | ||
1308 | |||
1306 | #endif /* CONFIG_X86_64 */ | 1309 | #endif /* CONFIG_X86_64 */ |
1307 | 1310 | ||
1308 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 1311 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
@@ -1315,13 +1318,13 @@ static const struct user_regset x86_32_regsets[] = { | |||
1315 | }, | 1318 | }, |
1316 | [REGSET_FP] = { | 1319 | [REGSET_FP] = { |
1317 | .core_note_type = NT_PRFPREG, | 1320 | .core_note_type = NT_PRFPREG, |
1318 | .n = sizeof(struct user_i387_struct) / sizeof(u32), | 1321 | .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), |
1319 | .size = sizeof(u32), .align = sizeof(u32), | 1322 | .size = sizeof(u32), .align = sizeof(u32), |
1320 | .active = fpregs_active, .get = fpregs_get, .set = fpregs_set | 1323 | .active = fpregs_active, .get = fpregs_get, .set = fpregs_set |
1321 | }, | 1324 | }, |
1322 | [REGSET_XFP] = { | 1325 | [REGSET_XFP] = { |
1323 | .core_note_type = NT_PRXFPREG, | 1326 | .core_note_type = NT_PRXFPREG, |
1324 | .n = sizeof(struct user_i387_struct) / sizeof(u32), | 1327 | .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), |
1325 | .size = sizeof(u32), .align = sizeof(u32), | 1328 | .size = sizeof(u32), .align = sizeof(u32), |
1326 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set | 1329 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set |
1327 | }, | 1330 | }, |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c new file mode 100644 index 000000000000..05fbe9a0325a --- /dev/null +++ b/arch/x86/kernel/pvclock.c | |||
@@ -0,0 +1,141 @@ | |||
1 | /* paravirtual clock -- common code used by kvm/xen | ||
2 | |||
3 | This program is free software; you can redistribute it and/or modify | ||
4 | it under the terms of the GNU General Public License as published by | ||
5 | the Free Software Foundation; either version 2 of the License, or | ||
6 | (at your option) any later version. | ||
7 | |||
8 | This program is distributed in the hope that it will be useful, | ||
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | GNU General Public License for more details. | ||
12 | |||
13 | You should have received a copy of the GNU General Public License | ||
14 | along with this program; if not, write to the Free Software | ||
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
16 | */ | ||
17 | |||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/percpu.h> | ||
20 | #include <asm/pvclock.h> | ||
21 | |||
22 | /* | ||
23 | * These are perodically updated | ||
24 | * xen: magic shared_info page | ||
25 | * kvm: gpa registered via msr | ||
26 | * and then copied here. | ||
27 | */ | ||
28 | struct pvclock_shadow_time { | ||
29 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
30 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
31 | u32 tsc_to_nsec_mul; | ||
32 | int tsc_shift; | ||
33 | u32 version; | ||
34 | }; | ||
35 | |||
36 | /* | ||
37 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | ||
38 | * yielding a 64-bit result. | ||
39 | */ | ||
40 | static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | ||
41 | { | ||
42 | u64 product; | ||
43 | #ifdef __i386__ | ||
44 | u32 tmp1, tmp2; | ||
45 | #endif | ||
46 | |||
47 | if (shift < 0) | ||
48 | delta >>= -shift; | ||
49 | else | ||
50 | delta <<= shift; | ||
51 | |||
52 | #ifdef __i386__ | ||
53 | __asm__ ( | ||
54 | "mul %5 ; " | ||
55 | "mov %4,%%eax ; " | ||
56 | "mov %%edx,%4 ; " | ||
57 | "mul %5 ; " | ||
58 | "xor %5,%5 ; " | ||
59 | "add %4,%%eax ; " | ||
60 | "adc %5,%%edx ; " | ||
61 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | ||
62 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | ||
63 | #elif __x86_64__ | ||
64 | __asm__ ( | ||
65 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | ||
66 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | ||
67 | #else | ||
68 | #error implement me! | ||
69 | #endif | ||
70 | |||
71 | return product; | ||
72 | } | ||
73 | |||
74 | static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) | ||
75 | { | ||
76 | u64 delta = native_read_tsc() - shadow->tsc_timestamp; | ||
77 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Reads a consistent set of time-base values from hypervisor, | ||
82 | * into a shadow data area. | ||
83 | */ | ||
84 | static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | ||
85 | struct pvclock_vcpu_time_info *src) | ||
86 | { | ||
87 | do { | ||
88 | dst->version = src->version; | ||
89 | rmb(); /* fetch version before data */ | ||
90 | dst->tsc_timestamp = src->tsc_timestamp; | ||
91 | dst->system_timestamp = src->system_time; | ||
92 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
93 | dst->tsc_shift = src->tsc_shift; | ||
94 | rmb(); /* test version after fetching data */ | ||
95 | } while ((src->version & 1) || (dst->version != src->version)); | ||
96 | |||
97 | return dst->version; | ||
98 | } | ||
99 | |||
100 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | ||
101 | { | ||
102 | struct pvclock_shadow_time shadow; | ||
103 | unsigned version; | ||
104 | cycle_t ret, offset; | ||
105 | |||
106 | do { | ||
107 | version = pvclock_get_time_values(&shadow, src); | ||
108 | barrier(); | ||
109 | offset = pvclock_get_nsec_offset(&shadow); | ||
110 | ret = shadow.system_timestamp + offset; | ||
111 | barrier(); | ||
112 | } while (version != src->version); | ||
113 | |||
114 | return ret; | ||
115 | } | ||
116 | |||
117 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, | ||
118 | struct pvclock_vcpu_time_info *vcpu_time, | ||
119 | struct timespec *ts) | ||
120 | { | ||
121 | u32 version; | ||
122 | u64 delta; | ||
123 | struct timespec now; | ||
124 | |||
125 | /* get wallclock at system boot */ | ||
126 | do { | ||
127 | version = wall_clock->version; | ||
128 | rmb(); /* fetch version before time */ | ||
129 | now.tv_sec = wall_clock->sec; | ||
130 | now.tv_nsec = wall_clock->nsec; | ||
131 | rmb(); /* fetch time before checking version */ | ||
132 | } while ((wall_clock->version & 1) || (version != wall_clock->version)); | ||
133 | |||
134 | delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ | ||
135 | delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; | ||
136 | |||
137 | now.tv_nsec = do_div(delta, NSEC_PER_SEC); | ||
138 | now.tv_sec = delta; | ||
139 | |||
140 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | ||
141 | } | ||
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d89a648fe710..79bdcd11c66e 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -65,6 +65,7 @@ static enum { | |||
65 | ICH_FORCE_HPET_RESUME, | 65 | ICH_FORCE_HPET_RESUME, |
66 | VT8237_FORCE_HPET_RESUME, | 66 | VT8237_FORCE_HPET_RESUME, |
67 | NVIDIA_FORCE_HPET_RESUME, | 67 | NVIDIA_FORCE_HPET_RESUME, |
68 | ATI_FORCE_HPET_RESUME, | ||
68 | } force_hpet_resume_type; | 69 | } force_hpet_resume_type; |
69 | 70 | ||
70 | static void __iomem *rcba_base; | 71 | static void __iomem *rcba_base; |
@@ -158,6 +159,8 @@ static void ich_force_enable_hpet(struct pci_dev *dev) | |||
158 | 159 | ||
159 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, | 160 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, |
160 | ich_force_enable_hpet); | 161 | ich_force_enable_hpet); |
162 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0, | ||
163 | ich_force_enable_hpet); | ||
161 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, | 164 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, |
162 | ich_force_enable_hpet); | 165 | ich_force_enable_hpet); |
163 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, | 166 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, |
@@ -174,6 +177,12 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, | |||
174 | 177 | ||
175 | static struct pci_dev *cached_dev; | 178 | static struct pci_dev *cached_dev; |
176 | 179 | ||
180 | static void hpet_print_force_info(void) | ||
181 | { | ||
182 | printk(KERN_INFO "HPET not enabled in BIOS. " | ||
183 | "You might try hpet=force boot option\n"); | ||
184 | } | ||
185 | |||
177 | static void old_ich_force_hpet_resume(void) | 186 | static void old_ich_force_hpet_resume(void) |
178 | { | 187 | { |
179 | u32 val; | 188 | u32 val; |
@@ -253,6 +262,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev) | |||
253 | { | 262 | { |
254 | if (hpet_force_user) | 263 | if (hpet_force_user) |
255 | old_ich_force_enable_hpet(dev); | 264 | old_ich_force_enable_hpet(dev); |
265 | else | ||
266 | hpet_print_force_info(); | ||
256 | } | 267 | } |
257 | 268 | ||
258 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, | 269 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, |
@@ -290,8 +301,13 @@ static void vt8237_force_enable_hpet(struct pci_dev *dev) | |||
290 | { | 301 | { |
291 | u32 uninitialized_var(val); | 302 | u32 uninitialized_var(val); |
292 | 303 | ||
293 | if (!hpet_force_user || hpet_address || force_hpet_address) | 304 | if (hpet_address || force_hpet_address) |
305 | return; | ||
306 | |||
307 | if (!hpet_force_user) { | ||
308 | hpet_print_force_info(); | ||
294 | return; | 309 | return; |
310 | } | ||
295 | 311 | ||
296 | pci_read_config_dword(dev, 0x68, &val); | 312 | pci_read_config_dword(dev, 0x68, &val); |
297 | /* | 313 | /* |
@@ -330,6 +346,36 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, | |||
330 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, | 346 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, |
331 | vt8237_force_enable_hpet); | 347 | vt8237_force_enable_hpet); |
332 | 348 | ||
349 | static void ati_force_hpet_resume(void) | ||
350 | { | ||
351 | pci_write_config_dword(cached_dev, 0x14, 0xfed00000); | ||
352 | printk(KERN_DEBUG "Force enabled HPET at resume\n"); | ||
353 | } | ||
354 | |||
355 | static void ati_force_enable_hpet(struct pci_dev *dev) | ||
356 | { | ||
357 | u32 uninitialized_var(val); | ||
358 | |||
359 | if (hpet_address || force_hpet_address) | ||
360 | return; | ||
361 | |||
362 | if (!hpet_force_user) { | ||
363 | hpet_print_force_info(); | ||
364 | return; | ||
365 | } | ||
366 | |||
367 | pci_write_config_dword(dev, 0x14, 0xfed00000); | ||
368 | pci_read_config_dword(dev, 0x14, &val); | ||
369 | force_hpet_address = val; | ||
370 | force_hpet_resume_type = ATI_FORCE_HPET_RESUME; | ||
371 | dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", | ||
372 | force_hpet_address); | ||
373 | cached_dev = dev; | ||
374 | return; | ||
375 | } | ||
376 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, | ||
377 | ati_force_enable_hpet); | ||
378 | |||
333 | /* | 379 | /* |
334 | * Undocumented chipset feature taken from LinuxBIOS. | 380 | * Undocumented chipset feature taken from LinuxBIOS. |
335 | */ | 381 | */ |
@@ -343,8 +389,13 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev) | |||
343 | { | 389 | { |
344 | u32 uninitialized_var(val); | 390 | u32 uninitialized_var(val); |
345 | 391 | ||
346 | if (!hpet_force_user || hpet_address || force_hpet_address) | 392 | if (hpet_address || force_hpet_address) |
393 | return; | ||
394 | |||
395 | if (!hpet_force_user) { | ||
396 | hpet_print_force_info(); | ||
347 | return; | 397 | return; |
398 | } | ||
348 | 399 | ||
349 | pci_write_config_dword(dev, 0x44, 0xfed00001); | 400 | pci_write_config_dword(dev, 0x44, 0xfed00001); |
350 | pci_read_config_dword(dev, 0x44, &val); | 401 | pci_read_config_dword(dev, 0x44, &val); |
@@ -397,6 +448,9 @@ void force_hpet_resume(void) | |||
397 | case NVIDIA_FORCE_HPET_RESUME: | 448 | case NVIDIA_FORCE_HPET_RESUME: |
398 | nvidia_force_hpet_resume(); | 449 | nvidia_force_hpet_resume(); |
399 | return; | 450 | return; |
451 | case ATI_FORCE_HPET_RESUME: | ||
452 | ati_force_hpet_resume(); | ||
453 | return; | ||
400 | default: | 454 | default: |
401 | break; | 455 | break; |
402 | } | 456 | } |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f6be7d5f82f8..f8a62160e151 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -27,7 +27,7 @@ | |||
27 | void (*pm_power_off)(void); | 27 | void (*pm_power_off)(void); |
28 | EXPORT_SYMBOL(pm_power_off); | 28 | EXPORT_SYMBOL(pm_power_off); |
29 | 29 | ||
30 | static long no_idt[3]; | 30 | static const struct desc_ptr no_idt = {}; |
31 | static int reboot_mode; | 31 | static int reboot_mode; |
32 | enum reboot_type reboot_type = BOOT_KBD; | 32 | enum reboot_type reboot_type = BOOT_KBD; |
33 | int reboot_force; | 33 | int reboot_force; |
@@ -201,15 +201,15 @@ core_initcall(reboot_init); | |||
201 | controller to pulse the CPU reset line, which is more thorough, but | 201 | controller to pulse the CPU reset line, which is more thorough, but |
202 | doesn't work with at least one type of 486 motherboard. It is easy | 202 | doesn't work with at least one type of 486 motherboard. It is easy |
203 | to stop this code working; hence the copious comments. */ | 203 | to stop this code working; hence the copious comments. */ |
204 | static unsigned long long | 204 | static const unsigned long long |
205 | real_mode_gdt_entries [3] = | 205 | real_mode_gdt_entries [3] = |
206 | { | 206 | { |
207 | 0x0000000000000000ULL, /* Null descriptor */ | 207 | 0x0000000000000000ULL, /* Null descriptor */ |
208 | 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ | 208 | 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ |
209 | 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ | 209 | 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ |
210 | }; | 210 | }; |
211 | 211 | ||
212 | static struct desc_ptr | 212 | static const struct desc_ptr |
213 | real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, | 213 | real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, |
214 | real_mode_idt = { 0x3ff, 0 }; | 214 | real_mode_idt = { 0x3ff, 0 }; |
215 | 215 | ||
@@ -231,7 +231,7 @@ real_mode_idt = { 0x3ff, 0 }; | |||
231 | 231 | ||
232 | More could be done here to set up the registers as if a CPU reset had | 232 | More could be done here to set up the registers as if a CPU reset had |
233 | occurred; hopefully real BIOSs don't assume much. */ | 233 | occurred; hopefully real BIOSs don't assume much. */ |
234 | static unsigned char real_mode_switch [] = | 234 | static const unsigned char real_mode_switch [] = |
235 | { | 235 | { |
236 | 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ | 236 | 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ |
237 | 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ | 237 | 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ |
@@ -245,7 +245,7 @@ static unsigned char real_mode_switch [] = | |||
245 | 0x24, 0x10, /* f: andb $0x10,al */ | 245 | 0x24, 0x10, /* f: andb $0x10,al */ |
246 | 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ | 246 | 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ |
247 | }; | 247 | }; |
248 | static unsigned char jump_to_bios [] = | 248 | static const unsigned char jump_to_bios [] = |
249 | { | 249 | { |
250 | 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ | 250 | 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ |
251 | }; | 251 | }; |
@@ -255,7 +255,7 @@ static unsigned char jump_to_bios [] = | |||
255 | * specified by the code and length parameters. | 255 | * specified by the code and length parameters. |
256 | * We assume that length will aways be less that 100! | 256 | * We assume that length will aways be less that 100! |
257 | */ | 257 | */ |
258 | void machine_real_restart(unsigned char *code, int length) | 258 | void machine_real_restart(const unsigned char *code, int length) |
259 | { | 259 | { |
260 | local_irq_disable(); | 260 | local_irq_disable(); |
261 | 261 | ||
@@ -368,7 +368,7 @@ static void native_machine_emergency_restart(void) | |||
368 | } | 368 | } |
369 | 369 | ||
370 | case BOOT_TRIPLE: | 370 | case BOOT_TRIPLE: |
371 | load_idt((const struct desc_ptr *)&no_idt); | 371 | load_idt(&no_idt); |
372 | __asm__ __volatile__("int3"); | 372 | __asm__ __volatile__("int3"); |
373 | 373 | ||
374 | reboot_type = BOOT_KBD; | 374 | reboot_type = BOOT_KBD; |
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index dec0b5ec25c2..61a837743fe5 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c | |||
@@ -49,7 +49,7 @@ struct device_fixup { | |||
49 | void (*reboot_fixup)(struct pci_dev *); | 49 | void (*reboot_fixup)(struct pci_dev *); |
50 | }; | 50 | }; |
51 | 51 | ||
52 | static struct device_fixup fixups_table[] = { | 52 | static const struct device_fixup fixups_table[] = { |
53 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, | 53 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, |
54 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, | 54 | { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, |
55 | { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, | 55 | { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, |
@@ -64,7 +64,7 @@ static struct device_fixup fixups_table[] = { | |||
64 | */ | 64 | */ |
65 | void mach_reboot_fixups(void) | 65 | void mach_reboot_fixups(void) |
66 | { | 66 | { |
67 | struct device_fixup *cur; | 67 | const struct device_fixup *cur; |
68 | struct pci_dev *dev; | 68 | struct pci_dev *dev; |
69 | int i; | 69 | int i; |
70 | 70 | ||
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 9615eee9b775..05191bbc68b8 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -4,6 +4,8 @@ | |||
4 | #include <linux/acpi.h> | 4 | #include <linux/acpi.h> |
5 | #include <linux/bcd.h> | 5 | #include <linux/bcd.h> |
6 | #include <linux/mc146818rtc.h> | 6 | #include <linux/mc146818rtc.h> |
7 | #include <linux/platform_device.h> | ||
8 | #include <linux/pnp.h> | ||
7 | 9 | ||
8 | #include <asm/time.h> | 10 | #include <asm/time.h> |
9 | #include <asm/vsyscall.h> | 11 | #include <asm/vsyscall.h> |
@@ -197,3 +199,35 @@ unsigned long long native_read_tsc(void) | |||
197 | } | 199 | } |
198 | EXPORT_SYMBOL(native_read_tsc); | 200 | EXPORT_SYMBOL(native_read_tsc); |
199 | 201 | ||
202 | |||
203 | static struct resource rtc_resources[] = { | ||
204 | [0] = { | ||
205 | .start = RTC_PORT(0), | ||
206 | .end = RTC_PORT(1), | ||
207 | .flags = IORESOURCE_IO, | ||
208 | }, | ||
209 | [1] = { | ||
210 | .start = RTC_IRQ, | ||
211 | .end = RTC_IRQ, | ||
212 | .flags = IORESOURCE_IRQ, | ||
213 | } | ||
214 | }; | ||
215 | |||
216 | static struct platform_device rtc_device = { | ||
217 | .name = "rtc_cmos", | ||
218 | .id = -1, | ||
219 | .resource = rtc_resources, | ||
220 | .num_resources = ARRAY_SIZE(rtc_resources), | ||
221 | }; | ||
222 | |||
223 | static __init int add_rtc_cmos(void) | ||
224 | { | ||
225 | #ifdef CONFIG_PNP | ||
226 | if (!pnp_platform_devices) | ||
227 | platform_device_register(&rtc_device); | ||
228 | #else | ||
229 | platform_device_register(&rtc_device); | ||
230 | #endif /* CONFIG_PNP */ | ||
231 | return 0; | ||
232 | } | ||
233 | device_initcall(add_rtc_cmos); | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c0c68c18a788..6f80b852a196 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/mpspec.h> | 12 | #include <asm/mpspec.h> |
13 | #include <asm/apicdef.h> | 13 | #include <asm/apicdef.h> |
14 | 14 | ||
15 | #ifdef CONFIG_X86_LOCAL_APIC | ||
15 | unsigned int num_processors; | 16 | unsigned int num_processors; |
16 | unsigned disabled_cpus __cpuinitdata; | 17 | unsigned disabled_cpus __cpuinitdata; |
17 | /* Processor that is doing the boot up */ | 18 | /* Processor that is doing the boot up */ |
@@ -23,8 +24,9 @@ EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); | |||
23 | 24 | ||
24 | /* Bitmask of physically existing CPUs */ | 25 | /* Bitmask of physically existing CPUs */ |
25 | physid_mask_t phys_cpu_present_map; | 26 | physid_mask_t phys_cpu_present_map; |
27 | #endif | ||
26 | 28 | ||
27 | #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP) | 29 | #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) |
28 | /* | 30 | /* |
29 | * Copy data used in early init routines from the initial arrays to the | 31 | * Copy data used in early init routines from the initial arrays to the |
30 | * per cpu data areas. These arrays then become expendable and the | 32 | * per cpu data areas. These arrays then become expendable and the |
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 2c5f8b213e86..5a2f8e063887 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c | |||
@@ -532,10 +532,16 @@ static void __init reserve_crashkernel(void) | |||
532 | (unsigned long)(crash_size >> 20), | 532 | (unsigned long)(crash_size >> 20), |
533 | (unsigned long)(crash_base >> 20), | 533 | (unsigned long)(crash_base >> 20), |
534 | (unsigned long)(total_mem >> 20)); | 534 | (unsigned long)(total_mem >> 20)); |
535 | |||
536 | if (reserve_bootmem(crash_base, crash_size, | ||
537 | BOOTMEM_EXCLUSIVE) < 0) { | ||
538 | printk(KERN_INFO "crashkernel reservation " | ||
539 | "failed - memory is in use\n"); | ||
540 | return; | ||
541 | } | ||
542 | |||
535 | crashk_res.start = crash_base; | 543 | crashk_res.start = crash_base; |
536 | crashk_res.end = crash_base + crash_size - 1; | 544 | crashk_res.end = crash_base + crash_size - 1; |
537 | reserve_bootmem(crash_base, crash_size, | ||
538 | BOOTMEM_DEFAULT); | ||
539 | } else | 545 | } else |
540 | printk(KERN_INFO "crashkernel reservation failed - " | 546 | printk(KERN_INFO "crashkernel reservation failed - " |
541 | "you have to specify a base address\n"); | 547 | "you have to specify a base address\n"); |
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index f2fc8feb727d..545440e471b2 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <asm/topology.h> | 71 | #include <asm/topology.h> |
72 | #include <asm/trampoline.h> | 72 | #include <asm/trampoline.h> |
73 | #include <asm/pat.h> | 73 | #include <asm/pat.h> |
74 | #include <asm/mmconfig.h> | ||
74 | 75 | ||
75 | #include <mach_apic.h> | 76 | #include <mach_apic.h> |
76 | #ifdef CONFIG_PARAVIRT | 77 | #ifdef CONFIG_PARAVIRT |
@@ -79,6 +80,8 @@ | |||
79 | #define ARCH_SETUP | 80 | #define ARCH_SETUP |
80 | #endif | 81 | #endif |
81 | 82 | ||
83 | #include "cpu/cpu.h" | ||
84 | |||
82 | /* | 85 | /* |
83 | * Machine setup.. | 86 | * Machine setup.. |
84 | */ | 87 | */ |
@@ -95,8 +98,6 @@ int bootloader_type; | |||
95 | 98 | ||
96 | unsigned long saved_video_mode; | 99 | unsigned long saved_video_mode; |
97 | 100 | ||
98 | int force_mwait __cpuinitdata; | ||
99 | |||
100 | /* | 101 | /* |
101 | * Early DMI memory | 102 | * Early DMI memory |
102 | */ | 103 | */ |
@@ -118,7 +119,7 @@ EXPORT_SYMBOL_GPL(edid_info); | |||
118 | 119 | ||
119 | extern int root_mountflags; | 120 | extern int root_mountflags; |
120 | 121 | ||
121 | char __initdata command_line[COMMAND_LINE_SIZE]; | 122 | static char __initdata command_line[COMMAND_LINE_SIZE]; |
122 | 123 | ||
123 | static struct resource standard_io_resources[] = { | 124 | static struct resource standard_io_resources[] = { |
124 | { .name = "dma1", .start = 0x00, .end = 0x1f, | 125 | { .name = "dma1", .start = 0x00, .end = 0x1f, |
@@ -164,6 +165,7 @@ static struct resource bss_resource = { | |||
164 | .flags = IORESOURCE_RAM, | 165 | .flags = IORESOURCE_RAM, |
165 | }; | 166 | }; |
166 | 167 | ||
168 | static void __init early_cpu_init(void); | ||
167 | static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); | 169 | static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); |
168 | 170 | ||
169 | #ifdef CONFIG_PROC_VMCORE | 171 | #ifdef CONFIG_PROC_VMCORE |
@@ -293,18 +295,6 @@ static void __init parse_setup_data(void) | |||
293 | } | 295 | } |
294 | } | 296 | } |
295 | 297 | ||
296 | #ifdef CONFIG_PCI_MMCONFIG | ||
297 | extern void __cpuinit fam10h_check_enable_mmcfg(void); | ||
298 | extern void __init check_enable_amd_mmconf_dmi(void); | ||
299 | #else | ||
300 | void __cpuinit fam10h_check_enable_mmcfg(void) | ||
301 | { | ||
302 | } | ||
303 | void __init check_enable_amd_mmconf_dmi(void) | ||
304 | { | ||
305 | } | ||
306 | #endif | ||
307 | |||
308 | /* | 298 | /* |
309 | * setup_arch - architecture-specific boot-time initializations | 299 | * setup_arch - architecture-specific boot-time initializations |
310 | * | 300 | * |
@@ -352,6 +342,7 @@ void __init setup_arch(char **cmdline_p) | |||
352 | bss_resource.start = virt_to_phys(&__bss_start); | 342 | bss_resource.start = virt_to_phys(&__bss_start); |
353 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 343 | bss_resource.end = virt_to_phys(&__bss_stop)-1; |
354 | 344 | ||
345 | early_cpu_init(); | ||
355 | early_identify_cpu(&boot_cpu_data); | 346 | early_identify_cpu(&boot_cpu_data); |
356 | 347 | ||
357 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 348 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
@@ -537,7 +528,20 @@ void __init setup_arch(char **cmdline_p) | |||
537 | check_enable_amd_mmconf_dmi(); | 528 | check_enable_amd_mmconf_dmi(); |
538 | } | 529 | } |
539 | 530 | ||
540 | static int __cpuinit get_model_name(struct cpuinfo_x86 *c) | 531 | struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; |
532 | |||
533 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | ||
534 | { | ||
535 | display_cacheinfo(c); | ||
536 | } | ||
537 | |||
538 | static struct cpu_dev __cpuinitdata default_cpu = { | ||
539 | .c_init = default_init, | ||
540 | .c_vendor = "Unknown", | ||
541 | }; | ||
542 | static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; | ||
543 | |||
544 | int __cpuinit get_model_name(struct cpuinfo_x86 *c) | ||
541 | { | 545 | { |
542 | unsigned int *v; | 546 | unsigned int *v; |
543 | 547 | ||
@@ -553,7 +557,7 @@ static int __cpuinit get_model_name(struct cpuinfo_x86 *c) | |||
553 | } | 557 | } |
554 | 558 | ||
555 | 559 | ||
556 | static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | 560 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) |
557 | { | 561 | { |
558 | unsigned int n, dummy, eax, ebx, ecx, edx; | 562 | unsigned int n, dummy, eax, ebx, ecx, edx; |
559 | 563 | ||
@@ -585,228 +589,6 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
585 | } | 589 | } |
586 | } | 590 | } |
587 | 591 | ||
588 | #ifdef CONFIG_NUMA | ||
589 | static int __cpuinit nearby_node(int apicid) | ||
590 | { | ||
591 | int i, node; | ||
592 | |||
593 | for (i = apicid - 1; i >= 0; i--) { | ||
594 | node = apicid_to_node[i]; | ||
595 | if (node != NUMA_NO_NODE && node_online(node)) | ||
596 | return node; | ||
597 | } | ||
598 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | ||
599 | node = apicid_to_node[i]; | ||
600 | if (node != NUMA_NO_NODE && node_online(node)) | ||
601 | return node; | ||
602 | } | ||
603 | return first_node(node_online_map); /* Shouldn't happen */ | ||
604 | } | ||
605 | #endif | ||
606 | |||
607 | /* | ||
608 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | ||
609 | * Assumes number of cores is a power of two. | ||
610 | */ | ||
611 | static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | ||
612 | { | ||
613 | #ifdef CONFIG_SMP | ||
614 | unsigned bits; | ||
615 | #ifdef CONFIG_NUMA | ||
616 | int cpu = smp_processor_id(); | ||
617 | int node = 0; | ||
618 | unsigned apicid = hard_smp_processor_id(); | ||
619 | #endif | ||
620 | bits = c->x86_coreid_bits; | ||
621 | |||
622 | /* Low order bits define the core id (index of core in socket) */ | ||
623 | c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); | ||
624 | /* Convert the initial APIC ID into the socket ID */ | ||
625 | c->phys_proc_id = c->initial_apicid >> bits; | ||
626 | |||
627 | #ifdef CONFIG_NUMA | ||
628 | node = c->phys_proc_id; | ||
629 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | ||
630 | node = apicid_to_node[apicid]; | ||
631 | if (!node_online(node)) { | ||
632 | /* Two possibilities here: | ||
633 | - The CPU is missing memory and no node was created. | ||
634 | In that case try picking one from a nearby CPU | ||
635 | - The APIC IDs differ from the HyperTransport node IDs | ||
636 | which the K8 northbridge parsing fills in. | ||
637 | Assume they are all increased by a constant offset, | ||
638 | but in the same order as the HT nodeids. | ||
639 | If that doesn't result in a usable node fall back to the | ||
640 | path for the previous case. */ | ||
641 | |||
642 | int ht_nodeid = c->initial_apicid; | ||
643 | |||
644 | if (ht_nodeid >= 0 && | ||
645 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | ||
646 | node = apicid_to_node[ht_nodeid]; | ||
647 | /* Pick a nearby node */ | ||
648 | if (!node_online(node)) | ||
649 | node = nearby_node(apicid); | ||
650 | } | ||
651 | numa_set_node(cpu, node); | ||
652 | |||
653 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
654 | #endif | ||
655 | #endif | ||
656 | } | ||
657 | |||
658 | static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) | ||
659 | { | ||
660 | #ifdef CONFIG_SMP | ||
661 | unsigned bits, ecx; | ||
662 | |||
663 | /* Multi core CPU? */ | ||
664 | if (c->extended_cpuid_level < 0x80000008) | ||
665 | return; | ||
666 | |||
667 | ecx = cpuid_ecx(0x80000008); | ||
668 | |||
669 | c->x86_max_cores = (ecx & 0xff) + 1; | ||
670 | |||
671 | /* CPU telling us the core id bits shift? */ | ||
672 | bits = (ecx >> 12) & 0xF; | ||
673 | |||
674 | /* Otherwise recompute */ | ||
675 | if (bits == 0) { | ||
676 | while ((1 << bits) < c->x86_max_cores) | ||
677 | bits++; | ||
678 | } | ||
679 | |||
680 | c->x86_coreid_bits = bits; | ||
681 | |||
682 | #endif | ||
683 | } | ||
684 | |||
685 | #define ENABLE_C1E_MASK 0x18000000 | ||
686 | #define CPUID_PROCESSOR_SIGNATURE 1 | ||
687 | #define CPUID_XFAM 0x0ff00000 | ||
688 | #define CPUID_XFAM_K8 0x00000000 | ||
689 | #define CPUID_XFAM_10H 0x00100000 | ||
690 | #define CPUID_XFAM_11H 0x00200000 | ||
691 | #define CPUID_XMOD 0x000f0000 | ||
692 | #define CPUID_XMOD_REV_F 0x00040000 | ||
693 | |||
694 | /* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ | ||
695 | static __cpuinit int amd_apic_timer_broken(void) | ||
696 | { | ||
697 | u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | ||
698 | |||
699 | switch (eax & CPUID_XFAM) { | ||
700 | case CPUID_XFAM_K8: | ||
701 | if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) | ||
702 | break; | ||
703 | case CPUID_XFAM_10H: | ||
704 | case CPUID_XFAM_11H: | ||
705 | rdmsr(MSR_K8_ENABLE_C1E, lo, hi); | ||
706 | if (lo & ENABLE_C1E_MASK) | ||
707 | return 1; | ||
708 | break; | ||
709 | default: | ||
710 | /* err on the side of caution */ | ||
711 | return 1; | ||
712 | } | ||
713 | return 0; | ||
714 | } | ||
715 | |||
716 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | ||
717 | { | ||
718 | early_init_amd_mc(c); | ||
719 | |||
720 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | ||
721 | if (c->x86_power & (1<<8)) | ||
722 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
723 | } | ||
724 | |||
725 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | ||
726 | { | ||
727 | unsigned level; | ||
728 | |||
729 | #ifdef CONFIG_SMP | ||
730 | unsigned long value; | ||
731 | |||
732 | /* | ||
733 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 | ||
734 | * bit 6 of msr C001_0015 | ||
735 | * | ||
736 | * Errata 63 for SH-B3 steppings | ||
737 | * Errata 122 for all steppings (F+ have it disabled by default) | ||
738 | */ | ||
739 | if (c->x86 == 15) { | ||
740 | rdmsrl(MSR_K8_HWCR, value); | ||
741 | value |= 1 << 6; | ||
742 | wrmsrl(MSR_K8_HWCR, value); | ||
743 | } | ||
744 | #endif | ||
745 | |||
746 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | ||
747 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | ||
748 | clear_cpu_cap(c, 0*32+31); | ||
749 | |||
750 | /* On C+ stepping K8 rep microcode works well for copy/memset */ | ||
751 | level = cpuid_eax(1); | ||
752 | if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || | ||
753 | level >= 0x0f58)) | ||
754 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
755 | if (c->x86 == 0x10 || c->x86 == 0x11) | ||
756 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
757 | |||
758 | /* Enable workaround for FXSAVE leak */ | ||
759 | if (c->x86 >= 6) | ||
760 | set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); | ||
761 | |||
762 | level = get_model_name(c); | ||
763 | if (!level) { | ||
764 | switch (c->x86) { | ||
765 | case 15: | ||
766 | /* Should distinguish Models here, but this is only | ||
767 | a fallback anyways. */ | ||
768 | strcpy(c->x86_model_id, "Hammer"); | ||
769 | break; | ||
770 | } | ||
771 | } | ||
772 | display_cacheinfo(c); | ||
773 | |||
774 | /* Multi core CPU? */ | ||
775 | if (c->extended_cpuid_level >= 0x80000008) | ||
776 | amd_detect_cmp(c); | ||
777 | |||
778 | if (c->extended_cpuid_level >= 0x80000006 && | ||
779 | (cpuid_edx(0x80000006) & 0xf000)) | ||
780 | num_cache_leaves = 4; | ||
781 | else | ||
782 | num_cache_leaves = 3; | ||
783 | |||
784 | if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11) | ||
785 | set_cpu_cap(c, X86_FEATURE_K8); | ||
786 | |||
787 | /* MFENCE stops RDTSC speculation */ | ||
788 | set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); | ||
789 | |||
790 | if (c->x86 == 0x10) | ||
791 | fam10h_check_enable_mmcfg(); | ||
792 | |||
793 | if (amd_apic_timer_broken()) | ||
794 | disable_apic_timer = 1; | ||
795 | |||
796 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | ||
797 | unsigned long long tseg; | ||
798 | |||
799 | /* | ||
800 | * Split up direct mapping around the TSEG SMM area. | ||
801 | * Don't do it for gbpages because there seems very little | ||
802 | * benefit in doing so. | ||
803 | */ | ||
804 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && | ||
805 | (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) | ||
806 | set_memory_4k((unsigned long)__va(tseg), 1); | ||
807 | } | ||
808 | } | ||
809 | |||
810 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 592 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
811 | { | 593 | { |
812 | #ifdef CONFIG_SMP | 594 | #ifdef CONFIG_SMP |
@@ -857,135 +639,59 @@ out: | |||
857 | #endif | 639 | #endif |
858 | } | 640 | } |
859 | 641 | ||
860 | /* | 642 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) |
861 | * find out the number of processor cores on the die | ||
862 | */ | ||
863 | static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) | ||
864 | { | ||
865 | unsigned int eax, t; | ||
866 | |||
867 | if (c->cpuid_level < 4) | ||
868 | return 1; | ||
869 | |||
870 | cpuid_count(4, 0, &eax, &t, &t, &t); | ||
871 | |||
872 | if (eax & 0x1f) | ||
873 | return ((eax >> 26) + 1); | ||
874 | else | ||
875 | return 1; | ||
876 | } | ||
877 | |||
878 | static void __cpuinit srat_detect_node(void) | ||
879 | { | ||
880 | #ifdef CONFIG_NUMA | ||
881 | unsigned node; | ||
882 | int cpu = smp_processor_id(); | ||
883 | int apicid = hard_smp_processor_id(); | ||
884 | |||
885 | /* Don't do the funky fallback heuristics the AMD version employs | ||
886 | for now. */ | ||
887 | node = apicid_to_node[apicid]; | ||
888 | if (node == NUMA_NO_NODE || !node_online(node)) | ||
889 | node = first_node(node_online_map); | ||
890 | numa_set_node(cpu, node); | ||
891 | |||
892 | printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); | ||
893 | #endif | ||
894 | } | ||
895 | |||
896 | static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | ||
897 | { | ||
898 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | ||
899 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | ||
900 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
901 | } | ||
902 | |||
903 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) | ||
904 | { | 643 | { |
905 | /* Cache sizes */ | 644 | char *v = c->x86_vendor_id; |
906 | unsigned n; | 645 | int i; |
907 | 646 | static int printed; | |
908 | init_intel_cacheinfo(c); | 647 | |
909 | if (c->cpuid_level > 9) { | 648 | for (i = 0; i < X86_VENDOR_NUM; i++) { |
910 | unsigned eax = cpuid_eax(10); | 649 | if (cpu_devs[i]) { |
911 | /* Check for version and the number of counters */ | 650 | if (!strcmp(v, cpu_devs[i]->c_ident[0]) || |
912 | if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) | 651 | (cpu_devs[i]->c_ident[1] && |
913 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 652 | !strcmp(v, cpu_devs[i]->c_ident[1]))) { |
653 | c->x86_vendor = i; | ||
654 | this_cpu = cpu_devs[i]; | ||
655 | return; | ||
656 | } | ||
657 | } | ||
914 | } | 658 | } |
915 | 659 | if (!printed) { | |
916 | if (cpu_has_ds) { | 660 | printed++; |
917 | unsigned int l1, l2; | 661 | printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); |
918 | rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | 662 | printk(KERN_ERR "CPU: Your system may be unstable.\n"); |
919 | if (!(l1 & (1<<11))) | ||
920 | set_cpu_cap(c, X86_FEATURE_BTS); | ||
921 | if (!(l1 & (1<<12))) | ||
922 | set_cpu_cap(c, X86_FEATURE_PEBS); | ||
923 | } | 663 | } |
924 | 664 | c->x86_vendor = X86_VENDOR_UNKNOWN; | |
925 | |||
926 | if (cpu_has_bts) | ||
927 | ds_init_intel(c); | ||
928 | |||
929 | n = c->extended_cpuid_level; | ||
930 | if (n >= 0x80000008) { | ||
931 | unsigned eax = cpuid_eax(0x80000008); | ||
932 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
933 | c->x86_phys_bits = eax & 0xff; | ||
934 | /* CPUID workaround for Intel 0F34 CPU */ | ||
935 | if (c->x86_vendor == X86_VENDOR_INTEL && | ||
936 | c->x86 == 0xF && c->x86_model == 0x3 && | ||
937 | c->x86_mask == 0x4) | ||
938 | c->x86_phys_bits = 36; | ||
939 | } | ||
940 | |||
941 | if (c->x86 == 15) | ||
942 | c->x86_cache_alignment = c->x86_clflush_size * 2; | ||
943 | if (c->x86 == 6) | ||
944 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | ||
945 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | ||
946 | c->x86_max_cores = intel_num_cpu_cores(c); | ||
947 | |||
948 | srat_detect_node(); | ||
949 | } | ||
950 | |||
951 | static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) | ||
952 | { | ||
953 | if (c->x86 == 0x6 && c->x86_model >= 0xf) | ||
954 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | ||
955 | } | 665 | } |
956 | 666 | ||
957 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) | 667 | static void __init early_cpu_support_print(void) |
958 | { | 668 | { |
959 | /* Cache sizes */ | 669 | int i,j; |
960 | unsigned n; | 670 | struct cpu_dev *cpu_devx; |
961 | 671 | ||
962 | n = c->extended_cpuid_level; | 672 | printk("KERNEL supported cpus:\n"); |
963 | if (n >= 0x80000008) { | 673 | for (i = 0; i < X86_VENDOR_NUM; i++) { |
964 | unsigned eax = cpuid_eax(0x80000008); | 674 | cpu_devx = cpu_devs[i]; |
965 | c->x86_virt_bits = (eax >> 8) & 0xff; | 675 | if (!cpu_devx) |
966 | c->x86_phys_bits = eax & 0xff; | 676 | continue; |
967 | } | 677 | for (j = 0; j < 2; j++) { |
968 | 678 | if (!cpu_devx->c_ident[j]) | |
969 | if (c->x86 == 0x6 && c->x86_model >= 0xf) { | 679 | continue; |
970 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 680 | printk(" %s %s\n", cpu_devx->c_vendor, |
971 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 681 | cpu_devx->c_ident[j]); |
972 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 682 | } |
973 | } | 683 | } |
974 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | ||
975 | } | 684 | } |
976 | 685 | ||
977 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | 686 | static void __init early_cpu_init(void) |
978 | { | 687 | { |
979 | char *v = c->x86_vendor_id; | 688 | struct cpu_vendor_dev *cvdev; |
980 | 689 | ||
981 | if (!strcmp(v, "AuthenticAMD")) | 690 | for (cvdev = __x86cpuvendor_start ; |
982 | c->x86_vendor = X86_VENDOR_AMD; | 691 | cvdev < __x86cpuvendor_end ; |
983 | else if (!strcmp(v, "GenuineIntel")) | 692 | cvdev++) |
984 | c->x86_vendor = X86_VENDOR_INTEL; | 693 | cpu_devs[cvdev->vendor] = cvdev->cpu_dev; |
985 | else if (!strcmp(v, "CentaurHauls")) | 694 | early_cpu_support_print(); |
986 | c->x86_vendor = X86_VENDOR_CENTAUR; | ||
987 | else | ||
988 | c->x86_vendor = X86_VENDOR_UNKNOWN; | ||
989 | } | 695 | } |
990 | 696 | ||
991 | /* Do some early cpuid on the boot CPU to get some parameter that are | 697 | /* Do some early cpuid on the boot CPU to get some parameter that are |
@@ -1066,17 +772,9 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
1066 | if (c->extended_cpuid_level >= 0x80000007) | 772 | if (c->extended_cpuid_level >= 0x80000007) |
1067 | c->x86_power = cpuid_edx(0x80000007); | 773 | c->x86_power = cpuid_edx(0x80000007); |
1068 | 774 | ||
1069 | switch (c->x86_vendor) { | 775 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && |
1070 | case X86_VENDOR_AMD: | 776 | cpu_devs[c->x86_vendor]->c_early_init) |
1071 | early_init_amd(c); | 777 | cpu_devs[c->x86_vendor]->c_early_init(c); |
1072 | break; | ||
1073 | case X86_VENDOR_INTEL: | ||
1074 | early_init_intel(c); | ||
1075 | break; | ||
1076 | case X86_VENDOR_CENTAUR: | ||
1077 | early_init_centaur(c); | ||
1078 | break; | ||
1079 | } | ||
1080 | 778 | ||
1081 | validate_pat_support(c); | 779 | validate_pat_support(c); |
1082 | } | 780 | } |
@@ -1104,24 +802,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
1104 | * At the end of this section, c->x86_capability better | 802 | * At the end of this section, c->x86_capability better |
1105 | * indicate the features this CPU genuinely supports! | 803 | * indicate the features this CPU genuinely supports! |
1106 | */ | 804 | */ |
1107 | switch (c->x86_vendor) { | 805 | if (this_cpu->c_init) |
1108 | case X86_VENDOR_AMD: | 806 | this_cpu->c_init(c); |
1109 | init_amd(c); | ||
1110 | break; | ||
1111 | |||
1112 | case X86_VENDOR_INTEL: | ||
1113 | init_intel(c); | ||
1114 | break; | ||
1115 | |||
1116 | case X86_VENDOR_CENTAUR: | ||
1117 | init_centaur(c); | ||
1118 | break; | ||
1119 | |||
1120 | case X86_VENDOR_UNKNOWN: | ||
1121 | default: | ||
1122 | display_cacheinfo(c); | ||
1123 | break; | ||
1124 | } | ||
1125 | 807 | ||
1126 | detect_ht(c); | 808 | detect_ht(c); |
1127 | 809 | ||
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 8f75893a6467..0cb7aadc87cd 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -231,7 +231,8 @@ native_smp_call_function_mask(cpumask_t mask, | |||
231 | wmb(); | 231 | wmb(); |
232 | 232 | ||
233 | /* Send a message to other CPUs */ | 233 | /* Send a message to other CPUs */ |
234 | if (cpus_equal(mask, allbutself)) | 234 | if (cpus_equal(mask, allbutself) && |
235 | cpus_equal(cpu_online_map, cpu_callout_map)) | ||
235 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | 236 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); |
236 | else | 237 | else |
237 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | 238 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6b087ab6cd8f..f2b666756299 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -59,7 +59,6 @@ | |||
59 | #include <asm/pgtable.h> | 59 | #include <asm/pgtable.h> |
60 | #include <asm/tlbflush.h> | 60 | #include <asm/tlbflush.h> |
61 | #include <asm/mtrr.h> | 61 | #include <asm/mtrr.h> |
62 | #include <asm/nmi.h> | ||
63 | #include <asm/vmi.h> | 62 | #include <asm/vmi.h> |
64 | #include <asm/genapic.h> | 63 | #include <asm/genapic.h> |
65 | #include <linux/mc146818rtc.h> | 64 | #include <linux/mc146818rtc.h> |
@@ -86,6 +85,7 @@ void *x86_bios_cpu_apicid_early_ptr; | |||
86 | 85 | ||
87 | #ifdef CONFIG_X86_32 | 86 | #ifdef CONFIG_X86_32 |
88 | u8 apicid_2_node[MAX_APICID]; | 87 | u8 apicid_2_node[MAX_APICID]; |
88 | static int low_mappings; | ||
89 | #endif | 89 | #endif |
90 | 90 | ||
91 | /* State of each CPU */ | 91 | /* State of each CPU */ |
@@ -326,6 +326,12 @@ static void __cpuinit start_secondary(void *unused) | |||
326 | enable_8259A_irq(0); | 326 | enable_8259A_irq(0); |
327 | } | 327 | } |
328 | 328 | ||
329 | #ifdef CONFIG_X86_32 | ||
330 | while (low_mappings) | ||
331 | cpu_relax(); | ||
332 | __flush_tlb_all(); | ||
333 | #endif | ||
334 | |||
329 | /* This must be done before setting cpu_online_map */ | 335 | /* This must be done before setting cpu_online_map */ |
330 | set_cpu_sibling_map(raw_smp_processor_id()); | 336 | set_cpu_sibling_map(raw_smp_processor_id()); |
331 | wmb(); | 337 | wmb(); |
@@ -989,7 +995,6 @@ do_rest: | |||
989 | #endif | 995 | #endif |
990 | cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ | 996 | cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ |
991 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ | 997 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ |
992 | cpu_clear(cpu, cpu_possible_map); | ||
993 | cpu_clear(cpu, cpu_present_map); | 998 | cpu_clear(cpu, cpu_present_map); |
994 | per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; | 999 | per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; |
995 | } | 1000 | } |
@@ -1040,14 +1045,20 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
1040 | #ifdef CONFIG_X86_32 | 1045 | #ifdef CONFIG_X86_32 |
1041 | /* init low mem mapping */ | 1046 | /* init low mem mapping */ |
1042 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, | 1047 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, |
1043 | min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); | 1048 | min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); |
1044 | flush_tlb_all(); | 1049 | flush_tlb_all(); |
1045 | #endif | 1050 | low_mappings = 1; |
1051 | |||
1052 | err = do_boot_cpu(apicid, cpu); | ||
1046 | 1053 | ||
1054 | zap_low_mappings(); | ||
1055 | low_mappings = 0; | ||
1056 | #else | ||
1047 | err = do_boot_cpu(apicid, cpu); | 1057 | err = do_boot_cpu(apicid, cpu); |
1048 | if (err < 0) { | 1058 | #endif |
1059 | if (err) { | ||
1049 | Dprintk("do_boot_cpu failed %d\n", err); | 1060 | Dprintk("do_boot_cpu failed %d\n", err); |
1050 | return err; | 1061 | return -EIO; |
1051 | } | 1062 | } |
1052 | 1063 | ||
1053 | /* | 1064 | /* |
@@ -1145,9 +1156,11 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1145 | * If SMP should be disabled, then really disable it! | 1156 | * If SMP should be disabled, then really disable it! |
1146 | */ | 1157 | */ |
1147 | if (!max_cpus) { | 1158 | if (!max_cpus) { |
1148 | printk(KERN_INFO "SMP mode deactivated," | 1159 | printk(KERN_INFO "SMP mode deactivated.\n"); |
1149 | "forcing use of dummy APIC emulation.\n"); | ||
1150 | smpboot_clear_io_apic(); | 1160 | smpboot_clear_io_apic(); |
1161 | |||
1162 | localise_nmi_watchdog(); | ||
1163 | |||
1151 | #ifdef CONFIG_X86_32 | 1164 | #ifdef CONFIG_X86_32 |
1152 | connect_bsp_APIC(); | 1165 | connect_bsp_APIC(); |
1153 | #endif | 1166 | #endif |
@@ -1177,6 +1190,7 @@ static void __init smp_cpu_index_default(void) | |||
1177 | */ | 1190 | */ |
1178 | void __init native_smp_prepare_cpus(unsigned int max_cpus) | 1191 | void __init native_smp_prepare_cpus(unsigned int max_cpus) |
1179 | { | 1192 | { |
1193 | preempt_disable(); | ||
1180 | nmi_watchdog_default(); | 1194 | nmi_watchdog_default(); |
1181 | smp_cpu_index_default(); | 1195 | smp_cpu_index_default(); |
1182 | current_cpu_data = boot_cpu_data; | 1196 | current_cpu_data = boot_cpu_data; |
@@ -1193,7 +1207,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1193 | if (smp_sanity_check(max_cpus) < 0) { | 1207 | if (smp_sanity_check(max_cpus) < 0) { |
1194 | printk(KERN_INFO "SMP disabled\n"); | 1208 | printk(KERN_INFO "SMP disabled\n"); |
1195 | disable_smp(); | 1209 | disable_smp(); |
1196 | return; | 1210 | goto out; |
1197 | } | 1211 | } |
1198 | 1212 | ||
1199 | preempt_disable(); | 1213 | preempt_disable(); |
@@ -1233,6 +1247,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1233 | printk(KERN_INFO "CPU%d: ", 0); | 1247 | printk(KERN_INFO "CPU%d: ", 0); |
1234 | print_cpu_info(&cpu_data(0)); | 1248 | print_cpu_info(&cpu_data(0)); |
1235 | setup_boot_clock(); | 1249 | setup_boot_clock(); |
1250 | out: | ||
1251 | preempt_enable(); | ||
1236 | } | 1252 | } |
1237 | /* | 1253 | /* |
1238 | * Early setup to make printk work. | 1254 | * Early setup to make printk work. |
@@ -1259,9 +1275,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1259 | setup_ioapic_dest(); | 1275 | setup_ioapic_dest(); |
1260 | #endif | 1276 | #endif |
1261 | check_nmi_watchdog(); | 1277 | check_nmi_watchdog(); |
1262 | #ifdef CONFIG_X86_32 | ||
1263 | zap_low_mappings(); | ||
1264 | #endif | ||
1265 | } | 1278 | } |
1266 | 1279 | ||
1267 | #ifdef CONFIG_HOTPLUG_CPU | 1280 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index d2ab52cc1d6b..7066cb855a60 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c | |||
@@ -19,8 +19,8 @@ | |||
19 | #include <linux/utsname.h> | 19 | #include <linux/utsname.h> |
20 | #include <linux/ipc.h> | 20 | #include <linux/ipc.h> |
21 | 21 | ||
22 | #include <asm/uaccess.h> | 22 | #include <linux/uaccess.h> |
23 | #include <asm/unistd.h> | 23 | #include <linux/unistd.h> |
24 | 24 | ||
25 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, | 25 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, |
26 | unsigned long prot, unsigned long flags, | 26 | unsigned long prot, unsigned long flags, |
@@ -103,7 +103,7 @@ asmlinkage int old_select(struct sel_arg_struct __user *arg) | |||
103 | * | 103 | * |
104 | * This is really horribly ugly. | 104 | * This is really horribly ugly. |
105 | */ | 105 | */ |
106 | asmlinkage int sys_ipc (uint call, int first, int second, | 106 | asmlinkage int sys_ipc(uint call, int first, int second, |
107 | int third, void __user *ptr, long fifth) | 107 | int third, void __user *ptr, long fifth) |
108 | { | 108 | { |
109 | int version, ret; | 109 | int version, ret; |
@@ -113,24 +113,24 @@ asmlinkage int sys_ipc (uint call, int first, int second, | |||
113 | 113 | ||
114 | switch (call) { | 114 | switch (call) { |
115 | case SEMOP: | 115 | case SEMOP: |
116 | return sys_semtimedop (first, (struct sembuf __user *)ptr, second, NULL); | 116 | return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL); |
117 | case SEMTIMEDOP: | 117 | case SEMTIMEDOP: |
118 | return sys_semtimedop(first, (struct sembuf __user *)ptr, second, | 118 | return sys_semtimedop(first, (struct sembuf __user *)ptr, second, |
119 | (const struct timespec __user *)fifth); | 119 | (const struct timespec __user *)fifth); |
120 | 120 | ||
121 | case SEMGET: | 121 | case SEMGET: |
122 | return sys_semget (first, second, third); | 122 | return sys_semget(first, second, third); |
123 | case SEMCTL: { | 123 | case SEMCTL: { |
124 | union semun fourth; | 124 | union semun fourth; |
125 | if (!ptr) | 125 | if (!ptr) |
126 | return -EINVAL; | 126 | return -EINVAL; |
127 | if (get_user(fourth.__pad, (void __user * __user *) ptr)) | 127 | if (get_user(fourth.__pad, (void __user * __user *) ptr)) |
128 | return -EFAULT; | 128 | return -EFAULT; |
129 | return sys_semctl (first, second, third, fourth); | 129 | return sys_semctl(first, second, third, fourth); |
130 | } | 130 | } |
131 | 131 | ||
132 | case MSGSND: | 132 | case MSGSND: |
133 | return sys_msgsnd (first, (struct msgbuf __user *) ptr, | 133 | return sys_msgsnd(first, (struct msgbuf __user *) ptr, |
134 | second, third); | 134 | second, third); |
135 | case MSGRCV: | 135 | case MSGRCV: |
136 | switch (version) { | 136 | switch (version) { |
@@ -138,45 +138,45 @@ asmlinkage int sys_ipc (uint call, int first, int second, | |||
138 | struct ipc_kludge tmp; | 138 | struct ipc_kludge tmp; |
139 | if (!ptr) | 139 | if (!ptr) |
140 | return -EINVAL; | 140 | return -EINVAL; |
141 | 141 | ||
142 | if (copy_from_user(&tmp, | 142 | if (copy_from_user(&tmp, |
143 | (struct ipc_kludge __user *) ptr, | 143 | (struct ipc_kludge __user *) ptr, |
144 | sizeof (tmp))) | 144 | sizeof(tmp))) |
145 | return -EFAULT; | 145 | return -EFAULT; |
146 | return sys_msgrcv (first, tmp.msgp, second, | 146 | return sys_msgrcv(first, tmp.msgp, second, |
147 | tmp.msgtyp, third); | 147 | tmp.msgtyp, third); |
148 | } | 148 | } |
149 | default: | 149 | default: |
150 | return sys_msgrcv (first, | 150 | return sys_msgrcv(first, |
151 | (struct msgbuf __user *) ptr, | 151 | (struct msgbuf __user *) ptr, |
152 | second, fifth, third); | 152 | second, fifth, third); |
153 | } | 153 | } |
154 | case MSGGET: | 154 | case MSGGET: |
155 | return sys_msgget ((key_t) first, second); | 155 | return sys_msgget((key_t) first, second); |
156 | case MSGCTL: | 156 | case MSGCTL: |
157 | return sys_msgctl (first, second, (struct msqid_ds __user *) ptr); | 157 | return sys_msgctl(first, second, (struct msqid_ds __user *) ptr); |
158 | 158 | ||
159 | case SHMAT: | 159 | case SHMAT: |
160 | switch (version) { | 160 | switch (version) { |
161 | default: { | 161 | default: { |
162 | ulong raddr; | 162 | ulong raddr; |
163 | ret = do_shmat (first, (char __user *) ptr, second, &raddr); | 163 | ret = do_shmat(first, (char __user *) ptr, second, &raddr); |
164 | if (ret) | 164 | if (ret) |
165 | return ret; | 165 | return ret; |
166 | return put_user (raddr, (ulong __user *) third); | 166 | return put_user(raddr, (ulong __user *) third); |
167 | } | 167 | } |
168 | case 1: /* iBCS2 emulator entry point */ | 168 | case 1: /* iBCS2 emulator entry point */ |
169 | if (!segment_eq(get_fs(), get_ds())) | 169 | if (!segment_eq(get_fs(), get_ds())) |
170 | return -EINVAL; | 170 | return -EINVAL; |
171 | /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */ | 171 | /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */ |
172 | return do_shmat (first, (char __user *) ptr, second, (ulong *) third); | 172 | return do_shmat(first, (char __user *) ptr, second, (ulong *) third); |
173 | } | 173 | } |
174 | case SHMDT: | 174 | case SHMDT: |
175 | return sys_shmdt ((char __user *)ptr); | 175 | return sys_shmdt((char __user *)ptr); |
176 | case SHMGET: | 176 | case SHMGET: |
177 | return sys_shmget (first, second, third); | 177 | return sys_shmget(first, second, third); |
178 | case SHMCTL: | 178 | case SHMCTL: |
179 | return sys_shmctl (first, second, | 179 | return sys_shmctl(first, second, |
180 | (struct shmid_ds __user *) ptr); | 180 | (struct shmid_ds __user *) ptr); |
181 | default: | 181 | default: |
182 | return -ENOSYS; | 182 | return -ENOSYS; |
@@ -186,28 +186,28 @@ asmlinkage int sys_ipc (uint call, int first, int second, | |||
186 | /* | 186 | /* |
187 | * Old cruft | 187 | * Old cruft |
188 | */ | 188 | */ |
189 | asmlinkage int sys_uname(struct old_utsname __user * name) | 189 | asmlinkage int sys_uname(struct old_utsname __user *name) |
190 | { | 190 | { |
191 | int err; | 191 | int err; |
192 | if (!name) | 192 | if (!name) |
193 | return -EFAULT; | 193 | return -EFAULT; |
194 | down_read(&uts_sem); | 194 | down_read(&uts_sem); |
195 | err = copy_to_user(name, utsname(), sizeof (*name)); | 195 | err = copy_to_user(name, utsname(), sizeof(*name)); |
196 | up_read(&uts_sem); | 196 | up_read(&uts_sem); |
197 | return err?-EFAULT:0; | 197 | return err? -EFAULT:0; |
198 | } | 198 | } |
199 | 199 | ||
200 | asmlinkage int sys_olduname(struct oldold_utsname __user * name) | 200 | asmlinkage int sys_olduname(struct oldold_utsname __user *name) |
201 | { | 201 | { |
202 | int error; | 202 | int error; |
203 | 203 | ||
204 | if (!name) | 204 | if (!name) |
205 | return -EFAULT; | 205 | return -EFAULT; |
206 | if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) | 206 | if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) |
207 | return -EFAULT; | 207 | return -EFAULT; |
208 | 208 | ||
209 | down_read(&uts_sem); | 209 | down_read(&uts_sem); |
210 | 210 | ||
211 | error = __copy_to_user(&name->sysname, &utsname()->sysname, | 211 | error = __copy_to_user(&name->sysname, &utsname()->sysname, |
212 | __OLD_UTS_LEN); | 212 | __OLD_UTS_LEN); |
213 | error |= __put_user(0, name->sysname + __OLD_UTS_LEN); | 213 | error |= __put_user(0, name->sysname + __OLD_UTS_LEN); |
@@ -223,9 +223,9 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name) | |||
223 | error |= __copy_to_user(&name->machine, &utsname()->machine, | 223 | error |= __copy_to_user(&name->machine, &utsname()->machine, |
224 | __OLD_UTS_LEN); | 224 | __OLD_UTS_LEN); |
225 | error |= __put_user(0, name->machine + __OLD_UTS_LEN); | 225 | error |= __put_user(0, name->machine + __OLD_UTS_LEN); |
226 | 226 | ||
227 | up_read(&uts_sem); | 227 | up_read(&uts_sem); |
228 | 228 | ||
229 | error = error ? -EFAULT : 0; | 229 | error = error ? -EFAULT : 0; |
230 | 230 | ||
231 | return error; | 231 | return error; |
@@ -241,6 +241,6 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]) | |||
241 | long __res; | 241 | long __res; |
242 | asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" | 242 | asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" |
243 | : "=a" (__res) | 243 | : "=a" (__res) |
244 | : "0" (__NR_execve),"ri" (filename),"c" (argv), "d" (envp) : "memory"); | 244 | : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory"); |
245 | return __res; | 245 | return __res; |
246 | } | 246 | } |
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 2ff21f398934..5f29f12da50c 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
@@ -84,8 +84,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
84 | if (timer_ack) { | 84 | if (timer_ack) { |
85 | /* | 85 | /* |
86 | * Subtle, when I/O APICs are used we have to ack timer IRQ | 86 | * Subtle, when I/O APICs are used we have to ack timer IRQ |
87 | * manually to reset the IRR bit for do_slow_gettimeoffset(). | 87 | * manually to deassert NMI lines for the watchdog if run |
88 | * This will also deassert NMI lines for the watchdog if run | ||
89 | * on an 82489DX-based system. | 88 | * on an 82489DX-based system. |
90 | */ | 89 | */ |
91 | spin_lock(&i8259A_lock); | 90 | spin_lock(&i8259A_lock); |
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index bde6f63e15d5..08d752de4eee 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c | |||
@@ -544,6 +544,7 @@ vm86_trap: | |||
544 | #define DO_ERROR(trapnr, signr, str, name) \ | 544 | #define DO_ERROR(trapnr, signr, str, name) \ |
545 | void do_##name(struct pt_regs *regs, long error_code) \ | 545 | void do_##name(struct pt_regs *regs, long error_code) \ |
546 | { \ | 546 | { \ |
547 | trace_hardirqs_fixup(); \ | ||
547 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 548 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
548 | == NOTIFY_STOP) \ | 549 | == NOTIFY_STOP) \ |
549 | return; \ | 550 | return; \ |
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index adff76ea97c4..ec6d3b2130c4 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c | |||
@@ -71,7 +71,6 @@ asmlinkage void general_protection(void); | |||
71 | asmlinkage void page_fault(void); | 71 | asmlinkage void page_fault(void); |
72 | asmlinkage void coprocessor_error(void); | 72 | asmlinkage void coprocessor_error(void); |
73 | asmlinkage void simd_coprocessor_error(void); | 73 | asmlinkage void simd_coprocessor_error(void); |
74 | asmlinkage void reserved(void); | ||
75 | asmlinkage void alignment_check(void); | 74 | asmlinkage void alignment_check(void); |
76 | asmlinkage void machine_check(void); | 75 | asmlinkage void machine_check(void); |
77 | asmlinkage void spurious_interrupt_bug(void); | 76 | asmlinkage void spurious_interrupt_bug(void); |
@@ -702,12 +701,10 @@ DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | |||
702 | DO_ERROR( 4, SIGSEGV, "overflow", overflow) | 701 | DO_ERROR( 4, SIGSEGV, "overflow", overflow) |
703 | DO_ERROR( 5, SIGSEGV, "bounds", bounds) | 702 | DO_ERROR( 5, SIGSEGV, "bounds", bounds) |
704 | DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) | 703 | DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) |
705 | DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) | ||
706 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | 704 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) |
707 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | 705 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) |
708 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | 706 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) |
709 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | 707 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) |
710 | DO_ERROR(18, SIGSEGV, "reserved", reserved) | ||
711 | 708 | ||
712 | /* Runs on IST stack */ | 709 | /* Runs on IST stack */ |
713 | asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) | 710 | asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) |
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index e4790728b224..774a5a83c296 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c | |||
@@ -14,7 +14,10 @@ | |||
14 | 14 | ||
15 | #include "mach_timer.h" | 15 | #include "mach_timer.h" |
16 | 16 | ||
17 | static int tsc_enabled; | 17 | /* native_sched_clock() is called before tsc_init(), so |
18 | we must start with the TSC soft disabled to prevent | ||
19 | erroneous rdtsc usage on !cpu_has_tsc processors */ | ||
20 | static int tsc_disabled = -1; | ||
18 | 21 | ||
19 | /* | 22 | /* |
20 | * On some systems the TSC frequency does not | 23 | * On some systems the TSC frequency does not |
@@ -28,8 +31,8 @@ EXPORT_SYMBOL_GPL(tsc_khz); | |||
28 | static int __init tsc_setup(char *str) | 31 | static int __init tsc_setup(char *str) |
29 | { | 32 | { |
30 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | 33 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " |
31 | "cannot disable TSC completely.\n"); | 34 | "cannot disable TSC completely.\n"); |
32 | mark_tsc_unstable("user disabled TSC"); | 35 | tsc_disabled = 1; |
33 | return 1; | 36 | return 1; |
34 | } | 37 | } |
35 | #else | 38 | #else |
@@ -120,7 +123,7 @@ unsigned long long native_sched_clock(void) | |||
120 | * very important for it to be as fast as the platform | 123 | * very important for it to be as fast as the platform |
121 | * can achive it. ) | 124 | * can achive it. ) |
122 | */ | 125 | */ |
123 | if (unlikely(!tsc_enabled && !tsc_unstable)) | 126 | if (unlikely(tsc_disabled)) |
124 | /* No locking but a rare wrong value is not a big deal: */ | 127 | /* No locking but a rare wrong value is not a big deal: */ |
125 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | 128 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); |
126 | 129 | ||
@@ -283,7 +286,6 @@ core_initcall(cpufreq_tsc); | |||
283 | 286 | ||
284 | /* clock source code */ | 287 | /* clock source code */ |
285 | 288 | ||
286 | static unsigned long current_tsc_khz; | ||
287 | static struct clocksource clocksource_tsc; | 289 | static struct clocksource clocksource_tsc; |
288 | 290 | ||
289 | /* | 291 | /* |
@@ -322,7 +324,6 @@ void mark_tsc_unstable(char *reason) | |||
322 | { | 324 | { |
323 | if (!tsc_unstable) { | 325 | if (!tsc_unstable) { |
324 | tsc_unstable = 1; | 326 | tsc_unstable = 1; |
325 | tsc_enabled = 0; | ||
326 | printk("Marking TSC unstable due to: %s.\n", reason); | 327 | printk("Marking TSC unstable due to: %s.\n", reason); |
327 | /* Can be called before registration */ | 328 | /* Can be called before registration */ |
328 | if (clocksource_tsc.mult) | 329 | if (clocksource_tsc.mult) |
@@ -336,7 +337,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); | |||
336 | static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) | 337 | static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) |
337 | { | 338 | { |
338 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", | 339 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", |
339 | d->ident); | 340 | d->ident); |
340 | tsc_unstable = 1; | 341 | tsc_unstable = 1; |
341 | return 0; | 342 | return 0; |
342 | } | 343 | } |
@@ -403,7 +404,7 @@ void __init tsc_init(void) | |||
403 | { | 404 | { |
404 | int cpu; | 405 | int cpu; |
405 | 406 | ||
406 | if (!cpu_has_tsc) | 407 | if (!cpu_has_tsc || tsc_disabled > 0) |
407 | return; | 408 | return; |
408 | 409 | ||
409 | cpu_khz = calculate_cpu_khz(); | 410 | cpu_khz = calculate_cpu_khz(); |
@@ -414,6 +415,9 @@ void __init tsc_init(void) | |||
414 | return; | 415 | return; |
415 | } | 416 | } |
416 | 417 | ||
418 | /* now allow native_sched_clock() to use rdtsc */ | ||
419 | tsc_disabled = 0; | ||
420 | |||
417 | printk("Detected %lu.%03lu MHz processor.\n", | 421 | printk("Detected %lu.%03lu MHz processor.\n", |
418 | (unsigned long)cpu_khz / 1000, | 422 | (unsigned long)cpu_khz / 1000, |
419 | (unsigned long)cpu_khz % 1000); | 423 | (unsigned long)cpu_khz % 1000); |
@@ -434,15 +438,12 @@ void __init tsc_init(void) | |||
434 | 438 | ||
435 | unsynchronized_tsc(); | 439 | unsynchronized_tsc(); |
436 | check_geode_tsc_reliable(); | 440 | check_geode_tsc_reliable(); |
437 | current_tsc_khz = tsc_khz; | 441 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, |
438 | clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, | 442 | clocksource_tsc.shift); |
439 | clocksource_tsc.shift); | ||
440 | /* lower the rating if we already know its unstable: */ | 443 | /* lower the rating if we already know its unstable: */ |
441 | if (check_tsc_unstable()) { | 444 | if (check_tsc_unstable()) { |
442 | clocksource_tsc.rating = 0; | 445 | clocksource_tsc.rating = 0; |
443 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | 446 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; |
444 | } else | 447 | } |
445 | tsc_enabled = 1; | ||
446 | |||
447 | clocksource_register(&clocksource_tsc); | 448 | clocksource_register(&clocksource_tsc); |
448 | } | 449 | } |
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index fcc16e58609e..9898fb01edfd 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c | |||
@@ -227,14 +227,14 @@ void __init tsc_calibrate(void) | |||
227 | /* hpet or pmtimer available ? */ | 227 | /* hpet or pmtimer available ? */ |
228 | if (!hpet && !pm1 && !pm2) { | 228 | if (!hpet && !pm1 && !pm2) { |
229 | printk(KERN_INFO "TSC calibrated against PIT\n"); | 229 | printk(KERN_INFO "TSC calibrated against PIT\n"); |
230 | return; | 230 | goto out; |
231 | } | 231 | } |
232 | 232 | ||
233 | /* Check, whether the sampling was disturbed by an SMI */ | 233 | /* Check, whether the sampling was disturbed by an SMI */ |
234 | if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { | 234 | if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { |
235 | printk(KERN_WARNING "TSC calibration disturbed by SMI, " | 235 | printk(KERN_WARNING "TSC calibration disturbed by SMI, " |
236 | "using PIT calibration result\n"); | 236 | "using PIT calibration result\n"); |
237 | return; | 237 | goto out; |
238 | } | 238 | } |
239 | 239 | ||
240 | tsc2 = (tsc2 - tsc1) * 1000000L; | 240 | tsc2 = (tsc2 - tsc1) * 1000000L; |
@@ -242,7 +242,7 @@ void __init tsc_calibrate(void) | |||
242 | if (hpet) { | 242 | if (hpet) { |
243 | printk(KERN_INFO "TSC calibrated against HPET\n"); | 243 | printk(KERN_INFO "TSC calibrated against HPET\n"); |
244 | if (hpet2 < hpet1) | 244 | if (hpet2 < hpet1) |
245 | hpet2 += 0x100000000; | 245 | hpet2 += 0x100000000UL; |
246 | hpet2 -= hpet1; | 246 | hpet2 -= hpet1; |
247 | tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; | 247 | tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; |
248 | } else { | 248 | } else { |
@@ -255,6 +255,7 @@ void __init tsc_calibrate(void) | |||
255 | 255 | ||
256 | tsc_khz = tsc2 / tsc1; | 256 | tsc_khz = tsc2 / tsc1; |
257 | 257 | ||
258 | out: | ||
258 | for_each_possible_cpu(cpu) | 259 | for_each_possible_cpu(cpu) |
259 | set_cyc2ns_scale(tsc_khz, cpu); | 260 | set_cyc2ns_scale(tsc_khz, cpu); |
260 | } | 261 | } |
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed083a1e9..2674f5796275 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
@@ -60,13 +60,6 @@ SECTIONS | |||
60 | 60 | ||
61 | BUG_TABLE :text | 61 | BUG_TABLE :text |
62 | 62 | ||
63 | . = ALIGN(4); | ||
64 | .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { | ||
65 | __tracedata_start = .; | ||
66 | *(.tracedata) | ||
67 | __tracedata_end = .; | ||
68 | } | ||
69 | |||
70 | RODATA | 63 | RODATA |
71 | 64 | ||
72 | /* writeable */ | 65 | /* writeable */ |
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fad3674b06a5..fd246e22fe6b 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -53,13 +53,6 @@ SECTIONS | |||
53 | 53 | ||
54 | RODATA | 54 | RODATA |
55 | 55 | ||
56 | . = ALIGN(4); | ||
57 | .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { | ||
58 | __tracedata_start = .; | ||
59 | *(.tracedata) | ||
60 | __tracedata_end = .; | ||
61 | } | ||
62 | |||
63 | . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ | 56 | . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ |
64 | /* Data */ | 57 | /* Data */ |
65 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 58 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
@@ -177,6 +170,7 @@ SECTIONS | |||
177 | *(.con_initcall.init) | 170 | *(.con_initcall.init) |
178 | } | 171 | } |
179 | __con_initcall_end = .; | 172 | __con_initcall_end = .; |
173 | . = ALIGN(16); | ||
180 | __x86cpuvendor_start = .; | 174 | __x86cpuvendor_start = .; |
181 | .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { | 175 | .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { |
182 | *(.x86cpuvendor.init) | 176 | *(.x86cpuvendor.init) |
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index ba8c0b75ab0a..0c029e8959c7 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
@@ -15,9 +15,12 @@ | |||
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/pci_ids.h> | 16 | #include <linux/pci_ids.h> |
17 | #include <linux/pci_regs.h> | 17 | #include <linux/pci_regs.h> |
18 | |||
19 | #include <asm/apic.h> | ||
18 | #include <asm/pci-direct.h> | 20 | #include <asm/pci-direct.h> |
19 | #include <asm/io.h> | 21 | #include <asm/io.h> |
20 | #include <asm/paravirt.h> | 22 | #include <asm/paravirt.h> |
23 | #include <asm/setup.h> | ||
21 | 24 | ||
22 | #if defined CONFIG_PCI && defined CONFIG_PARAVIRT | 25 | #if defined CONFIG_PCI && defined CONFIG_PARAVIRT |
23 | /* | 26 | /* |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 58882f9f2637..f6c05d0410fb 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -2,6 +2,7 @@ | |||
2 | All C exports should go in the respective C files. */ | 2 | All C exports should go in the respective C files. */ |
3 | 3 | ||
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <net/checksum.h> | ||
5 | #include <linux/smp.h> | 6 | #include <linux/smp.h> |
6 | 7 | ||
7 | #include <asm/processor.h> | 8 | #include <asm/processor.h> |
@@ -29,6 +30,8 @@ EXPORT_SYMBOL(__copy_from_user_inatomic); | |||
29 | EXPORT_SYMBOL(copy_page); | 30 | EXPORT_SYMBOL(copy_page); |
30 | EXPORT_SYMBOL(clear_page); | 31 | EXPORT_SYMBOL(clear_page); |
31 | 32 | ||
33 | EXPORT_SYMBOL(csum_partial); | ||
34 | |||
32 | /* | 35 | /* |
33 | * Export string functions. We normally rely on gcc builtin for most of these, | 36 | * Export string functions. We normally rely on gcc builtin for most of these, |
34 | * but gcc sometimes decides not to inline them. | 37 | * but gcc sometimes decides not to inline them. |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 3324d90038e4..3829aa7b663f 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -200,10 +200,12 @@ int __pit_timer_fn(struct kvm_kpit_state *ps) | |||
200 | 200 | ||
201 | atomic_inc(&pt->pending); | 201 | atomic_inc(&pt->pending); |
202 | smp_mb__after_atomic_inc(); | 202 | smp_mb__after_atomic_inc(); |
203 | /* FIXME: handle case where the guest is in guest mode */ | 203 | if (vcpu0) { |
204 | if (vcpu0 && waitqueue_active(&vcpu0->wq)) { | 204 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); |
205 | vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 205 | if (waitqueue_active(&vcpu0->wq)) { |
206 | wake_up_interruptible(&vcpu0->wq); | 206 | vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
207 | wake_up_interruptible(&vcpu0->wq); | ||
208 | } | ||
207 | } | 209 | } |
208 | 210 | ||
209 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); | 211 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); |
@@ -216,7 +218,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu) | |||
216 | { | 218 | { |
217 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | 219 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; |
218 | 220 | ||
219 | if (pit && vcpu->vcpu_id == 0) | 221 | if (pit && vcpu->vcpu_id == 0 && pit->pit_state.inject_pending) |
220 | return atomic_read(&pit->pit_state.pit_timer.pending); | 222 | return atomic_read(&pit->pit_state.pit_timer.pending); |
221 | 223 | ||
222 | return 0; | 224 | return 0; |
@@ -237,6 +239,19 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | |||
237 | return HRTIMER_NORESTART; | 239 | return HRTIMER_NORESTART; |
238 | } | 240 | } |
239 | 241 | ||
242 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | ||
243 | { | ||
244 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | ||
245 | struct hrtimer *timer; | ||
246 | |||
247 | if (vcpu->vcpu_id != 0 || !pit) | ||
248 | return; | ||
249 | |||
250 | timer = &pit->pit_state.pit_timer.timer; | ||
251 | if (hrtimer_cancel(timer)) | ||
252 | hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); | ||
253 | } | ||
254 | |||
240 | static void destroy_pit_timer(struct kvm_kpit_timer *pt) | 255 | static void destroy_pit_timer(struct kvm_kpit_timer *pt) |
241 | { | 256 | { |
242 | pr_debug("pit: execute del timer!\n"); | 257 | pr_debug("pit: execute del timer!\n"); |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index ce1f583459b1..76d736b5f664 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -94,3 +94,9 @@ void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) | |||
94 | /* TODO: PIT, RTC etc. */ | 94 | /* TODO: PIT, RTC etc. */ |
95 | } | 95 | } |
96 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); | 96 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); |
97 | |||
98 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu) | ||
99 | { | ||
100 | __kvm_migrate_apic_timer(vcpu); | ||
101 | __kvm_migrate_pit_timer(vcpu); | ||
102 | } | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 1802134b836f..2a15be2275c0 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -84,6 +84,8 @@ void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); | |||
84 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); | 84 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); |
85 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); | 85 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); |
86 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); | 86 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); |
87 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); | ||
88 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu); | ||
87 | 89 | ||
88 | int pit_has_pending_timer(struct kvm_vcpu *vcpu); | 90 | int pit_has_pending_timer(struct kvm_vcpu *vcpu); |
89 | int apic_has_pending_timer(struct kvm_vcpu *vcpu); | 91 | int apic_has_pending_timer(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 36809d79788b..ebc03f5ae162 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -940,6 +940,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic) | |||
940 | wait_queue_head_t *q = &apic->vcpu->wq; | 940 | wait_queue_head_t *q = &apic->vcpu->wq; |
941 | 941 | ||
942 | atomic_inc(&apic->timer.pending); | 942 | atomic_inc(&apic->timer.pending); |
943 | set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); | ||
943 | if (waitqueue_active(q)) { | 944 | if (waitqueue_active(q)) { |
944 | apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 945 | apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
945 | wake_up_interruptible(q); | 946 | wake_up_interruptible(q); |
@@ -957,7 +958,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) | |||
957 | { | 958 | { |
958 | struct kvm_lapic *lapic = vcpu->arch.apic; | 959 | struct kvm_lapic *lapic = vcpu->arch.apic; |
959 | 960 | ||
960 | if (lapic) | 961 | if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) |
961 | return atomic_read(&lapic->timer.pending); | 962 | return atomic_read(&lapic->timer.pending); |
962 | 963 | ||
963 | return 0; | 964 | return 0; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 36c5406b1813..7e7c3969f7a2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -640,6 +640,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
640 | rmap_remove(kvm, spte); | 640 | rmap_remove(kvm, spte); |
641 | --kvm->stat.lpages; | 641 | --kvm->stat.lpages; |
642 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); | 642 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); |
643 | spte = NULL; | ||
643 | write_protected = 1; | 644 | write_protected = 1; |
644 | } | 645 | } |
645 | spte = rmap_next(kvm, rmapp, spte); | 646 | spte = rmap_next(kvm, rmapp, spte); |
@@ -658,7 +659,7 @@ static int is_empty_shadow_page(u64 *spt) | |||
658 | u64 *end; | 659 | u64 *end; |
659 | 660 | ||
660 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) | 661 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) |
661 | if (*pos != shadow_trap_nonpresent_pte) { | 662 | if (is_shadow_present_pte(*pos)) { |
662 | printk(KERN_ERR "%s: %p %llx\n", __func__, | 663 | printk(KERN_ERR "%s: %p %llx\n", __func__, |
663 | pos, *pos); | 664 | pos, *pos); |
664 | return 0; | 665 | return 0; |
@@ -1082,10 +1083,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1082 | struct kvm_mmu_page *shadow; | 1083 | struct kvm_mmu_page *shadow; |
1083 | 1084 | ||
1084 | spte |= PT_WRITABLE_MASK; | 1085 | spte |= PT_WRITABLE_MASK; |
1085 | if (user_fault) { | ||
1086 | mmu_unshadow(vcpu->kvm, gfn); | ||
1087 | goto unshadowed; | ||
1088 | } | ||
1089 | 1086 | ||
1090 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | 1087 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); |
1091 | if (shadow || | 1088 | if (shadow || |
@@ -1102,8 +1099,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1102 | } | 1099 | } |
1103 | } | 1100 | } |
1104 | 1101 | ||
1105 | unshadowed: | ||
1106 | |||
1107 | if (pte_access & ACC_WRITE_MASK) | 1102 | if (pte_access & ACC_WRITE_MASK) |
1108 | mark_page_dirty(vcpu->kvm, gfn); | 1103 | mark_page_dirty(vcpu->kvm, gfn); |
1109 | 1104 | ||
@@ -1580,11 +1575,13 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
1580 | u64 *spte, | 1575 | u64 *spte, |
1581 | const void *new) | 1576 | const void *new) |
1582 | { | 1577 | { |
1583 | if ((sp->role.level != PT_PAGE_TABLE_LEVEL) | 1578 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { |
1584 | && !vcpu->arch.update_pte.largepage) { | 1579 | if (!vcpu->arch.update_pte.largepage || |
1585 | ++vcpu->kvm->stat.mmu_pde_zapped; | 1580 | sp->role.glevels == PT32_ROOT_LEVEL) { |
1586 | return; | 1581 | ++vcpu->kvm->stat.mmu_pde_zapped; |
1587 | } | 1582 | return; |
1583 | } | ||
1584 | } | ||
1588 | 1585 | ||
1589 | ++vcpu->kvm->stat.mmu_pte_updated; | 1586 | ++vcpu->kvm->stat.mmu_pte_updated; |
1590 | if (sp->role.glevels == PT32_ROOT_LEVEL) | 1587 | if (sp->role.glevels == PT32_ROOT_LEVEL) |
@@ -1858,6 +1855,7 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu) | |||
1858 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.next, | 1855 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.next, |
1859 | struct kvm_mmu_page, link); | 1856 | struct kvm_mmu_page, link); |
1860 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1857 | kvm_mmu_zap_page(vcpu->kvm, sp); |
1858 | cond_resched(); | ||
1861 | } | 1859 | } |
1862 | free_page((unsigned long)vcpu->arch.mmu.pae_root); | 1860 | free_page((unsigned long)vcpu->arch.mmu.pae_root); |
1863 | } | 1861 | } |
@@ -1996,7 +1994,7 @@ static struct shrinker mmu_shrinker = { | |||
1996 | .seeks = DEFAULT_SEEKS * 10, | 1994 | .seeks = DEFAULT_SEEKS * 10, |
1997 | }; | 1995 | }; |
1998 | 1996 | ||
1999 | void mmu_destroy_caches(void) | 1997 | static void mmu_destroy_caches(void) |
2000 | { | 1998 | { |
2001 | if (pte_chain_cache) | 1999 | if (pte_chain_cache) |
2002 | kmem_cache_destroy(pte_chain_cache); | 2000 | kmem_cache_destroy(pte_chain_cache); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 156fe10288ae..934c7b619396 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -418,7 +418,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
418 | 418 | ||
419 | /* mmio */ | 419 | /* mmio */ |
420 | if (is_error_pfn(pfn)) { | 420 | if (is_error_pfn(pfn)) { |
421 | pgprintk("gfn %x is mmio\n", walker.gfn); | 421 | pgprintk("gfn %lx is mmio\n", walker.gfn); |
422 | kvm_release_pfn_clean(pfn); | 422 | kvm_release_pfn_clean(pfn); |
423 | return 1; | 423 | return 1; |
424 | } | 424 | } |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ab22615eee89..6b0d5fa5bab3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -688,7 +688,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
688 | delta = vcpu->arch.host_tsc - tsc_this; | 688 | delta = vcpu->arch.host_tsc - tsc_this; |
689 | svm->vmcb->control.tsc_offset += delta; | 689 | svm->vmcb->control.tsc_offset += delta; |
690 | vcpu->cpu = cpu; | 690 | vcpu->cpu = cpu; |
691 | kvm_migrate_apic_timer(vcpu); | 691 | kvm_migrate_timers(vcpu); |
692 | } | 692 | } |
693 | 693 | ||
694 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 694 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bfe4db11989c..540e95179074 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -566,7 +566,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
566 | load_transition_efer(vmx); | 566 | load_transition_efer(vmx); |
567 | } | 567 | } |
568 | 568 | ||
569 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | 569 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) |
570 | { | 570 | { |
571 | unsigned long flags; | 571 | unsigned long flags; |
572 | 572 | ||
@@ -596,6 +596,13 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) | |||
596 | reload_host_efer(vmx); | 596 | reload_host_efer(vmx); |
597 | } | 597 | } |
598 | 598 | ||
599 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | ||
600 | { | ||
601 | preempt_disable(); | ||
602 | __vmx_load_host_state(vmx); | ||
603 | preempt_enable(); | ||
604 | } | ||
605 | |||
599 | /* | 606 | /* |
600 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes | 607 | * Switches to specified vcpu, until a matching vcpu_put(), but assumes |
601 | * vcpu mutex is already taken. | 608 | * vcpu mutex is already taken. |
@@ -608,7 +615,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
608 | 615 | ||
609 | if (vcpu->cpu != cpu) { | 616 | if (vcpu->cpu != cpu) { |
610 | vcpu_clear(vmx); | 617 | vcpu_clear(vmx); |
611 | kvm_migrate_apic_timer(vcpu); | 618 | kvm_migrate_timers(vcpu); |
612 | vpid_sync_vcpu_all(vmx); | 619 | vpid_sync_vcpu_all(vmx); |
613 | } | 620 | } |
614 | 621 | ||
@@ -654,7 +661,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
654 | 661 | ||
655 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | 662 | static void vmx_vcpu_put(struct kvm_vcpu *vcpu) |
656 | { | 663 | { |
657 | vmx_load_host_state(to_vmx(vcpu)); | 664 | __vmx_load_host_state(to_vmx(vcpu)); |
658 | } | 665 | } |
659 | 666 | ||
660 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | 667 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) |
@@ -884,11 +891,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
884 | switch (msr_index) { | 891 | switch (msr_index) { |
885 | #ifdef CONFIG_X86_64 | 892 | #ifdef CONFIG_X86_64 |
886 | case MSR_EFER: | 893 | case MSR_EFER: |
894 | vmx_load_host_state(vmx); | ||
887 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 895 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
888 | if (vmx->host_state.loaded) { | ||
889 | reload_host_efer(vmx); | ||
890 | load_transition_efer(vmx); | ||
891 | } | ||
892 | break; | 896 | break; |
893 | case MSR_FS_BASE: | 897 | case MSR_FS_BASE: |
894 | vmcs_writel(GUEST_FS_BASE, data); | 898 | vmcs_writel(GUEST_FS_BASE, data); |
@@ -910,11 +914,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
910 | guest_write_tsc(data); | 914 | guest_write_tsc(data); |
911 | break; | 915 | break; |
912 | default: | 916 | default: |
917 | vmx_load_host_state(vmx); | ||
913 | msr = find_msr_entry(vmx, msr_index); | 918 | msr = find_msr_entry(vmx, msr_index); |
914 | if (msr) { | 919 | if (msr) { |
915 | msr->data = data; | 920 | msr->data = data; |
916 | if (vmx->host_state.loaded) | ||
917 | load_msrs(vmx->guest_msrs, vmx->save_nmsrs); | ||
918 | break; | 921 | break; |
919 | } | 922 | } |
920 | ret = kvm_set_msr_common(vcpu, msr_index, data); | 923 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
@@ -1036,6 +1039,7 @@ static void hardware_enable(void *garbage) | |||
1036 | static void hardware_disable(void *garbage) | 1039 | static void hardware_disable(void *garbage) |
1037 | { | 1040 | { |
1038 | asm volatile (ASM_VMX_VMXOFF : : : "cc"); | 1041 | asm volatile (ASM_VMX_VMXOFF : : : "cc"); |
1042 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | ||
1039 | } | 1043 | } |
1040 | 1044 | ||
1041 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | 1045 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 21338bdb28ff..63a77caa59f1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -492,8 +492,8 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |||
492 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | 492 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) |
493 | { | 493 | { |
494 | static int version; | 494 | static int version; |
495 | struct kvm_wall_clock wc; | 495 | struct pvclock_wall_clock wc; |
496 | struct timespec wc_ts; | 496 | struct timespec now, sys, boot; |
497 | 497 | ||
498 | if (!wall_clock) | 498 | if (!wall_clock) |
499 | return; | 499 | return; |
@@ -502,10 +502,19 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | |||
502 | 502 | ||
503 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | 503 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); |
504 | 504 | ||
505 | wc_ts = current_kernel_time(); | 505 | /* |
506 | wc.wc_sec = wc_ts.tv_sec; | 506 | * The guest calculates current wall clock time by adding |
507 | wc.wc_nsec = wc_ts.tv_nsec; | 507 | * system time (updated by kvm_write_guest_time below) to the |
508 | wc.wc_version = version; | 508 | * wall clock specified here. guest system time equals host |
509 | * system time for us, thus we must fill in host boot time here. | ||
510 | */ | ||
511 | now = current_kernel_time(); | ||
512 | ktime_get_ts(&sys); | ||
513 | boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys)); | ||
514 | |||
515 | wc.sec = boot.tv_sec; | ||
516 | wc.nsec = boot.tv_nsec; | ||
517 | wc.version = version; | ||
509 | 518 | ||
510 | kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); | 519 | kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); |
511 | 520 | ||
@@ -513,6 +522,45 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | |||
513 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | 522 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); |
514 | } | 523 | } |
515 | 524 | ||
525 | static uint32_t div_frac(uint32_t dividend, uint32_t divisor) | ||
526 | { | ||
527 | uint32_t quotient, remainder; | ||
528 | |||
529 | /* Don't try to replace with do_div(), this one calculates | ||
530 | * "(dividend << 32) / divisor" */ | ||
531 | __asm__ ( "divl %4" | ||
532 | : "=a" (quotient), "=d" (remainder) | ||
533 | : "0" (0), "1" (dividend), "r" (divisor) ); | ||
534 | return quotient; | ||
535 | } | ||
536 | |||
537 | static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock) | ||
538 | { | ||
539 | uint64_t nsecs = 1000000000LL; | ||
540 | int32_t shift = 0; | ||
541 | uint64_t tps64; | ||
542 | uint32_t tps32; | ||
543 | |||
544 | tps64 = tsc_khz * 1000LL; | ||
545 | while (tps64 > nsecs*2) { | ||
546 | tps64 >>= 1; | ||
547 | shift--; | ||
548 | } | ||
549 | |||
550 | tps32 = (uint32_t)tps64; | ||
551 | while (tps32 <= (uint32_t)nsecs) { | ||
552 | tps32 <<= 1; | ||
553 | shift++; | ||
554 | } | ||
555 | |||
556 | hv_clock->tsc_shift = shift; | ||
557 | hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32); | ||
558 | |||
559 | pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n", | ||
560 | __FUNCTION__, tsc_khz, hv_clock->tsc_shift, | ||
561 | hv_clock->tsc_to_system_mul); | ||
562 | } | ||
563 | |||
516 | static void kvm_write_guest_time(struct kvm_vcpu *v) | 564 | static void kvm_write_guest_time(struct kvm_vcpu *v) |
517 | { | 565 | { |
518 | struct timespec ts; | 566 | struct timespec ts; |
@@ -523,6 +571,11 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
523 | if ((!vcpu->time_page)) | 571 | if ((!vcpu->time_page)) |
524 | return; | 572 | return; |
525 | 573 | ||
574 | if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { | ||
575 | kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); | ||
576 | vcpu->hv_clock_tsc_khz = tsc_khz; | ||
577 | } | ||
578 | |||
526 | /* Keep irq disabled to prevent changes to the clock */ | 579 | /* Keep irq disabled to prevent changes to the clock */ |
527 | local_irq_save(flags); | 580 | local_irq_save(flags); |
528 | kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, | 581 | kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, |
@@ -537,14 +590,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
537 | /* | 590 | /* |
538 | * The interface expects us to write an even number signaling that the | 591 | * The interface expects us to write an even number signaling that the |
539 | * update is finished. Since the guest won't see the intermediate | 592 | * update is finished. Since the guest won't see the intermediate |
540 | * state, we just write "2" at the end | 593 | * state, we just increase by 2 at the end. |
541 | */ | 594 | */ |
542 | vcpu->hv_clock.version = 2; | 595 | vcpu->hv_clock.version += 2; |
543 | 596 | ||
544 | shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); | 597 | shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); |
545 | 598 | ||
546 | memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, | 599 | memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, |
547 | sizeof(vcpu->hv_clock)); | 600 | sizeof(vcpu->hv_clock)); |
548 | 601 | ||
549 | kunmap_atomic(shared_kaddr, KM_USER0); | 602 | kunmap_atomic(shared_kaddr, KM_USER0); |
550 | 603 | ||
@@ -599,10 +652,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
599 | /* ...but clean it before doing the actual write */ | 652 | /* ...but clean it before doing the actual write */ |
600 | vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); | 653 | vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); |
601 | 654 | ||
602 | vcpu->arch.hv_clock.tsc_to_system_mul = | ||
603 | clocksource_khz2mult(tsc_khz, 22); | ||
604 | vcpu->arch.hv_clock.tsc_shift = 22; | ||
605 | |||
606 | down_read(¤t->mm->mmap_sem); | 655 | down_read(¤t->mm->mmap_sem); |
607 | vcpu->arch.time_page = | 656 | vcpu->arch.time_page = |
608 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); | 657 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); |
@@ -2758,7 +2807,9 @@ again: | |||
2758 | 2807 | ||
2759 | if (vcpu->requests) { | 2808 | if (vcpu->requests) { |
2760 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) | 2809 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) |
2761 | __kvm_migrate_apic_timer(vcpu); | 2810 | __kvm_migrate_timers(vcpu); |
2811 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | ||
2812 | kvm_x86_ops->tlb_flush(vcpu); | ||
2762 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 2813 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, |
2763 | &vcpu->requests)) { | 2814 | &vcpu->requests)) { |
2764 | kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; | 2815 | kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; |
@@ -2772,6 +2823,7 @@ again: | |||
2772 | } | 2823 | } |
2773 | } | 2824 | } |
2774 | 2825 | ||
2826 | clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); | ||
2775 | kvm_inject_pending_timer_irqs(vcpu); | 2827 | kvm_inject_pending_timer_irqs(vcpu); |
2776 | 2828 | ||
2777 | preempt_disable(); | 2829 | preempt_disable(); |
@@ -2781,21 +2833,13 @@ again: | |||
2781 | 2833 | ||
2782 | local_irq_disable(); | 2834 | local_irq_disable(); |
2783 | 2835 | ||
2784 | if (need_resched()) { | 2836 | if (vcpu->requests || need_resched()) { |
2785 | local_irq_enable(); | 2837 | local_irq_enable(); |
2786 | preempt_enable(); | 2838 | preempt_enable(); |
2787 | r = 1; | 2839 | r = 1; |
2788 | goto out; | 2840 | goto out; |
2789 | } | 2841 | } |
2790 | 2842 | ||
2791 | if (vcpu->requests) | ||
2792 | if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) { | ||
2793 | local_irq_enable(); | ||
2794 | preempt_enable(); | ||
2795 | r = 1; | ||
2796 | goto out; | ||
2797 | } | ||
2798 | |||
2799 | if (signal_pending(current)) { | 2843 | if (signal_pending(current)) { |
2800 | local_irq_enable(); | 2844 | local_irq_enable(); |
2801 | preempt_enable(); | 2845 | preempt_enable(); |
@@ -2825,9 +2869,6 @@ again: | |||
2825 | 2869 | ||
2826 | kvm_guest_enter(); | 2870 | kvm_guest_enter(); |
2827 | 2871 | ||
2828 | if (vcpu->requests) | ||
2829 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | ||
2830 | kvm_x86_ops->tlb_flush(vcpu); | ||
2831 | 2872 | ||
2832 | KVMTRACE_0D(VMENTRY, vcpu, entryexit); | 2873 | KVMTRACE_0D(VMENTRY, vcpu, entryexit); |
2833 | kvm_x86_ops->run(vcpu, kvm_run); | 2874 | kvm_x86_ops->run(vcpu, kvm_run); |
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index f2a696d6a243..932f216d890c 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -677,8 +677,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
677 | c->use_modrm_ea = 1; | 677 | c->use_modrm_ea = 1; |
678 | 678 | ||
679 | if (c->modrm_mod == 3) { | 679 | if (c->modrm_mod == 3) { |
680 | c->modrm_val = *(unsigned long *) | 680 | c->modrm_ptr = decode_register(c->modrm_rm, |
681 | decode_register(c->modrm_rm, c->regs, c->d & ByteOp); | 681 | c->regs, c->d & ByteOp); |
682 | c->modrm_val = *(unsigned long *)c->modrm_ptr; | ||
682 | return rc; | 683 | return rc; |
683 | } | 684 | } |
684 | 685 | ||
@@ -1005,6 +1006,7 @@ done_prefixes: | |||
1005 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1006 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
1006 | c->src.type = OP_REG; | 1007 | c->src.type = OP_REG; |
1007 | c->src.val = c->modrm_val; | 1008 | c->src.val = c->modrm_val; |
1009 | c->src.ptr = c->modrm_ptr; | ||
1008 | break; | 1010 | break; |
1009 | } | 1011 | } |
1010 | c->src.type = OP_MEM; | 1012 | c->src.type = OP_MEM; |
@@ -1049,6 +1051,7 @@ done_prefixes: | |||
1049 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1051 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
1050 | c->dst.type = OP_REG; | 1052 | c->dst.type = OP_REG; |
1051 | c->dst.val = c->dst.orig_val = c->modrm_val; | 1053 | c->dst.val = c->dst.orig_val = c->modrm_val; |
1054 | c->dst.ptr = c->modrm_ptr; | ||
1052 | break; | 1055 | break; |
1053 | } | 1056 | } |
1054 | c->dst.type = OP_MEM; | 1057 | c->dst.type = OP_MEM; |
@@ -1724,7 +1727,8 @@ twobyte_insn: | |||
1724 | if (rc) | 1727 | if (rc) |
1725 | goto done; | 1728 | goto done; |
1726 | 1729 | ||
1727 | kvm_emulate_hypercall(ctxt->vcpu); | 1730 | /* Let the processor re-execute the fixed hypercall */ |
1731 | c->eip = ctxt->vcpu->arch.rip; | ||
1728 | /* Disable writeback. */ | 1732 | /* Disable writeback. */ |
1729 | c->dst.type = OP_NONE; | 1733 | c->dst.type = OP_NONE; |
1730 | break; | 1734 | break; |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index af65b2da3ba0..5c7e2fd52075 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -582,8 +582,9 @@ static void __init lguest_init_IRQ(void) | |||
582 | int vector = FIRST_EXTERNAL_VECTOR + i; | 582 | int vector = FIRST_EXTERNAL_VECTOR + i; |
583 | if (vector != SYSCALL_VECTOR) { | 583 | if (vector != SYSCALL_VECTOR) { |
584 | set_intr_gate(vector, interrupt[i]); | 584 | set_intr_gate(vector, interrupt[i]); |
585 | set_irq_chip_and_handler(i, &lguest_irq_controller, | 585 | set_irq_chip_and_handler_name(i, &lguest_irq_controller, |
586 | handle_level_irq); | 586 | handle_level_irq, |
587 | "level"); | ||
587 | } | 588 | } |
588 | } | 589 | } |
589 | /* This call is required to set up for 4k stacks, where we have | 590 | /* This call is required to set up for 4k stacks, where we have |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 70bebd310408..ee1c3f635157 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -217,19 +217,19 @@ ENTRY(copy_user_generic_unrolled) | |||
217 | /* table sorted by exception address */ | 217 | /* table sorted by exception address */ |
218 | .section __ex_table,"a" | 218 | .section __ex_table,"a" |
219 | .align 8 | 219 | .align 8 |
220 | .quad .Ls1,.Ls1e | 220 | .quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */ |
221 | .quad .Ls2,.Ls2e | 221 | .quad .Ls2,.Ls1e |
222 | .quad .Ls3,.Ls3e | 222 | .quad .Ls3,.Ls1e |
223 | .quad .Ls4,.Ls4e | 223 | .quad .Ls4,.Ls1e |
224 | .quad .Ld1,.Ls1e | 224 | .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */ |
225 | .quad .Ld2,.Ls2e | 225 | .quad .Ld2,.Ls2e |
226 | .quad .Ld3,.Ls3e | 226 | .quad .Ld3,.Ls3e |
227 | .quad .Ld4,.Ls4e | 227 | .quad .Ld4,.Ls4e |
228 | .quad .Ls5,.Ls5e | 228 | .quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */ |
229 | .quad .Ls6,.Ls6e | 229 | .quad .Ls6,.Ls5e |
230 | .quad .Ls7,.Ls7e | 230 | .quad .Ls7,.Ls5e |
231 | .quad .Ls8,.Ls8e | 231 | .quad .Ls8,.Ls5e |
232 | .quad .Ld5,.Ls5e | 232 | .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */ |
233 | .quad .Ld6,.Ls6e | 233 | .quad .Ld6,.Ls6e |
234 | .quad .Ld7,.Ls7e | 234 | .quad .Ld7,.Ls7e |
235 | .quad .Ld8,.Ls8e | 235 | .quad .Ld8,.Ls8e |
@@ -244,11 +244,8 @@ ENTRY(copy_user_generic_unrolled) | |||
244 | .quad .Le5,.Le_zero | 244 | .quad .Le5,.Le_zero |
245 | .previous | 245 | .previous |
246 | 246 | ||
247 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
248 | pessimistic side. this is gross. it would be better to fix the | ||
249 | interface. */ | ||
250 | /* eax: zero, ebx: 64 */ | 247 | /* eax: zero, ebx: 64 */ |
251 | .Ls1e: addl $8,%eax | 248 | .Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */ |
252 | .Ls2e: addl $8,%eax | 249 | .Ls2e: addl $8,%eax |
253 | .Ls3e: addl $8,%eax | 250 | .Ls3e: addl $8,%eax |
254 | .Ls4e: addl $8,%eax | 251 | .Ls4e: addl $8,%eax |
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 5196762b3b0e..9d3d1ab83763 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S | |||
@@ -145,19 +145,19 @@ ENTRY(__copy_user_nocache) | |||
145 | /* table sorted by exception address */ | 145 | /* table sorted by exception address */ |
146 | .section __ex_table,"a" | 146 | .section __ex_table,"a" |
147 | .align 8 | 147 | .align 8 |
148 | .quad .Ls1,.Ls1e | 148 | .quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */ |
149 | .quad .Ls2,.Ls2e | 149 | .quad .Ls2,.Ls1e |
150 | .quad .Ls3,.Ls3e | 150 | .quad .Ls3,.Ls1e |
151 | .quad .Ls4,.Ls4e | 151 | .quad .Ls4,.Ls1e |
152 | .quad .Ld1,.Ls1e | 152 | .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */ |
153 | .quad .Ld2,.Ls2e | 153 | .quad .Ld2,.Ls2e |
154 | .quad .Ld3,.Ls3e | 154 | .quad .Ld3,.Ls3e |
155 | .quad .Ld4,.Ls4e | 155 | .quad .Ld4,.Ls4e |
156 | .quad .Ls5,.Ls5e | 156 | .quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */ |
157 | .quad .Ls6,.Ls6e | 157 | .quad .Ls6,.Ls5e |
158 | .quad .Ls7,.Ls7e | 158 | .quad .Ls7,.Ls5e |
159 | .quad .Ls8,.Ls8e | 159 | .quad .Ls8,.Ls5e |
160 | .quad .Ld5,.Ls5e | 160 | .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */ |
161 | .quad .Ld6,.Ls6e | 161 | .quad .Ld6,.Ls6e |
162 | .quad .Ld7,.Ls7e | 162 | .quad .Ld7,.Ls7e |
163 | .quad .Ld8,.Ls8e | 163 | .quad .Ld8,.Ls8e |
@@ -172,11 +172,8 @@ ENTRY(__copy_user_nocache) | |||
172 | .quad .Le5,.Le_zero | 172 | .quad .Le5,.Le_zero |
173 | .previous | 173 | .previous |
174 | 174 | ||
175 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
176 | pessimistic side. this is gross. it would be better to fix the | ||
177 | interface. */ | ||
178 | /* eax: zero, ebx: 64 */ | 175 | /* eax: zero, ebx: 64 */ |
179 | .Ls1e: addl $8,%eax | 176 | .Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */ |
180 | .Ls2e: addl $8,%eax | 177 | .Ls2e: addl $8,%eax |
181 | .Ls3e: addl $8,%eax | 178 | .Ls3e: addl $8,%eax |
182 | .Ls4e: addl $8,%eax | 179 | .Ls4e: addl $8,%eax |
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index bc503f506903..bf51144d97e1 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c | |||
@@ -136,8 +136,6 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) | |||
136 | (__force u32)sum); | 136 | (__force u32)sum); |
137 | } | 137 | } |
138 | 138 | ||
139 | EXPORT_SYMBOL(csum_partial); | ||
140 | |||
141 | /* | 139 | /* |
142 | * this routine is used for miscellaneous IP-like checksums, mainly | 140 | * this routine is used for miscellaneous IP-like checksums, mainly |
143 | * in icmp.c | 141 | * in icmp.c |
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index 4535e6d147ad..d710f2d167bb 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c | |||
@@ -44,13 +44,36 @@ static void delay_loop(unsigned long loops) | |||
44 | static void delay_tsc(unsigned long loops) | 44 | static void delay_tsc(unsigned long loops) |
45 | { | 45 | { |
46 | unsigned long bclock, now; | 46 | unsigned long bclock, now; |
47 | int cpu; | ||
47 | 48 | ||
48 | preempt_disable(); /* TSC's are per-cpu */ | 49 | preempt_disable(); |
50 | cpu = smp_processor_id(); | ||
49 | rdtscl(bclock); | 51 | rdtscl(bclock); |
50 | do { | 52 | for (;;) { |
51 | rep_nop(); | ||
52 | rdtscl(now); | 53 | rdtscl(now); |
53 | } while ((now-bclock) < loops); | 54 | if ((now - bclock) >= loops) |
55 | break; | ||
56 | |||
57 | /* Allow RT tasks to run */ | ||
58 | preempt_enable(); | ||
59 | rep_nop(); | ||
60 | preempt_disable(); | ||
61 | |||
62 | /* | ||
63 | * It is possible that we moved to another CPU, and | ||
64 | * since TSC's are per-cpu we need to calculate | ||
65 | * that. The delay must guarantee that we wait "at | ||
66 | * least" the amount of time. Being moved to another | ||
67 | * CPU could make the wait longer but we just need to | ||
68 | * make sure we waited long enough. Rebalance the | ||
69 | * counter for this CPU. | ||
70 | */ | ||
71 | if (unlikely(cpu != smp_processor_id())) { | ||
72 | loops -= (now - bclock); | ||
73 | cpu = smp_processor_id(); | ||
74 | rdtscl(bclock); | ||
75 | } | ||
76 | } | ||
54 | preempt_enable(); | 77 | preempt_enable(); |
55 | } | 78 | } |
56 | 79 | ||
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index bbc610518516..4c441be92641 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c | |||
@@ -31,14 +31,36 @@ int __devinit read_current_timer(unsigned long *timer_value) | |||
31 | void __delay(unsigned long loops) | 31 | void __delay(unsigned long loops) |
32 | { | 32 | { |
33 | unsigned bclock, now; | 33 | unsigned bclock, now; |
34 | int cpu; | ||
34 | 35 | ||
35 | preempt_disable(); /* TSC's are pre-cpu */ | 36 | preempt_disable(); |
37 | cpu = smp_processor_id(); | ||
36 | rdtscl(bclock); | 38 | rdtscl(bclock); |
37 | do { | 39 | for (;;) { |
38 | rep_nop(); | ||
39 | rdtscl(now); | 40 | rdtscl(now); |
41 | if ((now - bclock) >= loops) | ||
42 | break; | ||
43 | |||
44 | /* Allow RT tasks to run */ | ||
45 | preempt_enable(); | ||
46 | rep_nop(); | ||
47 | preempt_disable(); | ||
48 | |||
49 | /* | ||
50 | * It is possible that we moved to another CPU, and | ||
51 | * since TSC's are per-cpu we need to calculate | ||
52 | * that. The delay must guarantee that we wait "at | ||
53 | * least" the amount of time. Being moved to another | ||
54 | * CPU could make the wait longer but we just need to | ||
55 | * make sure we waited long enough. Rebalance the | ||
56 | * counter for this CPU. | ||
57 | */ | ||
58 | if (unlikely(cpu != smp_processor_id())) { | ||
59 | loops -= (now - bclock); | ||
60 | cpu = smp_processor_id(); | ||
61 | rdtscl(bclock); | ||
62 | } | ||
40 | } | 63 | } |
41 | while ((now-bclock) < loops); | ||
42 | preempt_enable(); | 64 | preempt_enable(); |
43 | } | 65 | } |
44 | EXPORT_SYMBOL(__delay); | 66 | EXPORT_SYMBOL(__delay); |
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 6e38d877ea77..c7b06feb139b 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
31 | #include <asm/desc.h> | 31 | #include <asm/desc.h> |
32 | #include <asm/user.h> | 32 | #include <asm/user.h> |
33 | #include <asm/i387.h> | ||
33 | 34 | ||
34 | #include "fpu_system.h" | 35 | #include "fpu_system.h" |
35 | #include "fpu_emu.h" | 36 | #include "fpu_emu.h" |
@@ -146,6 +147,13 @@ asmlinkage void math_emulate(long arg) | |||
146 | unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ | 147 | unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ |
147 | struct desc_struct code_descriptor; | 148 | struct desc_struct code_descriptor; |
148 | 149 | ||
150 | if (!used_math()) { | ||
151 | if (init_fpu(current)) { | ||
152 | do_group_exit(SIGKILL); | ||
153 | return; | ||
154 | } | ||
155 | } | ||
156 | |||
149 | #ifdef RE_ENTRANT_CHECKING | 157 | #ifdef RE_ENTRANT_CHECKING |
150 | if (emulating) { | 158 | if (emulating) { |
151 | printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); | 159 | printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); |
@@ -153,11 +161,6 @@ asmlinkage void math_emulate(long arg) | |||
153 | RE_ENTRANT_CHECK_ON; | 161 | RE_ENTRANT_CHECK_ON; |
154 | #endif /* RE_ENTRANT_CHECKING */ | 162 | #endif /* RE_ENTRANT_CHECKING */ |
155 | 163 | ||
156 | if (!used_math()) { | ||
157 | finit(); | ||
158 | set_used_math(); | ||
159 | } | ||
160 | |||
161 | SETUP_DATA_AREA(arg); | 164 | SETUP_DATA_AREA(arg); |
162 | 165 | ||
163 | FPU_ORIG_EIP = FPU_EIP; | 166 | FPU_ORIG_EIP = FPU_EIP; |
diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c index 04869e64b18e..00548354912f 100644 --- a/arch/x86/math-emu/reg_constant.c +++ b/arch/x86/math-emu/reg_constant.c | |||
@@ -16,8 +16,8 @@ | |||
16 | #include "reg_constant.h" | 16 | #include "reg_constant.h" |
17 | #include "control_w.h" | 17 | #include "control_w.h" |
18 | 18 | ||
19 | #define MAKE_REG(s,e,l,h) { l, h, \ | 19 | #define MAKE_REG(s, e, l, h) { l, h, \ |
20 | ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } | 20 | ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } |
21 | 21 | ||
22 | FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); | 22 | FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); |
23 | #if 0 | 23 | #if 0 |
@@ -40,7 +40,7 @@ FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66, | |||
40 | FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); | 40 | FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); |
41 | 41 | ||
42 | /* Only the sign and significand (and tag) are used in internal NaNs */ | 42 | /* Only the sign and significand (and tag) are used in internal NaNs */ |
43 | /* The 80486 never generates one of these | 43 | /* The 80486 never generates one of these |
44 | FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); | 44 | FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); |
45 | */ | 45 | */ |
46 | /* This is the real indefinite QNaN */ | 46 | /* This is the real indefinite QNaN */ |
@@ -49,7 +49,7 @@ FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000); | |||
49 | /* Only the sign (and tag) is used in internal infinities */ | 49 | /* Only the sign (and tag) is used in internal infinities */ |
50 | FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); | 50 | FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); |
51 | 51 | ||
52 | static void fld_const(FPU_REG const *c, int adj, u_char tag) | 52 | static void fld_const(FPU_REG const * c, int adj, u_char tag) |
53 | { | 53 | { |
54 | FPU_REG *st_new_ptr; | 54 | FPU_REG *st_new_ptr; |
55 | 55 | ||
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 914ccf983687..8b4eac0ca07d 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -156,7 +156,7 @@ static void __init propagate_e820_map_node(int nid) | |||
156 | */ | 156 | */ |
157 | static void __init allocate_pgdat(int nid) | 157 | static void __init allocate_pgdat(int nid) |
158 | { | 158 | { |
159 | if (nid && node_has_online_mem(nid)) | 159 | if (nid && node_has_online_mem(nid) && node_remap_start_vaddr[nid]) |
160 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; | 160 | NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; |
161 | else { | 161 | else { |
162 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); | 162 | NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); |
@@ -164,16 +164,13 @@ static void __init allocate_pgdat(int nid) | |||
164 | } | 164 | } |
165 | } | 165 | } |
166 | 166 | ||
167 | #ifdef CONFIG_DISCONTIGMEM | ||
168 | /* | 167 | /* |
169 | * In the discontig memory model, a portion of the kernel virtual area (KVA) | 168 | * In the DISCONTIGMEM and SPARSEMEM memory model, a portion of the kernel |
170 | * is reserved and portions of nodes are mapped using it. This is to allow | 169 | * virtual address space (KVA) is reserved and portions of nodes are mapped |
171 | * node-local memory to be allocated for structures that would normally require | 170 | * using it. This is to allow node-local memory to be allocated for |
172 | * ZONE_NORMAL. The memory is allocated with alloc_remap() and callers | 171 | * structures that would normally require ZONE_NORMAL. The memory is |
173 | * should be prepared to allocate from the bootmem allocator instead. This KVA | 172 | * allocated with alloc_remap() and callers should be prepared to allocate |
174 | * mechanism is incompatible with SPARSEMEM as it makes assumptions about the | 173 | * from the bootmem allocator instead. |
175 | * layout of memory that are broken if alloc_remap() succeeds for some of the | ||
176 | * map and fails for others | ||
177 | */ | 174 | */ |
178 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | 175 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; |
179 | static void *node_remap_end_vaddr[MAX_NUMNODES]; | 176 | static void *node_remap_end_vaddr[MAX_NUMNODES]; |
@@ -290,25 +287,6 @@ static void init_remap_allocator(int nid) | |||
290 | (ulong) pfn_to_kaddr(highstart_pfn | 287 | (ulong) pfn_to_kaddr(highstart_pfn |
291 | + node_remap_offset[nid] + node_remap_size[nid])); | 288 | + node_remap_offset[nid] + node_remap_size[nid])); |
292 | } | 289 | } |
293 | #else | ||
294 | void *alloc_remap(int nid, unsigned long size) | ||
295 | { | ||
296 | return NULL; | ||
297 | } | ||
298 | |||
299 | static unsigned long calculate_numa_remap_pages(void) | ||
300 | { | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static void init_remap_allocator(int nid) | ||
305 | { | ||
306 | } | ||
307 | |||
308 | void __init remap_numa_kva(void) | ||
309 | { | ||
310 | } | ||
311 | #endif /* CONFIG_DISCONTIGMEM */ | ||
312 | 290 | ||
313 | extern void setup_bootmem_allocator(void); | 291 | extern void setup_bootmem_allocator(void); |
314 | unsigned long __init setup_memory(void) | 292 | unsigned long __init setup_memory(void) |
@@ -476,3 +454,21 @@ int memory_add_physaddr_to_nid(u64 addr) | |||
476 | 454 | ||
477 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | 455 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
478 | #endif | 456 | #endif |
457 | |||
458 | #if defined(CONFIG_ACPI_NUMA) && !defined(CONFIG_HAVE_ARCH_PARSE_SRAT) | ||
459 | /* | ||
460 | * Dummy on 32-bit, for now: | ||
461 | */ | ||
462 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | ||
463 | { | ||
464 | } | ||
465 | |||
466 | void __init | ||
467 | acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | ||
468 | { | ||
469 | } | ||
470 | |||
471 | void __init acpi_numa_arch_fixup(void) | ||
472 | { | ||
473 | } | ||
474 | #endif | ||
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 2c24bea92c66..0bb0caed8971 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -42,7 +42,7 @@ static struct addr_marker address_markers[] = { | |||
42 | { 0, "User Space" }, | 42 | { 0, "User Space" }, |
43 | #ifdef CONFIG_X86_64 | 43 | #ifdef CONFIG_X86_64 |
44 | { 0x8000000000000000UL, "Kernel Space" }, | 44 | { 0x8000000000000000UL, "Kernel Space" }, |
45 | { 0xffff810000000000UL, "Low Kernel Mapping" }, | 45 | { PAGE_OFFSET, "Low Kernel Mapping" }, |
46 | { VMALLOC_START, "vmalloc() Area" }, | 46 | { VMALLOC_START, "vmalloc() Area" }, |
47 | { VMEMMAP_START, "Vmemmap" }, | 47 | { VMEMMAP_START, "Vmemmap" }, |
48 | { __START_KERNEL_map, "High Kernel Mapping" }, | 48 | { __START_KERNEL_map, "High Kernel Mapping" }, |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fd7e1798c75a..1e64795714c8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -55,11 +55,7 @@ static inline int notify_page_fault(struct pt_regs *regs) | |||
55 | int ret = 0; | 55 | int ret = 0; |
56 | 56 | ||
57 | /* kprobe_running() needs smp_processor_id() */ | 57 | /* kprobe_running() needs smp_processor_id() */ |
58 | #ifdef CONFIG_X86_32 | ||
59 | if (!user_mode_vm(regs)) { | 58 | if (!user_mode_vm(regs)) { |
60 | #else | ||
61 | if (!user_mode(regs)) { | ||
62 | #endif | ||
63 | preempt_disable(); | 59 | preempt_disable(); |
64 | if (kprobe_running() && kprobe_fault_handler(regs, 14)) | 60 | if (kprobe_running() && kprobe_fault_handler(regs, 14)) |
65 | ret = 1; | 61 | ret = 1; |
@@ -497,6 +493,11 @@ static int vmalloc_fault(unsigned long address) | |||
497 | unsigned long pgd_paddr; | 493 | unsigned long pgd_paddr; |
498 | pmd_t *pmd_k; | 494 | pmd_t *pmd_k; |
499 | pte_t *pte_k; | 495 | pte_t *pte_k; |
496 | |||
497 | /* Make sure we are in vmalloc area */ | ||
498 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) | ||
499 | return -1; | ||
500 | |||
500 | /* | 501 | /* |
501 | * Synchronize this task's top level page-table | 502 | * Synchronize this task's top level page-table |
502 | * with the 'reference' page table. | 503 | * with the 'reference' page table. |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index de236e419cb5..d71be0eb0130 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -162,6 +162,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
162 | pgd_t *pgd; | 162 | pgd_t *pgd; |
163 | pmd_t *pmd; | 163 | pmd_t *pmd; |
164 | pte_t *pte; | 164 | pte_t *pte; |
165 | unsigned pages_2m = 0, pages_4k = 0; | ||
165 | 166 | ||
166 | pgd_idx = pgd_index(PAGE_OFFSET); | 167 | pgd_idx = pgd_index(PAGE_OFFSET); |
167 | pgd = pgd_base + pgd_idx; | 168 | pgd = pgd_base + pgd_idx; |
@@ -197,6 +198,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
197 | is_kernel_text(addr2)) | 198 | is_kernel_text(addr2)) |
198 | prot = PAGE_KERNEL_LARGE_EXEC; | 199 | prot = PAGE_KERNEL_LARGE_EXEC; |
199 | 200 | ||
201 | pages_2m++; | ||
200 | set_pmd(pmd, pfn_pmd(pfn, prot)); | 202 | set_pmd(pmd, pfn_pmd(pfn, prot)); |
201 | 203 | ||
202 | pfn += PTRS_PER_PTE; | 204 | pfn += PTRS_PER_PTE; |
@@ -213,11 +215,14 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
213 | if (is_kernel_text(addr)) | 215 | if (is_kernel_text(addr)) |
214 | prot = PAGE_KERNEL_EXEC; | 216 | prot = PAGE_KERNEL_EXEC; |
215 | 217 | ||
218 | pages_4k++; | ||
216 | set_pte(pte, pfn_pte(pfn, prot)); | 219 | set_pte(pte, pfn_pte(pfn, prot)); |
217 | } | 220 | } |
218 | max_pfn_mapped = pfn; | 221 | max_pfn_mapped = pfn; |
219 | } | 222 | } |
220 | } | 223 | } |
224 | update_page_count(PG_LEVEL_2M, pages_2m); | ||
225 | update_page_count(PG_LEVEL_4K, pages_4k); | ||
221 | } | 226 | } |
222 | 227 | ||
223 | static inline int page_kills_ppro(unsigned long pagenr) | 228 | static inline int page_kills_ppro(unsigned long pagenr) |
@@ -438,8 +443,6 @@ void zap_low_mappings(void) | |||
438 | { | 443 | { |
439 | int i; | 444 | int i; |
440 | 445 | ||
441 | save_pg_dir(); | ||
442 | |||
443 | /* | 446 | /* |
444 | * Zap initial low-memory mappings. | 447 | * Zap initial low-memory mappings. |
445 | * | 448 | * |
@@ -573,17 +576,6 @@ void __init mem_init(void) | |||
573 | #endif | 576 | #endif |
574 | bad_ppro = ppro_with_ram_bug(); | 577 | bad_ppro = ppro_with_ram_bug(); |
575 | 578 | ||
576 | #ifdef CONFIG_HIGHMEM | ||
577 | /* check that fixmap and pkmap do not overlap */ | ||
578 | if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { | ||
579 | printk(KERN_ERR | ||
580 | "fixmap and kmap areas overlap - this will crash\n"); | ||
581 | printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", | ||
582 | PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE, | ||
583 | FIXADDR_START); | ||
584 | BUG(); | ||
585 | } | ||
586 | #endif | ||
587 | /* this will put all low memory onto the freelists */ | 579 | /* this will put all low memory onto the freelists */ |
588 | totalram_pages += free_all_bootmem(); | 580 | totalram_pages += free_all_bootmem(); |
589 | 581 | ||
@@ -616,7 +608,6 @@ void __init mem_init(void) | |||
616 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) | 608 | (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) |
617 | ); | 609 | ); |
618 | 610 | ||
619 | #if 1 /* double-sanity-check paranoia */ | ||
620 | printk(KERN_INFO "virtual kernel memory layout:\n" | 611 | printk(KERN_INFO "virtual kernel memory layout:\n" |
621 | " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" | 612 | " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" |
622 | #ifdef CONFIG_HIGHMEM | 613 | #ifdef CONFIG_HIGHMEM |
@@ -657,22 +648,13 @@ void __init mem_init(void) | |||
657 | #endif | 648 | #endif |
658 | BUG_ON(VMALLOC_START > VMALLOC_END); | 649 | BUG_ON(VMALLOC_START > VMALLOC_END); |
659 | BUG_ON((unsigned long)high_memory > VMALLOC_START); | 650 | BUG_ON((unsigned long)high_memory > VMALLOC_START); |
660 | #endif /* double-sanity-check paranoia */ | ||
661 | 651 | ||
662 | if (boot_cpu_data.wp_works_ok < 0) | 652 | if (boot_cpu_data.wp_works_ok < 0) |
663 | test_wp_bit(); | 653 | test_wp_bit(); |
664 | 654 | ||
665 | cpa_init(); | 655 | cpa_init(); |
666 | 656 | save_pg_dir(); | |
667 | /* | ||
668 | * Subtle. SMP is doing it's boot stuff late (because it has to | ||
669 | * fork idle threads) - but it also needs low mappings for the | ||
670 | * protected-mode entry to work. We zap these entries only after | ||
671 | * the WP-bit has been tested. | ||
672 | */ | ||
673 | #ifndef CONFIG_SMP | ||
674 | zap_low_mappings(); | 657 | zap_low_mappings(); |
675 | #endif | ||
676 | } | 658 | } |
677 | 659 | ||
678 | #ifdef CONFIG_MEMORY_HOTPLUG | 660 | #ifdef CONFIG_MEMORY_HOTPLUG |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 32ba13b0f818..48623ae628fb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/swap.h> | 18 | #include <linux/swap.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/initrd.h> | ||
21 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
22 | #include <linux/bootmem.h> | 23 | #include <linux/bootmem.h> |
23 | #include <linux/proc_fs.h> | 24 | #include <linux/proc_fs.h> |
@@ -135,7 +136,7 @@ static __init void *spp_getpage(void) | |||
135 | return ptr; | 136 | return ptr; |
136 | } | 137 | } |
137 | 138 | ||
138 | static void | 139 | static __init void |
139 | set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) | 140 | set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) |
140 | { | 141 | { |
141 | pgd_t *pgd; | 142 | pgd_t *pgd; |
@@ -206,7 +207,7 @@ void __init cleanup_highmap(void) | |||
206 | pmd_t *last_pmd = pmd + PTRS_PER_PMD; | 207 | pmd_t *last_pmd = pmd + PTRS_PER_PMD; |
207 | 208 | ||
208 | for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { | 209 | for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { |
209 | if (!pmd_present(*pmd)) | 210 | if (pmd_none(*pmd)) |
210 | continue; | 211 | continue; |
211 | if (vaddr < (unsigned long) _text || vaddr > end) | 212 | if (vaddr < (unsigned long) _text || vaddr > end) |
212 | set_pmd(pmd, __pmd(0)); | 213 | set_pmd(pmd, __pmd(0)); |
@@ -214,7 +215,7 @@ void __init cleanup_highmap(void) | |||
214 | } | 215 | } |
215 | 216 | ||
216 | /* NOTE: this is meant to be run only at boot */ | 217 | /* NOTE: this is meant to be run only at boot */ |
217 | void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) | 218 | void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) |
218 | { | 219 | { |
219 | unsigned long address = __fix_to_virt(idx); | 220 | unsigned long address = __fix_to_virt(idx); |
220 | 221 | ||
@@ -312,6 +313,8 @@ __meminit void early_iounmap(void *addr, unsigned long size) | |||
312 | static unsigned long __meminit | 313 | static unsigned long __meminit |
313 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) | 314 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) |
314 | { | 315 | { |
316 | unsigned long pages = 0; | ||
317 | |||
315 | int i = pmd_index(address); | 318 | int i = pmd_index(address); |
316 | 319 | ||
317 | for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { | 320 | for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { |
@@ -328,9 +331,11 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) | |||
328 | if (pmd_val(*pmd)) | 331 | if (pmd_val(*pmd)) |
329 | continue; | 332 | continue; |
330 | 333 | ||
334 | pages++; | ||
331 | set_pte((pte_t *)pmd, | 335 | set_pte((pte_t *)pmd, |
332 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 336 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); |
333 | } | 337 | } |
338 | update_page_count(PG_LEVEL_2M, pages); | ||
334 | return address; | 339 | return address; |
335 | } | 340 | } |
336 | 341 | ||
@@ -350,6 +355,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) | |||
350 | static unsigned long __meminit | 355 | static unsigned long __meminit |
351 | phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) | 356 | phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) |
352 | { | 357 | { |
358 | unsigned long pages = 0; | ||
353 | unsigned long last_map_addr = end; | 359 | unsigned long last_map_addr = end; |
354 | int i = pud_index(addr); | 360 | int i = pud_index(addr); |
355 | 361 | ||
@@ -374,6 +380,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) | |||
374 | } | 380 | } |
375 | 381 | ||
376 | if (direct_gbpages) { | 382 | if (direct_gbpages) { |
383 | pages++; | ||
377 | set_pte((pte_t *)pud, | 384 | set_pte((pte_t *)pud, |
378 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 385 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); |
379 | last_map_addr = (addr & PUD_MASK) + PUD_SIZE; | 386 | last_map_addr = (addr & PUD_MASK) + PUD_SIZE; |
@@ -390,6 +397,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) | |||
390 | unmap_low_page(pmd); | 397 | unmap_low_page(pmd); |
391 | } | 398 | } |
392 | __flush_tlb_all(); | 399 | __flush_tlb_all(); |
400 | update_page_count(PG_LEVEL_1G, pages); | ||
393 | 401 | ||
394 | return last_map_addr >> PAGE_SHIFT; | 402 | return last_map_addr >> PAGE_SHIFT; |
395 | } | 403 | } |
@@ -431,7 +439,7 @@ static void __init init_gbpages(void) | |||
431 | direct_gbpages = 0; | 439 | direct_gbpages = 0; |
432 | } | 440 | } |
433 | 441 | ||
434 | #ifdef CONFIG_MEMTEST_BOOTPARAM | 442 | #ifdef CONFIG_MEMTEST |
435 | 443 | ||
436 | static void __init memtest(unsigned long start_phys, unsigned long size, | 444 | static void __init memtest(unsigned long start_phys, unsigned long size, |
437 | unsigned pattern) | 445 | unsigned pattern) |
@@ -493,7 +501,8 @@ static void __init memtest(unsigned long start_phys, unsigned long size, | |||
493 | 501 | ||
494 | } | 502 | } |
495 | 503 | ||
496 | static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE; | 504 | /* default is disabled */ |
505 | static int memtest_pattern __initdata; | ||
497 | 506 | ||
498 | static int __init parse_memtest(char *arg) | 507 | static int __init parse_memtest(char *arg) |
499 | { | 508 | { |
@@ -506,7 +515,7 @@ early_param("memtest", parse_memtest); | |||
506 | 515 | ||
507 | static void __init early_memtest(unsigned long start, unsigned long end) | 516 | static void __init early_memtest(unsigned long start, unsigned long end) |
508 | { | 517 | { |
509 | unsigned long t_start, t_size; | 518 | u64 t_start, t_size; |
510 | unsigned pattern; | 519 | unsigned pattern; |
511 | 520 | ||
512 | if (!memtest_pattern) | 521 | if (!memtest_pattern) |
@@ -525,8 +534,9 @@ static void __init early_memtest(unsigned long start, unsigned long end) | |||
525 | if (t_start + t_size > end) | 534 | if (t_start + t_size > end) |
526 | t_size = end - t_start; | 535 | t_size = end - t_start; |
527 | 536 | ||
528 | printk(KERN_CONT "\n %016lx - %016lx pattern %d", | 537 | printk(KERN_CONT "\n %016llx - %016llx pattern %d", |
529 | t_start, t_start + t_size, pattern); | 538 | (unsigned long long)t_start, |
539 | (unsigned long long)t_start + t_size, pattern); | ||
530 | 540 | ||
531 | memtest(t_start, t_size, pattern); | 541 | memtest(t_start, t_size, pattern); |
532 | 542 | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 71bb3159031a..416ea415f5c2 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -142,7 +142,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
142 | /* | 142 | /* |
143 | * Don't remap the low PCI/ISA area, it's always mapped.. | 143 | * Don't remap the low PCI/ISA area, it's always mapped.. |
144 | */ | 144 | */ |
145 | if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) | 145 | if (is_ISA_range(phys_addr, last_addr)) |
146 | return (__force void __iomem *)phys_to_virt(phys_addr); | 146 | return (__force void __iomem *)phys_to_virt(phys_addr); |
147 | 147 | ||
148 | /* | 148 | /* |
@@ -261,7 +261,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) | |||
261 | { | 261 | { |
262 | /* | 262 | /* |
263 | * Ideally, this should be: | 263 | * Ideally, this should be: |
264 | * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; | 264 | * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; |
265 | * | 265 | * |
266 | * Till we fix all X drivers to use ioremap_wc(), we will use | 266 | * Till we fix all X drivers to use ioremap_wc(), we will use |
267 | * UC MINUS. | 267 | * UC MINUS. |
@@ -285,7 +285,7 @@ EXPORT_SYMBOL(ioremap_nocache); | |||
285 | */ | 285 | */ |
286 | void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) | 286 | void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) |
287 | { | 287 | { |
288 | if (pat_wc_enabled) | 288 | if (pat_enabled) |
289 | return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, | 289 | return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, |
290 | __builtin_return_address(0)); | 290 | __builtin_return_address(0)); |
291 | else | 291 | else |
@@ -318,8 +318,8 @@ void iounmap(volatile void __iomem *addr) | |||
318 | * vm_area and by simply returning an address into the kernel mapping | 318 | * vm_area and by simply returning an address into the kernel mapping |
319 | * of ISA space. So handle that here. | 319 | * of ISA space. So handle that here. |
320 | */ | 320 | */ |
321 | if (addr >= phys_to_virt(ISA_START_ADDRESS) && | 321 | if ((void __force *)addr >= phys_to_virt(ISA_START_ADDRESS) && |
322 | addr < phys_to_virt(ISA_END_ADDRESS)) | 322 | (void __force *)addr < phys_to_virt(ISA_END_ADDRESS)) |
323 | return; | 323 | return; |
324 | 324 | ||
325 | addr = (volatile void __iomem *) | 325 | addr = (volatile void __iomem *) |
@@ -332,7 +332,7 @@ void iounmap(volatile void __iomem *addr) | |||
332 | cpa takes care of the direct mappings. */ | 332 | cpa takes care of the direct mappings. */ |
333 | read_lock(&vmlist_lock); | 333 | read_lock(&vmlist_lock); |
334 | for (p = vmlist; p; p = p->next) { | 334 | for (p = vmlist; p; p = p->next) { |
335 | if (p->addr == addr) | 335 | if (p->addr == (void __force *)addr) |
336 | break; | 336 | break; |
337 | } | 337 | } |
338 | read_unlock(&vmlist_lock); | 338 | read_unlock(&vmlist_lock); |
@@ -346,7 +346,7 @@ void iounmap(volatile void __iomem *addr) | |||
346 | free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); | 346 | free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p)); |
347 | 347 | ||
348 | /* Finally remove it */ | 348 | /* Finally remove it */ |
349 | o = remove_vm_area((void *)addr); | 349 | o = remove_vm_area((void __force *)addr); |
350 | BUG_ON(p != o || o == NULL); | 350 | BUG_ON(p != o || o == NULL); |
351 | kfree(p); | 351 | kfree(p); |
352 | } | 352 | } |
@@ -365,7 +365,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) | |||
365 | if (page_is_ram(start >> PAGE_SHIFT)) | 365 | if (page_is_ram(start >> PAGE_SHIFT)) |
366 | return __va(phys); | 366 | return __va(phys); |
367 | 367 | ||
368 | addr = (void *)ioremap(start, PAGE_SIZE); | 368 | addr = (void __force *)ioremap(start, PAGE_SIZE); |
369 | if (addr) | 369 | if (addr) |
370 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); | 370 | addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); |
371 | 371 | ||
@@ -593,10 +593,11 @@ void __init early_iounmap(void *addr, unsigned long size) | |||
593 | unsigned long offset; | 593 | unsigned long offset; |
594 | unsigned int nrpages; | 594 | unsigned int nrpages; |
595 | enum fixed_addresses idx; | 595 | enum fixed_addresses idx; |
596 | unsigned int nesting; | 596 | int nesting; |
597 | 597 | ||
598 | nesting = --early_ioremap_nested; | 598 | nesting = --early_ioremap_nested; |
599 | WARN_ON(nesting < 0); | 599 | if (WARN_ON(nesting < 0)) |
600 | return; | ||
600 | 601 | ||
601 | if (early_ioremap_debug) { | 602 | if (early_ioremap_debug) { |
602 | printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, | 603 | printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, |
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 1f476e477844..0ea66b532c35 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/numa.h> | 22 | #include <asm/numa.h> |
23 | #include <asm/mpspec.h> | 23 | #include <asm/mpspec.h> |
24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
25 | #include <asm/k8.h> | ||
25 | 26 | ||
26 | static __init int find_northbridge(void) | 27 | static __init int find_northbridge(void) |
27 | { | 28 | { |
@@ -73,17 +74,12 @@ static __init void early_get_boot_cpu_id(void) | |||
73 | 74 | ||
74 | int __init k8_scan_nodes(unsigned long start, unsigned long end) | 75 | int __init k8_scan_nodes(unsigned long start, unsigned long end) |
75 | { | 76 | { |
77 | unsigned numnodes, cores, bits, apicid_base; | ||
76 | unsigned long prevbase; | 78 | unsigned long prevbase; |
77 | struct bootnode nodes[8]; | 79 | struct bootnode nodes[8]; |
78 | int nodeid, i, nb; | ||
79 | unsigned char nodeids[8]; | 80 | unsigned char nodeids[8]; |
80 | int found = 0; | 81 | int i, j, nb, found = 0; |
81 | u32 reg; | 82 | u32 nodeid, reg; |
82 | unsigned numnodes; | ||
83 | unsigned cores; | ||
84 | unsigned bits; | ||
85 | int j; | ||
86 | unsigned apicid_base; | ||
87 | 83 | ||
88 | if (!early_pci_allowed()) | 84 | if (!early_pci_allowed()) |
89 | return -1; | 85 | return -1; |
@@ -105,7 +101,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
105 | prevbase = 0; | 101 | prevbase = 0; |
106 | for (i = 0; i < 8; i++) { | 102 | for (i = 0; i < 8; i++) { |
107 | unsigned long base, limit; | 103 | unsigned long base, limit; |
108 | u32 nodeid; | ||
109 | 104 | ||
110 | base = read_pci_config(0, nb, 1, 0x40 + i*8); | 105 | base = read_pci_config(0, nb, 1, 0x40 + i*8); |
111 | limit = read_pci_config(0, nb, 1, 0x44 + i*8); | 106 | limit = read_pci_config(0, nb, 1, 0x44 + i*8); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 60bcb5b6a37e..afd40054d157 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -34,6 +34,41 @@ struct cpa_data { | |||
34 | unsigned force_split : 1; | 34 | unsigned force_split : 1; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | #ifdef CONFIG_PROC_FS | ||
38 | static unsigned long direct_pages_count[PG_LEVEL_NUM]; | ||
39 | |||
40 | void update_page_count(int level, unsigned long pages) | ||
41 | { | ||
42 | unsigned long flags; | ||
43 | |||
44 | /* Protect against CPA */ | ||
45 | spin_lock_irqsave(&pgd_lock, flags); | ||
46 | direct_pages_count[level] += pages; | ||
47 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
48 | } | ||
49 | |||
50 | static void split_page_count(int level) | ||
51 | { | ||
52 | direct_pages_count[level]--; | ||
53 | direct_pages_count[level - 1] += PTRS_PER_PTE; | ||
54 | } | ||
55 | |||
56 | int arch_report_meminfo(char *page) | ||
57 | { | ||
58 | int n = sprintf(page, "DirectMap4k: %8lu\n" | ||
59 | "DirectMap2M: %8lu\n", | ||
60 | direct_pages_count[PG_LEVEL_4K], | ||
61 | direct_pages_count[PG_LEVEL_2M]); | ||
62 | #ifdef CONFIG_X86_64 | ||
63 | n += sprintf(page + n, "DirectMap1G: %8lu\n", | ||
64 | direct_pages_count[PG_LEVEL_1G]); | ||
65 | #endif | ||
66 | return n; | ||
67 | } | ||
68 | #else | ||
69 | static inline void split_page_count(int level) { } | ||
70 | #endif | ||
71 | |||
37 | #ifdef CONFIG_X86_64 | 72 | #ifdef CONFIG_X86_64 |
38 | 73 | ||
39 | static inline unsigned long highmap_start_pfn(void) | 74 | static inline unsigned long highmap_start_pfn(void) |
@@ -500,6 +535,10 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
500 | for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) | 535 | for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) |
501 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); | 536 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); |
502 | 537 | ||
538 | if (address >= (unsigned long)__va(0) && | ||
539 | address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) | ||
540 | split_page_count(level); | ||
541 | |||
503 | /* | 542 | /* |
504 | * Install the new, split up pagetable. Important details here: | 543 | * Install the new, split up pagetable. Important details here: |
505 | * | 544 | * |
@@ -805,7 +844,7 @@ int _set_memory_wc(unsigned long addr, int numpages) | |||
805 | 844 | ||
806 | int set_memory_wc(unsigned long addr, int numpages) | 845 | int set_memory_wc(unsigned long addr, int numpages) |
807 | { | 846 | { |
808 | if (!pat_wc_enabled) | 847 | if (!pat_enabled) |
809 | return set_memory_uc(addr, numpages); | 848 | return set_memory_uc(addr, numpages); |
810 | 849 | ||
811 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | 850 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 60adbe22efa0..a885a1019b8a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -26,15 +26,15 @@ | |||
26 | #include <asm/io.h> | 26 | #include <asm/io.h> |
27 | 27 | ||
28 | #ifdef CONFIG_X86_PAT | 28 | #ifdef CONFIG_X86_PAT |
29 | int __read_mostly pat_wc_enabled = 1; | 29 | int __read_mostly pat_enabled = 1; |
30 | 30 | ||
31 | void __init pat_disable(char *reason) | 31 | void __cpuinit pat_disable(char *reason) |
32 | { | 32 | { |
33 | pat_wc_enabled = 0; | 33 | pat_enabled = 0; |
34 | printk(KERN_INFO "%s\n", reason); | 34 | printk(KERN_INFO "%s\n", reason); |
35 | } | 35 | } |
36 | 36 | ||
37 | static int nopat(char *str) | 37 | static int __init nopat(char *str) |
38 | { | 38 | { |
39 | pat_disable("PAT support disabled."); | 39 | pat_disable("PAT support disabled."); |
40 | return 0; | 40 | return 0; |
@@ -42,6 +42,19 @@ static int nopat(char *str) | |||
42 | early_param("nopat", nopat); | 42 | early_param("nopat", nopat); |
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | |||
46 | static int debug_enable; | ||
47 | static int __init pat_debug_setup(char *str) | ||
48 | { | ||
49 | debug_enable = 1; | ||
50 | return 0; | ||
51 | } | ||
52 | __setup("debugpat", pat_debug_setup); | ||
53 | |||
54 | #define dprintk(fmt, arg...) \ | ||
55 | do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) | ||
56 | |||
57 | |||
45 | static u64 __read_mostly boot_pat_state; | 58 | static u64 __read_mostly boot_pat_state; |
46 | 59 | ||
47 | enum { | 60 | enum { |
@@ -53,24 +66,25 @@ enum { | |||
53 | PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ | 66 | PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ |
54 | }; | 67 | }; |
55 | 68 | ||
56 | #define PAT(x,y) ((u64)PAT_ ## y << ((x)*8)) | 69 | #define PAT(x, y) ((u64)PAT_ ## y << ((x)*8)) |
57 | 70 | ||
58 | void pat_init(void) | 71 | void pat_init(void) |
59 | { | 72 | { |
60 | u64 pat; | 73 | u64 pat; |
61 | 74 | ||
62 | if (!pat_wc_enabled) | 75 | if (!pat_enabled) |
63 | return; | 76 | return; |
64 | 77 | ||
65 | /* Paranoia check. */ | 78 | /* Paranoia check. */ |
66 | if (!cpu_has_pat) { | 79 | if (!cpu_has_pat && boot_pat_state) { |
67 | printk(KERN_ERR "PAT enabled, but CPU feature cleared\n"); | ||
68 | /* | 80 | /* |
69 | * Panic if this happens on the secondary CPU, and we | 81 | * If this happens we are on a secondary CPU, but |
70 | * switched to PAT on the boot CPU. We have no way to | 82 | * switched to PAT on the boot CPU. We have no way to |
71 | * undo PAT. | 83 | * undo PAT. |
72 | */ | 84 | */ |
73 | BUG_ON(boot_pat_state); | 85 | printk(KERN_ERR "PAT enabled, " |
86 | "but not supported by secondary CPU\n"); | ||
87 | BUG(); | ||
74 | } | 88 | } |
75 | 89 | ||
76 | /* Set PWT to Write-Combining. All other bits stay the same */ | 90 | /* Set PWT to Write-Combining. All other bits stay the same */ |
@@ -86,8 +100,8 @@ void pat_init(void) | |||
86 | * 011 UC _PAGE_CACHE_UC | 100 | * 011 UC _PAGE_CACHE_UC |
87 | * PAT bit unused | 101 | * PAT bit unused |
88 | */ | 102 | */ |
89 | pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) | | 103 | pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) | |
90 | PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC); | 104 | PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); |
91 | 105 | ||
92 | /* Boot CPU check */ | 106 | /* Boot CPU check */ |
93 | if (!boot_pat_state) | 107 | if (!boot_pat_state) |
@@ -103,11 +117,11 @@ void pat_init(void) | |||
103 | static char *cattr_name(unsigned long flags) | 117 | static char *cattr_name(unsigned long flags) |
104 | { | 118 | { |
105 | switch (flags & _PAGE_CACHE_MASK) { | 119 | switch (flags & _PAGE_CACHE_MASK) { |
106 | case _PAGE_CACHE_UC: return "uncached"; | 120 | case _PAGE_CACHE_UC: return "uncached"; |
107 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | 121 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; |
108 | case _PAGE_CACHE_WB: return "write-back"; | 122 | case _PAGE_CACHE_WB: return "write-back"; |
109 | case _PAGE_CACHE_WC: return "write-combining"; | 123 | case _PAGE_CACHE_WC: return "write-combining"; |
110 | default: return "broken"; | 124 | default: return "broken"; |
111 | } | 125 | } |
112 | } | 126 | } |
113 | 127 | ||
@@ -145,46 +159,50 @@ static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | |||
145 | * The intersection is based on "Effective Memory Type" tables in IA-32 | 159 | * The intersection is based on "Effective Memory Type" tables in IA-32 |
146 | * SDM vol 3a | 160 | * SDM vol 3a |
147 | */ | 161 | */ |
148 | static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, | 162 | static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) |
149 | unsigned long *ret_prot) | ||
150 | { | 163 | { |
151 | unsigned long pat_type; | 164 | /* |
152 | u8 mtrr_type; | 165 | * Look for MTRR hint to get the effective type in case where PAT |
153 | 166 | * request is for WB. | |
154 | mtrr_type = mtrr_type_lookup(start, end); | 167 | */ |
155 | if (mtrr_type == 0xFF) { /* MTRR not enabled */ | 168 | if (req_type == _PAGE_CACHE_WB) { |
156 | *ret_prot = prot; | 169 | u8 mtrr_type; |
157 | return 0; | 170 | |
158 | } | 171 | mtrr_type = mtrr_type_lookup(start, end); |
159 | if (mtrr_type == 0xFE) { /* MTRR match error */ | 172 | if (mtrr_type == MTRR_TYPE_UNCACHABLE) |
160 | *ret_prot = _PAGE_CACHE_UC; | 173 | return _PAGE_CACHE_UC; |
161 | return -1; | 174 | if (mtrr_type == MTRR_TYPE_WRCOMB) |
162 | } | 175 | return _PAGE_CACHE_WC; |
163 | if (mtrr_type != MTRR_TYPE_UNCACHABLE && | ||
164 | mtrr_type != MTRR_TYPE_WRBACK && | ||
165 | mtrr_type != MTRR_TYPE_WRCOMB) { /* MTRR type unhandled */ | ||
166 | *ret_prot = _PAGE_CACHE_UC; | ||
167 | return -1; | ||
168 | } | 176 | } |
169 | 177 | ||
170 | pat_type = prot & _PAGE_CACHE_MASK; | 178 | return req_type; |
171 | prot &= (~_PAGE_CACHE_MASK); | 179 | } |
172 | 180 | ||
173 | /* Currently doing intersection by hand. Optimize it later. */ | 181 | static int chk_conflict(struct memtype *new, struct memtype *entry, |
174 | if (pat_type == _PAGE_CACHE_WC) { | 182 | unsigned long *type) |
175 | *ret_prot = prot | _PAGE_CACHE_WC; | 183 | { |
176 | } else if (pat_type == _PAGE_CACHE_UC_MINUS) { | 184 | if (new->type != entry->type) { |
177 | *ret_prot = prot | _PAGE_CACHE_UC_MINUS; | 185 | if (type) { |
178 | } else if (pat_type == _PAGE_CACHE_UC || | 186 | new->type = entry->type; |
179 | mtrr_type == MTRR_TYPE_UNCACHABLE) { | 187 | *type = entry->type; |
180 | *ret_prot = prot | _PAGE_CACHE_UC; | 188 | } else |
181 | } else if (mtrr_type == MTRR_TYPE_WRCOMB) { | 189 | goto conflict; |
182 | *ret_prot = prot | _PAGE_CACHE_WC; | ||
183 | } else { | ||
184 | *ret_prot = prot | _PAGE_CACHE_WB; | ||
185 | } | 190 | } |
186 | 191 | ||
192 | /* check overlaps with more than one entry in the list */ | ||
193 | list_for_each_entry_continue(entry, &memtype_list, nd) { | ||
194 | if (new->end <= entry->start) | ||
195 | break; | ||
196 | else if (new->type != entry->type) | ||
197 | goto conflict; | ||
198 | } | ||
187 | return 0; | 199 | return 0; |
200 | |||
201 | conflict: | ||
202 | printk(KERN_INFO "%s:%d conflicting memory types " | ||
203 | "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, | ||
204 | new->end, cattr_name(new->type), cattr_name(entry->type)); | ||
205 | return -EBUSY; | ||
188 | } | 206 | } |
189 | 207 | ||
190 | /* | 208 | /* |
@@ -197,251 +215,134 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, | |||
197 | * req_type will have a special case value '-1', when requester want to inherit | 215 | * req_type will have a special case value '-1', when requester want to inherit |
198 | * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. | 216 | * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. |
199 | * | 217 | * |
200 | * If ret_type is NULL, function will return an error if it cannot reserve the | 218 | * If new_type is NULL, function will return an error if it cannot reserve the |
201 | * region with req_type. If ret_type is non-null, function will return | 219 | * region with req_type. If new_type is non-NULL, function will return |
202 | * available type in ret_type in case of no error. In case of any error | 220 | * available type in new_type in case of no error. In case of any error |
203 | * it will return a negative return value. | 221 | * it will return a negative return value. |
204 | */ | 222 | */ |
205 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, | 223 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, |
206 | unsigned long *ret_type) | 224 | unsigned long *new_type) |
207 | { | 225 | { |
208 | struct memtype *new_entry = NULL; | 226 | struct memtype *new, *entry; |
209 | struct memtype *parse; | ||
210 | unsigned long actual_type; | 227 | unsigned long actual_type; |
228 | struct list_head *where; | ||
211 | int err = 0; | 229 | int err = 0; |
212 | 230 | ||
213 | /* Only track when pat_wc_enabled */ | 231 | BUG_ON(start >= end); /* end is exclusive */ |
214 | if (!pat_wc_enabled) { | 232 | |
233 | if (!pat_enabled) { | ||
215 | /* This is identical to page table setting without PAT */ | 234 | /* This is identical to page table setting without PAT */ |
216 | if (ret_type) { | 235 | if (new_type) { |
217 | if (req_type == -1) { | 236 | if (req_type == -1) |
218 | *ret_type = _PAGE_CACHE_WB; | 237 | *new_type = _PAGE_CACHE_WB; |
219 | } else { | 238 | else |
220 | *ret_type = req_type; | 239 | *new_type = req_type & _PAGE_CACHE_MASK; |
221 | } | ||
222 | } | 240 | } |
223 | return 0; | 241 | return 0; |
224 | } | 242 | } |
225 | 243 | ||
226 | /* Low ISA region is always mapped WB in page table. No need to track */ | 244 | /* Low ISA region is always mapped WB in page table. No need to track */ |
227 | if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) { | 245 | if (is_ISA_range(start, end - 1)) { |
228 | if (ret_type) | 246 | if (new_type) |
229 | *ret_type = _PAGE_CACHE_WB; | 247 | *new_type = _PAGE_CACHE_WB; |
230 | |||
231 | return 0; | 248 | return 0; |
232 | } | 249 | } |
233 | 250 | ||
234 | if (req_type == -1) { | 251 | if (req_type == -1) { |
235 | /* | 252 | /* |
236 | * Special case where caller wants to inherit from mtrr or | 253 | * Call mtrr_lookup to get the type hint. This is an |
237 | * existing pat mapping, defaulting to UC_MINUS in case of | 254 | * optimization for /dev/mem mmap'ers into WB memory (BIOS |
238 | * no match. | 255 | * tools and ACPI tools). Use WB request for WB memory and use |
256 | * UC_MINUS otherwise. | ||
239 | */ | 257 | */ |
240 | u8 mtrr_type = mtrr_type_lookup(start, end); | 258 | u8 mtrr_type = mtrr_type_lookup(start, end); |
241 | if (mtrr_type == 0xFE) { /* MTRR match error */ | ||
242 | err = -1; | ||
243 | } | ||
244 | 259 | ||
245 | if (mtrr_type == MTRR_TYPE_WRBACK) { | 260 | if (mtrr_type == MTRR_TYPE_WRBACK) |
246 | req_type = _PAGE_CACHE_WB; | ||
247 | actual_type = _PAGE_CACHE_WB; | 261 | actual_type = _PAGE_CACHE_WB; |
248 | } else { | 262 | else |
249 | req_type = _PAGE_CACHE_UC_MINUS; | ||
250 | actual_type = _PAGE_CACHE_UC_MINUS; | 263 | actual_type = _PAGE_CACHE_UC_MINUS; |
251 | } | 264 | } else |
252 | } else { | 265 | actual_type = pat_x_mtrr_type(start, end, |
253 | req_type &= _PAGE_CACHE_MASK; | 266 | req_type & _PAGE_CACHE_MASK); |
254 | err = pat_x_mtrr_type(start, end, req_type, &actual_type); | ||
255 | } | ||
256 | 267 | ||
257 | if (err) { | 268 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); |
258 | if (ret_type) | 269 | if (!new) |
259 | *ret_type = actual_type; | ||
260 | |||
261 | return -EINVAL; | ||
262 | } | ||
263 | |||
264 | new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); | ||
265 | if (!new_entry) | ||
266 | return -ENOMEM; | 270 | return -ENOMEM; |
267 | 271 | ||
268 | new_entry->start = start; | 272 | new->start = start; |
269 | new_entry->end = end; | 273 | new->end = end; |
270 | new_entry->type = actual_type; | 274 | new->type = actual_type; |
271 | 275 | ||
272 | if (ret_type) | 276 | if (new_type) |
273 | *ret_type = actual_type; | 277 | *new_type = actual_type; |
274 | 278 | ||
275 | spin_lock(&memtype_lock); | 279 | spin_lock(&memtype_lock); |
276 | 280 | ||
277 | /* Search for existing mapping that overlaps the current range */ | 281 | /* Search for existing mapping that overlaps the current range */ |
278 | list_for_each_entry(parse, &memtype_list, nd) { | 282 | where = NULL; |
279 | struct memtype *saved_ptr; | 283 | list_for_each_entry(entry, &memtype_list, nd) { |
280 | 284 | if (end <= entry->start) { | |
281 | if (parse->start >= end) { | 285 | where = entry->nd.prev; |
282 | pr_debug("New Entry\n"); | ||
283 | list_add(&new_entry->nd, parse->nd.prev); | ||
284 | new_entry = NULL; | ||
285 | break; | 286 | break; |
286 | } | 287 | } else if (start <= entry->start) { /* end > entry->start */ |
287 | 288 | err = chk_conflict(new, entry, new_type); | |
288 | if (start <= parse->start && end >= parse->start) { | 289 | if (!err) { |
289 | if (actual_type != parse->type && ret_type) { | 290 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
290 | actual_type = parse->type; | 291 | entry->start, entry->end); |
291 | *ret_type = actual_type; | 292 | where = entry->nd.prev; |
292 | new_entry->type = actual_type; | ||
293 | } | ||
294 | |||
295 | if (actual_type != parse->type) { | ||
296 | printk( | ||
297 | KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | ||
298 | current->comm, current->pid, | ||
299 | start, end, | ||
300 | cattr_name(actual_type), | ||
301 | cattr_name(parse->type)); | ||
302 | err = -EBUSY; | ||
303 | break; | ||
304 | } | ||
305 | |||
306 | saved_ptr = parse; | ||
307 | /* | ||
308 | * Check to see whether the request overlaps more | ||
309 | * than one entry in the list | ||
310 | */ | ||
311 | list_for_each_entry_continue(parse, &memtype_list, nd) { | ||
312 | if (end <= parse->start) { | ||
313 | break; | ||
314 | } | ||
315 | |||
316 | if (actual_type != parse->type) { | ||
317 | printk( | ||
318 | KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | ||
319 | current->comm, current->pid, | ||
320 | start, end, | ||
321 | cattr_name(actual_type), | ||
322 | cattr_name(parse->type)); | ||
323 | err = -EBUSY; | ||
324 | break; | ||
325 | } | ||
326 | } | 293 | } |
327 | |||
328 | if (err) { | ||
329 | break; | ||
330 | } | ||
331 | |||
332 | pr_debug("Overlap at 0x%Lx-0x%Lx\n", | ||
333 | saved_ptr->start, saved_ptr->end); | ||
334 | /* No conflict. Go ahead and add this new entry */ | ||
335 | list_add(&new_entry->nd, saved_ptr->nd.prev); | ||
336 | new_entry = NULL; | ||
337 | break; | 294 | break; |
338 | } | 295 | } else if (start < entry->end) { /* start > entry->start */ |
339 | 296 | err = chk_conflict(new, entry, new_type); | |
340 | if (start < parse->end) { | 297 | if (!err) { |
341 | if (actual_type != parse->type && ret_type) { | 298 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
342 | actual_type = parse->type; | 299 | entry->start, entry->end); |
343 | *ret_type = actual_type; | 300 | where = &entry->nd; |
344 | new_entry->type = actual_type; | ||
345 | } | ||
346 | |||
347 | if (actual_type != parse->type) { | ||
348 | printk( | ||
349 | KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | ||
350 | current->comm, current->pid, | ||
351 | start, end, | ||
352 | cattr_name(actual_type), | ||
353 | cattr_name(parse->type)); | ||
354 | err = -EBUSY; | ||
355 | break; | ||
356 | } | 301 | } |
357 | |||
358 | saved_ptr = parse; | ||
359 | /* | ||
360 | * Check to see whether the request overlaps more | ||
361 | * than one entry in the list | ||
362 | */ | ||
363 | list_for_each_entry_continue(parse, &memtype_list, nd) { | ||
364 | if (end <= parse->start) { | ||
365 | break; | ||
366 | } | ||
367 | |||
368 | if (actual_type != parse->type) { | ||
369 | printk( | ||
370 | KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | ||
371 | current->comm, current->pid, | ||
372 | start, end, | ||
373 | cattr_name(actual_type), | ||
374 | cattr_name(parse->type)); | ||
375 | err = -EBUSY; | ||
376 | break; | ||
377 | } | ||
378 | } | ||
379 | |||
380 | if (err) { | ||
381 | break; | ||
382 | } | ||
383 | |||
384 | pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", | ||
385 | saved_ptr->start, saved_ptr->end); | ||
386 | /* No conflict. Go ahead and add this new entry */ | ||
387 | list_add(&new_entry->nd, &saved_ptr->nd); | ||
388 | new_entry = NULL; | ||
389 | break; | 302 | break; |
390 | } | 303 | } |
391 | } | 304 | } |
392 | 305 | ||
393 | if (err) { | 306 | if (err) { |
394 | printk(KERN_INFO | 307 | printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " |
395 | "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", | 308 | "track %s, req %s\n", |
396 | start, end, cattr_name(new_entry->type), | 309 | start, end, cattr_name(new->type), cattr_name(req_type)); |
397 | cattr_name(req_type)); | 310 | kfree(new); |
398 | kfree(new_entry); | ||
399 | spin_unlock(&memtype_lock); | 311 | spin_unlock(&memtype_lock); |
400 | return err; | 312 | return err; |
401 | } | 313 | } |
402 | 314 | ||
403 | if (new_entry) { | 315 | if (where) |
404 | /* No conflict. Not yet added to the list. Add to the tail */ | 316 | list_add(&new->nd, where); |
405 | list_add_tail(&new_entry->nd, &memtype_list); | 317 | else |
406 | pr_debug("New Entry\n"); | 318 | list_add_tail(&new->nd, &memtype_list); |
407 | } | ||
408 | |||
409 | if (ret_type) { | ||
410 | pr_debug( | ||
411 | "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | ||
412 | start, end, cattr_name(actual_type), | ||
413 | cattr_name(req_type), cattr_name(*ret_type)); | ||
414 | } else { | ||
415 | pr_debug( | ||
416 | "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", | ||
417 | start, end, cattr_name(actual_type), | ||
418 | cattr_name(req_type)); | ||
419 | } | ||
420 | 319 | ||
421 | spin_unlock(&memtype_lock); | 320 | spin_unlock(&memtype_lock); |
321 | |||
322 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | ||
323 | start, end, cattr_name(new->type), cattr_name(req_type), | ||
324 | new_type ? cattr_name(*new_type) : "-"); | ||
325 | |||
422 | return err; | 326 | return err; |
423 | } | 327 | } |
424 | 328 | ||
425 | int free_memtype(u64 start, u64 end) | 329 | int free_memtype(u64 start, u64 end) |
426 | { | 330 | { |
427 | struct memtype *ml; | 331 | struct memtype *entry; |
428 | int err = -EINVAL; | 332 | int err = -EINVAL; |
429 | 333 | ||
430 | /* Only track when pat_wc_enabled */ | 334 | if (!pat_enabled) |
431 | if (!pat_wc_enabled) { | ||
432 | return 0; | 335 | return 0; |
433 | } | ||
434 | 336 | ||
435 | /* Low ISA region is always mapped WB. No need to track */ | 337 | /* Low ISA region is always mapped WB. No need to track */ |
436 | if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) { | 338 | if (is_ISA_range(start, end - 1)) |
437 | return 0; | 339 | return 0; |
438 | } | ||
439 | 340 | ||
440 | spin_lock(&memtype_lock); | 341 | spin_lock(&memtype_lock); |
441 | list_for_each_entry(ml, &memtype_list, nd) { | 342 | list_for_each_entry(entry, &memtype_list, nd) { |
442 | if (ml->start == start && ml->end == end) { | 343 | if (entry->start == start && entry->end == end) { |
443 | list_del(&ml->nd); | 344 | list_del(&entry->nd); |
444 | kfree(ml); | 345 | kfree(entry); |
445 | err = 0; | 346 | err = 0; |
446 | break; | 347 | break; |
447 | } | 348 | } |
@@ -453,7 +354,7 @@ int free_memtype(u64 start, u64 end) | |||
453 | current->comm, current->pid, start, end); | 354 | current->comm, current->pid, start, end); |
454 | } | 355 | } |
455 | 356 | ||
456 | pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end); | 357 | dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); |
457 | return err; | 358 | return err; |
458 | } | 359 | } |
459 | 360 | ||
@@ -522,12 +423,12 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
522 | * caching for the high addresses through the KEN pin, but | 423 | * caching for the high addresses through the KEN pin, but |
523 | * we maintain the tradition of paranoia in this code. | 424 | * we maintain the tradition of paranoia in this code. |
524 | */ | 425 | */ |
525 | if (!pat_wc_enabled && | 426 | if (!pat_enabled && |
526 | ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || | 427 | !(boot_cpu_has(X86_FEATURE_MTRR) || |
527 | test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || | 428 | boot_cpu_has(X86_FEATURE_K6_MTRR) || |
528 | test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || | 429 | boot_cpu_has(X86_FEATURE_CYRIX_ARR) || |
529 | test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && | 430 | boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) && |
530 | (pfn << PAGE_SHIFT) >= __pa(high_memory)) { | 431 | (pfn << PAGE_SHIFT) >= __pa(high_memory)) { |
531 | flags = _PAGE_CACHE_UC; | 432 | flags = _PAGE_CACHE_UC; |
532 | } | 433 | } |
533 | #endif | 434 | #endif |
@@ -549,13 +450,13 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
549 | return 0; | 450 | return 0; |
550 | 451 | ||
551 | if (pfn <= max_pfn_mapped && | 452 | if (pfn <= max_pfn_mapped && |
552 | ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { | 453 | ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { |
553 | free_memtype(offset, offset + size); | 454 | free_memtype(offset, offset + size); |
554 | printk(KERN_INFO | 455 | printk(KERN_INFO |
555 | "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", | 456 | "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", |
556 | current->comm, current->pid, | 457 | current->comm, current->pid, |
557 | cattr_name(flags), | 458 | cattr_name(flags), |
558 | offset, offset + size); | 459 | offset, (unsigned long long)(offset + size)); |
559 | return 0; | 460 | return 0; |
560 | } | 461 | } |
561 | 462 | ||
@@ -576,7 +477,7 @@ void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) | |||
576 | "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n", | 477 | "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n", |
577 | current->comm, current->pid, | 478 | current->comm, current->pid, |
578 | cattr_name(want_flags), | 479 | cattr_name(want_flags), |
579 | addr, addr + size, | 480 | addr, (unsigned long long)(addr + size), |
580 | cattr_name(flags)); | 481 | cattr_name(flags)); |
581 | } | 482 | } |
582 | } | 483 | } |
@@ -587,4 +488,3 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) | |||
587 | 488 | ||
588 | free_memtype(addr, addr + size); | 489 | free_memtype(addr, addr + size); |
589 | } | 490 | } |
590 | |||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 50159764f694..ee1d6d39edd4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -255,7 +255,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, | |||
255 | 255 | ||
256 | if (pte_young(*ptep)) | 256 | if (pte_young(*ptep)) |
257 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, | 257 | ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, |
258 | &ptep->pte); | 258 | (unsigned long *) &ptep->pte); |
259 | 259 | ||
260 | if (ret) | 260 | if (ret) |
261 | pte_update(vma->vm_mm, addr, ptep); | 261 | pte_update(vma->vm_mm, addr, ptep); |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 3890234e5b26..391d51035871 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -97,36 +97,9 @@ static __init inline int srat_disabled(void) | |||
97 | return numa_off || acpi_numa < 0; | 97 | return numa_off || acpi_numa < 0; |
98 | } | 98 | } |
99 | 99 | ||
100 | /* | ||
101 | * A lot of BIOS fill in 10 (= no distance) everywhere. This messes | ||
102 | * up the NUMA heuristics which wants the local node to have a smaller | ||
103 | * distance than the others. | ||
104 | * Do some quick checks here and only use the SLIT if it passes. | ||
105 | */ | ||
106 | static __init int slit_valid(struct acpi_table_slit *slit) | ||
107 | { | ||
108 | int i, j; | ||
109 | int d = slit->locality_count; | ||
110 | for (i = 0; i < d; i++) { | ||
111 | for (j = 0; j < d; j++) { | ||
112 | u8 val = slit->entry[d*i + j]; | ||
113 | if (i == j) { | ||
114 | if (val != LOCAL_DISTANCE) | ||
115 | return 0; | ||
116 | } else if (val <= LOCAL_DISTANCE) | ||
117 | return 0; | ||
118 | } | ||
119 | } | ||
120 | return 1; | ||
121 | } | ||
122 | |||
123 | /* Callback for SLIT parsing */ | 100 | /* Callback for SLIT parsing */ |
124 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | 101 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) |
125 | { | 102 | { |
126 | if (!slit_valid(slit)) { | ||
127 | printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n"); | ||
128 | return; | ||
129 | } | ||
130 | acpi_slit = slit; | 103 | acpi_slit = slit; |
131 | } | 104 | } |
132 | 105 | ||
@@ -522,6 +495,7 @@ int __node_distance(int a, int b) | |||
522 | 495 | ||
523 | EXPORT_SYMBOL(__node_distance); | 496 | EXPORT_SYMBOL(__node_distance); |
524 | 497 | ||
498 | #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) | ||
525 | int memory_add_physaddr_to_nid(u64 start) | 499 | int memory_add_physaddr_to_nid(u64 start) |
526 | { | 500 | { |
527 | int i, ret = 0; | 501 | int i, ret = 0; |
@@ -533,4 +507,4 @@ int memory_add_physaddr_to_nid(u64 start) | |||
533 | return ret; | 507 | return ret; |
534 | } | 508 | } |
535 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | 509 | EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); |
536 | 510 | #endif | |
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index 89ec35d00efd..f647e7e56da4 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 | |||
@@ -22,3 +22,4 @@ pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o | |||
22 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o | 22 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o |
23 | 23 | ||
24 | obj-y += $(pci-y) common.o early.o | 24 | obj-y += $(pci-y) common.o early.o |
25 | obj-y += amd_bus.o | ||
diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64 index 8fbd19832cf6..fd47068c95de 100644 --- a/arch/x86/pci/Makefile_64 +++ b/arch/x86/pci/Makefile_64 | |||
@@ -13,5 +13,5 @@ obj-y += legacy.o irq.o common.o early.o | |||
13 | # mmconfig has a 64bit special | 13 | # mmconfig has a 64bit special |
14 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o | 14 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o |
15 | 15 | ||
16 | obj-y += k8-bus_64.o | 16 | obj-y += amd_bus.o |
17 | 17 | ||
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index d95de2f199cd..464279da49c4 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -218,7 +218,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
218 | return bus; | 218 | return bus; |
219 | } | 219 | } |
220 | 220 | ||
221 | extern int pci_routeirq; | ||
222 | static int __init pci_acpi_init(void) | 221 | static int __init pci_acpi_init(void) |
223 | { | 222 | { |
224 | struct pci_dev *dev = NULL; | 223 | struct pci_dev *dev = NULL; |
diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/amd_bus.c index 5c2799c20e47..15f505d3a78e 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -1,5 +1,9 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include "pci.h" | ||
4 | |||
5 | #ifdef CONFIG_X86_64 | ||
6 | |||
3 | #include <asm/pci-direct.h> | 7 | #include <asm/pci-direct.h> |
4 | #include <asm/mpspec.h> | 8 | #include <asm/mpspec.h> |
5 | #include <linux/cpumask.h> | 9 | #include <linux/cpumask.h> |
@@ -526,3 +530,31 @@ static int __init early_fill_mp_bus_info(void) | |||
526 | } | 530 | } |
527 | 531 | ||
528 | postcore_initcall(early_fill_mp_bus_info); | 532 | postcore_initcall(early_fill_mp_bus_info); |
533 | |||
534 | #endif | ||
535 | |||
536 | /* common 32/64 bit code */ | ||
537 | |||
538 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) | ||
539 | |||
540 | static void enable_pci_io_ecs_per_cpu(void *unused) | ||
541 | { | ||
542 | u64 reg; | ||
543 | rdmsrl(MSR_AMD64_NB_CFG, reg); | ||
544 | if (!(reg & ENABLE_CF8_EXT_CFG)) { | ||
545 | reg |= ENABLE_CF8_EXT_CFG; | ||
546 | wrmsrl(MSR_AMD64_NB_CFG, reg); | ||
547 | } | ||
548 | } | ||
549 | |||
550 | static int __init enable_pci_io_ecs(void) | ||
551 | { | ||
552 | /* assume all cpus from fam10h have IO ECS */ | ||
553 | if (boot_cpu_data.x86 < 0x10) | ||
554 | return 0; | ||
555 | on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1, 1); | ||
556 | pci_probe |= PCI_HAS_IO_ECS; | ||
557 | return 0; | ||
558 | } | ||
559 | |||
560 | postcore_initcall(enable_pci_io_ecs); | ||
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 8545c8a9d107..940185ecaeda 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -302,18 +302,18 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { | |||
302 | }, | 302 | }, |
303 | { | 303 | { |
304 | .callback = set_bf_sort, | 304 | .callback = set_bf_sort, |
305 | .ident = "HP ProLiant DL385 G2", | 305 | .ident = "HP ProLiant DL360", |
306 | .matches = { | 306 | .matches = { |
307 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), | 307 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), |
308 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL385 G2"), | 308 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL360"), |
309 | }, | 309 | }, |
310 | }, | 310 | }, |
311 | { | 311 | { |
312 | .callback = set_bf_sort, | 312 | .callback = set_bf_sort, |
313 | .ident = "HP ProLiant DL585 G2", | 313 | .ident = "HP ProLiant DL380", |
314 | .matches = { | 314 | .matches = { |
315 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), | 315 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), |
316 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), | 316 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL380"), |
317 | }, | 317 | }, |
318 | }, | 318 | }, |
319 | #ifdef __i386__ | 319 | #ifdef __i386__ |
@@ -328,18 +328,18 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { | |||
328 | #endif | 328 | #endif |
329 | { | 329 | { |
330 | .callback = set_bf_sort, | 330 | .callback = set_bf_sort, |
331 | .ident = "HP ProLiant DL385 G2", | 331 | .ident = "HP ProLiant DL360", |
332 | .matches = { | 332 | .matches = { |
333 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), | 333 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), |
334 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL385 G2"), | 334 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL360"), |
335 | }, | 335 | }, |
336 | }, | 336 | }, |
337 | { | 337 | { |
338 | .callback = set_bf_sort, | 338 | .callback = set_bf_sort, |
339 | .ident = "HP ProLiant DL585 G2", | 339 | .ident = "HP ProLiant DL380", |
340 | .matches = { | 340 | .matches = { |
341 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), | 341 | DMI_MATCH(DMI_SYS_VENDOR, "HP"), |
342 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), | 342 | DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL380"), |
343 | }, | 343 | }, |
344 | }, | 344 | }, |
345 | {} | 345 | {} |
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 21d1e0e0d535..9915293500fb 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c | |||
@@ -8,18 +8,21 @@ | |||
8 | #include "pci.h" | 8 | #include "pci.h" |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * Functions for accessing PCI configuration space with type 1 accesses | 11 | * Functions for accessing PCI base (first 256 bytes) and extended |
12 | * (4096 bytes per PCI function) configuration space with type 1 | ||
13 | * accesses. | ||
12 | */ | 14 | */ |
13 | 15 | ||
14 | #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ | 16 | #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ |
15 | (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) | 17 | (0x80000000 | ((reg & 0xF00) << 16) | (bus << 16) \ |
18 | | (devfn << 8) | (reg & 0xFC)) | ||
16 | 19 | ||
17 | static int pci_conf1_read(unsigned int seg, unsigned int bus, | 20 | static int pci_conf1_read(unsigned int seg, unsigned int bus, |
18 | unsigned int devfn, int reg, int len, u32 *value) | 21 | unsigned int devfn, int reg, int len, u32 *value) |
19 | { | 22 | { |
20 | unsigned long flags; | 23 | unsigned long flags; |
21 | 24 | ||
22 | if ((bus > 255) || (devfn > 255) || (reg > 255)) { | 25 | if ((bus > 255) || (devfn > 255) || (reg > 4095)) { |
23 | *value = -1; | 26 | *value = -1; |
24 | return -EINVAL; | 27 | return -EINVAL; |
25 | } | 28 | } |
@@ -50,7 +53,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, | |||
50 | { | 53 | { |
51 | unsigned long flags; | 54 | unsigned long flags; |
52 | 55 | ||
53 | if ((bus > 255) || (devfn > 255) || (reg > 255)) | 56 | if ((bus > 255) || (devfn > 255) || (reg > 4095)) |
54 | return -EINVAL; | 57 | return -EINVAL; |
55 | 58 | ||
56 | spin_lock_irqsave(&pci_config_lock, flags); | 59 | spin_lock_irqsave(&pci_config_lock, flags); |
@@ -260,10 +263,18 @@ void __init pci_direct_init(int type) | |||
260 | return; | 263 | return; |
261 | printk(KERN_INFO "PCI: Using configuration type %d for base access\n", | 264 | printk(KERN_INFO "PCI: Using configuration type %d for base access\n", |
262 | type); | 265 | type); |
263 | if (type == 1) | 266 | if (type == 1) { |
264 | raw_pci_ops = &pci_direct_conf1; | 267 | raw_pci_ops = &pci_direct_conf1; |
265 | else | 268 | if (raw_pci_ext_ops) |
266 | raw_pci_ops = &pci_direct_conf2; | 269 | return; |
270 | if (!(pci_probe & PCI_HAS_IO_ECS)) | ||
271 | return; | ||
272 | printk(KERN_INFO "PCI: Using configuration type 1 " | ||
273 | "for extended access\n"); | ||
274 | raw_pci_ext_ops = &pci_direct_conf1; | ||
275 | return; | ||
276 | } | ||
277 | raw_pci_ops = &pci_direct_conf2; | ||
267 | } | 278 | } |
268 | 279 | ||
269 | int __init pci_direct_probe(void) | 280 | int __init pci_direct_probe(void) |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 8af0f0bae2af..6ccd7a108cd4 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -299,17 +299,15 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | |||
299 | return -EINVAL; | 299 | return -EINVAL; |
300 | 300 | ||
301 | prot = pgprot_val(vma->vm_page_prot); | 301 | prot = pgprot_val(vma->vm_page_prot); |
302 | if (pat_wc_enabled && write_combine) | 302 | if (pat_enabled && write_combine) |
303 | prot |= _PAGE_CACHE_WC; | 303 | prot |= _PAGE_CACHE_WC; |
304 | else if (pat_wc_enabled) | 304 | else if (pat_enabled || boot_cpu_data.x86 > 3) |
305 | /* | 305 | /* |
306 | * ioremap() and ioremap_nocache() defaults to UC MINUS for now. | 306 | * ioremap() and ioremap_nocache() defaults to UC MINUS for now. |
307 | * To avoid attribute conflicts, request UC MINUS here | 307 | * To avoid attribute conflicts, request UC MINUS here |
308 | * aswell. | 308 | * aswell. |
309 | */ | 309 | */ |
310 | prot |= _PAGE_CACHE_UC_MINUS; | 310 | prot |= _PAGE_CACHE_UC_MINUS; |
311 | else if (boot_cpu_data.x86 > 3) | ||
312 | prot |= _PAGE_CACHE_UC; | ||
313 | 311 | ||
314 | vma->vm_page_prot = __pgprot(prot); | 312 | vma->vm_page_prot = __pgprot(prot); |
315 | 313 | ||
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index e70b9c57b88e..b821f4462d99 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c | |||
@@ -15,7 +15,8 @@ static __init int pci_access_init(void) | |||
15 | pci_mmcfg_early_init(); | 15 | pci_mmcfg_early_init(); |
16 | 16 | ||
17 | #ifdef CONFIG_PCI_OLPC | 17 | #ifdef CONFIG_PCI_OLPC |
18 | pci_olpc_init(); | 18 | if (!pci_olpc_init()) |
19 | return 0; /* skip additional checks if it's an XO */ | ||
19 | #endif | 20 | #endif |
20 | #ifdef CONFIG_PCI_BIOS | 21 | #ifdef CONFIG_PCI_BIOS |
21 | pci_pcbios_init(); | 22 | pci_pcbios_init(); |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 0908fca901bf..f0859de23e20 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -11,8 +11,8 @@ | |||
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/interrupt.h> | 12 | #include <linux/interrupt.h> |
13 | #include <linux/dmi.h> | 13 | #include <linux/dmi.h> |
14 | #include <asm/io.h> | 14 | #include <linux/io.h> |
15 | #include <asm/smp.h> | 15 | #include <linux/smp.h> |
16 | #include <asm/io_apic.h> | 16 | #include <asm/io_apic.h> |
17 | #include <linux/irq.h> | 17 | #include <linux/irq.h> |
18 | #include <linux/acpi.h> | 18 | #include <linux/acpi.h> |
@@ -61,7 +61,7 @@ void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; | |||
61 | * and perform checksum verification. | 61 | * and perform checksum verification. |
62 | */ | 62 | */ |
63 | 63 | ||
64 | static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) | 64 | static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr) |
65 | { | 65 | { |
66 | struct irq_routing_table *rt; | 66 | struct irq_routing_table *rt; |
67 | int i; | 67 | int i; |
@@ -74,7 +74,7 @@ static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) | |||
74 | rt->size < sizeof(struct irq_routing_table)) | 74 | rt->size < sizeof(struct irq_routing_table)) |
75 | return NULL; | 75 | return NULL; |
76 | sum = 0; | 76 | sum = 0; |
77 | for (i=0; i < rt->size; i++) | 77 | for (i = 0; i < rt->size; i++) |
78 | sum += addr[i]; | 78 | sum += addr[i]; |
79 | if (!sum) { | 79 | if (!sum) { |
80 | DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); | 80 | DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); |
@@ -100,7 +100,7 @@ static struct irq_routing_table * __init pirq_find_routing_table(void) | |||
100 | return rt; | 100 | return rt; |
101 | printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); | 101 | printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); |
102 | } | 102 | } |
103 | for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { | 103 | for (addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { |
104 | rt = pirq_check_routing_table(addr); | 104 | rt = pirq_check_routing_table(addr); |
105 | if (rt) | 105 | if (rt) |
106 | return rt; | 106 | return rt; |
@@ -122,20 +122,20 @@ static void __init pirq_peer_trick(void) | |||
122 | struct irq_info *e; | 122 | struct irq_info *e; |
123 | 123 | ||
124 | memset(busmap, 0, sizeof(busmap)); | 124 | memset(busmap, 0, sizeof(busmap)); |
125 | for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { | 125 | for (i = 0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { |
126 | e = &rt->slots[i]; | 126 | e = &rt->slots[i]; |
127 | #ifdef DEBUG | 127 | #ifdef DEBUG |
128 | { | 128 | { |
129 | int j; | 129 | int j; |
130 | DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); | 130 | DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); |
131 | for(j=0; j<4; j++) | 131 | for (j = 0; j < 4; j++) |
132 | DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); | 132 | DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); |
133 | DBG("\n"); | 133 | DBG("\n"); |
134 | } | 134 | } |
135 | #endif | 135 | #endif |
136 | busmap[e->bus] = 1; | 136 | busmap[e->bus] = 1; |
137 | } | 137 | } |
138 | for(i = 1; i < 256; i++) { | 138 | for (i = 1; i < 256; i++) { |
139 | int node; | 139 | int node; |
140 | if (!busmap[i] || pci_find_bus(0, i)) | 140 | if (!busmap[i] || pci_find_bus(0, i)) |
141 | continue; | 141 | continue; |
@@ -285,7 +285,7 @@ static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) | |||
285 | static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; | 285 | static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; |
286 | 286 | ||
287 | WARN_ON_ONCE(pirq > 4); | 287 | WARN_ON_ONCE(pirq > 4); |
288 | return read_config_nybble(router,0x43, pirqmap[pirq-1]); | 288 | return read_config_nybble(router, 0x43, pirqmap[pirq-1]); |
289 | } | 289 | } |
290 | 290 | ||
291 | static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | 291 | static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) |
@@ -314,7 +314,7 @@ static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, | |||
314 | 314 | ||
315 | /* | 315 | /* |
316 | * Cyrix: nibble offset 0x5C | 316 | * Cyrix: nibble offset 0x5C |
317 | * 0x5C bits 7:4 is INTB bits 3:0 is INTA | 317 | * 0x5C bits 7:4 is INTB bits 3:0 is INTA |
318 | * 0x5D bits 7:4 is INTD bits 3:0 is INTC | 318 | * 0x5D bits 7:4 is INTD bits 3:0 is INTC |
319 | */ | 319 | */ |
320 | static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) | 320 | static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) |
@@ -350,7 +350,7 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, | |||
350 | * Apparently there are systems implementing PCI routing table using | 350 | * Apparently there are systems implementing PCI routing table using |
351 | * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. | 351 | * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. |
352 | * We try our best to handle both link mappings. | 352 | * We try our best to handle both link mappings. |
353 | * | 353 | * |
354 | * Currently (2003-05-21) it appears most SiS chipsets follow the | 354 | * Currently (2003-05-21) it appears most SiS chipsets follow the |
355 | * definition of routing registers from the SiS-5595 southbridge. | 355 | * definition of routing registers from the SiS-5595 southbridge. |
356 | * According to the SiS 5595 datasheets the revision id's of the | 356 | * According to the SiS 5595 datasheets the revision id's of the |
@@ -370,7 +370,7 @@ static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, | |||
370 | * | 370 | * |
371 | * 0x62: USBIRQ: | 371 | * 0x62: USBIRQ: |
372 | * bit 6 OHCI function disabled (0), enabled (1) | 372 | * bit 6 OHCI function disabled (0), enabled (1) |
373 | * | 373 | * |
374 | * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved | 374 | * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved |
375 | * | 375 | * |
376 | * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved | 376 | * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved |
@@ -487,9 +487,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq | |||
487 | u8 irq; | 487 | u8 irq; |
488 | irq = 0; | 488 | irq = 0; |
489 | if (pirq <= 4) | 489 | if (pirq <= 4) |
490 | { | ||
491 | irq = read_config_nybble(router, 0x56, pirq - 1); | 490 | irq = read_config_nybble(router, 0x56, pirq - 1); |
492 | } | ||
493 | printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", | 491 | printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", |
494 | dev->vendor, dev->device, pirq, irq); | 492 | dev->vendor, dev->device, pirq, irq); |
495 | return irq; | 493 | return irq; |
@@ -497,12 +495,10 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq | |||
497 | 495 | ||
498 | static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | 496 | static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) |
499 | { | 497 | { |
500 | printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", | 498 | printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", |
501 | dev->vendor, dev->device, pirq, irq); | 499 | dev->vendor, dev->device, pirq, irq); |
502 | if (pirq <= 4) | 500 | if (pirq <= 4) |
503 | { | ||
504 | write_config_nybble(router, 0x56, pirq - 1, irq); | 501 | write_config_nybble(router, 0x56, pirq - 1, irq); |
505 | } | ||
506 | return 1; | 502 | return 1; |
507 | } | 503 | } |
508 | 504 | ||
@@ -549,50 +545,49 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
549 | if (pci_dev_present(pirq_440gx)) | 545 | if (pci_dev_present(pirq_440gx)) |
550 | return 0; | 546 | return 0; |
551 | 547 | ||
552 | switch(device) | 548 | switch (device) { |
553 | { | 549 | case PCI_DEVICE_ID_INTEL_82371FB_0: |
554 | case PCI_DEVICE_ID_INTEL_82371FB_0: | 550 | case PCI_DEVICE_ID_INTEL_82371SB_0: |
555 | case PCI_DEVICE_ID_INTEL_82371SB_0: | 551 | case PCI_DEVICE_ID_INTEL_82371AB_0: |
556 | case PCI_DEVICE_ID_INTEL_82371AB_0: | 552 | case PCI_DEVICE_ID_INTEL_82371MX: |
557 | case PCI_DEVICE_ID_INTEL_82371MX: | 553 | case PCI_DEVICE_ID_INTEL_82443MX_0: |
558 | case PCI_DEVICE_ID_INTEL_82443MX_0: | 554 | case PCI_DEVICE_ID_INTEL_82801AA_0: |
559 | case PCI_DEVICE_ID_INTEL_82801AA_0: | 555 | case PCI_DEVICE_ID_INTEL_82801AB_0: |
560 | case PCI_DEVICE_ID_INTEL_82801AB_0: | 556 | case PCI_DEVICE_ID_INTEL_82801BA_0: |
561 | case PCI_DEVICE_ID_INTEL_82801BA_0: | 557 | case PCI_DEVICE_ID_INTEL_82801BA_10: |
562 | case PCI_DEVICE_ID_INTEL_82801BA_10: | 558 | case PCI_DEVICE_ID_INTEL_82801CA_0: |
563 | case PCI_DEVICE_ID_INTEL_82801CA_0: | 559 | case PCI_DEVICE_ID_INTEL_82801CA_12: |
564 | case PCI_DEVICE_ID_INTEL_82801CA_12: | 560 | case PCI_DEVICE_ID_INTEL_82801DB_0: |
565 | case PCI_DEVICE_ID_INTEL_82801DB_0: | 561 | case PCI_DEVICE_ID_INTEL_82801E_0: |
566 | case PCI_DEVICE_ID_INTEL_82801E_0: | 562 | case PCI_DEVICE_ID_INTEL_82801EB_0: |
567 | case PCI_DEVICE_ID_INTEL_82801EB_0: | 563 | case PCI_DEVICE_ID_INTEL_ESB_1: |
568 | case PCI_DEVICE_ID_INTEL_ESB_1: | 564 | case PCI_DEVICE_ID_INTEL_ICH6_0: |
569 | case PCI_DEVICE_ID_INTEL_ICH6_0: | 565 | case PCI_DEVICE_ID_INTEL_ICH6_1: |
570 | case PCI_DEVICE_ID_INTEL_ICH6_1: | 566 | case PCI_DEVICE_ID_INTEL_ICH7_0: |
571 | case PCI_DEVICE_ID_INTEL_ICH7_0: | 567 | case PCI_DEVICE_ID_INTEL_ICH7_1: |
572 | case PCI_DEVICE_ID_INTEL_ICH7_1: | 568 | case PCI_DEVICE_ID_INTEL_ICH7_30: |
573 | case PCI_DEVICE_ID_INTEL_ICH7_30: | 569 | case PCI_DEVICE_ID_INTEL_ICH7_31: |
574 | case PCI_DEVICE_ID_INTEL_ICH7_31: | 570 | case PCI_DEVICE_ID_INTEL_ESB2_0: |
575 | case PCI_DEVICE_ID_INTEL_ESB2_0: | 571 | case PCI_DEVICE_ID_INTEL_ICH8_0: |
576 | case PCI_DEVICE_ID_INTEL_ICH8_0: | 572 | case PCI_DEVICE_ID_INTEL_ICH8_1: |
577 | case PCI_DEVICE_ID_INTEL_ICH8_1: | 573 | case PCI_DEVICE_ID_INTEL_ICH8_2: |
578 | case PCI_DEVICE_ID_INTEL_ICH8_2: | 574 | case PCI_DEVICE_ID_INTEL_ICH8_3: |
579 | case PCI_DEVICE_ID_INTEL_ICH8_3: | 575 | case PCI_DEVICE_ID_INTEL_ICH8_4: |
580 | case PCI_DEVICE_ID_INTEL_ICH8_4: | 576 | case PCI_DEVICE_ID_INTEL_ICH9_0: |
581 | case PCI_DEVICE_ID_INTEL_ICH9_0: | 577 | case PCI_DEVICE_ID_INTEL_ICH9_1: |
582 | case PCI_DEVICE_ID_INTEL_ICH9_1: | 578 | case PCI_DEVICE_ID_INTEL_ICH9_2: |
583 | case PCI_DEVICE_ID_INTEL_ICH9_2: | 579 | case PCI_DEVICE_ID_INTEL_ICH9_3: |
584 | case PCI_DEVICE_ID_INTEL_ICH9_3: | 580 | case PCI_DEVICE_ID_INTEL_ICH9_4: |
585 | case PCI_DEVICE_ID_INTEL_ICH9_4: | 581 | case PCI_DEVICE_ID_INTEL_ICH9_5: |
586 | case PCI_DEVICE_ID_INTEL_ICH9_5: | 582 | case PCI_DEVICE_ID_INTEL_TOLAPAI_0: |
587 | case PCI_DEVICE_ID_INTEL_TOLAPAI_0: | 583 | case PCI_DEVICE_ID_INTEL_ICH10_0: |
588 | case PCI_DEVICE_ID_INTEL_ICH10_0: | 584 | case PCI_DEVICE_ID_INTEL_ICH10_1: |
589 | case PCI_DEVICE_ID_INTEL_ICH10_1: | 585 | case PCI_DEVICE_ID_INTEL_ICH10_2: |
590 | case PCI_DEVICE_ID_INTEL_ICH10_2: | 586 | case PCI_DEVICE_ID_INTEL_ICH10_3: |
591 | case PCI_DEVICE_ID_INTEL_ICH10_3: | 587 | r->name = "PIIX/ICH"; |
592 | r->name = "PIIX/ICH"; | 588 | r->get = pirq_piix_get; |
593 | r->get = pirq_piix_get; | 589 | r->set = pirq_piix_set; |
594 | r->set = pirq_piix_set; | 590 | return 1; |
595 | return 1; | ||
596 | } | 591 | } |
597 | return 0; | 592 | return 0; |
598 | } | 593 | } |
@@ -606,7 +601,7 @@ static __init int via_router_probe(struct irq_router *r, | |||
606 | * workarounds for some buggy BIOSes | 601 | * workarounds for some buggy BIOSes |
607 | */ | 602 | */ |
608 | if (device == PCI_DEVICE_ID_VIA_82C586_0) { | 603 | if (device == PCI_DEVICE_ID_VIA_82C586_0) { |
609 | switch(router->device) { | 604 | switch (router->device) { |
610 | case PCI_DEVICE_ID_VIA_82C686: | 605 | case PCI_DEVICE_ID_VIA_82C686: |
611 | /* | 606 | /* |
612 | * Asus k7m bios wrongly reports 82C686A | 607 | * Asus k7m bios wrongly reports 82C686A |
@@ -621,10 +616,17 @@ static __init int via_router_probe(struct irq_router *r, | |||
621 | */ | 616 | */ |
622 | device = PCI_DEVICE_ID_VIA_8235; | 617 | device = PCI_DEVICE_ID_VIA_8235; |
623 | break; | 618 | break; |
619 | case PCI_DEVICE_ID_VIA_8237: | ||
620 | /** | ||
621 | * Asus a7v600 bios wrongly reports 8237 | ||
622 | * as 586-compatible | ||
623 | */ | ||
624 | device = PCI_DEVICE_ID_VIA_8237; | ||
625 | break; | ||
624 | } | 626 | } |
625 | } | 627 | } |
626 | 628 | ||
627 | switch(device) { | 629 | switch (device) { |
628 | case PCI_DEVICE_ID_VIA_82C586_0: | 630 | case PCI_DEVICE_ID_VIA_82C586_0: |
629 | r->name = "VIA"; | 631 | r->name = "VIA"; |
630 | r->get = pirq_via586_get; | 632 | r->get = pirq_via586_get; |
@@ -647,13 +649,12 @@ static __init int via_router_probe(struct irq_router *r, | |||
647 | 649 | ||
648 | static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | 650 | static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) |
649 | { | 651 | { |
650 | switch(device) | 652 | switch (device) { |
651 | { | 653 | case PCI_DEVICE_ID_VLSI_82C534: |
652 | case PCI_DEVICE_ID_VLSI_82C534: | 654 | r->name = "VLSI 82C534"; |
653 | r->name = "VLSI 82C534"; | 655 | r->get = pirq_vlsi_get; |
654 | r->get = pirq_vlsi_get; | 656 | r->set = pirq_vlsi_set; |
655 | r->set = pirq_vlsi_set; | 657 | return 1; |
656 | return 1; | ||
657 | } | 658 | } |
658 | return 0; | 659 | return 0; |
659 | } | 660 | } |
@@ -661,14 +662,13 @@ static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router | |||
661 | 662 | ||
662 | static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | 663 | static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) |
663 | { | 664 | { |
664 | switch(device) | 665 | switch (device) { |
665 | { | 666 | case PCI_DEVICE_ID_SERVERWORKS_OSB4: |
666 | case PCI_DEVICE_ID_SERVERWORKS_OSB4: | 667 | case PCI_DEVICE_ID_SERVERWORKS_CSB5: |
667 | case PCI_DEVICE_ID_SERVERWORKS_CSB5: | 668 | r->name = "ServerWorks"; |
668 | r->name = "ServerWorks"; | 669 | r->get = pirq_serverworks_get; |
669 | r->get = pirq_serverworks_get; | 670 | r->set = pirq_serverworks_set; |
670 | r->set = pirq_serverworks_set; | 671 | return 1; |
671 | return 1; | ||
672 | } | 672 | } |
673 | return 0; | 673 | return 0; |
674 | } | 674 | } |
@@ -677,7 +677,7 @@ static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, | |||
677 | { | 677 | { |
678 | if (device != PCI_DEVICE_ID_SI_503) | 678 | if (device != PCI_DEVICE_ID_SI_503) |
679 | return 0; | 679 | return 0; |
680 | 680 | ||
681 | r->name = "SIS"; | 681 | r->name = "SIS"; |
682 | r->get = pirq_sis_get; | 682 | r->get = pirq_sis_get; |
683 | r->set = pirq_sis_set; | 683 | r->set = pirq_sis_set; |
@@ -686,47 +686,43 @@ static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, | |||
686 | 686 | ||
687 | static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | 687 | static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) |
688 | { | 688 | { |
689 | switch(device) | 689 | switch (device) { |
690 | { | 690 | case PCI_DEVICE_ID_CYRIX_5520: |
691 | case PCI_DEVICE_ID_CYRIX_5520: | 691 | r->name = "NatSemi"; |
692 | r->name = "NatSemi"; | 692 | r->get = pirq_cyrix_get; |
693 | r->get = pirq_cyrix_get; | 693 | r->set = pirq_cyrix_set; |
694 | r->set = pirq_cyrix_set; | 694 | return 1; |
695 | return 1; | ||
696 | } | 695 | } |
697 | return 0; | 696 | return 0; |
698 | } | 697 | } |
699 | 698 | ||
700 | static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | 699 | static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) |
701 | { | 700 | { |
702 | switch(device) | 701 | switch (device) { |
703 | { | 702 | case PCI_DEVICE_ID_OPTI_82C700: |
704 | case PCI_DEVICE_ID_OPTI_82C700: | 703 | r->name = "OPTI"; |
705 | r->name = "OPTI"; | 704 | r->get = pirq_opti_get; |
706 | r->get = pirq_opti_get; | 705 | r->set = pirq_opti_set; |
707 | r->set = pirq_opti_set; | 706 | return 1; |
708 | return 1; | ||
709 | } | 707 | } |
710 | return 0; | 708 | return 0; |
711 | } | 709 | } |
712 | 710 | ||
713 | static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | 711 | static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) |
714 | { | 712 | { |
715 | switch(device) | 713 | switch (device) { |
716 | { | 714 | case PCI_DEVICE_ID_ITE_IT8330G_0: |
717 | case PCI_DEVICE_ID_ITE_IT8330G_0: | 715 | r->name = "ITE"; |
718 | r->name = "ITE"; | 716 | r->get = pirq_ite_get; |
719 | r->get = pirq_ite_get; | 717 | r->set = pirq_ite_set; |
720 | r->set = pirq_ite_set; | 718 | return 1; |
721 | return 1; | ||
722 | } | 719 | } |
723 | return 0; | 720 | return 0; |
724 | } | 721 | } |
725 | 722 | ||
726 | static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | 723 | static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) |
727 | { | 724 | { |
728 | switch(device) | 725 | switch (device) { |
729 | { | ||
730 | case PCI_DEVICE_ID_AL_M1533: | 726 | case PCI_DEVICE_ID_AL_M1533: |
731 | case PCI_DEVICE_ID_AL_M1563: | 727 | case PCI_DEVICE_ID_AL_M1563: |
732 | printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); | 728 | printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); |
@@ -740,25 +736,24 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, | |||
740 | 736 | ||
741 | static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | 737 | static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) |
742 | { | 738 | { |
743 | switch(device) | 739 | switch (device) { |
744 | { | 740 | case PCI_DEVICE_ID_AMD_VIPER_740B: |
745 | case PCI_DEVICE_ID_AMD_VIPER_740B: | 741 | r->name = "AMD756"; |
746 | r->name = "AMD756"; | 742 | break; |
747 | break; | 743 | case PCI_DEVICE_ID_AMD_VIPER_7413: |
748 | case PCI_DEVICE_ID_AMD_VIPER_7413: | 744 | r->name = "AMD766"; |
749 | r->name = "AMD766"; | 745 | break; |
750 | break; | 746 | case PCI_DEVICE_ID_AMD_VIPER_7443: |
751 | case PCI_DEVICE_ID_AMD_VIPER_7443: | 747 | r->name = "AMD768"; |
752 | r->name = "AMD768"; | 748 | break; |
753 | break; | 749 | default: |
754 | default: | 750 | return 0; |
755 | return 0; | ||
756 | } | 751 | } |
757 | r->get = pirq_amd756_get; | 752 | r->get = pirq_amd756_get; |
758 | r->set = pirq_amd756_set; | 753 | r->set = pirq_amd756_set; |
759 | return 1; | 754 | return 1; |
760 | } | 755 | } |
761 | 756 | ||
762 | static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | 757 | static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) |
763 | { | 758 | { |
764 | switch (device) { | 759 | switch (device) { |
@@ -800,7 +795,7 @@ static struct pci_dev *pirq_router_dev; | |||
800 | * FIXME: should we have an option to say "generic for | 795 | * FIXME: should we have an option to say "generic for |
801 | * chipset" ? | 796 | * chipset" ? |
802 | */ | 797 | */ |
803 | 798 | ||
804 | static void __init pirq_find_router(struct irq_router *r) | 799 | static void __init pirq_find_router(struct irq_router *r) |
805 | { | 800 | { |
806 | struct irq_routing_table *rt = pirq_table; | 801 | struct irq_routing_table *rt = pirq_table; |
@@ -819,7 +814,7 @@ static void __init pirq_find_router(struct irq_router *r) | |||
819 | r->name = "default"; | 814 | r->name = "default"; |
820 | r->get = NULL; | 815 | r->get = NULL; |
821 | r->set = NULL; | 816 | r->set = NULL; |
822 | 817 | ||
823 | DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", | 818 | DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", |
824 | rt->rtr_vendor, rt->rtr_device); | 819 | rt->rtr_vendor, rt->rtr_device); |
825 | 820 | ||
@@ -830,7 +825,7 @@ static void __init pirq_find_router(struct irq_router *r) | |||
830 | return; | 825 | return; |
831 | } | 826 | } |
832 | 827 | ||
833 | for( h = pirq_routers; h->vendor; h++) { | 828 | for (h = pirq_routers; h->vendor; h++) { |
834 | /* First look for a router match */ | 829 | /* First look for a router match */ |
835 | if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) | 830 | if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) |
836 | break; | 831 | break; |
@@ -882,7 +877,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
882 | 877 | ||
883 | if (!pirq_table) | 878 | if (!pirq_table) |
884 | return 0; | 879 | return 0; |
885 | 880 | ||
886 | DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); | 881 | DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); |
887 | info = pirq_get_info(dev); | 882 | info = pirq_get_info(dev); |
888 | if (!info) { | 883 | if (!info) { |
@@ -921,8 +916,10 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
921 | */ | 916 | */ |
922 | newirq = dev->irq; | 917 | newirq = dev->irq; |
923 | if (newirq && !((1 << newirq) & mask)) { | 918 | if (newirq && !((1 << newirq) & mask)) { |
924 | if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; | 919 | if (pci_probe & PCI_USE_PIRQ_MASK) |
925 | else printk("\n" KERN_WARNING | 920 | newirq = 0; |
921 | else | ||
922 | printk("\n" KERN_WARNING | ||
926 | "PCI: IRQ %i for device %s doesn't match PIRQ mask " | 923 | "PCI: IRQ %i for device %s doesn't match PIRQ mask " |
927 | "- try pci=usepirqmask\n" KERN_DEBUG, newirq, | 924 | "- try pci=usepirqmask\n" KERN_DEBUG, newirq, |
928 | pci_name(dev)); | 925 | pci_name(dev)); |
@@ -942,8 +939,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
942 | irq = pirq & 0xf; | 939 | irq = pirq & 0xf; |
943 | DBG(" -> hardcoded IRQ %d\n", irq); | 940 | DBG(" -> hardcoded IRQ %d\n", irq); |
944 | msg = "Hardcoded"; | 941 | msg = "Hardcoded"; |
945 | } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ | 942 | } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ |
946 | ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) { | 943 | ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { |
947 | DBG(" -> got IRQ %d\n", irq); | 944 | DBG(" -> got IRQ %d\n", irq); |
948 | msg = "Found"; | 945 | msg = "Found"; |
949 | eisa_set_level_irq(irq); | 946 | eisa_set_level_irq(irq); |
@@ -978,15 +975,15 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
978 | continue; | 975 | continue; |
979 | if (info->irq[pin].link == pirq) { | 976 | if (info->irq[pin].link == pirq) { |
980 | /* We refuse to override the dev->irq information. Give a warning! */ | 977 | /* We refuse to override the dev->irq information. Give a warning! */ |
981 | if ( dev2->irq && dev2->irq != irq && \ | 978 | if (dev2->irq && dev2->irq != irq && \ |
982 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ | 979 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ |
983 | ((1 << dev2->irq) & mask)) ) { | 980 | ((1 << dev2->irq) & mask))) { |
984 | #ifndef CONFIG_PCI_MSI | 981 | #ifndef CONFIG_PCI_MSI |
985 | printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", | 982 | printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", |
986 | pci_name(dev2), dev2->irq, irq); | 983 | pci_name(dev2), dev2->irq, irq); |
987 | #endif | 984 | #endif |
988 | continue; | 985 | continue; |
989 | } | 986 | } |
990 | dev2->irq = irq; | 987 | dev2->irq = irq; |
991 | pirq_penalty[irq]++; | 988 | pirq_penalty[irq]++; |
992 | if (dev != dev2) | 989 | if (dev != dev2) |
@@ -1024,8 +1021,7 @@ static void __init pcibios_fixup_irqs(void) | |||
1024 | /* | 1021 | /* |
1025 | * Recalculate IRQ numbers if we use the I/O APIC. | 1022 | * Recalculate IRQ numbers if we use the I/O APIC. |
1026 | */ | 1023 | */ |
1027 | if (io_apic_assign_pci_irqs) | 1024 | if (io_apic_assign_pci_irqs) { |
1028 | { | ||
1029 | int irq; | 1025 | int irq; |
1030 | 1026 | ||
1031 | if (pin) { | 1027 | if (pin) { |
@@ -1038,10 +1034,10 @@ static void __init pcibios_fixup_irqs(void) | |||
1038 | * busses itself so we should get into this branch reliably. | 1034 | * busses itself so we should get into this branch reliably. |
1039 | */ | 1035 | */ |
1040 | if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ | 1036 | if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ |
1041 | struct pci_dev * bridge = dev->bus->self; | 1037 | struct pci_dev *bridge = dev->bus->self; |
1042 | 1038 | ||
1043 | pin = (pin + PCI_SLOT(dev->devfn)) % 4; | 1039 | pin = (pin + PCI_SLOT(dev->devfn)) % 4; |
1044 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1040 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
1045 | PCI_SLOT(bridge->devfn), pin); | 1041 | PCI_SLOT(bridge->devfn), pin); |
1046 | if (irq >= 0) | 1042 | if (irq >= 0) |
1047 | printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", | 1043 | printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", |
@@ -1131,7 +1127,7 @@ static int __init pcibios_irq_init(void) | |||
1131 | pirq_find_router(&pirq_router); | 1127 | pirq_find_router(&pirq_router); |
1132 | if (pirq_table->exclusive_irqs) { | 1128 | if (pirq_table->exclusive_irqs) { |
1133 | int i; | 1129 | int i; |
1134 | for (i=0; i<16; i++) | 1130 | for (i = 0; i < 16; i++) |
1135 | if (!(pirq_table->exclusive_irqs & (1 << i))) | 1131 | if (!(pirq_table->exclusive_irqs & (1 << i))) |
1136 | pirq_penalty[i] += 100; | 1132 | pirq_penalty[i] += 100; |
1137 | } | 1133 | } |
@@ -1196,10 +1192,10 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1196 | */ | 1192 | */ |
1197 | temp_dev = dev; | 1193 | temp_dev = dev; |
1198 | while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ | 1194 | while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ |
1199 | struct pci_dev * bridge = dev->bus->self; | 1195 | struct pci_dev *bridge = dev->bus->self; |
1200 | 1196 | ||
1201 | pin = (pin + PCI_SLOT(dev->devfn)) % 4; | 1197 | pin = (pin + PCI_SLOT(dev->devfn)) % 4; |
1202 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1198 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
1203 | PCI_SLOT(bridge->devfn), pin); | 1199 | PCI_SLOT(bridge->devfn), pin); |
1204 | if (irq >= 0) | 1200 | if (irq >= 0) |
1205 | printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", | 1201 | printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 0cfebecf2a8f..23faaa890ffc 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -374,7 +374,7 @@ reject: | |||
374 | 374 | ||
375 | static int __initdata known_bridge; | 375 | static int __initdata known_bridge; |
376 | 376 | ||
377 | void __init __pci_mmcfg_init(int early) | 377 | static void __init __pci_mmcfg_init(int early) |
378 | { | 378 | { |
379 | /* MMCONFIG disabled */ | 379 | /* MMCONFIG disabled */ |
380 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) | 380 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) |
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index 5e7636558c02..e11e9e803d5f 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c | |||
@@ -302,12 +302,13 @@ static struct pci_raw_ops pci_olpc_conf = { | |||
302 | .write = pci_olpc_write, | 302 | .write = pci_olpc_write, |
303 | }; | 303 | }; |
304 | 304 | ||
305 | void __init pci_olpc_init(void) | 305 | int __init pci_olpc_init(void) |
306 | { | 306 | { |
307 | if (!machine_is_olpc() || olpc_has_vsa()) | 307 | if (!machine_is_olpc() || olpc_has_vsa()) |
308 | return; | 308 | return -ENODEV; |
309 | 309 | ||
310 | printk(KERN_INFO "PCI: Using configuration type OLPC\n"); | 310 | printk(KERN_INFO "PCI: Using configuration type OLPC\n"); |
311 | raw_pci_ops = &pci_olpc_conf; | 311 | raw_pci_ops = &pci_olpc_conf; |
312 | is_lx = is_geode_lx(); | 312 | is_lx = is_geode_lx(); |
313 | return 0; | ||
313 | } | 314 | } |
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index f3972b12c60a..ba263e626a68 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h | |||
@@ -27,6 +27,7 @@ | |||
27 | #define PCI_CAN_SKIP_ISA_ALIGN 0x8000 | 27 | #define PCI_CAN_SKIP_ISA_ALIGN 0x8000 |
28 | #define PCI_USE__CRS 0x10000 | 28 | #define PCI_USE__CRS 0x10000 |
29 | #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 | 29 | #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 |
30 | #define PCI_HAS_IO_ECS 0x40000 | ||
30 | 31 | ||
31 | extern unsigned int pci_probe; | 32 | extern unsigned int pci_probe; |
32 | extern unsigned long pirq_table_addr; | 33 | extern unsigned long pirq_table_addr; |
@@ -101,7 +102,7 @@ extern struct pci_raw_ops pci_direct_conf1; | |||
101 | extern int pci_direct_probe(void); | 102 | extern int pci_direct_probe(void); |
102 | extern void pci_direct_init(int type); | 103 | extern void pci_direct_init(int type); |
103 | extern void pci_pcbios_init(void); | 104 | extern void pci_pcbios_init(void); |
104 | extern void pci_olpc_init(void); | 105 | extern int pci_olpc_init(void); |
105 | 106 | ||
106 | /* pci-mmconfig.c */ | 107 | /* pci-mmconfig.c */ |
107 | 108 | ||
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 23476c2ebfc4..efa2ba7c6005 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -106,9 +106,9 @@ int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | |||
106 | do_realtime((struct timespec *)tv); | 106 | do_realtime((struct timespec *)tv); |
107 | tv->tv_usec /= 1000; | 107 | tv->tv_usec /= 1000; |
108 | if (unlikely(tz != NULL)) { | 108 | if (unlikely(tz != NULL)) { |
109 | /* This relies on gcc inlining the memcpy. We'll notice | 109 | /* Avoid memcpy. Some old compilers fail to inline it */ |
110 | if it ever fails to do so. */ | 110 | tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; |
111 | memcpy(tz, >od->sys_tz, sizeof(struct timezone)); | 111 | tz->tz_dsttime = gtod->sys_tz.tz_dsttime; |
112 | } | 112 | } |
113 | return 0; | 113 | return 0; |
114 | } | 114 | } |
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 3fdd51497a83..19a6cfaf5db9 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include "vextern.h" /* Just for VMAGIC. */ | 16 | #include "vextern.h" /* Just for VMAGIC. */ |
17 | #undef VEXTERN | 17 | #undef VEXTERN |
18 | 18 | ||
19 | int vdso_enabled = 1; | 19 | unsigned int __read_mostly vdso_enabled = 1; |
20 | 20 | ||
21 | extern char vdso_start[], vdso_end[]; | 21 | extern char vdso_start[], vdso_end[]; |
22 | extern unsigned short vdso_sync_cpuid; | 22 | extern unsigned short vdso_sync_cpuid; |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 2e641be2737e..c2cc99580871 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -5,9 +5,20 @@ | |||
5 | config XEN | 5 | config XEN |
6 | bool "Xen guest support" | 6 | bool "Xen guest support" |
7 | select PARAVIRT | 7 | select PARAVIRT |
8 | select PARAVIRT_CLOCK | ||
8 | depends on X86_32 | 9 | depends on X86_32 |
9 | depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) | 10 | depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER) |
10 | help | 11 | help |
11 | This is the Linux Xen port. Enabling this will allow the | 12 | This is the Linux Xen port. Enabling this will allow the |
12 | kernel to boot in a paravirtualized environment under the | 13 | kernel to boot in a paravirtualized environment under the |
13 | Xen hypervisor. | 14 | Xen hypervisor. |
15 | |||
16 | config XEN_MAX_DOMAIN_MEMORY | ||
17 | int "Maximum allowed size of a domain in gigabytes" | ||
18 | default 8 | ||
19 | depends on XEN | ||
20 | help | ||
21 | The pseudo-physical to machine address array is sized | ||
22 | according to the maximum possible memory size of a Xen | ||
23 | domain. This array uses 1 page per gigabyte, so there's no | ||
24 | need to be too stingy here. \ No newline at end of file | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3d8df981d5fd..2ba2d1649131 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ | 1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ |
2 | time.o manage.o xen-asm.o grant-table.o | 2 | time.o xen-asm.o grant-table.o suspend.o |
3 | 3 | ||
4 | obj-$(CONFIG_SMP) += smp.o | 4 | obj-$(CONFIG_SMP) += smp.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8a56e457d61..bd74229081c3 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -75,13 +75,13 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | |||
75 | struct start_info *xen_start_info; | 75 | struct start_info *xen_start_info; |
76 | EXPORT_SYMBOL_GPL(xen_start_info); | 76 | EXPORT_SYMBOL_GPL(xen_start_info); |
77 | 77 | ||
78 | static /* __initdata */ struct shared_info dummy_shared_info; | 78 | struct shared_info xen_dummy_shared_info; |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Point at some empty memory to start with. We map the real shared_info | 81 | * Point at some empty memory to start with. We map the real shared_info |
82 | * page as soon as fixmap is up and running. | 82 | * page as soon as fixmap is up and running. |
83 | */ | 83 | */ |
84 | struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; | 84 | struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Flag to determine whether vcpu info placement is available on all | 87 | * Flag to determine whether vcpu info placement is available on all |
@@ -98,13 +98,13 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; | |||
98 | */ | 98 | */ |
99 | static int have_vcpu_info_placement = 1; | 99 | static int have_vcpu_info_placement = 1; |
100 | 100 | ||
101 | static void __init xen_vcpu_setup(int cpu) | 101 | static void xen_vcpu_setup(int cpu) |
102 | { | 102 | { |
103 | struct vcpu_register_vcpu_info info; | 103 | struct vcpu_register_vcpu_info info; |
104 | int err; | 104 | int err; |
105 | struct vcpu_info *vcpup; | 105 | struct vcpu_info *vcpup; |
106 | 106 | ||
107 | BUG_ON(HYPERVISOR_shared_info == &dummy_shared_info); | 107 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
108 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 108 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
109 | 109 | ||
110 | if (!have_vcpu_info_placement) | 110 | if (!have_vcpu_info_placement) |
@@ -136,11 +136,41 @@ static void __init xen_vcpu_setup(int cpu) | |||
136 | } | 136 | } |
137 | } | 137 | } |
138 | 138 | ||
139 | /* | ||
140 | * On restore, set the vcpu placement up again. | ||
141 | * If it fails, then we're in a bad state, since | ||
142 | * we can't back out from using it... | ||
143 | */ | ||
144 | void xen_vcpu_restore(void) | ||
145 | { | ||
146 | if (have_vcpu_info_placement) { | ||
147 | int cpu; | ||
148 | |||
149 | for_each_online_cpu(cpu) { | ||
150 | bool other_cpu = (cpu != smp_processor_id()); | ||
151 | |||
152 | if (other_cpu && | ||
153 | HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) | ||
154 | BUG(); | ||
155 | |||
156 | xen_vcpu_setup(cpu); | ||
157 | |||
158 | if (other_cpu && | ||
159 | HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) | ||
160 | BUG(); | ||
161 | } | ||
162 | |||
163 | BUG_ON(!have_vcpu_info_placement); | ||
164 | } | ||
165 | } | ||
166 | |||
139 | static void __init xen_banner(void) | 167 | static void __init xen_banner(void) |
140 | { | 168 | { |
141 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 169 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
142 | pv_info.name); | 170 | pv_info.name); |
143 | printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); | 171 | printk(KERN_INFO "Hypervisor signature: %s%s\n", |
172 | xen_start_info->magic, | ||
173 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | ||
144 | } | 174 | } |
145 | 175 | ||
146 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, | 176 | static void xen_cpuid(unsigned int *ax, unsigned int *bx, |
@@ -235,13 +265,13 @@ static void xen_irq_enable(void) | |||
235 | { | 265 | { |
236 | struct vcpu_info *vcpu; | 266 | struct vcpu_info *vcpu; |
237 | 267 | ||
238 | /* There's a one instruction preempt window here. We need to | 268 | /* We don't need to worry about being preempted here, since |
239 | make sure we're don't switch CPUs between getting the vcpu | 269 | either a) interrupts are disabled, so no preemption, or b) |
240 | pointer and updating the mask. */ | 270 | the caller is confused and is trying to re-enable interrupts |
241 | preempt_disable(); | 271 | on an indeterminate processor. */ |
272 | |||
242 | vcpu = x86_read_percpu(xen_vcpu); | 273 | vcpu = x86_read_percpu(xen_vcpu); |
243 | vcpu->evtchn_upcall_mask = 0; | 274 | vcpu->evtchn_upcall_mask = 0; |
244 | preempt_enable_no_resched(); | ||
245 | 275 | ||
246 | /* Doesn't matter if we get preempted here, because any | 276 | /* Doesn't matter if we get preempted here, because any |
247 | pending event will get dealt with anyway. */ | 277 | pending event will get dealt with anyway. */ |
@@ -254,7 +284,7 @@ static void xen_irq_enable(void) | |||
254 | static void xen_safe_halt(void) | 284 | static void xen_safe_halt(void) |
255 | { | 285 | { |
256 | /* Blocking includes an implicit local_irq_enable(). */ | 286 | /* Blocking includes an implicit local_irq_enable(). */ |
257 | if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) | 287 | if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) |
258 | BUG(); | 288 | BUG(); |
259 | } | 289 | } |
260 | 290 | ||
@@ -607,6 +637,30 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, | |||
607 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 637 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
608 | } | 638 | } |
609 | 639 | ||
640 | static void xen_clts(void) | ||
641 | { | ||
642 | struct multicall_space mcs; | ||
643 | |||
644 | mcs = xen_mc_entry(0); | ||
645 | |||
646 | MULTI_fpu_taskswitch(mcs.mc, 0); | ||
647 | |||
648 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
649 | } | ||
650 | |||
651 | static void xen_write_cr0(unsigned long cr0) | ||
652 | { | ||
653 | struct multicall_space mcs; | ||
654 | |||
655 | /* Only pay attention to cr0.TS; everything else is | ||
656 | ignored. */ | ||
657 | mcs = xen_mc_entry(0); | ||
658 | |||
659 | MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0); | ||
660 | |||
661 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
662 | } | ||
663 | |||
610 | static void xen_write_cr2(unsigned long cr2) | 664 | static void xen_write_cr2(unsigned long cr2) |
611 | { | 665 | { |
612 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; | 666 | x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; |
@@ -624,8 +678,10 @@ static unsigned long xen_read_cr2_direct(void) | |||
624 | 678 | ||
625 | static void xen_write_cr4(unsigned long cr4) | 679 | static void xen_write_cr4(unsigned long cr4) |
626 | { | 680 | { |
627 | /* Just ignore cr4 changes; Xen doesn't allow us to do | 681 | cr4 &= ~X86_CR4_PGE; |
628 | anything anyway. */ | 682 | cr4 &= ~X86_CR4_PSE; |
683 | |||
684 | native_write_cr4(cr4); | ||
629 | } | 685 | } |
630 | 686 | ||
631 | static unsigned long xen_read_cr3(void) | 687 | static unsigned long xen_read_cr3(void) |
@@ -785,38 +841,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | |||
785 | static __init void xen_pagetable_setup_start(pgd_t *base) | 841 | static __init void xen_pagetable_setup_start(pgd_t *base) |
786 | { | 842 | { |
787 | pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; | 843 | pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; |
844 | int i; | ||
788 | 845 | ||
789 | /* special set_pte for pagetable initialization */ | 846 | /* special set_pte for pagetable initialization */ |
790 | pv_mmu_ops.set_pte = xen_set_pte_init; | 847 | pv_mmu_ops.set_pte = xen_set_pte_init; |
791 | 848 | ||
792 | init_mm.pgd = base; | 849 | init_mm.pgd = base; |
793 | /* | 850 | /* |
794 | * copy top-level of Xen-supplied pagetable into place. For | 851 | * copy top-level of Xen-supplied pagetable into place. This |
795 | * !PAE we can use this as-is, but for PAE it is a stand-in | 852 | * is a stand-in while we copy the pmd pages. |
796 | * while we copy the pmd pages. | ||
797 | */ | 853 | */ |
798 | memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); | 854 | memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); |
799 | 855 | ||
800 | if (PTRS_PER_PMD > 1) { | 856 | /* |
801 | int i; | 857 | * For PAE, need to allocate new pmds, rather than |
802 | /* | 858 | * share Xen's, since Xen doesn't like pmd's being |
803 | * For PAE, need to allocate new pmds, rather than | 859 | * shared between address spaces. |
804 | * share Xen's, since Xen doesn't like pmd's being | 860 | */ |
805 | * shared between address spaces. | 861 | for (i = 0; i < PTRS_PER_PGD; i++) { |
806 | */ | 862 | if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { |
807 | for (i = 0; i < PTRS_PER_PGD; i++) { | 863 | pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
808 | if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { | ||
809 | pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | ||
810 | 864 | ||
811 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), | 865 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), |
812 | PAGE_SIZE); | 866 | PAGE_SIZE); |
813 | 867 | ||
814 | make_lowmem_page_readonly(pmd); | 868 | make_lowmem_page_readonly(pmd); |
815 | 869 | ||
816 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); | 870 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); |
817 | } else | 871 | } else |
818 | pgd_clear(&base[i]); | 872 | pgd_clear(&base[i]); |
819 | } | ||
820 | } | 873 | } |
821 | 874 | ||
822 | /* make sure zero_page is mapped RO so we can use it in pagetables */ | 875 | /* make sure zero_page is mapped RO so we can use it in pagetables */ |
@@ -834,7 +887,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) | |||
834 | PFN_DOWN(__pa(xen_start_info->pt_base))); | 887 | PFN_DOWN(__pa(xen_start_info->pt_base))); |
835 | } | 888 | } |
836 | 889 | ||
837 | static __init void setup_shared_info(void) | 890 | void xen_setup_shared_info(void) |
838 | { | 891 | { |
839 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 892 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
840 | unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); | 893 | unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); |
@@ -857,6 +910,8 @@ static __init void setup_shared_info(void) | |||
857 | /* In UP this is as good a place as any to set up shared info */ | 910 | /* In UP this is as good a place as any to set up shared info */ |
858 | xen_setup_vcpu_info_placement(); | 911 | xen_setup_vcpu_info_placement(); |
859 | #endif | 912 | #endif |
913 | |||
914 | xen_setup_mfn_list_list(); | ||
860 | } | 915 | } |
861 | 916 | ||
862 | static __init void xen_pagetable_setup_done(pgd_t *base) | 917 | static __init void xen_pagetable_setup_done(pgd_t *base) |
@@ -869,25 +924,23 @@ static __init void xen_pagetable_setup_done(pgd_t *base) | |||
869 | pv_mmu_ops.release_pmd = xen_release_pmd; | 924 | pv_mmu_ops.release_pmd = xen_release_pmd; |
870 | pv_mmu_ops.set_pte = xen_set_pte; | 925 | pv_mmu_ops.set_pte = xen_set_pte; |
871 | 926 | ||
872 | setup_shared_info(); | 927 | xen_setup_shared_info(); |
873 | 928 | ||
874 | /* Actually pin the pagetable down, but we can't set PG_pinned | 929 | /* Actually pin the pagetable down, but we can't set PG_pinned |
875 | yet because the page structures don't exist yet. */ | 930 | yet because the page structures don't exist yet. */ |
876 | { | 931 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); |
877 | unsigned level; | 932 | } |
878 | 933 | ||
879 | #ifdef CONFIG_X86_PAE | 934 | static __init void xen_post_allocator_init(void) |
880 | level = MMUEXT_PIN_L3_TABLE; | 935 | { |
881 | #else | 936 | pv_mmu_ops.set_pmd = xen_set_pmd; |
882 | level = MMUEXT_PIN_L2_TABLE; | 937 | pv_mmu_ops.set_pud = xen_set_pud; |
883 | #endif | ||
884 | 938 | ||
885 | pin_pagetable_pfn(level, PFN_DOWN(__pa(base))); | 939 | xen_mark_init_mm_pinned(); |
886 | } | ||
887 | } | 940 | } |
888 | 941 | ||
889 | /* This is called once we have the cpu_possible_map */ | 942 | /* This is called once we have the cpu_possible_map */ |
890 | void __init xen_setup_vcpu_info_placement(void) | 943 | void xen_setup_vcpu_info_placement(void) |
891 | { | 944 | { |
892 | int cpu; | 945 | int cpu; |
893 | 946 | ||
@@ -973,7 +1026,7 @@ static const struct pv_init_ops xen_init_ops __initdata = { | |||
973 | .banner = xen_banner, | 1026 | .banner = xen_banner, |
974 | .memory_setup = xen_memory_setup, | 1027 | .memory_setup = xen_memory_setup, |
975 | .arch_setup = xen_arch_setup, | 1028 | .arch_setup = xen_arch_setup, |
976 | .post_allocator_init = xen_mark_init_mm_pinned, | 1029 | .post_allocator_init = xen_post_allocator_init, |
977 | }; | 1030 | }; |
978 | 1031 | ||
979 | static const struct pv_time_ops xen_time_ops __initdata = { | 1032 | static const struct pv_time_ops xen_time_ops __initdata = { |
@@ -991,10 +1044,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
991 | .set_debugreg = xen_set_debugreg, | 1044 | .set_debugreg = xen_set_debugreg, |
992 | .get_debugreg = xen_get_debugreg, | 1045 | .get_debugreg = xen_get_debugreg, |
993 | 1046 | ||
994 | .clts = native_clts, | 1047 | .clts = xen_clts, |
995 | 1048 | ||
996 | .read_cr0 = native_read_cr0, | 1049 | .read_cr0 = native_read_cr0, |
997 | .write_cr0 = native_write_cr0, | 1050 | .write_cr0 = xen_write_cr0, |
998 | 1051 | ||
999 | .read_cr4 = native_read_cr4, | 1052 | .read_cr4 = native_read_cr4, |
1000 | .read_cr4_safe = native_read_cr4_safe, | 1053 | .read_cr4_safe = native_read_cr4_safe, |
@@ -1085,24 +1138,26 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1085 | 1138 | ||
1086 | .set_pte = NULL, /* see xen_pagetable_setup_* */ | 1139 | .set_pte = NULL, /* see xen_pagetable_setup_* */ |
1087 | .set_pte_at = xen_set_pte_at, | 1140 | .set_pte_at = xen_set_pte_at, |
1088 | .set_pmd = xen_set_pmd, | 1141 | .set_pmd = xen_set_pmd_hyper, |
1142 | |||
1143 | .ptep_modify_prot_start = __ptep_modify_prot_start, | ||
1144 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | ||
1089 | 1145 | ||
1090 | .pte_val = xen_pte_val, | 1146 | .pte_val = xen_pte_val, |
1147 | .pte_flags = native_pte_val, | ||
1091 | .pgd_val = xen_pgd_val, | 1148 | .pgd_val = xen_pgd_val, |
1092 | 1149 | ||
1093 | .make_pte = xen_make_pte, | 1150 | .make_pte = xen_make_pte, |
1094 | .make_pgd = xen_make_pgd, | 1151 | .make_pgd = xen_make_pgd, |
1095 | 1152 | ||
1096 | #ifdef CONFIG_X86_PAE | ||
1097 | .set_pte_atomic = xen_set_pte_atomic, | 1153 | .set_pte_atomic = xen_set_pte_atomic, |
1098 | .set_pte_present = xen_set_pte_at, | 1154 | .set_pte_present = xen_set_pte_at, |
1099 | .set_pud = xen_set_pud, | 1155 | .set_pud = xen_set_pud_hyper, |
1100 | .pte_clear = xen_pte_clear, | 1156 | .pte_clear = xen_pte_clear, |
1101 | .pmd_clear = xen_pmd_clear, | 1157 | .pmd_clear = xen_pmd_clear, |
1102 | 1158 | ||
1103 | .make_pmd = xen_make_pmd, | 1159 | .make_pmd = xen_make_pmd, |
1104 | .pmd_val = xen_pmd_val, | 1160 | .pmd_val = xen_pmd_val, |
1105 | #endif /* PAE */ | ||
1106 | 1161 | ||
1107 | .activate_mm = xen_activate_mm, | 1162 | .activate_mm = xen_activate_mm, |
1108 | .dup_mmap = xen_dup_mmap, | 1163 | .dup_mmap = xen_dup_mmap, |
@@ -1129,11 +1184,13 @@ static const struct smp_ops xen_smp_ops __initdata = { | |||
1129 | 1184 | ||
1130 | static void xen_reboot(int reason) | 1185 | static void xen_reboot(int reason) |
1131 | { | 1186 | { |
1187 | struct sched_shutdown r = { .reason = reason }; | ||
1188 | |||
1132 | #ifdef CONFIG_SMP | 1189 | #ifdef CONFIG_SMP |
1133 | smp_send_stop(); | 1190 | smp_send_stop(); |
1134 | #endif | 1191 | #endif |
1135 | 1192 | ||
1136 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) | 1193 | if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) |
1137 | BUG(); | 1194 | BUG(); |
1138 | } | 1195 | } |
1139 | 1196 | ||
@@ -1188,6 +1245,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1188 | 1245 | ||
1189 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); | 1246 | BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); |
1190 | 1247 | ||
1248 | xen_setup_features(); | ||
1249 | |||
1191 | /* Install Xen paravirt ops */ | 1250 | /* Install Xen paravirt ops */ |
1192 | pv_info = xen_info; | 1251 | pv_info = xen_info; |
1193 | pv_init_ops = xen_init_ops; | 1252 | pv_init_ops = xen_init_ops; |
@@ -1197,17 +1256,20 @@ asmlinkage void __init xen_start_kernel(void) | |||
1197 | pv_apic_ops = xen_apic_ops; | 1256 | pv_apic_ops = xen_apic_ops; |
1198 | pv_mmu_ops = xen_mmu_ops; | 1257 | pv_mmu_ops = xen_mmu_ops; |
1199 | 1258 | ||
1259 | if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { | ||
1260 | pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; | ||
1261 | pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; | ||
1262 | } | ||
1263 | |||
1200 | machine_ops = xen_machine_ops; | 1264 | machine_ops = xen_machine_ops; |
1201 | 1265 | ||
1202 | #ifdef CONFIG_SMP | 1266 | #ifdef CONFIG_SMP |
1203 | smp_ops = xen_smp_ops; | 1267 | smp_ops = xen_smp_ops; |
1204 | #endif | 1268 | #endif |
1205 | 1269 | ||
1206 | xen_setup_features(); | ||
1207 | |||
1208 | /* Get mfn list */ | 1270 | /* Get mfn list */ |
1209 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 1271 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
1210 | phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; | 1272 | xen_build_dynamic_phys_to_machine(); |
1211 | 1273 | ||
1212 | pgd = (pgd_t *)xen_start_info->pt_base; | 1274 | pgd = (pgd_t *)xen_start_info->pt_base; |
1213 | 1275 | ||
@@ -1228,6 +1290,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1228 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | 1290 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
1229 | pv_info.kernel_rpl = 0; | 1291 | pv_info.kernel_rpl = 0; |
1230 | 1292 | ||
1293 | /* Prevent unwanted bits from being set in PTEs. */ | ||
1294 | __supported_pte_mask &= ~_PAGE_GLOBAL; | ||
1295 | if (!is_initial_xendomain()) | ||
1296 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | ||
1297 | |||
1231 | /* set the limit of our address space */ | 1298 | /* set the limit of our address space */ |
1232 | xen_reserve_top(); | 1299 | xen_reserve_top(); |
1233 | 1300 | ||
@@ -1242,8 +1309,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1242 | ? __pa(xen_start_info->mod_start) : 0; | 1309 | ? __pa(xen_start_info->mod_start) : 0; |
1243 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1310 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1244 | 1311 | ||
1245 | if (!is_initial_xendomain()) | 1312 | if (!is_initial_xendomain()) { |
1313 | add_preferred_console("xenboot", 0, NULL); | ||
1314 | add_preferred_console("tty", 0, NULL); | ||
1246 | add_preferred_console("hvc", 0, NULL); | 1315 | add_preferred_console("hvc", 0, NULL); |
1316 | } | ||
1247 | 1317 | ||
1248 | /* Start the world */ | 1318 | /* Start the world */ |
1249 | start_kernel(); | 1319 | start_kernel(); |
diff --git a/arch/x86/xen/manage.c b/arch/x86/xen/manage.c deleted file mode 100644 index aa7af9e6abc0..000000000000 --- a/arch/x86/xen/manage.c +++ /dev/null | |||
@@ -1,143 +0,0 @@ | |||
1 | /* | ||
2 | * Handle extern requests for shutdown, reboot and sysrq | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/err.h> | ||
6 | #include <linux/reboot.h> | ||
7 | #include <linux/sysrq.h> | ||
8 | |||
9 | #include <xen/xenbus.h> | ||
10 | |||
11 | #define SHUTDOWN_INVALID -1 | ||
12 | #define SHUTDOWN_POWEROFF 0 | ||
13 | #define SHUTDOWN_SUSPEND 2 | ||
14 | /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only | ||
15 | * report a crash, not be instructed to crash! | ||
16 | * HALT is the same as POWEROFF, as far as we're concerned. The tools use | ||
17 | * the distinction when we return the reason code to them. | ||
18 | */ | ||
19 | #define SHUTDOWN_HALT 4 | ||
20 | |||
21 | /* Ignore multiple shutdown requests. */ | ||
22 | static int shutting_down = SHUTDOWN_INVALID; | ||
23 | |||
24 | static void shutdown_handler(struct xenbus_watch *watch, | ||
25 | const char **vec, unsigned int len) | ||
26 | { | ||
27 | char *str; | ||
28 | struct xenbus_transaction xbt; | ||
29 | int err; | ||
30 | |||
31 | if (shutting_down != SHUTDOWN_INVALID) | ||
32 | return; | ||
33 | |||
34 | again: | ||
35 | err = xenbus_transaction_start(&xbt); | ||
36 | if (err) | ||
37 | return; | ||
38 | |||
39 | str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); | ||
40 | /* Ignore read errors and empty reads. */ | ||
41 | if (XENBUS_IS_ERR_READ(str)) { | ||
42 | xenbus_transaction_end(xbt, 1); | ||
43 | return; | ||
44 | } | ||
45 | |||
46 | xenbus_write(xbt, "control", "shutdown", ""); | ||
47 | |||
48 | err = xenbus_transaction_end(xbt, 0); | ||
49 | if (err == -EAGAIN) { | ||
50 | kfree(str); | ||
51 | goto again; | ||
52 | } | ||
53 | |||
54 | if (strcmp(str, "poweroff") == 0 || | ||
55 | strcmp(str, "halt") == 0) | ||
56 | orderly_poweroff(false); | ||
57 | else if (strcmp(str, "reboot") == 0) | ||
58 | ctrl_alt_del(); | ||
59 | else { | ||
60 | printk(KERN_INFO "Ignoring shutdown request: %s\n", str); | ||
61 | shutting_down = SHUTDOWN_INVALID; | ||
62 | } | ||
63 | |||
64 | kfree(str); | ||
65 | } | ||
66 | |||
67 | static void sysrq_handler(struct xenbus_watch *watch, const char **vec, | ||
68 | unsigned int len) | ||
69 | { | ||
70 | char sysrq_key = '\0'; | ||
71 | struct xenbus_transaction xbt; | ||
72 | int err; | ||
73 | |||
74 | again: | ||
75 | err = xenbus_transaction_start(&xbt); | ||
76 | if (err) | ||
77 | return; | ||
78 | if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { | ||
79 | printk(KERN_ERR "Unable to read sysrq code in " | ||
80 | "control/sysrq\n"); | ||
81 | xenbus_transaction_end(xbt, 1); | ||
82 | return; | ||
83 | } | ||
84 | |||
85 | if (sysrq_key != '\0') | ||
86 | xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); | ||
87 | |||
88 | err = xenbus_transaction_end(xbt, 0); | ||
89 | if (err == -EAGAIN) | ||
90 | goto again; | ||
91 | |||
92 | if (sysrq_key != '\0') | ||
93 | handle_sysrq(sysrq_key, NULL); | ||
94 | } | ||
95 | |||
96 | static struct xenbus_watch shutdown_watch = { | ||
97 | .node = "control/shutdown", | ||
98 | .callback = shutdown_handler | ||
99 | }; | ||
100 | |||
101 | static struct xenbus_watch sysrq_watch = { | ||
102 | .node = "control/sysrq", | ||
103 | .callback = sysrq_handler | ||
104 | }; | ||
105 | |||
106 | static int setup_shutdown_watcher(void) | ||
107 | { | ||
108 | int err; | ||
109 | |||
110 | err = register_xenbus_watch(&shutdown_watch); | ||
111 | if (err) { | ||
112 | printk(KERN_ERR "Failed to set shutdown watcher\n"); | ||
113 | return err; | ||
114 | } | ||
115 | |||
116 | err = register_xenbus_watch(&sysrq_watch); | ||
117 | if (err) { | ||
118 | printk(KERN_ERR "Failed to set sysrq watcher\n"); | ||
119 | return err; | ||
120 | } | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static int shutdown_event(struct notifier_block *notifier, | ||
126 | unsigned long event, | ||
127 | void *data) | ||
128 | { | ||
129 | setup_shutdown_watcher(); | ||
130 | return NOTIFY_DONE; | ||
131 | } | ||
132 | |||
133 | static int __init setup_shutdown_event(void) | ||
134 | { | ||
135 | static struct notifier_block xenstore_notifier = { | ||
136 | .notifier_call = shutdown_event | ||
137 | }; | ||
138 | register_xenstore_notifier(&xenstore_notifier); | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | subsys_initcall(setup_shutdown_event); | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 126766d43aea..42b3b9ed641d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -56,11 +56,136 @@ | |||
56 | #include "multicalls.h" | 56 | #include "multicalls.h" |
57 | #include "mmu.h" | 57 | #include "mmu.h" |
58 | 58 | ||
59 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | ||
60 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | ||
61 | |||
62 | /* Placeholder for holes in the address space */ | ||
63 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] | ||
64 | __attribute__((section(".data.page_aligned"))) = | ||
65 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | ||
66 | |||
67 | /* Array of pointers to pages containing p2m entries */ | ||
68 | static unsigned long *p2m_top[TOP_ENTRIES] | ||
69 | __attribute__((section(".data.page_aligned"))) = | ||
70 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | ||
71 | |||
72 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | ||
73 | static unsigned long p2m_top_mfn[TOP_ENTRIES] | ||
74 | __attribute__((section(".bss.page_aligned"))); | ||
75 | |||
76 | static unsigned long p2m_top_mfn_list[ | ||
77 | PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] | ||
78 | __attribute__((section(".bss.page_aligned"))); | ||
79 | |||
80 | static inline unsigned p2m_top_index(unsigned long pfn) | ||
81 | { | ||
82 | BUG_ON(pfn >= MAX_DOMAIN_PAGES); | ||
83 | return pfn / P2M_ENTRIES_PER_PAGE; | ||
84 | } | ||
85 | |||
86 | static inline unsigned p2m_index(unsigned long pfn) | ||
87 | { | ||
88 | return pfn % P2M_ENTRIES_PER_PAGE; | ||
89 | } | ||
90 | |||
91 | /* Build the parallel p2m_top_mfn structures */ | ||
92 | void xen_setup_mfn_list_list(void) | ||
93 | { | ||
94 | unsigned pfn, idx; | ||
95 | |||
96 | for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { | ||
97 | unsigned topidx = p2m_top_index(pfn); | ||
98 | |||
99 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); | ||
100 | } | ||
101 | |||
102 | for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { | ||
103 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | ||
104 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | ||
105 | } | ||
106 | |||
107 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | ||
108 | |||
109 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | ||
110 | virt_to_mfn(p2m_top_mfn_list); | ||
111 | HYPERVISOR_shared_info->arch.max_pfn = xen_start_info->nr_pages; | ||
112 | } | ||
113 | |||
114 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | ||
115 | void __init xen_build_dynamic_phys_to_machine(void) | ||
116 | { | ||
117 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
118 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
119 | unsigned pfn; | ||
120 | |||
121 | for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { | ||
122 | unsigned topidx = p2m_top_index(pfn); | ||
123 | |||
124 | p2m_top[topidx] = &mfn_list[pfn]; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | unsigned long get_phys_to_machine(unsigned long pfn) | ||
129 | { | ||
130 | unsigned topidx, idx; | ||
131 | |||
132 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) | ||
133 | return INVALID_P2M_ENTRY; | ||
134 | |||
135 | topidx = p2m_top_index(pfn); | ||
136 | idx = p2m_index(pfn); | ||
137 | return p2m_top[topidx][idx]; | ||
138 | } | ||
139 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | ||
140 | |||
141 | static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) | ||
142 | { | ||
143 | unsigned long *p; | ||
144 | unsigned i; | ||
145 | |||
146 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | ||
147 | BUG_ON(p == NULL); | ||
148 | |||
149 | for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | ||
150 | p[i] = INVALID_P2M_ENTRY; | ||
151 | |||
152 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) | ||
153 | free_page((unsigned long)p); | ||
154 | else | ||
155 | *mfnp = virt_to_mfn(p); | ||
156 | } | ||
157 | |||
158 | void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | ||
159 | { | ||
160 | unsigned topidx, idx; | ||
161 | |||
162 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
163 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
164 | return; | ||
165 | } | ||
166 | |||
167 | if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { | ||
168 | BUG_ON(mfn != INVALID_P2M_ENTRY); | ||
169 | return; | ||
170 | } | ||
171 | |||
172 | topidx = p2m_top_index(pfn); | ||
173 | if (p2m_top[topidx] == p2m_missing) { | ||
174 | /* no need to allocate a page to store an invalid entry */ | ||
175 | if (mfn == INVALID_P2M_ENTRY) | ||
176 | return; | ||
177 | alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); | ||
178 | } | ||
179 | |||
180 | idx = p2m_index(pfn); | ||
181 | p2m_top[topidx][idx] = mfn; | ||
182 | } | ||
183 | |||
59 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) | 184 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) |
60 | { | 185 | { |
61 | unsigned int level; | 186 | unsigned int level; |
62 | pte_t *pte = lookup_address(address, &level); | 187 | pte_t *pte = lookup_address(address, &level); |
63 | unsigned offset = address & PAGE_MASK; | 188 | unsigned offset = address & ~PAGE_MASK; |
64 | 189 | ||
65 | BUG_ON(pte == NULL); | 190 | BUG_ON(pte == NULL); |
66 | 191 | ||
@@ -98,24 +223,60 @@ void make_lowmem_page_readwrite(void *vaddr) | |||
98 | } | 223 | } |
99 | 224 | ||
100 | 225 | ||
101 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | 226 | static bool page_pinned(void *ptr) |
227 | { | ||
228 | struct page *page = virt_to_page(ptr); | ||
229 | |||
230 | return PagePinned(page); | ||
231 | } | ||
232 | |||
233 | static void extend_mmu_update(const struct mmu_update *update) | ||
102 | { | 234 | { |
103 | struct multicall_space mcs; | 235 | struct multicall_space mcs; |
104 | struct mmu_update *u; | 236 | struct mmu_update *u; |
105 | 237 | ||
106 | preempt_disable(); | 238 | mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); |
239 | |||
240 | if (mcs.mc != NULL) | ||
241 | mcs.mc->args[1]++; | ||
242 | else { | ||
243 | mcs = __xen_mc_entry(sizeof(*u)); | ||
244 | MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); | ||
245 | } | ||
107 | 246 | ||
108 | mcs = xen_mc_entry(sizeof(*u)); | ||
109 | u = mcs.args; | 247 | u = mcs.args; |
110 | u->ptr = virt_to_machine(ptr).maddr; | 248 | *u = *update; |
111 | u->val = pmd_val_ma(val); | 249 | } |
112 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 250 | |
251 | void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | ||
252 | { | ||
253 | struct mmu_update u; | ||
254 | |||
255 | preempt_disable(); | ||
256 | |||
257 | xen_mc_batch(); | ||
258 | |||
259 | u.ptr = virt_to_machine(ptr).maddr; | ||
260 | u.val = pmd_val_ma(val); | ||
261 | extend_mmu_update(&u); | ||
113 | 262 | ||
114 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 263 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
115 | 264 | ||
116 | preempt_enable(); | 265 | preempt_enable(); |
117 | } | 266 | } |
118 | 267 | ||
268 | void xen_set_pmd(pmd_t *ptr, pmd_t val) | ||
269 | { | ||
270 | /* If page is not pinned, we can just update the entry | ||
271 | directly */ | ||
272 | if (!page_pinned(ptr)) { | ||
273 | *ptr = val; | ||
274 | return; | ||
275 | } | ||
276 | |||
277 | xen_set_pmd_hyper(ptr, val); | ||
278 | } | ||
279 | |||
119 | /* | 280 | /* |
120 | * Associate a virtual page frame with a given physical page frame | 281 | * Associate a virtual page frame with a given physical page frame |
121 | * and protection flags for that frame. | 282 | * and protection flags for that frame. |
@@ -179,68 +340,105 @@ out: | |||
179 | preempt_enable(); | 340 | preempt_enable(); |
180 | } | 341 | } |
181 | 342 | ||
182 | pteval_t xen_pte_val(pte_t pte) | 343 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
183 | { | 344 | { |
184 | pteval_t ret = pte.pte; | 345 | /* Just return the pte as-is. We preserve the bits on commit */ |
346 | return *ptep; | ||
347 | } | ||
348 | |||
349 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | ||
350 | pte_t *ptep, pte_t pte) | ||
351 | { | ||
352 | struct mmu_update u; | ||
353 | |||
354 | xen_mc_batch(); | ||
185 | 355 | ||
186 | if (ret & _PAGE_PRESENT) | 356 | u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; |
187 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | 357 | u.val = pte_val_ma(pte); |
358 | extend_mmu_update(&u); | ||
188 | 359 | ||
189 | return ret; | 360 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
190 | } | 361 | } |
191 | 362 | ||
192 | pgdval_t xen_pgd_val(pgd_t pgd) | 363 | /* Assume pteval_t is equivalent to all the other *val_t types. */ |
364 | static pteval_t pte_mfn_to_pfn(pteval_t val) | ||
193 | { | 365 | { |
194 | pgdval_t ret = pgd.pgd; | 366 | if (val & _PAGE_PRESENT) { |
195 | if (ret & _PAGE_PRESENT) | 367 | unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; |
196 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | 368 | pteval_t flags = val & ~PTE_MASK; |
197 | return ret; | 369 | val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; |
370 | } | ||
371 | |||
372 | return val; | ||
198 | } | 373 | } |
199 | 374 | ||
200 | pte_t xen_make_pte(pteval_t pte) | 375 | static pteval_t pte_pfn_to_mfn(pteval_t val) |
201 | { | 376 | { |
202 | if (pte & _PAGE_PRESENT) { | 377 | if (val & _PAGE_PRESENT) { |
203 | pte = phys_to_machine(XPADDR(pte)).maddr; | 378 | unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; |
204 | pte &= ~(_PAGE_PCD | _PAGE_PWT); | 379 | pteval_t flags = val & ~PTE_MASK; |
380 | val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; | ||
205 | } | 381 | } |
206 | 382 | ||
207 | return (pte_t){ .pte = pte }; | 383 | return val; |
208 | } | 384 | } |
209 | 385 | ||
210 | pgd_t xen_make_pgd(pgdval_t pgd) | 386 | pteval_t xen_pte_val(pte_t pte) |
211 | { | 387 | { |
212 | if (pgd & _PAGE_PRESENT) | 388 | return pte_mfn_to_pfn(pte.pte); |
213 | pgd = phys_to_machine(XPADDR(pgd)).maddr; | 389 | } |
214 | 390 | ||
215 | return (pgd_t){ pgd }; | 391 | pgdval_t xen_pgd_val(pgd_t pgd) |
392 | { | ||
393 | return pte_mfn_to_pfn(pgd.pgd); | ||
394 | } | ||
395 | |||
396 | pte_t xen_make_pte(pteval_t pte) | ||
397 | { | ||
398 | pte = pte_pfn_to_mfn(pte); | ||
399 | return native_make_pte(pte); | ||
400 | } | ||
401 | |||
402 | pgd_t xen_make_pgd(pgdval_t pgd) | ||
403 | { | ||
404 | pgd = pte_pfn_to_mfn(pgd); | ||
405 | return native_make_pgd(pgd); | ||
216 | } | 406 | } |
217 | 407 | ||
218 | pmdval_t xen_pmd_val(pmd_t pmd) | 408 | pmdval_t xen_pmd_val(pmd_t pmd) |
219 | { | 409 | { |
220 | pmdval_t ret = native_pmd_val(pmd); | 410 | return pte_mfn_to_pfn(pmd.pmd); |
221 | if (ret & _PAGE_PRESENT) | ||
222 | ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; | ||
223 | return ret; | ||
224 | } | 411 | } |
225 | #ifdef CONFIG_X86_PAE | 412 | |
226 | void xen_set_pud(pud_t *ptr, pud_t val) | 413 | void xen_set_pud_hyper(pud_t *ptr, pud_t val) |
227 | { | 414 | { |
228 | struct multicall_space mcs; | 415 | struct mmu_update u; |
229 | struct mmu_update *u; | ||
230 | 416 | ||
231 | preempt_disable(); | 417 | preempt_disable(); |
232 | 418 | ||
233 | mcs = xen_mc_entry(sizeof(*u)); | 419 | xen_mc_batch(); |
234 | u = mcs.args; | 420 | |
235 | u->ptr = virt_to_machine(ptr).maddr; | 421 | u.ptr = virt_to_machine(ptr).maddr; |
236 | u->val = pud_val_ma(val); | 422 | u.val = pud_val_ma(val); |
237 | MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); | 423 | extend_mmu_update(&u); |
238 | 424 | ||
239 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 425 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
240 | 426 | ||
241 | preempt_enable(); | 427 | preempt_enable(); |
242 | } | 428 | } |
243 | 429 | ||
430 | void xen_set_pud(pud_t *ptr, pud_t val) | ||
431 | { | ||
432 | /* If page is not pinned, we can just update the entry | ||
433 | directly */ | ||
434 | if (!page_pinned(ptr)) { | ||
435 | *ptr = val; | ||
436 | return; | ||
437 | } | ||
438 | |||
439 | xen_set_pud_hyper(ptr, val); | ||
440 | } | ||
441 | |||
244 | void xen_set_pte(pte_t *ptep, pte_t pte) | 442 | void xen_set_pte(pte_t *ptep, pte_t pte) |
245 | { | 443 | { |
246 | ptep->pte_high = pte.pte_high; | 444 | ptep->pte_high = pte.pte_high; |
@@ -262,22 +460,14 @@ void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |||
262 | 460 | ||
263 | void xen_pmd_clear(pmd_t *pmdp) | 461 | void xen_pmd_clear(pmd_t *pmdp) |
264 | { | 462 | { |
265 | xen_set_pmd(pmdp, __pmd(0)); | 463 | set_pmd(pmdp, __pmd(0)); |
266 | } | 464 | } |
267 | 465 | ||
268 | pmd_t xen_make_pmd(pmdval_t pmd) | 466 | pmd_t xen_make_pmd(pmdval_t pmd) |
269 | { | 467 | { |
270 | if (pmd & _PAGE_PRESENT) | 468 | pmd = pte_pfn_to_mfn(pmd); |
271 | pmd = phys_to_machine(XPADDR(pmd)).maddr; | ||
272 | |||
273 | return native_make_pmd(pmd); | 469 | return native_make_pmd(pmd); |
274 | } | 470 | } |
275 | #else /* !PAE */ | ||
276 | void xen_set_pte(pte_t *ptep, pte_t pte) | ||
277 | { | ||
278 | *ptep = pte; | ||
279 | } | ||
280 | #endif /* CONFIG_X86_PAE */ | ||
281 | 471 | ||
282 | /* | 472 | /* |
283 | (Yet another) pagetable walker. This one is intended for pinning a | 473 | (Yet another) pagetable walker. This one is intended for pinning a |
@@ -430,8 +620,6 @@ static int pin_page(struct page *page, enum pt_level level) | |||
430 | read-only, and can be pinned. */ | 620 | read-only, and can be pinned. */ |
431 | void xen_pgd_pin(pgd_t *pgd) | 621 | void xen_pgd_pin(pgd_t *pgd) |
432 | { | 622 | { |
433 | unsigned level; | ||
434 | |||
435 | xen_mc_batch(); | 623 | xen_mc_batch(); |
436 | 624 | ||
437 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 625 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { |
@@ -441,15 +629,31 @@ void xen_pgd_pin(pgd_t *pgd) | |||
441 | xen_mc_batch(); | 629 | xen_mc_batch(); |
442 | } | 630 | } |
443 | 631 | ||
444 | #ifdef CONFIG_X86_PAE | 632 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
445 | level = MMUEXT_PIN_L3_TABLE; | 633 | xen_mc_issue(0); |
446 | #else | 634 | } |
447 | level = MMUEXT_PIN_L2_TABLE; | 635 | |
448 | #endif | 636 | /* |
637 | * On save, we need to pin all pagetables to make sure they get their | ||
638 | * mfns turned into pfns. Search the list for any unpinned pgds and pin | ||
639 | * them (unpinned pgds are not currently in use, probably because the | ||
640 | * process is under construction or destruction). | ||
641 | */ | ||
642 | void xen_mm_pin_all(void) | ||
643 | { | ||
644 | unsigned long flags; | ||
645 | struct page *page; | ||
449 | 646 | ||
450 | xen_do_pin(level, PFN_DOWN(__pa(pgd))); | 647 | spin_lock_irqsave(&pgd_lock, flags); |
451 | 648 | ||
452 | xen_mc_issue(0); | 649 | list_for_each_entry(page, &pgd_list, lru) { |
650 | if (!PagePinned(page)) { | ||
651 | xen_pgd_pin((pgd_t *)page_address(page)); | ||
652 | SetPageSavePinned(page); | ||
653 | } | ||
654 | } | ||
655 | |||
656 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
453 | } | 657 | } |
454 | 658 | ||
455 | /* The init_mm pagetable is really pinned as soon as its created, but | 659 | /* The init_mm pagetable is really pinned as soon as its created, but |
@@ -509,6 +713,29 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
509 | xen_mc_issue(0); | 713 | xen_mc_issue(0); |
510 | } | 714 | } |
511 | 715 | ||
716 | /* | ||
717 | * On resume, undo any pinning done at save, so that the rest of the | ||
718 | * kernel doesn't see any unexpected pinned pagetables. | ||
719 | */ | ||
720 | void xen_mm_unpin_all(void) | ||
721 | { | ||
722 | unsigned long flags; | ||
723 | struct page *page; | ||
724 | |||
725 | spin_lock_irqsave(&pgd_lock, flags); | ||
726 | |||
727 | list_for_each_entry(page, &pgd_list, lru) { | ||
728 | if (PageSavePinned(page)) { | ||
729 | BUG_ON(!PagePinned(page)); | ||
730 | printk("unpinning pinned %p\n", page_address(page)); | ||
731 | xen_pgd_unpin((pgd_t *)page_address(page)); | ||
732 | ClearPageSavePinned(page); | ||
733 | } | ||
734 | } | ||
735 | |||
736 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
737 | } | ||
738 | |||
512 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 739 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
513 | { | 740 | { |
514 | spin_lock(&next->page_table_lock); | 741 | spin_lock(&next->page_table_lock); |
@@ -602,7 +829,7 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
602 | spin_lock(&mm->page_table_lock); | 829 | spin_lock(&mm->page_table_lock); |
603 | 830 | ||
604 | /* pgd may not be pinned in the error exit path of execve */ | 831 | /* pgd may not be pinned in the error exit path of execve */ |
605 | if (PagePinned(virt_to_page(mm->pgd))) | 832 | if (page_pinned(mm->pgd)) |
606 | xen_pgd_unpin(mm->pgd); | 833 | xen_pgd_unpin(mm->pgd); |
607 | 834 | ||
608 | spin_unlock(&mm->page_table_lock); | 835 | spin_unlock(&mm->page_table_lock); |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index b5e189b1519d..297bf9f5b8bc 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -25,10 +25,6 @@ enum pt_level { | |||
25 | 25 | ||
26 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 26 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
27 | 27 | ||
28 | void xen_set_pte(pte_t *ptep, pte_t pteval); | ||
29 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | ||
30 | pte_t *ptep, pte_t pteval); | ||
31 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); | ||
32 | 28 | ||
33 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); | 29 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); |
34 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | 30 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); |
@@ -37,31 +33,27 @@ void xen_exit_mmap(struct mm_struct *mm); | |||
37 | void xen_pgd_pin(pgd_t *pgd); | 33 | void xen_pgd_pin(pgd_t *pgd); |
38 | //void xen_pgd_unpin(pgd_t *pgd); | 34 | //void xen_pgd_unpin(pgd_t *pgd); |
39 | 35 | ||
40 | #ifdef CONFIG_X86_PAE | 36 | pteval_t xen_pte_val(pte_t); |
41 | unsigned long long xen_pte_val(pte_t); | 37 | pmdval_t xen_pmd_val(pmd_t); |
42 | unsigned long long xen_pmd_val(pmd_t); | 38 | pgdval_t xen_pgd_val(pgd_t); |
43 | unsigned long long xen_pgd_val(pgd_t); | ||
44 | 39 | ||
45 | pte_t xen_make_pte(unsigned long long); | 40 | pte_t xen_make_pte(pteval_t); |
46 | pmd_t xen_make_pmd(unsigned long long); | 41 | pmd_t xen_make_pmd(pmdval_t); |
47 | pgd_t xen_make_pgd(unsigned long long); | 42 | pgd_t xen_make_pgd(pgdval_t); |
48 | 43 | ||
44 | void xen_set_pte(pte_t *ptep, pte_t pteval); | ||
49 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 45 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
50 | pte_t *ptep, pte_t pteval); | 46 | pte_t *ptep, pte_t pteval); |
51 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); | 47 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); |
48 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); | ||
52 | void xen_set_pud(pud_t *ptr, pud_t val); | 49 | void xen_set_pud(pud_t *ptr, pud_t val); |
50 | void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); | ||
51 | void xen_set_pud_hyper(pud_t *ptr, pud_t val); | ||
53 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 52 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
54 | void xen_pmd_clear(pmd_t *pmdp); | 53 | void xen_pmd_clear(pmd_t *pmdp); |
55 | 54 | ||
56 | 55 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | |
57 | #else | 56 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, |
58 | unsigned long xen_pte_val(pte_t); | 57 | pte_t *ptep, pte_t pte); |
59 | unsigned long xen_pmd_val(pmd_t); | ||
60 | unsigned long xen_pgd_val(pgd_t); | ||
61 | |||
62 | pte_t xen_make_pte(unsigned long); | ||
63 | pmd_t xen_make_pmd(unsigned long); | ||
64 | pgd_t xen_make_pgd(unsigned long); | ||
65 | #endif | ||
66 | 58 | ||
67 | #endif /* _XEN_MMU_H */ | 59 | #endif /* _XEN_MMU_H */ |
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 5791eb2e3750..3c63c4da7ed1 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -29,14 +29,14 @@ | |||
29 | #define MC_DEBUG 1 | 29 | #define MC_DEBUG 1 |
30 | 30 | ||
31 | #define MC_BATCH 32 | 31 | #define MC_BATCH 32 |
32 | #define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) | 32 | #define MC_ARGS (MC_BATCH * 16) |
33 | 33 | ||
34 | struct mc_buffer { | 34 | struct mc_buffer { |
35 | struct multicall_entry entries[MC_BATCH]; | 35 | struct multicall_entry entries[MC_BATCH]; |
36 | #if MC_DEBUG | 36 | #if MC_DEBUG |
37 | struct multicall_entry debug[MC_BATCH]; | 37 | struct multicall_entry debug[MC_BATCH]; |
38 | #endif | 38 | #endif |
39 | u64 args[MC_ARGS]; | 39 | unsigned char args[MC_ARGS]; |
40 | struct callback { | 40 | struct callback { |
41 | void (*fn)(void *); | 41 | void (*fn)(void *); |
42 | void *data; | 42 | void *data; |
@@ -107,20 +107,48 @@ struct multicall_space __xen_mc_entry(size_t args) | |||
107 | { | 107 | { |
108 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | 108 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); |
109 | struct multicall_space ret; | 109 | struct multicall_space ret; |
110 | unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); | 110 | unsigned argidx = roundup(b->argidx, sizeof(u64)); |
111 | 111 | ||
112 | BUG_ON(preemptible()); | 112 | BUG_ON(preemptible()); |
113 | BUG_ON(argspace > MC_ARGS); | 113 | BUG_ON(b->argidx > MC_ARGS); |
114 | 114 | ||
115 | if (b->mcidx == MC_BATCH || | 115 | if (b->mcidx == MC_BATCH || |
116 | (b->argidx + argspace) > MC_ARGS) | 116 | (argidx + args) > MC_ARGS) { |
117 | xen_mc_flush(); | 117 | xen_mc_flush(); |
118 | argidx = roundup(b->argidx, sizeof(u64)); | ||
119 | } | ||
118 | 120 | ||
119 | ret.mc = &b->entries[b->mcidx]; | 121 | ret.mc = &b->entries[b->mcidx]; |
120 | b->mcidx++; | 122 | b->mcidx++; |
123 | ret.args = &b->args[argidx]; | ||
124 | b->argidx = argidx + args; | ||
125 | |||
126 | BUG_ON(b->argidx > MC_ARGS); | ||
127 | return ret; | ||
128 | } | ||
129 | |||
130 | struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) | ||
131 | { | ||
132 | struct mc_buffer *b = &__get_cpu_var(mc_buffer); | ||
133 | struct multicall_space ret = { NULL, NULL }; | ||
134 | |||
135 | BUG_ON(preemptible()); | ||
136 | BUG_ON(b->argidx > MC_ARGS); | ||
137 | |||
138 | if (b->mcidx == 0) | ||
139 | return ret; | ||
140 | |||
141 | if (b->entries[b->mcidx - 1].op != op) | ||
142 | return ret; | ||
143 | |||
144 | if ((b->argidx + size) > MC_ARGS) | ||
145 | return ret; | ||
146 | |||
147 | ret.mc = &b->entries[b->mcidx - 1]; | ||
121 | ret.args = &b->args[b->argidx]; | 148 | ret.args = &b->args[b->argidx]; |
122 | b->argidx += argspace; | 149 | b->argidx += size; |
123 | 150 | ||
151 | BUG_ON(b->argidx > MC_ARGS); | ||
124 | return ret; | 152 | return ret; |
125 | } | 153 | } |
126 | 154 | ||
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 8bae996d99a3..858938241616 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h | |||
@@ -45,4 +45,16 @@ static inline void xen_mc_issue(unsigned mode) | |||
45 | /* Set up a callback to be called when the current batch is flushed */ | 45 | /* Set up a callback to be called when the current batch is flushed */ |
46 | void xen_mc_callback(void (*fn)(void *), void *data); | 46 | void xen_mc_callback(void (*fn)(void *), void *data); |
47 | 47 | ||
48 | /* | ||
49 | * Try to extend the arguments of the previous multicall command. The | ||
50 | * previous command's op must match. If it does, then it attempts to | ||
51 | * extend the argument space allocated to the multicall entry by | ||
52 | * arg_size bytes. | ||
53 | * | ||
54 | * The returned multicall_space will return with mc pointing to the | ||
55 | * command on success, or NULL on failure, and args pointing to the | ||
56 | * newly allocated space. | ||
57 | */ | ||
58 | struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); | ||
59 | |||
48 | #endif /* _XEN_MULTICALLS_H */ | 60 | #endif /* _XEN_MULTICALLS_H */ |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 82517e4a752a..488447878a9d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/xen/hypervisor.h> | 16 | #include <asm/xen/hypervisor.h> |
17 | #include <asm/xen/hypercall.h> | 17 | #include <asm/xen/hypercall.h> |
18 | 18 | ||
19 | #include <xen/page.h> | ||
19 | #include <xen/interface/callback.h> | 20 | #include <xen/interface/callback.h> |
20 | #include <xen/interface/physdev.h> | 21 | #include <xen/interface/physdev.h> |
21 | #include <xen/features.h> | 22 | #include <xen/features.h> |
@@ -27,8 +28,6 @@ | |||
27 | extern const char xen_hypervisor_callback[]; | 28 | extern const char xen_hypervisor_callback[]; |
28 | extern const char xen_failsafe_callback[]; | 29 | extern const char xen_failsafe_callback[]; |
29 | 30 | ||
30 | unsigned long *phys_to_machine_mapping; | ||
31 | EXPORT_SYMBOL(phys_to_machine_mapping); | ||
32 | 31 | ||
33 | /** | 32 | /** |
34 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 33 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
@@ -38,6 +37,8 @@ char * __init xen_memory_setup(void) | |||
38 | { | 37 | { |
39 | unsigned long max_pfn = xen_start_info->nr_pages; | 38 | unsigned long max_pfn = xen_start_info->nr_pages; |
40 | 39 | ||
40 | max_pfn = min(MAX_DOMAIN_PAGES, max_pfn); | ||
41 | |||
41 | e820.nr_map = 0; | 42 | e820.nr_map = 0; |
42 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); | 43 | add_memory_region(0, LOWMEMSIZE(), E820_RAM); |
43 | add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); | 44 | add_memory_region(HIGH_MEMORY, PFN_PHYS(max_pfn)-HIGH_MEMORY, E820_RAM); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 94e69000f982..d2e3c20127d7 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -35,7 +35,7 @@ | |||
35 | #include "xen-ops.h" | 35 | #include "xen-ops.h" |
36 | #include "mmu.h" | 36 | #include "mmu.h" |
37 | 37 | ||
38 | static cpumask_t xen_cpu_initialized_map; | 38 | cpumask_t xen_cpu_initialized_map; |
39 | static DEFINE_PER_CPU(int, resched_irq) = -1; | 39 | static DEFINE_PER_CPU(int, resched_irq) = -1; |
40 | static DEFINE_PER_CPU(int, callfunc_irq) = -1; | 40 | static DEFINE_PER_CPU(int, callfunc_irq) = -1; |
41 | static DEFINE_PER_CPU(int, debug_irq) = -1; | 41 | static DEFINE_PER_CPU(int, debug_irq) = -1; |
@@ -65,6 +65,12 @@ static struct call_data_struct *call_data; | |||
65 | */ | 65 | */ |
66 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | 66 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) |
67 | { | 67 | { |
68 | #ifdef CONFIG_X86_32 | ||
69 | __get_cpu_var(irq_stat).irq_resched_count++; | ||
70 | #else | ||
71 | add_pda(irq_resched_count, 1); | ||
72 | #endif | ||
73 | |||
68 | return IRQ_HANDLED; | 74 | return IRQ_HANDLED; |
69 | } | 75 | } |
70 | 76 | ||
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c new file mode 100644 index 000000000000..251669a932d4 --- /dev/null +++ b/arch/x86/xen/suspend.c | |||
@@ -0,0 +1,45 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include <xen/interface/xen.h> | ||
4 | #include <xen/grant_table.h> | ||
5 | #include <xen/events.h> | ||
6 | |||
7 | #include <asm/xen/hypercall.h> | ||
8 | #include <asm/xen/page.h> | ||
9 | |||
10 | #include "xen-ops.h" | ||
11 | #include "mmu.h" | ||
12 | |||
13 | void xen_pre_suspend(void) | ||
14 | { | ||
15 | xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); | ||
16 | xen_start_info->console.domU.mfn = | ||
17 | mfn_to_pfn(xen_start_info->console.domU.mfn); | ||
18 | |||
19 | BUG_ON(!irqs_disabled()); | ||
20 | |||
21 | HYPERVISOR_shared_info = &xen_dummy_shared_info; | ||
22 | if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), | ||
23 | __pte_ma(0), 0)) | ||
24 | BUG(); | ||
25 | } | ||
26 | |||
27 | void xen_post_suspend(int suspend_cancelled) | ||
28 | { | ||
29 | xen_setup_shared_info(); | ||
30 | |||
31 | if (suspend_cancelled) { | ||
32 | xen_start_info->store_mfn = | ||
33 | pfn_to_mfn(xen_start_info->store_mfn); | ||
34 | xen_start_info->console.domU.mfn = | ||
35 | pfn_to_mfn(xen_start_info->console.domU.mfn); | ||
36 | } else { | ||
37 | #ifdef CONFIG_SMP | ||
38 | xen_cpu_initialized_map = cpu_online_map; | ||
39 | #endif | ||
40 | xen_vcpu_restore(); | ||
41 | xen_timer_resume(); | ||
42 | } | ||
43 | |||
44 | } | ||
45 | |||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5aa241..64f0038b9558 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -12,7 +12,9 @@ | |||
12 | #include <linux/clocksource.h> | 12 | #include <linux/clocksource.h> |
13 | #include <linux/clockchips.h> | 13 | #include <linux/clockchips.h> |
14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
15 | #include <linux/math64.h> | ||
15 | 16 | ||
17 | #include <asm/pvclock.h> | ||
16 | #include <asm/xen/hypervisor.h> | 18 | #include <asm/xen/hypervisor.h> |
17 | #include <asm/xen/hypercall.h> | 19 | #include <asm/xen/hypercall.h> |
18 | 20 | ||
@@ -30,17 +32,6 @@ | |||
30 | 32 | ||
31 | static cycle_t xen_clocksource_read(void); | 33 | static cycle_t xen_clocksource_read(void); |
32 | 34 | ||
33 | /* These are perodically updated in shared_info, and then copied here. */ | ||
34 | struct shadow_time_info { | ||
35 | u64 tsc_timestamp; /* TSC at last update of time vals. */ | ||
36 | u64 system_timestamp; /* Time, in nanosecs, since boot. */ | ||
37 | u32 tsc_to_nsec_mul; | ||
38 | int tsc_shift; | ||
39 | u32 version; | ||
40 | }; | ||
41 | |||
42 | static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); | ||
43 | |||
44 | /* runstate info updated by Xen */ | 35 | /* runstate info updated by Xen */ |
45 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); | 36 | static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); |
46 | 37 | ||
@@ -150,11 +141,7 @@ static void do_stolen_accounting(void) | |||
150 | if (stolen < 0) | 141 | if (stolen < 0) |
151 | stolen = 0; | 142 | stolen = 0; |
152 | 143 | ||
153 | ticks = 0; | 144 | ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); |
154 | while (stolen >= NS_PER_TICK) { | ||
155 | ticks++; | ||
156 | stolen -= NS_PER_TICK; | ||
157 | } | ||
158 | __get_cpu_var(residual_stolen) = stolen; | 145 | __get_cpu_var(residual_stolen) = stolen; |
159 | account_steal_time(NULL, ticks); | 146 | account_steal_time(NULL, ticks); |
160 | 147 | ||
@@ -166,11 +153,7 @@ static void do_stolen_accounting(void) | |||
166 | if (blocked < 0) | 153 | if (blocked < 0) |
167 | blocked = 0; | 154 | blocked = 0; |
168 | 155 | ||
169 | ticks = 0; | 156 | ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); |
170 | while (blocked >= NS_PER_TICK) { | ||
171 | ticks++; | ||
172 | blocked -= NS_PER_TICK; | ||
173 | } | ||
174 | __get_cpu_var(residual_blocked) = blocked; | 157 | __get_cpu_var(residual_blocked) = blocked; |
175 | account_steal_time(idle_task(smp_processor_id()), ticks); | 158 | account_steal_time(idle_task(smp_processor_id()), ticks); |
176 | } | 159 | } |
@@ -218,7 +201,7 @@ unsigned long long xen_sched_clock(void) | |||
218 | unsigned long xen_cpu_khz(void) | 201 | unsigned long xen_cpu_khz(void) |
219 | { | 202 | { |
220 | u64 xen_khz = 1000000ULL << 32; | 203 | u64 xen_khz = 1000000ULL << 32; |
221 | const struct vcpu_time_info *info = | 204 | const struct pvclock_vcpu_time_info *info = |
222 | &HYPERVISOR_shared_info->vcpu_info[0].time; | 205 | &HYPERVISOR_shared_info->vcpu_info[0].time; |
223 | 206 | ||
224 | do_div(xen_khz, info->tsc_to_system_mul); | 207 | do_div(xen_khz, info->tsc_to_system_mul); |
@@ -230,121 +213,26 @@ unsigned long xen_cpu_khz(void) | |||
230 | return xen_khz; | 213 | return xen_khz; |
231 | } | 214 | } |
232 | 215 | ||
233 | /* | ||
234 | * Reads a consistent set of time-base values from Xen, into a shadow data | ||
235 | * area. | ||
236 | */ | ||
237 | static unsigned get_time_values_from_xen(void) | ||
238 | { | ||
239 | struct vcpu_time_info *src; | ||
240 | struct shadow_time_info *dst; | ||
241 | |||
242 | /* src is shared memory with the hypervisor, so we need to | ||
243 | make sure we get a consistent snapshot, even in the face of | ||
244 | being preempted. */ | ||
245 | src = &__get_cpu_var(xen_vcpu)->time; | ||
246 | dst = &__get_cpu_var(shadow_time); | ||
247 | |||
248 | do { | ||
249 | dst->version = src->version; | ||
250 | rmb(); /* fetch version before data */ | ||
251 | dst->tsc_timestamp = src->tsc_timestamp; | ||
252 | dst->system_timestamp = src->system_time; | ||
253 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | ||
254 | dst->tsc_shift = src->tsc_shift; | ||
255 | rmb(); /* test version after fetching data */ | ||
256 | } while ((src->version & 1) | (dst->version ^ src->version)); | ||
257 | |||
258 | return dst->version; | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | ||
263 | * yielding a 64-bit result. | ||
264 | */ | ||
265 | static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) | ||
266 | { | ||
267 | u64 product; | ||
268 | #ifdef __i386__ | ||
269 | u32 tmp1, tmp2; | ||
270 | #endif | ||
271 | |||
272 | if (shift < 0) | ||
273 | delta >>= -shift; | ||
274 | else | ||
275 | delta <<= shift; | ||
276 | |||
277 | #ifdef __i386__ | ||
278 | __asm__ ( | ||
279 | "mul %5 ; " | ||
280 | "mov %4,%%eax ; " | ||
281 | "mov %%edx,%4 ; " | ||
282 | "mul %5 ; " | ||
283 | "xor %5,%5 ; " | ||
284 | "add %4,%%eax ; " | ||
285 | "adc %5,%%edx ; " | ||
286 | : "=A" (product), "=r" (tmp1), "=r" (tmp2) | ||
287 | : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); | ||
288 | #elif __x86_64__ | ||
289 | __asm__ ( | ||
290 | "mul %%rdx ; shrd $32,%%rdx,%%rax" | ||
291 | : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); | ||
292 | #else | ||
293 | #error implement me! | ||
294 | #endif | ||
295 | |||
296 | return product; | ||
297 | } | ||
298 | |||
299 | static u64 get_nsec_offset(struct shadow_time_info *shadow) | ||
300 | { | ||
301 | u64 now, delta; | ||
302 | now = native_read_tsc(); | ||
303 | delta = now - shadow->tsc_timestamp; | ||
304 | return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); | ||
305 | } | ||
306 | |||
307 | static cycle_t xen_clocksource_read(void) | 216 | static cycle_t xen_clocksource_read(void) |
308 | { | 217 | { |
309 | struct shadow_time_info *shadow = &get_cpu_var(shadow_time); | 218 | struct pvclock_vcpu_time_info *src; |
310 | cycle_t ret; | 219 | cycle_t ret; |
311 | unsigned version; | ||
312 | |||
313 | do { | ||
314 | version = get_time_values_from_xen(); | ||
315 | barrier(); | ||
316 | ret = shadow->system_timestamp + get_nsec_offset(shadow); | ||
317 | barrier(); | ||
318 | } while (version != __get_cpu_var(xen_vcpu)->time.version); | ||
319 | |||
320 | put_cpu_var(shadow_time); | ||
321 | 220 | ||
221 | src = &get_cpu_var(xen_vcpu)->time; | ||
222 | ret = pvclock_clocksource_read(src); | ||
223 | put_cpu_var(xen_vcpu); | ||
322 | return ret; | 224 | return ret; |
323 | } | 225 | } |
324 | 226 | ||
325 | static void xen_read_wallclock(struct timespec *ts) | 227 | static void xen_read_wallclock(struct timespec *ts) |
326 | { | 228 | { |
327 | const struct shared_info *s = HYPERVISOR_shared_info; | 229 | struct shared_info *s = HYPERVISOR_shared_info; |
328 | u32 version; | 230 | struct pvclock_wall_clock *wall_clock = &(s->wc); |
329 | u64 delta; | 231 | struct pvclock_vcpu_time_info *vcpu_time; |
330 | struct timespec now; | ||
331 | |||
332 | /* get wallclock at system boot */ | ||
333 | do { | ||
334 | version = s->wc_version; | ||
335 | rmb(); /* fetch version before time */ | ||
336 | now.tv_sec = s->wc_sec; | ||
337 | now.tv_nsec = s->wc_nsec; | ||
338 | rmb(); /* fetch time before checking version */ | ||
339 | } while ((s->wc_version & 1) | (version ^ s->wc_version)); | ||
340 | |||
341 | delta = xen_clocksource_read(); /* time since system boot */ | ||
342 | delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; | ||
343 | |||
344 | now.tv_nsec = do_div(delta, NSEC_PER_SEC); | ||
345 | now.tv_sec = delta; | ||
346 | 232 | ||
347 | set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | 233 | vcpu_time = &get_cpu_var(xen_vcpu)->time; |
234 | pvclock_read_wallclock(wall_clock, vcpu_time, ts); | ||
235 | put_cpu_var(xen_vcpu); | ||
348 | } | 236 | } |
349 | 237 | ||
350 | unsigned long xen_get_wallclock(void) | 238 | unsigned long xen_get_wallclock(void) |
@@ -352,7 +240,6 @@ unsigned long xen_get_wallclock(void) | |||
352 | struct timespec ts; | 240 | struct timespec ts; |
353 | 241 | ||
354 | xen_read_wallclock(&ts); | 242 | xen_read_wallclock(&ts); |
355 | |||
356 | return ts.tv_sec; | 243 | return ts.tv_sec; |
357 | } | 244 | } |
358 | 245 | ||
@@ -572,12 +459,23 @@ void xen_setup_cpu_clockevents(void) | |||
572 | clockevents_register_device(&__get_cpu_var(xen_clock_events)); | 459 | clockevents_register_device(&__get_cpu_var(xen_clock_events)); |
573 | } | 460 | } |
574 | 461 | ||
462 | void xen_timer_resume(void) | ||
463 | { | ||
464 | int cpu; | ||
465 | |||
466 | if (xen_clockevent != &xen_vcpuop_clockevent) | ||
467 | return; | ||
468 | |||
469 | for_each_online_cpu(cpu) { | ||
470 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) | ||
471 | BUG(); | ||
472 | } | ||
473 | } | ||
474 | |||
575 | __init void xen_time_init(void) | 475 | __init void xen_time_init(void) |
576 | { | 476 | { |
577 | int cpu = smp_processor_id(); | 477 | int cpu = smp_processor_id(); |
578 | 478 | ||
579 | get_time_values_from_xen(); | ||
580 | |||
581 | clocksource_register(&xen_clocksource); | 479 | clocksource_register(&xen_clocksource); |
582 | 480 | ||
583 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { | 481 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 288d587ce73c..7c0cf6320a0a 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <asm/boot.h> | 8 | #include <asm/boot.h> |
9 | #include <xen/interface/elfnote.h> | 9 | #include <xen/interface/elfnote.h> |
10 | #include <asm/xen/interface.h> | ||
10 | 11 | ||
11 | __INIT | 12 | __INIT |
12 | ENTRY(startup_xen) | 13 | ENTRY(startup_xen) |
@@ -17,7 +18,7 @@ ENTRY(startup_xen) | |||
17 | 18 | ||
18 | __FINIT | 19 | __FINIT |
19 | 20 | ||
20 | .pushsection .bss.page_aligned | 21 | .pushsection .text |
21 | .align PAGE_SIZE_asm | 22 | .align PAGE_SIZE_asm |
22 | ENTRY(hypercall_page) | 23 | ENTRY(hypercall_page) |
23 | .skip 0x1000 | 24 | .skip 0x1000 |
@@ -30,11 +31,11 @@ ENTRY(hypercall_page) | |||
30 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) | 31 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) |
31 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) | 32 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) |
32 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 33 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") |
33 | #ifdef CONFIG_X86_PAE | ||
34 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 34 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
35 | #else | ||
36 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") | ||
37 | #endif | ||
38 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 35 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
36 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, | ||
37 | .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) | ||
38 | ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) | ||
39 | ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) | ||
39 | 40 | ||
40 | #endif /*CONFIG_XEN */ | 41 | #endif /*CONFIG_XEN */ |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f1063ae08037..9a055592a307 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -9,18 +9,26 @@ | |||
9 | extern const char xen_hypervisor_callback[]; | 9 | extern const char xen_hypervisor_callback[]; |
10 | extern const char xen_failsafe_callback[]; | 10 | extern const char xen_failsafe_callback[]; |
11 | 11 | ||
12 | struct trap_info; | ||
12 | void xen_copy_trap_info(struct trap_info *traps); | 13 | void xen_copy_trap_info(struct trap_info *traps); |
13 | 14 | ||
14 | DECLARE_PER_CPU(unsigned long, xen_cr3); | 15 | DECLARE_PER_CPU(unsigned long, xen_cr3); |
15 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); | 16 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); |
16 | 17 | ||
17 | extern struct start_info *xen_start_info; | 18 | extern struct start_info *xen_start_info; |
19 | extern struct shared_info xen_dummy_shared_info; | ||
18 | extern struct shared_info *HYPERVISOR_shared_info; | 20 | extern struct shared_info *HYPERVISOR_shared_info; |
19 | 21 | ||
22 | void xen_setup_mfn_list_list(void); | ||
23 | void xen_setup_shared_info(void); | ||
24 | |||
20 | char * __init xen_memory_setup(void); | 25 | char * __init xen_memory_setup(void); |
21 | void __init xen_arch_setup(void); | 26 | void __init xen_arch_setup(void); |
22 | void __init xen_init_IRQ(void); | 27 | void __init xen_init_IRQ(void); |
23 | void xen_enable_sysenter(void); | 28 | void xen_enable_sysenter(void); |
29 | void xen_vcpu_restore(void); | ||
30 | |||
31 | void __init xen_build_dynamic_phys_to_machine(void); | ||
24 | 32 | ||
25 | void xen_setup_timer(int cpu); | 33 | void xen_setup_timer(int cpu); |
26 | void xen_setup_cpu_clockevents(void); | 34 | void xen_setup_cpu_clockevents(void); |
@@ -29,6 +37,7 @@ void __init xen_time_init(void); | |||
29 | unsigned long xen_get_wallclock(void); | 37 | unsigned long xen_get_wallclock(void); |
30 | int xen_set_wallclock(unsigned long time); | 38 | int xen_set_wallclock(unsigned long time); |
31 | unsigned long long xen_sched_clock(void); | 39 | unsigned long long xen_sched_clock(void); |
40 | void xen_timer_resume(void); | ||
32 | 41 | ||
33 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); | 42 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); |
34 | 43 | ||
@@ -54,6 +63,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, | |||
54 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), | 63 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), |
55 | void *info, int wait); | 64 | void *info, int wait); |
56 | 65 | ||
66 | extern cpumask_t xen_cpu_initialized_map; | ||
67 | |||
57 | 68 | ||
58 | /* Declare an asm function, along with symbols needed to make it | 69 | /* Declare an asm function, along with symbols needed to make it |
59 | inlineable */ | 70 | inlineable */ |