diff options
Diffstat (limited to 'arch/x86')
190 files changed, 3811 insertions, 4128 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c71a8f8bdba8..1b7c74350a04 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -24,7 +24,7 @@ config X86 | |||
24 | select HAVE_UNSTABLE_SCHED_CLOCK | 24 | select HAVE_UNSTABLE_SCHED_CLOCK |
25 | select HAVE_IDE | 25 | select HAVE_IDE |
26 | select HAVE_OPROFILE | 26 | select HAVE_OPROFILE |
27 | select HAVE_PERF_COUNTERS if (!M386 && !M486) | 27 | select HAVE_PERF_EVENTS if (!M386 && !M486) |
28 | select HAVE_IOREMAP_PROT | 28 | select HAVE_IOREMAP_PROT |
29 | select HAVE_KPROBES | 29 | select HAVE_KPROBES |
30 | select ARCH_WANT_OPTIONAL_GPIOLIB | 30 | select ARCH_WANT_OPTIONAL_GPIOLIB |
@@ -87,10 +87,6 @@ config STACKTRACE_SUPPORT | |||
87 | config HAVE_LATENCYTOP_SUPPORT | 87 | config HAVE_LATENCYTOP_SUPPORT |
88 | def_bool y | 88 | def_bool y |
89 | 89 | ||
90 | config FAST_CMPXCHG_LOCAL | ||
91 | bool | ||
92 | default y | ||
93 | |||
94 | config MMU | 90 | config MMU |
95 | def_bool y | 91 | def_bool y |
96 | 92 | ||
@@ -151,7 +147,10 @@ config ARCH_HAS_CACHE_LINE_SIZE | |||
151 | config HAVE_SETUP_PER_CPU_AREA | 147 | config HAVE_SETUP_PER_CPU_AREA |
152 | def_bool y | 148 | def_bool y |
153 | 149 | ||
154 | config HAVE_DYNAMIC_PER_CPU_AREA | 150 | config NEED_PER_CPU_EMBED_FIRST_CHUNK |
151 | def_bool y | ||
152 | |||
153 | config NEED_PER_CPU_PAGE_FIRST_CHUNK | ||
155 | def_bool y | 154 | def_bool y |
156 | 155 | ||
157 | config HAVE_CPUMASK_OF_CPU_MAP | 156 | config HAVE_CPUMASK_OF_CPU_MAP |
@@ -180,6 +179,10 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING | |||
180 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 179 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
181 | def_bool y | 180 | def_bool y |
182 | 181 | ||
182 | config HAVE_INTEL_TXT | ||
183 | def_bool y | ||
184 | depends on EXPERIMENTAL && DMAR && ACPI | ||
185 | |||
183 | # Use the generic interrupt handling code in kernel/irq/: | 186 | # Use the generic interrupt handling code in kernel/irq/: |
184 | config GENERIC_HARDIRQS | 187 | config GENERIC_HARDIRQS |
185 | bool | 188 | bool |
@@ -319,6 +322,7 @@ config X86_EXTENDED_PLATFORM | |||
319 | SGI 320/540 (Visual Workstation) | 322 | SGI 320/540 (Visual Workstation) |
320 | Summit/EXA (IBM x440) | 323 | Summit/EXA (IBM x440) |
321 | Unisys ES7000 IA32 series | 324 | Unisys ES7000 IA32 series |
325 | Moorestown MID devices | ||
322 | 326 | ||
323 | If you have one of these systems, or if you want to build a | 327 | If you have one of these systems, or if you want to build a |
324 | generic distribution kernel, say Y here - otherwise say N. | 328 | generic distribution kernel, say Y here - otherwise say N. |
@@ -378,6 +382,18 @@ config X86_ELAN | |||
378 | 382 | ||
379 | If unsure, choose "PC-compatible" instead. | 383 | If unsure, choose "PC-compatible" instead. |
380 | 384 | ||
385 | config X86_MRST | ||
386 | bool "Moorestown MID platform" | ||
387 | depends on X86_32 | ||
388 | depends on X86_EXTENDED_PLATFORM | ||
389 | ---help--- | ||
390 | Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin | ||
391 | Internet Device(MID) platform. Moorestown consists of two chips: | ||
392 | Lincroft (CPU core, graphics, and memory controller) and Langwell IOH. | ||
393 | Unlike standard x86 PCs, Moorestown does not have many legacy devices | ||
394 | nor standard legacy replacement devices/features. e.g. Moorestown does | ||
395 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. | ||
396 | |||
381 | config X86_RDC321X | 397 | config X86_RDC321X |
382 | bool "RDC R-321x SoC" | 398 | bool "RDC R-321x SoC" |
383 | depends on X86_32 | 399 | depends on X86_32 |
@@ -413,6 +429,17 @@ config X86_NUMAQ | |||
413 | of Flat Logical. You will need a new lynxer.elf file to flash your | 429 | of Flat Logical. You will need a new lynxer.elf file to flash your |
414 | firmware with - send email to <Martin.Bligh@us.ibm.com>. | 430 | firmware with - send email to <Martin.Bligh@us.ibm.com>. |
415 | 431 | ||
432 | config X86_SUPPORTS_MEMORY_FAILURE | ||
433 | bool | ||
434 | # MCE code calls memory_failure(): | ||
435 | depends on X86_MCE | ||
436 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: | ||
437 | depends on !X86_NUMAQ | ||
438 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: | ||
439 | depends on X86_64 || !SPARSEMEM | ||
440 | select ARCH_SUPPORTS_MEMORY_FAILURE | ||
441 | default y | ||
442 | |||
416 | config X86_VISWS | 443 | config X86_VISWS |
417 | bool "SGI 320/540 (Visual Workstation)" | 444 | bool "SGI 320/540 (Visual Workstation)" |
418 | depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT | 445 | depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT |
@@ -465,7 +492,7 @@ if PARAVIRT_GUEST | |||
465 | source "arch/x86/xen/Kconfig" | 492 | source "arch/x86/xen/Kconfig" |
466 | 493 | ||
467 | config VMI | 494 | config VMI |
468 | bool "VMI Guest support" | 495 | bool "VMI Guest support (DEPRECATED)" |
469 | select PARAVIRT | 496 | select PARAVIRT |
470 | depends on X86_32 | 497 | depends on X86_32 |
471 | ---help--- | 498 | ---help--- |
@@ -474,6 +501,15 @@ config VMI | |||
474 | at the moment), by linking the kernel to a GPL-ed ROM module | 501 | at the moment), by linking the kernel to a GPL-ed ROM module |
475 | provided by the hypervisor. | 502 | provided by the hypervisor. |
476 | 503 | ||
504 | As of September 2009, VMware has started a phased retirement | ||
505 | of this feature from VMware's products. Please see | ||
506 | feature-removal-schedule.txt for details. If you are | ||
507 | planning to enable this option, please note that you cannot | ||
508 | live migrate a VMI enabled VM to a future VMware product, | ||
509 | which doesn't support VMI. So if you expect your kernel to | ||
510 | seamlessly migrate to newer VMware products, keep this | ||
511 | disabled. | ||
512 | |||
477 | config KVM_CLOCK | 513 | config KVM_CLOCK |
478 | bool "KVM paravirtualized clock" | 514 | bool "KVM paravirtualized clock" |
479 | select PARAVIRT | 515 | select PARAVIRT |
@@ -777,41 +813,17 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | |||
777 | increased on these systems. | 813 | increased on these systems. |
778 | 814 | ||
779 | config X86_MCE | 815 | config X86_MCE |
780 | bool "Machine Check Exception" | 816 | bool "Machine Check / overheating reporting" |
781 | ---help--- | 817 | ---help--- |
782 | Machine Check Exception support allows the processor to notify the | 818 | Machine Check support allows the processor to notify the |
783 | kernel if it detects a problem (e.g. overheating, component failure). | 819 | kernel if it detects a problem (e.g. overheating, data corruption). |
784 | The action the kernel takes depends on the severity of the problem, | 820 | The action the kernel takes depends on the severity of the problem, |
785 | ranging from a warning message on the console, to halting the machine. | 821 | ranging from warning messages to halting the machine. |
786 | Your processor must be a Pentium or newer to support this - check the | ||
787 | flags in /proc/cpuinfo for mce. Note that some older Pentium systems | ||
788 | have a design flaw which leads to false MCE events - hence MCE is | ||
789 | disabled on all P5 processors, unless explicitly enabled with "mce" | ||
790 | as a boot argument. Similarly, if MCE is built in and creates a | ||
791 | problem on some new non-standard machine, you can boot with "nomce" | ||
792 | to disable it. MCE support simply ignores non-MCE processors like | ||
793 | the 386 and 486, so nearly everyone can say Y here. | ||
794 | |||
795 | config X86_OLD_MCE | ||
796 | depends on X86_32 && X86_MCE | ||
797 | bool "Use legacy machine check code (will go away)" | ||
798 | default n | ||
799 | select X86_ANCIENT_MCE | ||
800 | ---help--- | ||
801 | Use the old i386 machine check code. This is merely intended for | ||
802 | testing in a transition period. Try this if you run into any machine | ||
803 | check related software problems, but report the problem to | ||
804 | linux-kernel. When in doubt say no. | ||
805 | |||
806 | config X86_NEW_MCE | ||
807 | depends on X86_MCE | ||
808 | bool | ||
809 | default y if (!X86_OLD_MCE && X86_32) || X86_64 | ||
810 | 822 | ||
811 | config X86_MCE_INTEL | 823 | config X86_MCE_INTEL |
812 | def_bool y | 824 | def_bool y |
813 | prompt "Intel MCE features" | 825 | prompt "Intel MCE features" |
814 | depends on X86_NEW_MCE && X86_LOCAL_APIC | 826 | depends on X86_MCE && X86_LOCAL_APIC |
815 | ---help--- | 827 | ---help--- |
816 | Additional support for intel specific MCE features such as | 828 | Additional support for intel specific MCE features such as |
817 | the thermal monitor. | 829 | the thermal monitor. |
@@ -819,14 +831,14 @@ config X86_MCE_INTEL | |||
819 | config X86_MCE_AMD | 831 | config X86_MCE_AMD |
820 | def_bool y | 832 | def_bool y |
821 | prompt "AMD MCE features" | 833 | prompt "AMD MCE features" |
822 | depends on X86_NEW_MCE && X86_LOCAL_APIC | 834 | depends on X86_MCE && X86_LOCAL_APIC |
823 | ---help--- | 835 | ---help--- |
824 | Additional support for AMD specific MCE features such as | 836 | Additional support for AMD specific MCE features such as |
825 | the DRAM Error Threshold. | 837 | the DRAM Error Threshold. |
826 | 838 | ||
827 | config X86_ANCIENT_MCE | 839 | config X86_ANCIENT_MCE |
828 | def_bool n | 840 | def_bool n |
829 | depends on X86_32 | 841 | depends on X86_32 && X86_MCE |
830 | prompt "Support for old Pentium 5 / WinChip machine checks" | 842 | prompt "Support for old Pentium 5 / WinChip machine checks" |
831 | ---help--- | 843 | ---help--- |
832 | Include support for machine check handling on old Pentium 5 or WinChip | 844 | Include support for machine check handling on old Pentium 5 or WinChip |
@@ -839,36 +851,16 @@ config X86_MCE_THRESHOLD | |||
839 | default y | 851 | default y |
840 | 852 | ||
841 | config X86_MCE_INJECT | 853 | config X86_MCE_INJECT |
842 | depends on X86_NEW_MCE | 854 | depends on X86_MCE |
843 | tristate "Machine check injector support" | 855 | tristate "Machine check injector support" |
844 | ---help--- | 856 | ---help--- |
845 | Provide support for injecting machine checks for testing purposes. | 857 | Provide support for injecting machine checks for testing purposes. |
846 | If you don't know what a machine check is and you don't do kernel | 858 | If you don't know what a machine check is and you don't do kernel |
847 | QA it is safe to say n. | 859 | QA it is safe to say n. |
848 | 860 | ||
849 | config X86_MCE_NONFATAL | ||
850 | tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" | ||
851 | depends on X86_OLD_MCE | ||
852 | ---help--- | ||
853 | Enabling this feature starts a timer that triggers every 5 seconds which | ||
854 | will look at the machine check registers to see if anything happened. | ||
855 | Non-fatal problems automatically get corrected (but still logged). | ||
856 | Disable this if you don't want to see these messages. | ||
857 | Seeing the messages this option prints out may be indicative of dying | ||
858 | or out-of-spec (ie, overclocked) hardware. | ||
859 | This option only does something on certain CPUs. | ||
860 | (AMD Athlon/Duron and Intel Pentium 4) | ||
861 | |||
862 | config X86_MCE_P4THERMAL | ||
863 | bool "check for P4 thermal throttling interrupt." | ||
864 | depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP) | ||
865 | ---help--- | ||
866 | Enabling this feature will cause a message to be printed when the P4 | ||
867 | enters thermal throttling. | ||
868 | |||
869 | config X86_THERMAL_VECTOR | 861 | config X86_THERMAL_VECTOR |
870 | def_bool y | 862 | def_bool y |
871 | depends on X86_MCE_P4THERMAL || X86_MCE_INTEL | 863 | depends on X86_MCE_INTEL |
872 | 864 | ||
873 | config VM86 | 865 | config VM86 |
874 | bool "Enable VM86 support" if EMBEDDED | 866 | bool "Enable VM86 support" if EMBEDDED |
@@ -1229,6 +1221,10 @@ config ARCH_DISCONTIGMEM_DEFAULT | |||
1229 | def_bool y | 1221 | def_bool y |
1230 | depends on NUMA && X86_32 | 1222 | depends on NUMA && X86_32 |
1231 | 1223 | ||
1224 | config ARCH_PROC_KCORE_TEXT | ||
1225 | def_bool y | ||
1226 | depends on X86_64 && PROC_KCORE | ||
1227 | |||
1232 | config ARCH_SPARSEMEM_DEFAULT | 1228 | config ARCH_SPARSEMEM_DEFAULT |
1233 | def_bool y | 1229 | def_bool y |
1234 | depends on X86_64 | 1230 | depends on X86_64 |
@@ -1414,6 +1410,10 @@ config X86_PAT | |||
1414 | 1410 | ||
1415 | If unsure, say Y. | 1411 | If unsure, say Y. |
1416 | 1412 | ||
1413 | config ARCH_USES_PG_UNCACHED | ||
1414 | def_bool y | ||
1415 | depends on X86_PAT | ||
1416 | |||
1417 | config EFI | 1417 | config EFI |
1418 | bool "EFI runtime service support" | 1418 | bool "EFI runtime service support" |
1419 | depends on ACPI | 1419 | depends on ACPI |
@@ -1683,6 +1683,8 @@ source "kernel/power/Kconfig" | |||
1683 | 1683 | ||
1684 | source "drivers/acpi/Kconfig" | 1684 | source "drivers/acpi/Kconfig" |
1685 | 1685 | ||
1686 | source "drivers/sfi/Kconfig" | ||
1687 | |||
1686 | config X86_APM_BOOT | 1688 | config X86_APM_BOOT |
1687 | bool | 1689 | bool |
1688 | default y | 1690 | default y |
@@ -1878,7 +1880,7 @@ config PCI_DIRECT | |||
1878 | 1880 | ||
1879 | config PCI_MMCONFIG | 1881 | config PCI_MMCONFIG |
1880 | def_bool y | 1882 | def_bool y |
1881 | depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) | 1883 | depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY) |
1882 | 1884 | ||
1883 | config PCI_OLPC | 1885 | config PCI_OLPC |
1884 | def_bool y | 1886 | def_bool y |
@@ -1916,7 +1918,7 @@ config DMAR_DEFAULT_ON | |||
1916 | config DMAR_BROKEN_GFX_WA | 1918 | config DMAR_BROKEN_GFX_WA |
1917 | def_bool n | 1919 | def_bool n |
1918 | prompt "Workaround broken graphics drivers (going away soon)" | 1920 | prompt "Workaround broken graphics drivers (going away soon)" |
1919 | depends on DMAR | 1921 | depends on DMAR && BROKEN |
1920 | ---help--- | 1922 | ---help--- |
1921 | Current Graphics drivers tend to use physical address | 1923 | Current Graphics drivers tend to use physical address |
1922 | for DMA and avoid using DMA APIs. Setting this config | 1924 | for DMA and avoid using DMA APIs. Setting this config |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 527519b8a9f9..f2824fb8c79c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -400,7 +400,7 @@ config X86_TSC | |||
400 | 400 | ||
401 | config X86_CMPXCHG64 | 401 | config X86_CMPXCHG64 |
402 | def_bool y | 402 | def_bool y |
403 | depends on X86_PAE || X86_64 | 403 | depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM |
404 | 404 | ||
405 | # this should be set for all -march=.. options where the compiler | 405 | # this should be set for all -march=.. options where the compiler |
406 | # generates cmov. | 406 | # generates cmov. |
@@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY | |||
412 | int | 412 | int |
413 | default "64" if X86_64 | 413 | default "64" if X86_64 |
414 | default "6" if X86_32 && X86_P6_NOP | 414 | default "6" if X86_32 && X86_P6_NOP |
415 | default "5" if X86_32 && X86_CMPXCHG64 | ||
415 | default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) | 416 | default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) |
416 | default "3" | 417 | default "3" |
417 | 418 | ||
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 7983c420eaf2..a012ee8ef803 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -179,8 +179,8 @@ archclean: | |||
179 | define archhelp | 179 | define archhelp |
180 | echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' | 180 | echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' |
181 | echo ' install - Install kernel using' | 181 | echo ' install - Install kernel using' |
182 | echo ' (your) ~/bin/installkernel or' | 182 | echo ' (your) ~/bin/$(INSTALLKERNEL) or' |
183 | echo ' (distribution) /sbin/installkernel or' | 183 | echo ' (distribution) /sbin/$(INSTALLKERNEL) or' |
184 | echo ' install to $$(INSTALL_PATH) and run lilo' | 184 | echo ' install to $$(INSTALL_PATH) and run lilo' |
185 | echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' | 185 | echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' |
186 | echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' | 186 | echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' |
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 75e4f001e706..f543b70ffae2 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -23,13 +23,14 @@ | |||
23 | */ | 23 | */ |
24 | .text | 24 | .text |
25 | 25 | ||
26 | #include <linux/init.h> | ||
26 | #include <linux/linkage.h> | 27 | #include <linux/linkage.h> |
27 | #include <asm/segment.h> | 28 | #include <asm/segment.h> |
28 | #include <asm/page_types.h> | 29 | #include <asm/page_types.h> |
29 | #include <asm/boot.h> | 30 | #include <asm/boot.h> |
30 | #include <asm/asm-offsets.h> | 31 | #include <asm/asm-offsets.h> |
31 | 32 | ||
32 | .section ".text.head","ax",@progbits | 33 | __HEAD |
33 | ENTRY(startup_32) | 34 | ENTRY(startup_32) |
34 | cld | 35 | cld |
35 | /* | 36 | /* |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index f62c284db9eb..077e1b69198e 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -24,6 +24,7 @@ | |||
24 | .code32 | 24 | .code32 |
25 | .text | 25 | .text |
26 | 26 | ||
27 | #include <linux/init.h> | ||
27 | #include <linux/linkage.h> | 28 | #include <linux/linkage.h> |
28 | #include <asm/segment.h> | 29 | #include <asm/segment.h> |
29 | #include <asm/pgtable_types.h> | 30 | #include <asm/pgtable_types.h> |
@@ -33,7 +34,7 @@ | |||
33 | #include <asm/processor-flags.h> | 34 | #include <asm/processor-flags.h> |
34 | #include <asm/asm-offsets.h> | 35 | #include <asm/asm-offsets.h> |
35 | 36 | ||
36 | .section ".text.head" | 37 | __HEAD |
37 | .code32 | 38 | .code32 |
38 | ENTRY(startup_32) | 39 | ENTRY(startup_32) |
39 | cld | 40 | cld |
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index cc353e1b3ffd..f4193bb48782 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S | |||
@@ -1,3 +1,5 @@ | |||
1 | #include <asm-generic/vmlinux.lds.h> | ||
2 | |||
1 | OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) | 3 | OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) |
2 | 4 | ||
3 | #undef i386 | 5 | #undef i386 |
@@ -18,9 +20,9 @@ SECTIONS | |||
18 | * address 0. | 20 | * address 0. |
19 | */ | 21 | */ |
20 | . = 0; | 22 | . = 0; |
21 | .text.head : { | 23 | .head.text : { |
22 | _head = . ; | 24 | _head = . ; |
23 | *(.text.head) | 25 | HEAD_TEXT |
24 | _ehead = . ; | 26 | _ehead = . ; |
25 | } | 27 | } |
26 | .rodata.compressed : { | 28 | .rodata.compressed : { |
diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh index 8d60ee15dfd9..d13ec1c38640 100644 --- a/arch/x86/boot/install.sh +++ b/arch/x86/boot/install.sh | |||
@@ -33,8 +33,8 @@ verify "$3" | |||
33 | 33 | ||
34 | # User may have a custom install script | 34 | # User may have a custom install script |
35 | 35 | ||
36 | if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi | 36 | if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi |
37 | if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi | 37 | if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi |
38 | 38 | ||
39 | # Default install - same as make zlilo | 39 | # Default install - same as make zlilo |
40 | 40 | ||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index ba331bfd1112..1733f9f65e82 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -21,8 +21,8 @@ | |||
21 | #define __AUDIT_ARCH_LE 0x40000000 | 21 | #define __AUDIT_ARCH_LE 0x40000000 |
22 | 22 | ||
23 | #ifndef CONFIG_AUDITSYSCALL | 23 | #ifndef CONFIG_AUDITSYSCALL |
24 | #define sysexit_audit int_ret_from_sys_call | 24 | #define sysexit_audit ia32_ret_from_sys_call |
25 | #define sysretl_audit int_ret_from_sys_call | 25 | #define sysretl_audit ia32_ret_from_sys_call |
26 | #endif | 26 | #endif |
27 | 27 | ||
28 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | 28 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) |
@@ -39,12 +39,12 @@ | |||
39 | .endm | 39 | .endm |
40 | 40 | ||
41 | /* clobbers %eax */ | 41 | /* clobbers %eax */ |
42 | .macro CLEAR_RREGS _r9=rax | 42 | .macro CLEAR_RREGS offset=0, _r9=rax |
43 | xorl %eax,%eax | 43 | xorl %eax,%eax |
44 | movq %rax,R11(%rsp) | 44 | movq %rax,\offset+R11(%rsp) |
45 | movq %rax,R10(%rsp) | 45 | movq %rax,\offset+R10(%rsp) |
46 | movq %\_r9,R9(%rsp) | 46 | movq %\_r9,\offset+R9(%rsp) |
47 | movq %rax,R8(%rsp) | 47 | movq %rax,\offset+R8(%rsp) |
48 | .endm | 48 | .endm |
49 | 49 | ||
50 | /* | 50 | /* |
@@ -172,6 +172,10 @@ sysexit_from_sys_call: | |||
172 | movl RIP-R11(%rsp),%edx /* User %eip */ | 172 | movl RIP-R11(%rsp),%edx /* User %eip */ |
173 | CFI_REGISTER rip,rdx | 173 | CFI_REGISTER rip,rdx |
174 | RESTORE_ARGS 1,24,1,1,1,1 | 174 | RESTORE_ARGS 1,24,1,1,1,1 |
175 | xorq %r8,%r8 | ||
176 | xorq %r9,%r9 | ||
177 | xorq %r10,%r10 | ||
178 | xorq %r11,%r11 | ||
175 | popfq | 179 | popfq |
176 | CFI_ADJUST_CFA_OFFSET -8 | 180 | CFI_ADJUST_CFA_OFFSET -8 |
177 | /*CFI_RESTORE rflags*/ | 181 | /*CFI_RESTORE rflags*/ |
@@ -202,7 +206,7 @@ sysexit_from_sys_call: | |||
202 | 206 | ||
203 | .macro auditsys_exit exit,ebpsave=RBP | 207 | .macro auditsys_exit exit,ebpsave=RBP |
204 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | 208 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) |
205 | jnz int_ret_from_sys_call | 209 | jnz ia32_ret_from_sys_call |
206 | TRACE_IRQS_ON | 210 | TRACE_IRQS_ON |
207 | sti | 211 | sti |
208 | movl %eax,%esi /* second arg, syscall return value */ | 212 | movl %eax,%esi /* second arg, syscall return value */ |
@@ -218,8 +222,9 @@ sysexit_from_sys_call: | |||
218 | cli | 222 | cli |
219 | TRACE_IRQS_OFF | 223 | TRACE_IRQS_OFF |
220 | testl %edi,TI_flags(%r10) | 224 | testl %edi,TI_flags(%r10) |
221 | jnz int_with_check | 225 | jz \exit |
222 | jmp \exit | 226 | CLEAR_RREGS -ARGOFFSET |
227 | jmp int_with_check | ||
223 | .endm | 228 | .endm |
224 | 229 | ||
225 | sysenter_auditsys: | 230 | sysenter_auditsys: |
@@ -329,6 +334,9 @@ sysretl_from_sys_call: | |||
329 | CFI_REGISTER rip,rcx | 334 | CFI_REGISTER rip,rcx |
330 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 335 | movl EFLAGS-ARGOFFSET(%rsp),%r11d |
331 | /*CFI_REGISTER rflags,r11*/ | 336 | /*CFI_REGISTER rflags,r11*/ |
337 | xorq %r10,%r10 | ||
338 | xorq %r9,%r9 | ||
339 | xorq %r8,%r8 | ||
332 | TRACE_IRQS_ON | 340 | TRACE_IRQS_ON |
333 | movl RSP-ARGOFFSET(%rsp),%esp | 341 | movl RSP-ARGOFFSET(%rsp),%esp |
334 | CFI_RESTORE rsp | 342 | CFI_RESTORE rsp |
@@ -353,7 +361,7 @@ cstar_tracesys: | |||
353 | #endif | 361 | #endif |
354 | xchgl %r9d,%ebp | 362 | xchgl %r9d,%ebp |
355 | SAVE_REST | 363 | SAVE_REST |
356 | CLEAR_RREGS r9 | 364 | CLEAR_RREGS 0, r9 |
357 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 365 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
358 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 366 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
359 | call syscall_trace_enter | 367 | call syscall_trace_enter |
@@ -425,6 +433,8 @@ ia32_do_call: | |||
425 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative | 433 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative |
426 | ia32_sysret: | 434 | ia32_sysret: |
427 | movq %rax,RAX-ARGOFFSET(%rsp) | 435 | movq %rax,RAX-ARGOFFSET(%rsp) |
436 | ia32_ret_from_sys_call: | ||
437 | CLEAR_RREGS -ARGOFFSET | ||
428 | jmp int_ret_from_sys_call | 438 | jmp int_ret_from_sys_call |
429 | 439 | ||
430 | ia32_tracesys: | 440 | ia32_tracesys: |
@@ -442,8 +452,8 @@ END(ia32_syscall) | |||
442 | 452 | ||
443 | ia32_badsys: | 453 | ia32_badsys: |
444 | movq $0,ORIG_RAX-ARGOFFSET(%rsp) | 454 | movq $0,ORIG_RAX-ARGOFFSET(%rsp) |
445 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 455 | movq $-ENOSYS,%rax |
446 | jmp int_ret_from_sys_call | 456 | jmp ia32_sysret |
447 | 457 | ||
448 | quiet_ni_syscall: | 458 | quiet_ni_syscall: |
449 | movq $-ENOSYS,%rax | 459 | movq $-ENOSYS,%rax |
@@ -831,5 +841,5 @@ ia32_sys_call_table: | |||
831 | .quad compat_sys_preadv | 841 | .quad compat_sys_preadv |
832 | .quad compat_sys_pwritev | 842 | .quad compat_sys_pwritev |
833 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ | 843 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ |
834 | .quad sys_perf_counter_open | 844 | .quad sys_perf_event_open |
835 | ia32_syscall_end: | 845 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 20d1465a2ab0..4518dc500903 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -144,7 +144,6 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) | |||
144 | 144 | ||
145 | #else /* !CONFIG_ACPI */ | 145 | #else /* !CONFIG_ACPI */ |
146 | 146 | ||
147 | #define acpi_disabled 1 | ||
148 | #define acpi_lapic 0 | 147 | #define acpi_lapic 0 |
149 | #define acpi_ioapic 0 | 148 | #define acpi_ioapic 0 |
150 | static inline void acpi_noirq_set(void) { } | 149 | static inline void acpi_noirq_set(void) { } |
diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h index 9825cd64c9b6..eec2a70d4376 100644 --- a/arch/x86/include/asm/agp.h +++ b/arch/x86/include/asm/agp.h | |||
@@ -22,10 +22,6 @@ | |||
22 | */ | 22 | */ |
23 | #define flush_agp_cache() wbinvd() | 23 | #define flush_agp_cache() wbinvd() |
24 | 24 | ||
25 | /* Convert a physical address to an address suitable for the GART. */ | ||
26 | #define phys_to_gart(x) (x) | ||
27 | #define gart_to_phys(x) (x) | ||
28 | |||
29 | /* GATT allocation. Returns/accepts GATT kernel virtual address. */ | 25 | /* GATT allocation. Returns/accepts GATT kernel virtual address. */ |
30 | #define alloc_gatt_pages(order) \ | 26 | #define alloc_gatt_pages(order) \ |
31 | ((char *)__get_free_pages(GFP_KERNEL, (order))) | 27 | ((char *)__get_free_pages(GFP_KERNEL, (order))) |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 586b7adb8e53..474d80d3e6cc 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -66,13 +66,23 @@ static inline void default_inquire_remote_apic(int apicid) | |||
66 | } | 66 | } |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * With 82489DX we can't rely on apic feature bit | ||
70 | * retrieved via cpuid but still have to deal with | ||
71 | * such an apic chip so we assume that SMP configuration | ||
72 | * is found from MP table (64bit case uses ACPI mostly | ||
73 | * which set smp presence flag as well so we are safe | ||
74 | * to use this helper too). | ||
75 | */ | ||
76 | static inline bool apic_from_smp_config(void) | ||
77 | { | ||
78 | return smp_found_config && !disable_apic; | ||
79 | } | ||
80 | |||
81 | /* | ||
69 | * Basic functions accessing APICs. | 82 | * Basic functions accessing APICs. |
70 | */ | 83 | */ |
71 | #ifdef CONFIG_PARAVIRT | 84 | #ifdef CONFIG_PARAVIRT |
72 | #include <asm/paravirt.h> | 85 | #include <asm/paravirt.h> |
73 | #else | ||
74 | #define setup_boot_clock setup_boot_APIC_clock | ||
75 | #define setup_secondary_clock setup_secondary_APIC_clock | ||
76 | #endif | 86 | #endif |
77 | 87 | ||
78 | #ifdef CONFIG_X86_64 | 88 | #ifdef CONFIG_X86_64 |
@@ -252,6 +262,8 @@ static inline void lapic_shutdown(void) { } | |||
252 | static inline void init_apic_mappings(void) { } | 262 | static inline void init_apic_mappings(void) { } |
253 | static inline void disable_local_APIC(void) { } | 263 | static inline void disable_local_APIC(void) { } |
254 | static inline void apic_disable(void) { } | 264 | static inline void apic_disable(void) { } |
265 | # define setup_boot_APIC_clock x86_init_noop | ||
266 | # define setup_secondary_APIC_clock x86_init_noop | ||
255 | #endif /* !CONFIG_X86_LOCAL_APIC */ | 267 | #endif /* !CONFIG_X86_LOCAL_APIC */ |
256 | 268 | ||
257 | #ifdef CONFIG_X86_64 | 269 | #ifdef CONFIG_X86_64 |
@@ -300,7 +312,7 @@ struct apic { | |||
300 | int (*cpu_present_to_apicid)(int mps_cpu); | 312 | int (*cpu_present_to_apicid)(int mps_cpu); |
301 | physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); | 313 | physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); |
302 | void (*setup_portio_remap)(void); | 314 | void (*setup_portio_remap)(void); |
303 | int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); | 315 | int (*check_phys_apicid_present)(int phys_apicid); |
304 | void (*enable_apic_mode)(void); | 316 | void (*enable_apic_mode)(void); |
305 | int (*phys_pkg_id)(int cpuid_apic, int index_msb); | 317 | int (*phys_pkg_id)(int cpuid_apic, int index_msb); |
306 | 318 | ||
@@ -434,7 +446,7 @@ extern struct apic apic_x2apic_uv_x; | |||
434 | DECLARE_PER_CPU(int, x2apic_extra_bits); | 446 | DECLARE_PER_CPU(int, x2apic_extra_bits); |
435 | 447 | ||
436 | extern int default_cpu_present_to_apicid(int mps_cpu); | 448 | extern int default_cpu_present_to_apicid(int mps_cpu); |
437 | extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); | 449 | extern int default_check_phys_apicid_present(int phys_apicid); |
438 | #endif | 450 | #endif |
439 | 451 | ||
440 | static inline void default_wait_for_init_deassert(atomic_t *deassert) | 452 | static inline void default_wait_for_init_deassert(atomic_t *deassert) |
@@ -550,9 +562,9 @@ static inline int __default_cpu_present_to_apicid(int mps_cpu) | |||
550 | } | 562 | } |
551 | 563 | ||
552 | static inline int | 564 | static inline int |
553 | __default_check_phys_apicid_present(int boot_cpu_physical_apicid) | 565 | __default_check_phys_apicid_present(int phys_apicid) |
554 | { | 566 | { |
555 | return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); | 567 | return physid_isset(phys_apicid, phys_cpu_present_map); |
556 | } | 568 | } |
557 | 569 | ||
558 | #ifdef CONFIG_X86_32 | 570 | #ifdef CONFIG_X86_32 |
@@ -562,13 +574,13 @@ static inline int default_cpu_present_to_apicid(int mps_cpu) | |||
562 | } | 574 | } |
563 | 575 | ||
564 | static inline int | 576 | static inline int |
565 | default_check_phys_apicid_present(int boot_cpu_physical_apicid) | 577 | default_check_phys_apicid_present(int phys_apicid) |
566 | { | 578 | { |
567 | return __default_check_phys_apicid_present(boot_cpu_physical_apicid); | 579 | return __default_check_phys_apicid_present(phys_apicid); |
568 | } | 580 | } |
569 | #else | 581 | #else |
570 | extern int default_cpu_present_to_apicid(int mps_cpu); | 582 | extern int default_cpu_present_to_apicid(int mps_cpu); |
571 | extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); | 583 | extern int default_check_phys_apicid_present(int phys_apicid); |
572 | #endif | 584 | #endif |
573 | 585 | ||
574 | static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) | 586 | static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) |
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index 1724e8de317c..6be33d83c716 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h | |||
@@ -85,7 +85,8 @@ struct efi_info { | |||
85 | struct boot_params { | 85 | struct boot_params { |
86 | struct screen_info screen_info; /* 0x000 */ | 86 | struct screen_info screen_info; /* 0x000 */ |
87 | struct apm_bios_info apm_bios_info; /* 0x040 */ | 87 | struct apm_bios_info apm_bios_info; /* 0x040 */ |
88 | __u8 _pad2[12]; /* 0x054 */ | 88 | __u8 _pad2[4]; /* 0x054 */ |
89 | __u64 tboot_addr; /* 0x058 */ | ||
89 | struct ist_info ist_info; /* 0x060 */ | 90 | struct ist_info ist_info; /* 0x060 */ |
90 | __u8 _pad3[16]; /* 0x070 */ | 91 | __u8 _pad3[16]; /* 0x070 */ |
91 | __u8 hd0_info[16]; /* obsolete! */ /* 0x080 */ | 92 | __u8 hd0_info[16]; /* obsolete! */ /* 0x080 */ |
@@ -109,4 +110,14 @@ struct boot_params { | |||
109 | __u8 _pad9[276]; /* 0xeec */ | 110 | __u8 _pad9[276]; /* 0xeec */ |
110 | } __attribute__((packed)); | 111 | } __attribute__((packed)); |
111 | 112 | ||
113 | enum { | ||
114 | X86_SUBARCH_PC = 0, | ||
115 | X86_SUBARCH_LGUEST, | ||
116 | X86_SUBARCH_XEN, | ||
117 | X86_SUBARCH_MRST, | ||
118 | X86_NR_SUBARCHS, | ||
119 | }; | ||
120 | |||
121 | |||
122 | |||
112 | #endif /* _ASM_X86_BOOTPARAM_H */ | 123 | #endif /* _ASM_X86_BOOTPARAM_H */ |
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index 5d367caa0e36..549860d3be8f 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_CACHE_H | 1 | #ifndef _ASM_X86_CACHE_H |
2 | #define _ASM_X86_CACHE_H | 2 | #define _ASM_X86_CACHE_H |
3 | 3 | ||
4 | #include <linux/linkage.h> | ||
5 | |||
4 | /* L1 cache line size */ | 6 | /* L1 cache line size */ |
5 | #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) | 7 | #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) |
6 | #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) | 8 | #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) |
@@ -13,7 +15,7 @@ | |||
13 | #ifdef CONFIG_SMP | 15 | #ifdef CONFIG_SMP |
14 | #define __cacheline_aligned_in_smp \ | 16 | #define __cacheline_aligned_in_smp \ |
15 | __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ | 17 | __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ |
16 | __attribute__((__section__(".data.page_aligned"))) | 18 | __page_aligned_data |
17 | #endif | 19 | #endif |
18 | #endif | 20 | #endif |
19 | 21 | ||
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index e55dfc1ad453..b54f6afe7ec4 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -43,8 +43,58 @@ static inline void copy_from_user_page(struct vm_area_struct *vma, | |||
43 | memcpy(dst, src, len); | 43 | memcpy(dst, src, len); |
44 | } | 44 | } |
45 | 45 | ||
46 | #define PG_non_WB PG_arch_1 | 46 | #define PG_WC PG_arch_1 |
47 | PAGEFLAG(NonWB, non_WB) | 47 | PAGEFLAG(WC, WC) |
48 | |||
49 | #ifdef CONFIG_X86_PAT | ||
50 | /* | ||
51 | * X86 PAT uses page flags WC and Uncached together to keep track of | ||
52 | * memory type of pages that have backing page struct. X86 PAT supports 3 | ||
53 | * different memory types, _PAGE_CACHE_WB, _PAGE_CACHE_WC and | ||
54 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not | ||
55 | * been changed from its default (value of -1 used to denote this). | ||
56 | * Note we do not support _PAGE_CACHE_UC here. | ||
57 | * | ||
58 | * Caller must hold memtype_lock for atomicity. | ||
59 | */ | ||
60 | static inline unsigned long get_page_memtype(struct page *pg) | ||
61 | { | ||
62 | if (!PageUncached(pg) && !PageWC(pg)) | ||
63 | return -1; | ||
64 | else if (!PageUncached(pg) && PageWC(pg)) | ||
65 | return _PAGE_CACHE_WC; | ||
66 | else if (PageUncached(pg) && !PageWC(pg)) | ||
67 | return _PAGE_CACHE_UC_MINUS; | ||
68 | else | ||
69 | return _PAGE_CACHE_WB; | ||
70 | } | ||
71 | |||
72 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) | ||
73 | { | ||
74 | switch (memtype) { | ||
75 | case _PAGE_CACHE_WC: | ||
76 | ClearPageUncached(pg); | ||
77 | SetPageWC(pg); | ||
78 | break; | ||
79 | case _PAGE_CACHE_UC_MINUS: | ||
80 | SetPageUncached(pg); | ||
81 | ClearPageWC(pg); | ||
82 | break; | ||
83 | case _PAGE_CACHE_WB: | ||
84 | SetPageUncached(pg); | ||
85 | SetPageWC(pg); | ||
86 | break; | ||
87 | default: | ||
88 | case -1: | ||
89 | ClearPageUncached(pg); | ||
90 | ClearPageWC(pg); | ||
91 | break; | ||
92 | } | ||
93 | } | ||
94 | #else | ||
95 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } | ||
96 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) { } | ||
97 | #endif | ||
48 | 98 | ||
49 | /* | 99 | /* |
50 | * The set_memory_* API can be used to change various attributes of a virtual | 100 | * The set_memory_* API can be used to change various attributes of a virtual |
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index 7c5ef8b14d92..46fc474fd819 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h | |||
@@ -161,7 +161,8 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, | |||
161 | "adcl $0, %0 ;\n" | 161 | "adcl $0, %0 ;\n" |
162 | : "=&r" (sum) | 162 | : "=&r" (sum) |
163 | : "r" (saddr), "r" (daddr), | 163 | : "r" (saddr), "r" (daddr), |
164 | "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)); | 164 | "r" (htonl(len)), "r" (htonl(proto)), "0" (sum) |
165 | : "memory"); | ||
165 | 166 | ||
166 | return csum_fold(sum); | 167 | return csum_fold(sum); |
167 | } | 168 | } |
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 82ceb788a981..ee1931be6593 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -312,19 +312,23 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, | |||
312 | 312 | ||
313 | extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); | 313 | extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); |
314 | 314 | ||
315 | #define cmpxchg64(ptr, o, n) \ | 315 | #define cmpxchg64(ptr, o, n) \ |
316 | ({ \ | 316 | ({ \ |
317 | __typeof__(*(ptr)) __ret; \ | 317 | __typeof__(*(ptr)) __ret; \ |
318 | if (likely(boot_cpu_data.x86 > 4)) \ | 318 | __typeof__(*(ptr)) __old = (o); \ |
319 | __ret = (__typeof__(*(ptr)))__cmpxchg64((ptr), \ | 319 | __typeof__(*(ptr)) __new = (n); \ |
320 | (unsigned long long)(o), \ | 320 | alternative_io("call cmpxchg8b_emu", \ |
321 | (unsigned long long)(n)); \ | 321 | "lock; cmpxchg8b (%%esi)" , \ |
322 | else \ | 322 | X86_FEATURE_CX8, \ |
323 | __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ | 323 | "=A" (__ret), \ |
324 | (unsigned long long)(o), \ | 324 | "S" ((ptr)), "0" (__old), \ |
325 | (unsigned long long)(n)); \ | 325 | "b" ((unsigned int)__new), \ |
326 | __ret; \ | 326 | "c" ((unsigned int)(__new>>32)) \ |
327 | }) | 327 | : "memory"); \ |
328 | __ret; }) | ||
329 | |||
330 | |||
331 | |||
328 | #define cmpxchg64_local(ptr, o, n) \ | 332 | #define cmpxchg64_local(ptr, o, n) \ |
329 | ({ \ | 333 | ({ \ |
330 | __typeof__(*(ptr)) __ret; \ | 334 | __typeof__(*(ptr)) __ret; \ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 847fee6493a2..9cfc88b97742 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -96,6 +96,7 @@ | |||
96 | #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ | 96 | #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ |
97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ | 97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ |
98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ | 98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ |
99 | #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ | ||
99 | 100 | ||
100 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 101 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
101 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 102 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
diff --git a/arch/x86/include/asm/do_timer.h b/arch/x86/include/asm/do_timer.h deleted file mode 100644 index 23ecda0b28a0..000000000000 --- a/arch/x86/include/asm/do_timer.h +++ /dev/null | |||
@@ -1,16 +0,0 @@ | |||
1 | /* defines for inline arch setup functions */ | ||
2 | #include <linux/clockchips.h> | ||
3 | |||
4 | #include <asm/i8259.h> | ||
5 | #include <asm/i8253.h> | ||
6 | |||
7 | /** | ||
8 | * do_timer_interrupt_hook - hook into timer tick | ||
9 | * | ||
10 | * Call the pit clock event handler. see asm/i8253.h | ||
11 | **/ | ||
12 | |||
13 | static inline void do_timer_interrupt_hook(void) | ||
14 | { | ||
15 | global_clock_event->event_handler(global_clock_event); | ||
16 | } | ||
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 7ecba4d85089..40b4e614fe71 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -126,8 +126,6 @@ extern void e820_reserve_resources(void); | |||
126 | extern void e820_reserve_resources_late(void); | 126 | extern void e820_reserve_resources_late(void); |
127 | extern void setup_memory_map(void); | 127 | extern void setup_memory_map(void); |
128 | extern char *default_machine_specific_memory_setup(void); | 128 | extern char *default_machine_specific_memory_setup(void); |
129 | extern char *machine_specific_memory_setup(void); | ||
130 | extern char *memory_setup(void); | ||
131 | #endif /* __KERNEL__ */ | 129 | #endif /* __KERNEL__ */ |
132 | #endif /* __ASSEMBLY__ */ | 130 | #endif /* __ASSEMBLY__ */ |
133 | 131 | ||
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 83c1bc8d2e8a..456a304b8172 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -299,6 +299,8 @@ do { \ | |||
299 | 299 | ||
300 | #ifdef CONFIG_X86_32 | 300 | #ifdef CONFIG_X86_32 |
301 | 301 | ||
302 | #define STACK_RND_MASK (0x7ff) | ||
303 | |||
302 | #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) | 304 | #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) |
303 | 305 | ||
304 | #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) | 306 | #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index ff8cbfa07851..f5693c81a1db 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) | |||
49 | BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) | 49 | BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) |
50 | BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) | 50 | BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) |
51 | 51 | ||
52 | #ifdef CONFIG_PERF_COUNTERS | 52 | #ifdef CONFIG_PERF_EVENTS |
53 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) | 53 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) |
54 | #endif | 54 | #endif |
55 | 55 | ||
@@ -61,7 +61,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) | |||
61 | BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) | 61 | BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) |
62 | #endif | 62 | #endif |
63 | 63 | ||
64 | #ifdef CONFIG_X86_NEW_MCE | 64 | #ifdef CONFIG_X86_MCE |
65 | BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) | 65 | BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) |
66 | #endif | 66 | #endif |
67 | 67 | ||
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 7b2d71df39a6..14f9890eb495 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -132,6 +132,9 @@ enum fixed_addresses { | |||
132 | #ifdef CONFIG_X86_32 | 132 | #ifdef CONFIG_X86_32 |
133 | FIX_WP_TEST, | 133 | FIX_WP_TEST, |
134 | #endif | 134 | #endif |
135 | #ifdef CONFIG_INTEL_TXT | ||
136 | FIX_TBOOT_BASE, | ||
137 | #endif | ||
135 | __end_of_fixed_addresses | 138 | __end_of_fixed_addresses |
136 | }; | 139 | }; |
137 | 140 | ||
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 369f5c5d09a1..b78c0941e422 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -20,7 +20,7 @@ | |||
20 | #ifndef ASM_X86__HYPERVISOR_H | 20 | #ifndef ASM_X86__HYPERVISOR_H |
21 | #define ASM_X86__HYPERVISOR_H | 21 | #define ASM_X86__HYPERVISOR_H |
22 | 22 | ||
23 | extern unsigned long get_hypervisor_tsc_freq(void); | ||
24 | extern void init_hypervisor(struct cpuinfo_x86 *c); | 23 | extern void init_hypervisor(struct cpuinfo_x86 *c); |
24 | extern void init_hypervisor_platform(void); | ||
25 | 25 | ||
26 | #endif | 26 | #endif |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 85232d32fcb8..7c7c16cde1f8 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -143,6 +143,8 @@ extern int noioapicreroute; | |||
143 | /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ | 143 | /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ |
144 | extern int timer_through_8259; | 144 | extern int timer_through_8259; |
145 | 145 | ||
146 | extern void io_apic_disable_legacy(void); | ||
147 | |||
146 | /* | 148 | /* |
147 | * If we use the IO-APIC for IRQ routing, disable automatic | 149 | * If we use the IO-APIC for IRQ routing, disable automatic |
148 | * assignment of PCI IRQ's. | 150 | * assignment of PCI IRQ's. |
@@ -176,6 +178,7 @@ extern int setup_ioapic_entry(int apic, int irq, | |||
176 | int polarity, int vector, int pin); | 178 | int polarity, int vector, int pin); |
177 | extern void ioapic_write_entry(int apic, int pin, | 179 | extern void ioapic_write_entry(int apic, int pin, |
178 | struct IO_APIC_route_entry e); | 180 | struct IO_APIC_route_entry e); |
181 | extern void setup_ioapic_ids_from_mpc(void); | ||
179 | 182 | ||
180 | struct mp_ioapic_gsi{ | 183 | struct mp_ioapic_gsi{ |
181 | int gsi_base; | 184 | int gsi_base; |
@@ -187,12 +190,14 @@ int mp_find_ioapic_pin(int ioapic, int gsi); | |||
187 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); | 190 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); |
188 | 191 | ||
189 | #else /* !CONFIG_X86_IO_APIC */ | 192 | #else /* !CONFIG_X86_IO_APIC */ |
193 | |||
190 | #define io_apic_assign_pci_irqs 0 | 194 | #define io_apic_assign_pci_irqs 0 |
195 | #define setup_ioapic_ids_from_mpc x86_init_noop | ||
191 | static const int timer_through_8259 = 0; | 196 | static const int timer_through_8259 = 0; |
192 | static inline void ioapic_init_mappings(void) { } | 197 | static inline void ioapic_init_mappings(void) { } |
193 | static inline void ioapic_insert_resources(void) { } | 198 | static inline void ioapic_insert_resources(void) { } |
194 | |||
195 | static inline void probe_nr_irqs_gsi(void) { } | 199 | static inline void probe_nr_irqs_gsi(void) { } |
200 | |||
196 | #endif | 201 | #endif |
197 | 202 | ||
198 | #endif /* _ASM_X86_IO_APIC_H */ | 203 | #endif /* _ASM_X86_IO_APIC_H */ |
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 0e9fe1d9d971..f35eb45d6576 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h | |||
@@ -26,13 +26,16 @@ | |||
26 | #include <asm/pgtable.h> | 26 | #include <asm/pgtable.h> |
27 | #include <asm/tlbflush.h> | 27 | #include <asm/tlbflush.h> |
28 | 28 | ||
29 | int | ||
30 | is_io_mapping_possible(resource_size_t base, unsigned long size); | ||
31 | |||
32 | void * | 29 | void * |
33 | iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); | 30 | iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); |
34 | 31 | ||
35 | void | 32 | void |
36 | iounmap_atomic(void *kvaddr, enum km_type type); | 33 | iounmap_atomic(void *kvaddr, enum km_type type); |
37 | 34 | ||
35 | int | ||
36 | iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); | ||
37 | |||
38 | void | ||
39 | iomap_free(resource_size_t base, unsigned long size); | ||
40 | |||
38 | #endif /* _ASM_X86_IOMAP_H */ | 41 | #endif /* _ASM_X86_IOMAP_H */ |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index f38481bcd455..ddda6cbed6f4 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -37,7 +37,6 @@ extern void fixup_irqs(void); | |||
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | extern void (*generic_interrupt_extension)(void); | 39 | extern void (*generic_interrupt_extension)(void); |
40 | extern void init_IRQ(void); | ||
41 | extern void native_init_IRQ(void); | 40 | extern void native_init_IRQ(void); |
42 | extern bool handle_irq(unsigned irq, struct pt_regs *regs); | 41 | extern bool handle_irq(unsigned irq, struct pt_regs *regs); |
43 | 42 | ||
@@ -47,4 +46,6 @@ extern unsigned int do_IRQ(struct pt_regs *regs); | |||
47 | extern DECLARE_BITMAP(used_vectors, NR_VECTORS); | 46 | extern DECLARE_BITMAP(used_vectors, NR_VECTORS); |
48 | extern int vector_used_by_percpu_irq(unsigned int vector); | 47 | extern int vector_used_by_percpu_irq(unsigned int vector); |
49 | 48 | ||
49 | extern void init_ISA_irqs(void); | ||
50 | |||
50 | #endif /* _ASM_X86_IRQ_H */ | 51 | #endif /* _ASM_X86_IRQ_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3be000435fad..d83892226f73 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void); | |||
796 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 796 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
797 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 797 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
798 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 798 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
799 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | ||
799 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | 800 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); |
800 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 801 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
801 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 802 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 5cdd8d100ec9..f1363b72364f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -9,7 +9,7 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ | 11 | #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ |
12 | #define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ | 12 | #define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ |
13 | #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ | 13 | #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ |
14 | #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ | 14 | #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ |
15 | #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ | 15 | #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ |
@@ -38,6 +38,14 @@ | |||
38 | #define MCM_ADDR_MEM 3 /* memory address */ | 38 | #define MCM_ADDR_MEM 3 /* memory address */ |
39 | #define MCM_ADDR_GENERIC 7 /* generic */ | 39 | #define MCM_ADDR_GENERIC 7 /* generic */ |
40 | 40 | ||
41 | #define MCJ_CTX_MASK 3 | ||
42 | #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) | ||
43 | #define MCJ_CTX_RANDOM 0 /* inject context: random */ | ||
44 | #define MCJ_CTX_PROCESS 1 /* inject context: process */ | ||
45 | #define MCJ_CTX_IRQ 2 /* inject context: IRQ */ | ||
46 | #define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */ | ||
47 | #define MCJ_EXCEPTION 8 /* raise as exception */ | ||
48 | |||
41 | /* Fields are zero when not available */ | 49 | /* Fields are zero when not available */ |
42 | struct mce { | 50 | struct mce { |
43 | __u64 status; | 51 | __u64 status; |
@@ -48,8 +56,8 @@ struct mce { | |||
48 | __u64 tsc; /* cpu time stamp counter */ | 56 | __u64 tsc; /* cpu time stamp counter */ |
49 | __u64 time; /* wall time_t when error was detected */ | 57 | __u64 time; /* wall time_t when error was detected */ |
50 | __u8 cpuvendor; /* cpu vendor as encoded in system.h */ | 58 | __u8 cpuvendor; /* cpu vendor as encoded in system.h */ |
51 | __u8 pad1; | 59 | __u8 inject_flags; /* software inject flags */ |
52 | __u16 pad2; | 60 | __u16 pad; |
53 | __u32 cpuid; /* CPUID 1 EAX */ | 61 | __u32 cpuid; /* CPUID 1 EAX */ |
54 | __u8 cs; /* code segment */ | 62 | __u8 cs; /* code segment */ |
55 | __u8 bank; /* machine check bank */ | 63 | __u8 bank; /* machine check bank */ |
@@ -115,13 +123,6 @@ void mcheck_init(struct cpuinfo_x86 *c); | |||
115 | static inline void mcheck_init(struct cpuinfo_x86 *c) {} | 123 | static inline void mcheck_init(struct cpuinfo_x86 *c) {} |
116 | #endif | 124 | #endif |
117 | 125 | ||
118 | #ifdef CONFIG_X86_OLD_MCE | ||
119 | extern int nr_mce_banks; | ||
120 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
121 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
122 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
123 | #endif | ||
124 | |||
125 | #ifdef CONFIG_X86_ANCIENT_MCE | 126 | #ifdef CONFIG_X86_ANCIENT_MCE |
126 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | 127 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); |
127 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | 128 | void winchip_mcheck_init(struct cpuinfo_x86 *c); |
@@ -132,15 +133,18 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | |||
132 | static inline void enable_p5_mce(void) {} | 133 | static inline void enable_p5_mce(void) {} |
133 | #endif | 134 | #endif |
134 | 135 | ||
136 | extern void (*x86_mce_decode_callback)(struct mce *m); | ||
137 | |||
135 | void mce_setup(struct mce *m); | 138 | void mce_setup(struct mce *m); |
136 | void mce_log(struct mce *m); | 139 | void mce_log(struct mce *m); |
137 | DECLARE_PER_CPU(struct sys_device, mce_dev); | 140 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
138 | 141 | ||
139 | /* | 142 | /* |
140 | * To support more than 128 would need to escape the predefined | 143 | * Maximum banks number. |
141 | * Linux defined extended banks first. | 144 | * This is the limit of the current register layout on |
145 | * Intel CPUs. | ||
142 | */ | 146 | */ |
143 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) | 147 | #define MAX_NR_BANKS 32 |
144 | 148 | ||
145 | #ifdef CONFIG_X86_MCE_INTEL | 149 | #ifdef CONFIG_X86_MCE_INTEL |
146 | extern int mce_cmci_disabled; | 150 | extern int mce_cmci_disabled; |
@@ -208,11 +212,7 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | |||
208 | 212 | ||
209 | void intel_init_thermal(struct cpuinfo_x86 *c); | 213 | void intel_init_thermal(struct cpuinfo_x86 *c); |
210 | 214 | ||
211 | #ifdef CONFIG_X86_NEW_MCE | ||
212 | void mce_log_therm_throt_event(__u64 status); | 215 | void mce_log_therm_throt_event(__u64 status); |
213 | #else | ||
214 | static inline void mce_log_therm_throt_event(__u64 status) {} | ||
215 | #endif | ||
216 | 216 | ||
217 | #endif /* __KERNEL__ */ | 217 | #endif /* __KERNEL__ */ |
218 | #endif /* _ASM_X86_MCE_H */ | 218 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index f923203dc39a..4a2d4e0c18d9 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h | |||
@@ -37,12 +37,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
37 | 37 | ||
38 | if (likely(prev != next)) { | 38 | if (likely(prev != next)) { |
39 | /* stop flush ipis for the previous mm */ | 39 | /* stop flush ipis for the previous mm */ |
40 | cpu_clear(cpu, prev->cpu_vm_mask); | 40 | cpumask_clear_cpu(cpu, mm_cpumask(prev)); |
41 | #ifdef CONFIG_SMP | 41 | #ifdef CONFIG_SMP |
42 | percpu_write(cpu_tlbstate.state, TLBSTATE_OK); | 42 | percpu_write(cpu_tlbstate.state, TLBSTATE_OK); |
43 | percpu_write(cpu_tlbstate.active_mm, next); | 43 | percpu_write(cpu_tlbstate.active_mm, next); |
44 | #endif | 44 | #endif |
45 | cpu_set(cpu, next->cpu_vm_mask); | 45 | cpumask_set_cpu(cpu, mm_cpumask(next)); |
46 | 46 | ||
47 | /* Re-load page tables */ | 47 | /* Re-load page tables */ |
48 | load_cr3(next->pgd); | 48 | load_cr3(next->pgd); |
@@ -58,7 +58,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |||
58 | percpu_write(cpu_tlbstate.state, TLBSTATE_OK); | 58 | percpu_write(cpu_tlbstate.state, TLBSTATE_OK); |
59 | BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); | 59 | BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); |
60 | 60 | ||
61 | if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { | 61 | if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) { |
62 | /* We were in lazy tlb mode and leave_mm disabled | 62 | /* We were in lazy tlb mode and leave_mm disabled |
63 | * tlb flush IPI delivery. We must reload CR3 | 63 | * tlb flush IPI delivery. We must reload CR3 |
64 | * to make sure to use no freed page tables. | 64 | * to make sure to use no freed page tables. |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index e2a1bb6d71ea..79c94500c0bb 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | 5 | ||
6 | #include <asm/mpspec_def.h> | 6 | #include <asm/mpspec_def.h> |
7 | #include <asm/x86_init.h> | ||
7 | 8 | ||
8 | extern int apic_version[MAX_APICS]; | 9 | extern int apic_version[MAX_APICS]; |
9 | extern int pic_mode; | 10 | extern int pic_mode; |
@@ -41,9 +42,6 @@ extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | |||
41 | 42 | ||
42 | #endif /* CONFIG_X86_64 */ | 43 | #endif /* CONFIG_X86_64 */ |
43 | 44 | ||
44 | extern void early_find_smp_config(void); | ||
45 | extern void early_get_smp_config(void); | ||
46 | |||
47 | #if defined(CONFIG_MCA) || defined(CONFIG_EISA) | 45 | #if defined(CONFIG_MCA) || defined(CONFIG_EISA) |
48 | extern int mp_bus_id_to_type[MAX_MP_BUSSES]; | 46 | extern int mp_bus_id_to_type[MAX_MP_BUSSES]; |
49 | #endif | 47 | #endif |
@@ -52,20 +50,55 @@ extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | |||
52 | 50 | ||
53 | extern unsigned int boot_cpu_physical_apicid; | 51 | extern unsigned int boot_cpu_physical_apicid; |
54 | extern unsigned int max_physical_apicid; | 52 | extern unsigned int max_physical_apicid; |
55 | extern int smp_found_config; | ||
56 | extern int mpc_default_type; | 53 | extern int mpc_default_type; |
57 | extern unsigned long mp_lapic_addr; | 54 | extern unsigned long mp_lapic_addr; |
58 | 55 | ||
59 | extern void get_smp_config(void); | 56 | #ifdef CONFIG_X86_LOCAL_APIC |
57 | extern int smp_found_config; | ||
58 | #else | ||
59 | # define smp_found_config 0 | ||
60 | #endif | ||
61 | |||
62 | static inline void get_smp_config(void) | ||
63 | { | ||
64 | x86_init.mpparse.get_smp_config(0); | ||
65 | } | ||
66 | |||
67 | static inline void early_get_smp_config(void) | ||
68 | { | ||
69 | x86_init.mpparse.get_smp_config(1); | ||
70 | } | ||
71 | |||
72 | static inline void find_smp_config(void) | ||
73 | { | ||
74 | x86_init.mpparse.find_smp_config(1); | ||
75 | } | ||
76 | |||
77 | static inline void early_find_smp_config(void) | ||
78 | { | ||
79 | x86_init.mpparse.find_smp_config(0); | ||
80 | } | ||
60 | 81 | ||
61 | #ifdef CONFIG_X86_MPPARSE | 82 | #ifdef CONFIG_X86_MPPARSE |
62 | extern void find_smp_config(void); | ||
63 | extern void early_reserve_e820_mpc_new(void); | 83 | extern void early_reserve_e820_mpc_new(void); |
64 | extern int enable_update_mptable; | 84 | extern int enable_update_mptable; |
85 | extern int default_mpc_apic_id(struct mpc_cpu *m); | ||
86 | extern void default_smp_read_mpc_oem(struct mpc_table *mpc); | ||
87 | # ifdef CONFIG_X86_IO_APIC | ||
88 | extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str); | ||
89 | # else | ||
90 | # define default_mpc_oem_bus_info NULL | ||
91 | # endif | ||
92 | extern void default_find_smp_config(unsigned int reserve); | ||
93 | extern void default_get_smp_config(unsigned int early); | ||
65 | #else | 94 | #else |
66 | static inline void find_smp_config(void) { } | ||
67 | static inline void early_reserve_e820_mpc_new(void) { } | 95 | static inline void early_reserve_e820_mpc_new(void) { } |
68 | #define enable_update_mptable 0 | 96 | #define enable_update_mptable 0 |
97 | #define default_mpc_apic_id NULL | ||
98 | #define default_smp_read_mpc_oem NULL | ||
99 | #define default_mpc_oem_bus_info NULL | ||
100 | #define default_find_smp_config x86_init_uint_noop | ||
101 | #define default_get_smp_config x86_init_uint_noop | ||
69 | #endif | 102 | #endif |
70 | 103 | ||
71 | void __cpuinit generic_processor_info(int apicid, int version); | 104 | void __cpuinit generic_processor_info(int apicid, int version); |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index bd5549034a95..4ffe09b2ad75 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -81,8 +81,15 @@ | |||
81 | #define MSR_IA32_MC0_ADDR 0x00000402 | 81 | #define MSR_IA32_MC0_ADDR 0x00000402 |
82 | #define MSR_IA32_MC0_MISC 0x00000403 | 82 | #define MSR_IA32_MC0_MISC 0x00000403 |
83 | 83 | ||
84 | #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) | ||
85 | #define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) | ||
86 | #define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) | ||
87 | #define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) | ||
88 | |||
84 | /* These are consecutive and not in the normal 4er MCE bank block */ | 89 | /* These are consecutive and not in the normal 4er MCE bank block */ |
85 | #define MSR_IA32_MC0_CTL2 0x00000280 | 90 | #define MSR_IA32_MC0_CTL2 0x00000280 |
91 | #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) | ||
92 | |||
86 | #define CMCI_EN (1ULL << 30) | 93 | #define CMCI_EN (1ULL << 30) |
87 | #define CMCI_THRESHOLD_MASK 0xffffULL | 94 | #define CMCI_THRESHOLD_MASK 0xffffULL |
88 | 95 | ||
@@ -215,6 +222,10 @@ | |||
215 | 222 | ||
216 | #define THERM_STATUS_PROCHOT (1 << 0) | 223 | #define THERM_STATUS_PROCHOT (1 << 0) |
217 | 224 | ||
225 | #define MSR_THERM2_CTL 0x0000019d | ||
226 | |||
227 | #define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) | ||
228 | |||
218 | #define MSR_IA32_MISC_ENABLE 0x000001a0 | 229 | #define MSR_IA32_MISC_ENABLE 0x000001a0 |
219 | 230 | ||
220 | /* MISC_ENABLE bits: architectural */ | 231 | /* MISC_ENABLE bits: architectural */ |
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index a51ada8467de..4365ffdb461f 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h | |||
@@ -121,6 +121,9 @@ extern int mtrr_del_page(int reg, unsigned long base, unsigned long size); | |||
121 | extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); | 121 | extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); |
122 | extern void mtrr_ap_init(void); | 122 | extern void mtrr_ap_init(void); |
123 | extern void mtrr_bp_init(void); | 123 | extern void mtrr_bp_init(void); |
124 | extern void set_mtrr_aps_delayed_init(void); | ||
125 | extern void mtrr_aps_init(void); | ||
126 | extern void mtrr_bp_restore(void); | ||
124 | extern int mtrr_trim_uncached_memory(unsigned long end_pfn); | 127 | extern int mtrr_trim_uncached_memory(unsigned long end_pfn); |
125 | extern int amd_special_default_mtrr(void); | 128 | extern int amd_special_default_mtrr(void); |
126 | # else | 129 | # else |
@@ -161,6 +164,9 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) | |||
161 | 164 | ||
162 | #define mtrr_ap_init() do {} while (0) | 165 | #define mtrr_ap_init() do {} while (0) |
163 | #define mtrr_bp_init() do {} while (0) | 166 | #define mtrr_bp_init() do {} while (0) |
167 | #define set_mtrr_aps_delayed_init() do {} while (0) | ||
168 | #define mtrr_aps_init() do {} while (0) | ||
169 | #define mtrr_bp_restore() do {} while (0) | ||
164 | # endif | 170 | # endif |
165 | 171 | ||
166 | #ifdef CONFIG_COMPAT | 172 | #ifdef CONFIG_COMPAT |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index e63cf7d441e1..139d4c1a33a7 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog; | |||
40 | #define NMI_INVALID 3 | 40 | #define NMI_INVALID 3 |
41 | 41 | ||
42 | struct ctl_table; | 42 | struct ctl_table; |
43 | struct file; | 43 | extern int proc_nmi_enabled(struct ctl_table *, int , |
44 | extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, | ||
45 | void __user *, size_t *, loff_t *); | 44 | void __user *, size_t *, loff_t *); |
46 | extern int unknown_nmi_panic; | 45 | extern int unknown_nmi_panic; |
47 | 46 | ||
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index ad2668ee1aa7..6d8723a766cc 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h | |||
@@ -65,6 +65,8 @@ | |||
65 | 6: osp nopl 0x00(%eax,%eax,1) | 65 | 6: osp nopl 0x00(%eax,%eax,1) |
66 | 7: nopl 0x00000000(%eax) | 66 | 7: nopl 0x00000000(%eax) |
67 | 8: nopl 0x00000000(%eax,%eax,1) | 67 | 8: nopl 0x00000000(%eax,%eax,1) |
68 | Note: All the above are assumed to be a single instruction. | ||
69 | There is kernel code that depends on this. | ||
68 | */ | 70 | */ |
69 | #define P6_NOP1 GENERIC_NOP1 | 71 | #define P6_NOP1 GENERIC_NOP1 |
70 | #define P6_NOP2 ".byte 0x66,0x90\n" | 72 | #define P6_NOP2 ".byte 0x66,0x90\n" |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 40d6586af25b..efb38994859c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -24,22 +24,6 @@ static inline void load_sp0(struct tss_struct *tss, | |||
24 | PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); | 24 | PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); |
25 | } | 25 | } |
26 | 26 | ||
27 | #define ARCH_SETUP pv_init_ops.arch_setup(); | ||
28 | static inline unsigned long get_wallclock(void) | ||
29 | { | ||
30 | return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock); | ||
31 | } | ||
32 | |||
33 | static inline int set_wallclock(unsigned long nowtime) | ||
34 | { | ||
35 | return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime); | ||
36 | } | ||
37 | |||
38 | static inline void (*choose_time_init(void))(void) | ||
39 | { | ||
40 | return pv_time_ops.time_init; | ||
41 | } | ||
42 | |||
43 | /* The paravirtualized CPUID instruction. */ | 27 | /* The paravirtualized CPUID instruction. */ |
44 | static inline void __cpuid(unsigned int *eax, unsigned int *ebx, | 28 | static inline void __cpuid(unsigned int *eax, unsigned int *ebx, |
45 | unsigned int *ecx, unsigned int *edx) | 29 | unsigned int *ecx, unsigned int *edx) |
@@ -245,7 +229,6 @@ static inline unsigned long long paravirt_sched_clock(void) | |||
245 | { | 229 | { |
246 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); | 230 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); |
247 | } | 231 | } |
248 | #define calibrate_tsc() (pv_time_ops.get_tsc_khz()) | ||
249 | 232 | ||
250 | static inline unsigned long long paravirt_read_pmc(int counter) | 233 | static inline unsigned long long paravirt_read_pmc(int counter) |
251 | { | 234 | { |
@@ -363,34 +346,6 @@ static inline void slow_down_io(void) | |||
363 | #endif | 346 | #endif |
364 | } | 347 | } |
365 | 348 | ||
366 | #ifdef CONFIG_X86_LOCAL_APIC | ||
367 | static inline void setup_boot_clock(void) | ||
368 | { | ||
369 | PVOP_VCALL0(pv_apic_ops.setup_boot_clock); | ||
370 | } | ||
371 | |||
372 | static inline void setup_secondary_clock(void) | ||
373 | { | ||
374 | PVOP_VCALL0(pv_apic_ops.setup_secondary_clock); | ||
375 | } | ||
376 | #endif | ||
377 | |||
378 | static inline void paravirt_post_allocator_init(void) | ||
379 | { | ||
380 | if (pv_init_ops.post_allocator_init) | ||
381 | (*pv_init_ops.post_allocator_init)(); | ||
382 | } | ||
383 | |||
384 | static inline void paravirt_pagetable_setup_start(pgd_t *base) | ||
385 | { | ||
386 | (*pv_mmu_ops.pagetable_setup_start)(base); | ||
387 | } | ||
388 | |||
389 | static inline void paravirt_pagetable_setup_done(pgd_t *base) | ||
390 | { | ||
391 | (*pv_mmu_ops.pagetable_setup_done)(base); | ||
392 | } | ||
393 | |||
394 | #ifdef CONFIG_SMP | 349 | #ifdef CONFIG_SMP |
395 | static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, | 350 | static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, |
396 | unsigned long start_esp) | 351 | unsigned long start_esp) |
@@ -885,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) | |||
885 | 840 | ||
886 | static inline unsigned long __raw_local_save_flags(void) | 841 | static inline unsigned long __raw_local_save_flags(void) |
887 | { | 842 | { |
888 | unsigned long f; | 843 | return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); |
889 | |||
890 | asm volatile(paravirt_alt(PARAVIRT_CALL) | ||
891 | : "=a"(f) | ||
892 | : paravirt_type(pv_irq_ops.save_fl), | ||
893 | paravirt_clobber(CLBR_EAX) | ||
894 | : "memory", "cc"); | ||
895 | return f; | ||
896 | } | 844 | } |
897 | 845 | ||
898 | static inline void raw_local_irq_restore(unsigned long f) | 846 | static inline void raw_local_irq_restore(unsigned long f) |
899 | { | 847 | { |
900 | asm volatile(paravirt_alt(PARAVIRT_CALL) | 848 | PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); |
901 | : "=a"(f) | ||
902 | : PV_FLAGS_ARG(f), | ||
903 | paravirt_type(pv_irq_ops.restore_fl), | ||
904 | paravirt_clobber(CLBR_EAX) | ||
905 | : "memory", "cc"); | ||
906 | } | 849 | } |
907 | 850 | ||
908 | static inline void raw_local_irq_disable(void) | 851 | static inline void raw_local_irq_disable(void) |
909 | { | 852 | { |
910 | asm volatile(paravirt_alt(PARAVIRT_CALL) | 853 | PVOP_VCALLEE0(pv_irq_ops.irq_disable); |
911 | : | ||
912 | : paravirt_type(pv_irq_ops.irq_disable), | ||
913 | paravirt_clobber(CLBR_EAX) | ||
914 | : "memory", "eax", "cc"); | ||
915 | } | 854 | } |
916 | 855 | ||
917 | static inline void raw_local_irq_enable(void) | 856 | static inline void raw_local_irq_enable(void) |
918 | { | 857 | { |
919 | asm volatile(paravirt_alt(PARAVIRT_CALL) | 858 | PVOP_VCALLEE0(pv_irq_ops.irq_enable); |
920 | : | ||
921 | : paravirt_type(pv_irq_ops.irq_enable), | ||
922 | paravirt_clobber(CLBR_EAX) | ||
923 | : "memory", "eax", "cc"); | ||
924 | } | 859 | } |
925 | 860 | ||
926 | static inline unsigned long __raw_local_irq_save(void) | 861 | static inline unsigned long __raw_local_irq_save(void) |
@@ -948,6 +883,8 @@ static inline unsigned long __raw_local_irq_save(void) | |||
948 | #undef PVOP_VCALL4 | 883 | #undef PVOP_VCALL4 |
949 | #undef PVOP_CALL4 | 884 | #undef PVOP_CALL4 |
950 | 885 | ||
886 | extern void default_banner(void); | ||
887 | |||
951 | #else /* __ASSEMBLY__ */ | 888 | #else /* __ASSEMBLY__ */ |
952 | 889 | ||
953 | #define _PVSITE(ptype, clobbers, ops, word, algn) \ | 890 | #define _PVSITE(ptype, clobbers, ops, word, algn) \ |
@@ -1088,5 +1025,7 @@ static inline unsigned long __raw_local_irq_save(void) | |||
1088 | #endif /* CONFIG_X86_32 */ | 1025 | #endif /* CONFIG_X86_32 */ |
1089 | 1026 | ||
1090 | #endif /* __ASSEMBLY__ */ | 1027 | #endif /* __ASSEMBLY__ */ |
1091 | #endif /* CONFIG_PARAVIRT */ | 1028 | #else /* CONFIG_PARAVIRT */ |
1029 | # define default_banner x86_init_noop | ||
1030 | #endif /* !CONFIG_PARAVIRT */ | ||
1092 | #endif /* _ASM_X86_PARAVIRT_H */ | 1031 | #endif /* _ASM_X86_PARAVIRT_H */ |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 25402d0006e7..9357473c8da0 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -78,14 +78,6 @@ struct pv_init_ops { | |||
78 | */ | 78 | */ |
79 | unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, | 79 | unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, |
80 | unsigned long addr, unsigned len); | 80 | unsigned long addr, unsigned len); |
81 | |||
82 | /* Basic arch-specific setup */ | ||
83 | void (*arch_setup)(void); | ||
84 | char *(*memory_setup)(void); | ||
85 | void (*post_allocator_init)(void); | ||
86 | |||
87 | /* Print a banner to identify the environment */ | ||
88 | void (*banner)(void); | ||
89 | }; | 81 | }; |
90 | 82 | ||
91 | 83 | ||
@@ -96,12 +88,6 @@ struct pv_lazy_ops { | |||
96 | }; | 88 | }; |
97 | 89 | ||
98 | struct pv_time_ops { | 90 | struct pv_time_ops { |
99 | void (*time_init)(void); | ||
100 | |||
101 | /* Set and set time of day */ | ||
102 | unsigned long (*get_wallclock)(void); | ||
103 | int (*set_wallclock)(unsigned long); | ||
104 | |||
105 | unsigned long long (*sched_clock)(void); | 91 | unsigned long long (*sched_clock)(void); |
106 | unsigned long (*get_tsc_khz)(void); | 92 | unsigned long (*get_tsc_khz)(void); |
107 | }; | 93 | }; |
@@ -203,8 +189,6 @@ struct pv_cpu_ops { | |||
203 | }; | 189 | }; |
204 | 190 | ||
205 | struct pv_irq_ops { | 191 | struct pv_irq_ops { |
206 | void (*init_IRQ)(void); | ||
207 | |||
208 | /* | 192 | /* |
209 | * Get/set interrupt state. save_fl and restore_fl are only | 193 | * Get/set interrupt state. save_fl and restore_fl are only |
210 | * expected to use X86_EFLAGS_IF; all other bits | 194 | * expected to use X86_EFLAGS_IF; all other bits |
@@ -229,9 +213,6 @@ struct pv_irq_ops { | |||
229 | 213 | ||
230 | struct pv_apic_ops { | 214 | struct pv_apic_ops { |
231 | #ifdef CONFIG_X86_LOCAL_APIC | 215 | #ifdef CONFIG_X86_LOCAL_APIC |
232 | void (*setup_boot_clock)(void); | ||
233 | void (*setup_secondary_clock)(void); | ||
234 | |||
235 | void (*startup_ipi_hook)(int phys_apicid, | 216 | void (*startup_ipi_hook)(int phys_apicid, |
236 | unsigned long start_eip, | 217 | unsigned long start_eip, |
237 | unsigned long start_esp); | 218 | unsigned long start_esp); |
@@ -239,15 +220,6 @@ struct pv_apic_ops { | |||
239 | }; | 220 | }; |
240 | 221 | ||
241 | struct pv_mmu_ops { | 222 | struct pv_mmu_ops { |
242 | /* | ||
243 | * Called before/after init_mm pagetable setup. setup_start | ||
244 | * may reset %cr3, and may pre-install parts of the pagetable; | ||
245 | * pagetable setup is expected to preserve any existing | ||
246 | * mapping. | ||
247 | */ | ||
248 | void (*pagetable_setup_start)(pgd_t *pgd_base); | ||
249 | void (*pagetable_setup_done)(pgd_t *pgd_base); | ||
250 | |||
251 | unsigned long (*read_cr2)(void); | 223 | unsigned long (*read_cr2)(void); |
252 | void (*write_cr2)(unsigned long); | 224 | void (*write_cr2)(unsigned long); |
253 | 225 | ||
@@ -522,10 +494,11 @@ int paravirt_disable_iospace(void); | |||
522 | #define EXTRA_CLOBBERS | 494 | #define EXTRA_CLOBBERS |
523 | #define VEXTRA_CLOBBERS | 495 | #define VEXTRA_CLOBBERS |
524 | #else /* CONFIG_X86_64 */ | 496 | #else /* CONFIG_X86_64 */ |
497 | /* [re]ax isn't an arg, but the return val */ | ||
525 | #define PVOP_VCALL_ARGS \ | 498 | #define PVOP_VCALL_ARGS \ |
526 | unsigned long __edi = __edi, __esi = __esi, \ | 499 | unsigned long __edi = __edi, __esi = __esi, \ |
527 | __edx = __edx, __ecx = __ecx | 500 | __edx = __edx, __ecx = __ecx, __eax = __eax |
528 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax | 501 | #define PVOP_CALL_ARGS PVOP_VCALL_ARGS |
529 | 502 | ||
530 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) | 503 | #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) |
531 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) | 504 | #define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) |
@@ -537,6 +510,7 @@ int paravirt_disable_iospace(void); | |||
537 | "=c" (__ecx) | 510 | "=c" (__ecx) |
538 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) | 511 | #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) |
539 | 512 | ||
513 | /* void functions are still allowed [re]ax for scratch */ | ||
540 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) | 514 | #define PVOP_VCALLEE_CLOBBERS "=a" (__eax) |
541 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS | 515 | #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS |
542 | 516 | ||
@@ -611,8 +585,8 @@ int paravirt_disable_iospace(void); | |||
611 | VEXTRA_CLOBBERS, \ | 585 | VEXTRA_CLOBBERS, \ |
612 | pre, post, ##__VA_ARGS__) | 586 | pre, post, ##__VA_ARGS__) |
613 | 587 | ||
614 | #define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ | 588 | #define __PVOP_VCALLEESAVE(op, pre, post, ...) \ |
615 | ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ | 589 | ____PVOP_VCALL(op.func, CLBR_RET_REG, \ |
616 | PVOP_VCALLEE_CLOBBERS, , \ | 590 | PVOP_VCALLEE_CLOBBERS, , \ |
617 | pre, post, ##__VA_ARGS__) | 591 | pre, post, ##__VA_ARGS__) |
618 | 592 | ||
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 7af14e512f97..e2c1668dde7a 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h | |||
@@ -19,4 +19,9 @@ extern int free_memtype(u64 start, u64 end); | |||
19 | extern int kernel_map_sync_memtype(u64 base, unsigned long size, | 19 | extern int kernel_map_sync_memtype(u64 base, unsigned long size, |
20 | unsigned long flag); | 20 | unsigned long flag); |
21 | 21 | ||
22 | int io_reserve_memtype(resource_size_t start, resource_size_t end, | ||
23 | unsigned long *type); | ||
24 | |||
25 | void io_free_memtype(resource_size_t start, resource_size_t end); | ||
26 | |||
22 | #endif /* _ASM_X86_PAT_H */ | 27 | #endif /* _ASM_X86_PAT_H */ |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 1ff685ca221c..ada8c201d513 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -48,7 +48,6 @@ extern unsigned int pcibios_assign_all_busses(void); | |||
48 | #else | 48 | #else |
49 | #define pcibios_assign_all_busses() 0 | 49 | #define pcibios_assign_all_busses() 0 |
50 | #endif | 50 | #endif |
51 | #define pcibios_scan_all_fns(a, b) 0 | ||
52 | 51 | ||
53 | extern unsigned long pci_mem_start; | 52 | extern unsigned long pci_mem_start; |
54 | #define PCIBIOS_MIN_IO 0x1000 | 53 | #define PCIBIOS_MIN_IO 0x1000 |
@@ -144,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus) | |||
144 | static inline const struct cpumask * | 143 | static inline const struct cpumask * |
145 | cpumask_of_pcibus(const struct pci_bus *bus) | 144 | cpumask_of_pcibus(const struct pci_bus *bus) |
146 | { | 145 | { |
147 | return cpumask_of_node(__pcibus_to_node(bus)); | 146 | int node; |
147 | |||
148 | node = __pcibus_to_node(bus); | ||
149 | return (node == -1) ? cpu_online_mask : | ||
150 | cpumask_of_node(node); | ||
148 | } | 151 | } |
149 | #endif | 152 | #endif |
150 | 153 | ||
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 04eacefcfd26..b65a36defeb7 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -168,15 +168,6 @@ do { \ | |||
168 | /* We can use this directly for local CPU (faster). */ | 168 | /* We can use this directly for local CPU (faster). */ |
169 | DECLARE_PER_CPU(unsigned long, this_cpu_off); | 169 | DECLARE_PER_CPU(unsigned long, this_cpu_off); |
170 | 170 | ||
171 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
172 | void *pcpu_lpage_remapped(void *kaddr); | ||
173 | #else | ||
174 | static inline void *pcpu_lpage_remapped(void *kaddr) | ||
175 | { | ||
176 | return NULL; | ||
177 | } | ||
178 | #endif | ||
179 | |||
180 | #endif /* !__ASSEMBLY__ */ | 171 | #endif /* !__ASSEMBLY__ */ |
181 | 172 | ||
182 | #ifdef CONFIG_SMP | 173 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_event.h index e7b7c938ae27..8d9f8548a870 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -1,8 +1,8 @@ | |||
1 | #ifndef _ASM_X86_PERF_COUNTER_H | 1 | #ifndef _ASM_X86_PERF_EVENT_H |
2 | #define _ASM_X86_PERF_COUNTER_H | 2 | #define _ASM_X86_PERF_EVENT_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Performance counter hw details: | 5 | * Performance event hw details: |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define X86_PMC_MAX_GENERIC 8 | 8 | #define X86_PMC_MAX_GENERIC 8 |
@@ -28,9 +28,20 @@ | |||
28 | */ | 28 | */ |
29 | #define ARCH_PERFMON_EVENT_MASK 0xffff | 29 | #define ARCH_PERFMON_EVENT_MASK 0xffff |
30 | 30 | ||
31 | /* | ||
32 | * filter mask to validate fixed counter events. | ||
33 | * the following filters disqualify for fixed counters: | ||
34 | * - inv | ||
35 | * - edge | ||
36 | * - cnt-mask | ||
37 | * The other filters are supported by fixed counters. | ||
38 | * The any-thread option is supported starting with v3. | ||
39 | */ | ||
40 | #define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 | ||
41 | |||
31 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 42 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
32 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 43 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
33 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 | 44 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 |
34 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | 45 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ |
35 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | 46 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) |
36 | 47 | ||
@@ -43,7 +54,7 @@ | |||
43 | union cpuid10_eax { | 54 | union cpuid10_eax { |
44 | struct { | 55 | struct { |
45 | unsigned int version_id:8; | 56 | unsigned int version_id:8; |
46 | unsigned int num_counters:8; | 57 | unsigned int num_events:8; |
47 | unsigned int bit_width:8; | 58 | unsigned int bit_width:8; |
48 | unsigned int mask_length:8; | 59 | unsigned int mask_length:8; |
49 | } split; | 60 | } split; |
@@ -52,7 +63,7 @@ union cpuid10_eax { | |||
52 | 63 | ||
53 | union cpuid10_edx { | 64 | union cpuid10_edx { |
54 | struct { | 65 | struct { |
55 | unsigned int num_counters_fixed:4; | 66 | unsigned int num_events_fixed:4; |
56 | unsigned int reserved:28; | 67 | unsigned int reserved:28; |
57 | } split; | 68 | } split; |
58 | unsigned int full; | 69 | unsigned int full; |
@@ -60,7 +71,7 @@ union cpuid10_edx { | |||
60 | 71 | ||
61 | 72 | ||
62 | /* | 73 | /* |
63 | * Fixed-purpose performance counters: | 74 | * Fixed-purpose performance events: |
64 | */ | 75 | */ |
65 | 76 | ||
66 | /* | 77 | /* |
@@ -87,22 +98,22 @@ union cpuid10_edx { | |||
87 | /* | 98 | /* |
88 | * We model BTS tracing as another fixed-mode PMC. | 99 | * We model BTS tracing as another fixed-mode PMC. |
89 | * | 100 | * |
90 | * We choose a value in the middle of the fixed counter range, since lower | 101 | * We choose a value in the middle of the fixed event range, since lower |
91 | * values are used by actual fixed counters and higher values are used | 102 | * values are used by actual fixed events and higher values are used |
92 | * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. | 103 | * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. |
93 | */ | 104 | */ |
94 | #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) | 105 | #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) |
95 | 106 | ||
96 | 107 | ||
97 | #ifdef CONFIG_PERF_COUNTERS | 108 | #ifdef CONFIG_PERF_EVENTS |
98 | extern void init_hw_perf_counters(void); | 109 | extern void init_hw_perf_events(void); |
99 | extern void perf_counters_lapic_init(void); | 110 | extern void perf_events_lapic_init(void); |
100 | 111 | ||
101 | #define PERF_COUNTER_INDEX_OFFSET 0 | 112 | #define PERF_EVENT_INDEX_OFFSET 0 |
102 | 113 | ||
103 | #else | 114 | #else |
104 | static inline void init_hw_perf_counters(void) { } | 115 | static inline void init_hw_perf_events(void) { } |
105 | static inline void perf_counters_lapic_init(void) { } | 116 | static inline void perf_events_lapic_init(void) { } |
106 | #endif | 117 | #endif |
107 | 118 | ||
108 | #endif /* _ASM_X86_PERF_COUNTER_H */ | 119 | #endif /* _ASM_X86_PERF_EVENT_H */ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 4c5b51fdc788..af6fd360ab35 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -56,16 +56,6 @@ extern struct list_head pgd_list; | |||
56 | #define pte_update(mm, addr, ptep) do { } while (0) | 56 | #define pte_update(mm, addr, ptep) do { } while (0) |
57 | #define pte_update_defer(mm, addr, ptep) do { } while (0) | 57 | #define pte_update_defer(mm, addr, ptep) do { } while (0) |
58 | 58 | ||
59 | static inline void __init paravirt_pagetable_setup_start(pgd_t *base) | ||
60 | { | ||
61 | native_pagetable_setup_start(base); | ||
62 | } | ||
63 | |||
64 | static inline void __init paravirt_pagetable_setup_done(pgd_t *base) | ||
65 | { | ||
66 | native_pagetable_setup_done(base); | ||
67 | } | ||
68 | |||
69 | #define pgd_val(x) native_pgd_val(x) | 59 | #define pgd_val(x) native_pgd_val(x) |
70 | #define __pgd(x) native_make_pgd(x) | 60 | #define __pgd(x) native_make_pgd(x) |
71 | 61 | ||
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 54cb697f4900..d1f4a760be23 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -277,6 +277,7 @@ static inline pteval_t pte_flags(pte_t pte) | |||
277 | typedef struct page *pgtable_t; | 277 | typedef struct page *pgtable_t; |
278 | 278 | ||
279 | extern pteval_t __supported_pte_mask; | 279 | extern pteval_t __supported_pte_mask; |
280 | extern void set_nx(void); | ||
280 | extern int nx_enabled; | 281 | extern int nx_enabled; |
281 | 282 | ||
282 | #define pgprot_writecombine pgprot_writecombine | 283 | #define pgprot_writecombine pgprot_writecombine |
@@ -299,8 +300,8 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte); | |||
299 | extern void native_pagetable_setup_start(pgd_t *base); | 300 | extern void native_pagetable_setup_start(pgd_t *base); |
300 | extern void native_pagetable_setup_done(pgd_t *base); | 301 | extern void native_pagetable_setup_done(pgd_t *base); |
301 | #else | 302 | #else |
302 | static inline void native_pagetable_setup_start(pgd_t *base) {} | 303 | #define native_pagetable_setup_start x86_init_pgd_noop |
303 | static inline void native_pagetable_setup_done(pgd_t *base) {} | 304 | #define native_pagetable_setup_done x86_init_pgd_noop |
304 | #endif | 305 | #endif |
305 | 306 | ||
306 | struct seq_file; | 307 | struct seq_file; |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1153037ae9ff..61aafb71c7ef 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -27,6 +27,7 @@ struct mm_struct; | |||
27 | #include <linux/cpumask.h> | 27 | #include <linux/cpumask.h> |
28 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
29 | #include <linux/threads.h> | 29 | #include <linux/threads.h> |
30 | #include <linux/math64.h> | ||
30 | #include <linux/init.h> | 31 | #include <linux/init.h> |
31 | 32 | ||
32 | #define HBP_NUM 4 | 33 | #define HBP_NUM 4 |
@@ -1020,4 +1021,35 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, | |||
1020 | extern int get_tsc_mode(unsigned long adr); | 1021 | extern int get_tsc_mode(unsigned long adr); |
1021 | extern int set_tsc_mode(unsigned int val); | 1022 | extern int set_tsc_mode(unsigned int val); |
1022 | 1023 | ||
1024 | extern int amd_get_nb_id(int cpu); | ||
1025 | |||
1026 | struct aperfmperf { | ||
1027 | u64 aperf, mperf; | ||
1028 | }; | ||
1029 | |||
1030 | static inline void get_aperfmperf(struct aperfmperf *am) | ||
1031 | { | ||
1032 | WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF)); | ||
1033 | |||
1034 | rdmsrl(MSR_IA32_APERF, am->aperf); | ||
1035 | rdmsrl(MSR_IA32_MPERF, am->mperf); | ||
1036 | } | ||
1037 | |||
1038 | #define APERFMPERF_SHIFT 10 | ||
1039 | |||
1040 | static inline | ||
1041 | unsigned long calc_aperfmperf_ratio(struct aperfmperf *old, | ||
1042 | struct aperfmperf *new) | ||
1043 | { | ||
1044 | u64 aperf = new->aperf - old->aperf; | ||
1045 | u64 mperf = new->mperf - old->mperf; | ||
1046 | unsigned long ratio = aperf; | ||
1047 | |||
1048 | mperf >>= APERFMPERF_SHIFT; | ||
1049 | if (mperf) | ||
1050 | ratio = div64_u64(aperf, mperf); | ||
1051 | |||
1052 | return ratio; | ||
1053 | } | ||
1054 | |||
1023 | #endif /* _ASM_X86_PROCESSOR_H */ | 1055 | #endif /* _ASM_X86_PROCESSOR_H */ |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 4093d1ed6db2..18e496c98ff0 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -5,43 +5,6 @@ | |||
5 | 5 | ||
6 | #define COMMAND_LINE_SIZE 2048 | 6 | #define COMMAND_LINE_SIZE 2048 |
7 | 7 | ||
8 | #ifndef __ASSEMBLY__ | ||
9 | |||
10 | /* | ||
11 | * Any setup quirks to be performed? | ||
12 | */ | ||
13 | struct mpc_cpu; | ||
14 | struct mpc_bus; | ||
15 | struct mpc_oemtable; | ||
16 | |||
17 | struct x86_quirks { | ||
18 | int (*arch_pre_time_init)(void); | ||
19 | int (*arch_time_init)(void); | ||
20 | int (*arch_pre_intr_init)(void); | ||
21 | int (*arch_intr_init)(void); | ||
22 | int (*arch_trap_init)(void); | ||
23 | char * (*arch_memory_setup)(void); | ||
24 | int (*mach_get_smp_config)(unsigned int early); | ||
25 | int (*mach_find_smp_config)(unsigned int reserve); | ||
26 | |||
27 | int *mpc_record; | ||
28 | int (*mpc_apic_id)(struct mpc_cpu *m); | ||
29 | void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); | ||
30 | void (*mpc_oem_pci_bus)(struct mpc_bus *m); | ||
31 | void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable, | ||
32 | unsigned short oemsize); | ||
33 | int (*setup_ioapic_ids)(void); | ||
34 | }; | ||
35 | |||
36 | extern void x86_quirk_intr_init(void); | ||
37 | |||
38 | extern void x86_quirk_trap_init(void); | ||
39 | |||
40 | extern void x86_quirk_pre_time_init(void); | ||
41 | extern void x86_quirk_time_init(void); | ||
42 | |||
43 | #endif /* __ASSEMBLY__ */ | ||
44 | |||
45 | #ifdef __i386__ | 8 | #ifdef __i386__ |
46 | 9 | ||
47 | #include <linux/pfn.h> | 10 | #include <linux/pfn.h> |
@@ -61,6 +24,7 @@ extern void x86_quirk_time_init(void); | |||
61 | 24 | ||
62 | #ifndef __ASSEMBLY__ | 25 | #ifndef __ASSEMBLY__ |
63 | #include <asm/bootparam.h> | 26 | #include <asm/bootparam.h> |
27 | #include <asm/x86_init.h> | ||
64 | 28 | ||
65 | /* Interrupt control for vSMPowered x86_64 systems */ | 29 | /* Interrupt control for vSMPowered x86_64 systems */ |
66 | #ifdef CONFIG_X86_64 | 30 | #ifdef CONFIG_X86_64 |
@@ -79,11 +43,16 @@ static inline void visws_early_detect(void) { } | |||
79 | static inline int is_visws_box(void) { return 0; } | 43 | static inline int is_visws_box(void) { return 0; } |
80 | #endif | 44 | #endif |
81 | 45 | ||
82 | extern struct x86_quirks *x86_quirks; | ||
83 | extern unsigned long saved_video_mode; | 46 | extern unsigned long saved_video_mode; |
84 | 47 | ||
85 | #ifndef CONFIG_PARAVIRT | 48 | extern void reserve_standard_io_resources(void); |
86 | #define paravirt_post_allocator_init() do {} while (0) | 49 | extern void i386_reserve_resources(void); |
50 | extern void setup_default_timer_irq(void); | ||
51 | |||
52 | #ifdef CONFIG_X86_MRST | ||
53 | extern void x86_mrst_early_setup(void); | ||
54 | #else | ||
55 | static inline void x86_mrst_early_setup(void) { } | ||
87 | #endif | 56 | #endif |
88 | 57 | ||
89 | #ifndef _SETUP | 58 | #ifndef _SETUP |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 6a84ed166aec..1e796782cd7b 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -121,7 +121,6 @@ static inline void arch_send_call_function_single_ipi(int cpu) | |||
121 | smp_ops.send_call_func_single_ipi(cpu); | 121 | smp_ops.send_call_func_single_ipi(cpu); |
122 | } | 122 | } |
123 | 123 | ||
124 | #define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask | ||
125 | static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) | 124 | static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) |
126 | { | 125 | { |
127 | smp_ops.send_call_func_ipi(mask); | 126 | smp_ops.send_call_func_ipi(mask); |
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index c86f452256de..ae907e617181 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h | |||
@@ -65,7 +65,6 @@ static __always_inline void *__constant_memcpy(void *to, const void *from, | |||
65 | case 4: | 65 | case 4: |
66 | *(int *)to = *(int *)from; | 66 | *(int *)to = *(int *)from; |
67 | return to; | 67 | return to; |
68 | |||
69 | case 3: | 68 | case 3: |
70 | *(short *)to = *(short *)from; | 69 | *(short *)to = *(short *)from; |
71 | *((char *)to + 2) = *((char *)from + 2); | 70 | *((char *)to + 2) = *((char *)from + 2); |
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d82f39bb7905..8d33bc5462d1 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Access to user system call parameters and results | 2 | * Access to user system call parameters and results |
3 | * | 3 | * |
4 | * Copyright (C) 2008 Red Hat, Inc. All rights reserved. | 4 | * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. |
5 | * | 5 | * |
6 | * This copyrighted material is made available to anyone wishing to use, | 6 | * This copyrighted material is made available to anyone wishing to use, |
7 | * modify, copy, or redistribute it subject to the terms and conditions | 7 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -16,13 +16,13 @@ | |||
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/err.h> | 17 | #include <linux/err.h> |
18 | 18 | ||
19 | static inline long syscall_get_nr(struct task_struct *task, | 19 | /* |
20 | struct pt_regs *regs) | 20 | * Only the low 32 bits of orig_ax are meaningful, so we return int. |
21 | * This importantly ignores the high bits on 64-bit, so comparisons | ||
22 | * sign-extend the low 32 bits. | ||
23 | */ | ||
24 | static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) | ||
21 | { | 25 | { |
22 | /* | ||
23 | * We always sign-extend a -1 value being set here, | ||
24 | * so this is always either -1L or a syscall number. | ||
25 | */ | ||
26 | return regs->orig_ax; | 26 | return regs->orig_ax; |
27 | } | 27 | } |
28 | 28 | ||
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index 50c733aac421..7bdec4e9b739 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h | |||
@@ -4,60 +4,7 @@ | |||
4 | extern void hpet_time_init(void); | 4 | extern void hpet_time_init(void); |
5 | 5 | ||
6 | #include <asm/mc146818rtc.h> | 6 | #include <asm/mc146818rtc.h> |
7 | #ifdef CONFIG_X86_32 | ||
8 | #include <linux/efi.h> | ||
9 | |||
10 | static inline unsigned long native_get_wallclock(void) | ||
11 | { | ||
12 | unsigned long retval; | ||
13 | |||
14 | if (efi_enabled) | ||
15 | retval = efi_get_time(); | ||
16 | else | ||
17 | retval = mach_get_cmos_time(); | ||
18 | |||
19 | return retval; | ||
20 | } | ||
21 | |||
22 | static inline int native_set_wallclock(unsigned long nowtime) | ||
23 | { | ||
24 | int retval; | ||
25 | |||
26 | if (efi_enabled) | ||
27 | retval = efi_set_rtc_mmss(nowtime); | ||
28 | else | ||
29 | retval = mach_set_rtc_mmss(nowtime); | ||
30 | |||
31 | return retval; | ||
32 | } | ||
33 | |||
34 | #else | ||
35 | extern void native_time_init_hook(void); | ||
36 | |||
37 | static inline unsigned long native_get_wallclock(void) | ||
38 | { | ||
39 | return mach_get_cmos_time(); | ||
40 | } | ||
41 | |||
42 | static inline int native_set_wallclock(unsigned long nowtime) | ||
43 | { | ||
44 | return mach_set_rtc_mmss(nowtime); | ||
45 | } | ||
46 | |||
47 | #endif | ||
48 | 7 | ||
49 | extern void time_init(void); | 8 | extern void time_init(void); |
50 | 9 | ||
51 | #ifdef CONFIG_PARAVIRT | ||
52 | #include <asm/paravirt.h> | ||
53 | #else /* !CONFIG_PARAVIRT */ | ||
54 | |||
55 | #define get_wallclock() native_get_wallclock() | ||
56 | #define set_wallclock(x) native_set_wallclock(x) | ||
57 | #define choose_time_init() hpet_time_init | ||
58 | |||
59 | #endif /* CONFIG_PARAVIRT */ | ||
60 | |||
61 | extern unsigned long __init calibrate_cpu(void); | ||
62 | |||
63 | #endif /* _ASM_X86_TIME_H */ | 10 | #endif /* _ASM_X86_TIME_H */ |
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 20ca9c4d4686..5469630b27f5 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h | |||
@@ -8,20 +8,16 @@ | |||
8 | #define TICK_SIZE (tick_nsec / 1000) | 8 | #define TICK_SIZE (tick_nsec / 1000) |
9 | 9 | ||
10 | unsigned long long native_sched_clock(void); | 10 | unsigned long long native_sched_clock(void); |
11 | unsigned long native_calibrate_tsc(void); | 11 | extern int recalibrate_cpu_khz(void); |
12 | 12 | ||
13 | #ifdef CONFIG_X86_32 | 13 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) |
14 | extern int timer_ack; | 14 | extern int timer_ack; |
15 | extern irqreturn_t timer_interrupt(int irq, void *dev_id); | 15 | #else |
16 | #endif /* CONFIG_X86_32 */ | 16 | # define timer_ack (0) |
17 | extern int recalibrate_cpu_khz(void); | 17 | #endif |
18 | 18 | ||
19 | extern int no_timer_check; | 19 | extern int no_timer_check; |
20 | 20 | ||
21 | #ifndef CONFIG_PARAVIRT | ||
22 | #define calibrate_tsc() native_calibrate_tsc() | ||
23 | #endif | ||
24 | |||
25 | /* Accelerators for sched_clock() | 21 | /* Accelerators for sched_clock() |
26 | * convert from cycles(64bits) => nanoseconds (64bits) | 22 | * convert from cycles(64bits) => nanoseconds (64bits) |
27 | * basic equation: | 23 | * basic equation: |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 26d06e052a18..25a92842dd99 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -116,15 +116,11 @@ extern unsigned long node_remap_size[]; | |||
116 | 116 | ||
117 | # define SD_CACHE_NICE_TRIES 1 | 117 | # define SD_CACHE_NICE_TRIES 1 |
118 | # define SD_IDLE_IDX 1 | 118 | # define SD_IDLE_IDX 1 |
119 | # define SD_NEWIDLE_IDX 2 | ||
120 | # define SD_FORKEXEC_IDX 0 | ||
121 | 119 | ||
122 | #else | 120 | #else |
123 | 121 | ||
124 | # define SD_CACHE_NICE_TRIES 2 | 122 | # define SD_CACHE_NICE_TRIES 2 |
125 | # define SD_IDLE_IDX 2 | 123 | # define SD_IDLE_IDX 2 |
126 | # define SD_NEWIDLE_IDX 2 | ||
127 | # define SD_FORKEXEC_IDX 1 | ||
128 | 124 | ||
129 | #endif | 125 | #endif |
130 | 126 | ||
@@ -137,22 +133,20 @@ extern unsigned long node_remap_size[]; | |||
137 | .cache_nice_tries = SD_CACHE_NICE_TRIES, \ | 133 | .cache_nice_tries = SD_CACHE_NICE_TRIES, \ |
138 | .busy_idx = 3, \ | 134 | .busy_idx = 3, \ |
139 | .idle_idx = SD_IDLE_IDX, \ | 135 | .idle_idx = SD_IDLE_IDX, \ |
140 | .newidle_idx = SD_NEWIDLE_IDX, \ | 136 | .newidle_idx = 0, \ |
141 | .wake_idx = 1, \ | 137 | .wake_idx = 0, \ |
142 | .forkexec_idx = SD_FORKEXEC_IDX, \ | 138 | .forkexec_idx = 0, \ |
143 | \ | 139 | \ |
144 | .flags = 1*SD_LOAD_BALANCE \ | 140 | .flags = 1*SD_LOAD_BALANCE \ |
145 | | 1*SD_BALANCE_NEWIDLE \ | 141 | | 1*SD_BALANCE_NEWIDLE \ |
146 | | 1*SD_BALANCE_EXEC \ | 142 | | 1*SD_BALANCE_EXEC \ |
147 | | 1*SD_BALANCE_FORK \ | 143 | | 1*SD_BALANCE_FORK \ |
148 | | 0*SD_WAKE_IDLE \ | 144 | | 0*SD_BALANCE_WAKE \ |
149 | | 1*SD_WAKE_AFFINE \ | 145 | | 1*SD_WAKE_AFFINE \ |
150 | | 1*SD_WAKE_BALANCE \ | ||
151 | | 0*SD_SHARE_CPUPOWER \ | 146 | | 0*SD_SHARE_CPUPOWER \ |
152 | | 0*SD_POWERSAVINGS_BALANCE \ | 147 | | 0*SD_POWERSAVINGS_BALANCE \ |
153 | | 0*SD_SHARE_PKG_RESOURCES \ | 148 | | 0*SD_SHARE_PKG_RESOURCES \ |
154 | | 1*SD_SERIALIZE \ | 149 | | 1*SD_SERIALIZE \ |
155 | | 1*SD_WAKE_IDLE_FAR \ | ||
156 | | 0*SD_PREFER_SIBLING \ | 150 | | 0*SD_PREFER_SIBLING \ |
157 | , \ | 151 | , \ |
158 | .last_balance = jiffies, \ | 152 | .last_balance = jiffies, \ |
@@ -171,21 +165,11 @@ static inline int numa_node_id(void) | |||
171 | return 0; | 165 | return 0; |
172 | } | 166 | } |
173 | 167 | ||
174 | static inline int cpu_to_node(int cpu) | ||
175 | { | ||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | static inline int early_cpu_to_node(int cpu) | 168 | static inline int early_cpu_to_node(int cpu) |
180 | { | 169 | { |
181 | return 0; | 170 | return 0; |
182 | } | 171 | } |
183 | 172 | ||
184 | static inline const struct cpumask *cpumask_of_node(int node) | ||
185 | { | ||
186 | return cpu_online_mask; | ||
187 | } | ||
188 | |||
189 | static inline void setup_node_to_cpumask_map(void) { } | 173 | static inline void setup_node_to_cpumask_map(void) { } |
190 | 174 | ||
191 | #endif | 175 | #endif |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 38ae163cc91b..c0427295e8f5 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -48,7 +48,8 @@ static __always_inline cycles_t vget_cycles(void) | |||
48 | extern void tsc_init(void); | 48 | extern void tsc_init(void); |
49 | extern void mark_tsc_unstable(char *reason); | 49 | extern void mark_tsc_unstable(char *reason); |
50 | extern int unsynchronized_tsc(void); | 50 | extern int unsynchronized_tsc(void); |
51 | int check_tsc_unstable(void); | 51 | extern int check_tsc_unstable(void); |
52 | extern unsigned long native_calibrate_tsc(void); | ||
52 | 53 | ||
53 | /* | 54 | /* |
54 | * Boot-time check whether the TSCs are synchronized across | 55 | * Boot-time check whether the TSCs are synchronized across |
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 5e06259e90e5..632fb44b4cb5 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -33,7 +33,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero | |||
33 | * Copy data from kernel space to user space. Caller must check | 33 | * Copy data from kernel space to user space. Caller must check |
34 | * the specified block with access_ok() before calling this function. | 34 | * the specified block with access_ok() before calling this function. |
35 | * The caller should also make sure he pins the user space address | 35 | * The caller should also make sure he pins the user space address |
36 | * so that the we don't result in page fault and sleep. | 36 | * so that we don't result in page fault and sleep. |
37 | * | 37 | * |
38 | * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault | 38 | * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault |
39 | * we return the initial request size (1, 2 or 4), as copy_*_user should do. | 39 | * we return the initial request size (1, 2 or 4), as copy_*_user should do. |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 8deaada61bc8..6fb3c209a7e3 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -341,7 +341,7 @@ | |||
341 | #define __NR_preadv 333 | 341 | #define __NR_preadv 333 |
342 | #define __NR_pwritev 334 | 342 | #define __NR_pwritev 334 |
343 | #define __NR_rt_tgsigqueueinfo 335 | 343 | #define __NR_rt_tgsigqueueinfo 335 |
344 | #define __NR_perf_counter_open 336 | 344 | #define __NR_perf_event_open 336 |
345 | 345 | ||
346 | #ifdef __KERNEL__ | 346 | #ifdef __KERNEL__ |
347 | 347 | ||
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index b9f3c60de5f7..8d3ad0adbc68 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -659,8 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv) | |||
659 | __SYSCALL(__NR_pwritev, sys_pwritev) | 659 | __SYSCALL(__NR_pwritev, sys_pwritev) |
660 | #define __NR_rt_tgsigqueueinfo 297 | 660 | #define __NR_rt_tgsigqueueinfo 297 |
661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) | 661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) |
662 | #define __NR_perf_counter_open 298 | 662 | #define __NR_perf_event_open 298 |
663 | __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) | 663 | __SYSCALL(__NR_perf_event_open, sys_perf_event_open) |
664 | 664 | ||
665 | #ifndef __NO_STUBS | 665 | #ifndef __NO_STUBS |
666 | #define __ARCH_WANT_OLD_READDIR | 666 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 77a68505419a..04eb6c958b9d 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/numa.h> | 15 | #include <linux/numa.h> |
16 | #include <linux/percpu.h> | 16 | #include <linux/percpu.h> |
17 | #include <linux/timer.h> | 17 | #include <linux/timer.h> |
18 | #include <linux/io.h> | ||
18 | #include <asm/types.h> | 19 | #include <asm/types.h> |
19 | #include <asm/percpu.h> | 20 | #include <asm/percpu.h> |
20 | #include <asm/uv/uv_mmrs.h> | 21 | #include <asm/uv/uv_mmrs.h> |
@@ -258,13 +259,13 @@ static inline unsigned long *uv_global_mmr32_address(int pnode, | |||
258 | static inline void uv_write_global_mmr32(int pnode, unsigned long offset, | 259 | static inline void uv_write_global_mmr32(int pnode, unsigned long offset, |
259 | unsigned long val) | 260 | unsigned long val) |
260 | { | 261 | { |
261 | *uv_global_mmr32_address(pnode, offset) = val; | 262 | writeq(val, uv_global_mmr32_address(pnode, offset)); |
262 | } | 263 | } |
263 | 264 | ||
264 | static inline unsigned long uv_read_global_mmr32(int pnode, | 265 | static inline unsigned long uv_read_global_mmr32(int pnode, |
265 | unsigned long offset) | 266 | unsigned long offset) |
266 | { | 267 | { |
267 | return *uv_global_mmr32_address(pnode, offset); | 268 | return readq(uv_global_mmr32_address(pnode, offset)); |
268 | } | 269 | } |
269 | 270 | ||
270 | /* | 271 | /* |
@@ -281,13 +282,13 @@ static inline unsigned long *uv_global_mmr64_address(int pnode, | |||
281 | static inline void uv_write_global_mmr64(int pnode, unsigned long offset, | 282 | static inline void uv_write_global_mmr64(int pnode, unsigned long offset, |
282 | unsigned long val) | 283 | unsigned long val) |
283 | { | 284 | { |
284 | *uv_global_mmr64_address(pnode, offset) = val; | 285 | writeq(val, uv_global_mmr64_address(pnode, offset)); |
285 | } | 286 | } |
286 | 287 | ||
287 | static inline unsigned long uv_read_global_mmr64(int pnode, | 288 | static inline unsigned long uv_read_global_mmr64(int pnode, |
288 | unsigned long offset) | 289 | unsigned long offset) |
289 | { | 290 | { |
290 | return *uv_global_mmr64_address(pnode, offset); | 291 | return readq(uv_global_mmr64_address(pnode, offset)); |
291 | } | 292 | } |
292 | 293 | ||
293 | /* | 294 | /* |
@@ -301,22 +302,22 @@ static inline unsigned long *uv_local_mmr_address(unsigned long offset) | |||
301 | 302 | ||
302 | static inline unsigned long uv_read_local_mmr(unsigned long offset) | 303 | static inline unsigned long uv_read_local_mmr(unsigned long offset) |
303 | { | 304 | { |
304 | return *uv_local_mmr_address(offset); | 305 | return readq(uv_local_mmr_address(offset)); |
305 | } | 306 | } |
306 | 307 | ||
307 | static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) | 308 | static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) |
308 | { | 309 | { |
309 | *uv_local_mmr_address(offset) = val; | 310 | writeq(val, uv_local_mmr_address(offset)); |
310 | } | 311 | } |
311 | 312 | ||
312 | static inline unsigned char uv_read_local_mmr8(unsigned long offset) | 313 | static inline unsigned char uv_read_local_mmr8(unsigned long offset) |
313 | { | 314 | { |
314 | return *((unsigned char *)uv_local_mmr_address(offset)); | 315 | return readb(uv_local_mmr_address(offset)); |
315 | } | 316 | } |
316 | 317 | ||
317 | static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val) | 318 | static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val) |
318 | { | 319 | { |
319 | *((unsigned char *)uv_local_mmr_address(offset)) = val; | 320 | writeb(val, uv_local_mmr_address(offset)); |
320 | } | 321 | } |
321 | 322 | ||
322 | /* | 323 | /* |
@@ -422,7 +423,7 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) | |||
422 | unsigned long val; | 423 | unsigned long val; |
423 | 424 | ||
424 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 425 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
425 | ((apicid & 0x3f) << UVH_IPI_INT_APIC_ID_SHFT) | | 426 | ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | |
426 | (vector << UVH_IPI_INT_VECTOR_SHFT); | 427 | (vector << UVH_IPI_INT_VECTOR_SHFT); |
427 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 428 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
428 | } | 429 | } |
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index dc27a69e5d2a..3d61e204826f 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h | |||
@@ -21,6 +21,7 @@ struct vsyscall_gtod_data { | |||
21 | u32 shift; | 21 | u32 shift; |
22 | } clock; | 22 | } clock; |
23 | struct timespec wall_to_monotonic; | 23 | struct timespec wall_to_monotonic; |
24 | struct timespec wall_time_coarse; | ||
24 | }; | 25 | }; |
25 | extern struct vsyscall_gtod_data __vsyscall_gtod_data | 26 | extern struct vsyscall_gtod_data __vsyscall_gtod_data |
26 | __section_vsyscall_gtod_data; | 27 | __section_vsyscall_gtod_data; |
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h index c11b7e100d83..e49ed6d2fd4e 100644 --- a/arch/x86/include/asm/vmware.h +++ b/arch/x86/include/asm/vmware.h | |||
@@ -20,7 +20,7 @@ | |||
20 | #ifndef ASM_X86__VMWARE_H | 20 | #ifndef ASM_X86__VMWARE_H |
21 | #define ASM_X86__VMWARE_H | 21 | #define ASM_X86__VMWARE_H |
22 | 22 | ||
23 | extern unsigned long vmware_get_tsc_khz(void); | 23 | extern void vmware_platform_setup(void); |
24 | extern int vmware_platform(void); | 24 | extern int vmware_platform(void); |
25 | extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); | 25 | extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); |
26 | 26 | ||
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h new file mode 100644 index 000000000000..2c756fd4ab0e --- /dev/null +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -0,0 +1,133 @@ | |||
1 | #ifndef _ASM_X86_PLATFORM_H | ||
2 | #define _ASM_X86_PLATFORM_H | ||
3 | |||
4 | #include <asm/pgtable_types.h> | ||
5 | #include <asm/bootparam.h> | ||
6 | |||
7 | struct mpc_bus; | ||
8 | struct mpc_cpu; | ||
9 | struct mpc_table; | ||
10 | |||
11 | /** | ||
12 | * struct x86_init_mpparse - platform specific mpparse ops | ||
13 | * @mpc_record: platform specific mpc record accounting | ||
14 | * @setup_ioapic_ids: platform specific ioapic id override | ||
15 | * @mpc_apic_id: platform specific mpc apic id assignment | ||
16 | * @smp_read_mpc_oem: platform specific oem mpc table setup | ||
17 | * @mpc_oem_pci_bus: platform specific pci bus setup (default NULL) | ||
18 | * @mpc_oem_bus_info: platform specific mpc bus info | ||
19 | * @find_smp_config: find the smp configuration | ||
20 | * @get_smp_config: get the smp configuration | ||
21 | */ | ||
22 | struct x86_init_mpparse { | ||
23 | void (*mpc_record)(unsigned int mode); | ||
24 | void (*setup_ioapic_ids)(void); | ||
25 | int (*mpc_apic_id)(struct mpc_cpu *m); | ||
26 | void (*smp_read_mpc_oem)(struct mpc_table *mpc); | ||
27 | void (*mpc_oem_pci_bus)(struct mpc_bus *m); | ||
28 | void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); | ||
29 | void (*find_smp_config)(unsigned int reserve); | ||
30 | void (*get_smp_config)(unsigned int early); | ||
31 | }; | ||
32 | |||
33 | /** | ||
34 | * struct x86_init_resources - platform specific resource related ops | ||
35 | * @probe_roms: probe BIOS roms | ||
36 | * @reserve_resources: reserve the standard resources for the | ||
37 | * platform | ||
38 | * @memory_setup: platform specific memory setup | ||
39 | * | ||
40 | */ | ||
41 | struct x86_init_resources { | ||
42 | void (*probe_roms)(void); | ||
43 | void (*reserve_resources)(void); | ||
44 | char *(*memory_setup)(void); | ||
45 | }; | ||
46 | |||
47 | /** | ||
48 | * struct x86_init_irqs - platform specific interrupt setup | ||
49 | * @pre_vector_init: init code to run before interrupt vectors | ||
50 | * are set up. | ||
51 | * @intr_init: interrupt init code | ||
52 | * @trap_init: platform specific trap setup | ||
53 | */ | ||
54 | struct x86_init_irqs { | ||
55 | void (*pre_vector_init)(void); | ||
56 | void (*intr_init)(void); | ||
57 | void (*trap_init)(void); | ||
58 | }; | ||
59 | |||
60 | /** | ||
61 | * struct x86_init_oem - oem platform specific customizing functions | ||
62 | * @arch_setup: platform specific architecure setup | ||
63 | * @banner: print a platform specific banner | ||
64 | */ | ||
65 | struct x86_init_oem { | ||
66 | void (*arch_setup)(void); | ||
67 | void (*banner)(void); | ||
68 | }; | ||
69 | |||
70 | /** | ||
71 | * struct x86_init_paging - platform specific paging functions | ||
72 | * @pagetable_setup_start: platform specific pre paging_init() call | ||
73 | * @pagetable_setup_done: platform specific post paging_init() call | ||
74 | */ | ||
75 | struct x86_init_paging { | ||
76 | void (*pagetable_setup_start)(pgd_t *base); | ||
77 | void (*pagetable_setup_done)(pgd_t *base); | ||
78 | }; | ||
79 | |||
80 | /** | ||
81 | * struct x86_init_timers - platform specific timer setup | ||
82 | * @setup_perpcu_clockev: set up the per cpu clock event device for the | ||
83 | * boot cpu | ||
84 | * @tsc_pre_init: platform function called before TSC init | ||
85 | * @timer_init: initialize the platform timer (default PIT/HPET) | ||
86 | */ | ||
87 | struct x86_init_timers { | ||
88 | void (*setup_percpu_clockev)(void); | ||
89 | void (*tsc_pre_init)(void); | ||
90 | void (*timer_init)(void); | ||
91 | }; | ||
92 | |||
93 | /** | ||
94 | * struct x86_init_ops - functions for platform specific setup | ||
95 | * | ||
96 | */ | ||
97 | struct x86_init_ops { | ||
98 | struct x86_init_resources resources; | ||
99 | struct x86_init_mpparse mpparse; | ||
100 | struct x86_init_irqs irqs; | ||
101 | struct x86_init_oem oem; | ||
102 | struct x86_init_paging paging; | ||
103 | struct x86_init_timers timers; | ||
104 | }; | ||
105 | |||
106 | /** | ||
107 | * struct x86_cpuinit_ops - platform specific cpu hotplug setups | ||
108 | * @setup_percpu_clockev: set up the per cpu clock event device | ||
109 | */ | ||
110 | struct x86_cpuinit_ops { | ||
111 | void (*setup_percpu_clockev)(void); | ||
112 | }; | ||
113 | |||
114 | /** | ||
115 | * struct x86_platform_ops - platform specific runtime functions | ||
116 | * @calibrate_tsc: calibrate TSC | ||
117 | * @get_wallclock: get time from HW clock like RTC etc. | ||
118 | * @set_wallclock: set time back to HW clock | ||
119 | */ | ||
120 | struct x86_platform_ops { | ||
121 | unsigned long (*calibrate_tsc)(void); | ||
122 | unsigned long (*get_wallclock)(void); | ||
123 | int (*set_wallclock)(unsigned long nowtime); | ||
124 | }; | ||
125 | |||
126 | extern struct x86_init_ops x86_init; | ||
127 | extern struct x86_cpuinit_ops x86_cpuinit; | ||
128 | extern struct x86_platform_ops x86_platform; | ||
129 | |||
130 | extern void x86_init_noop(void); | ||
131 | extern void x86_init_uint_noop(unsigned int unused); | ||
132 | |||
133 | #endif | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bf04201b6575..4f2e66e29ecc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -31,8 +31,8 @@ GCOV_PROFILE_paravirt.o := n | |||
31 | 31 | ||
32 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 32 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
33 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 33 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
34 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o | 34 | obj-y += time.o ioport.o ldt.o dumpstack.o |
35 | obj-y += setup.o i8259.o irqinit.o | 35 | obj-y += setup.o x86_init.o i8259.o irqinit.o |
36 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 36 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
37 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 37 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
38 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 38 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
@@ -52,9 +52,11 @@ obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o | |||
52 | obj-$(CONFIG_X86_32) += tls.o | 52 | obj-$(CONFIG_X86_32) += tls.o |
53 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 53 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
54 | obj-y += step.o | 54 | obj-y += step.o |
55 | obj-$(CONFIG_INTEL_TXT) += tboot.o | ||
55 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 56 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
56 | obj-y += cpu/ | 57 | obj-y += cpu/ |
57 | obj-y += acpi/ | 58 | obj-y += acpi/ |
59 | obj-$(CONFIG_SFI) += sfi.o | ||
58 | obj-y += reboot.o | 60 | obj-y += reboot.o |
59 | obj-$(CONFIG_MCA) += mca_32.o | 61 | obj-$(CONFIG_MCA) += mca_32.o |
60 | obj-$(CONFIG_X86_MSR) += msr.o | 62 | obj-$(CONFIG_X86_MSR) += msr.o |
@@ -104,6 +106,7 @@ obj-$(CONFIG_SCx200) += scx200.o | |||
104 | scx200-y += scx200_32.o | 106 | scx200-y += scx200_32.o |
105 | 107 | ||
106 | obj-$(CONFIG_OLPC) += olpc.o | 108 | obj-$(CONFIG_OLPC) += olpc.o |
109 | obj-$(CONFIG_X86_MRST) += mrst.o | ||
107 | 110 | ||
108 | microcode-y := microcode_core.o | 111 | microcode-y := microcode_core.o |
109 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | 112 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8c44c232efcb..59cdfa4686b2 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, | |||
48 | * P4, Core and beyond CPUs | 48 | * P4, Core and beyond CPUs |
49 | */ | 49 | */ |
50 | if (c->x86_vendor == X86_VENDOR_INTEL && | 50 | if (c->x86_vendor == X86_VENDOR_INTEL && |
51 | (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14))) | 51 | (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14))) |
52 | flags->bm_control = 0; | 52 | flags->bm_control = 0; |
53 | } | 53 | } |
54 | EXPORT_SYMBOL(acpi_processor_power_init_bm_check); | 54 | EXPORT_SYMBOL(acpi_processor_power_init_bm_check); |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 7da00b799cda..0e50e1e5c573 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S | |||
@@ -56,6 +56,6 @@ SECTIONS | |||
56 | /DISCARD/ : { | 56 | /DISCARD/ : { |
57 | *(.note*) | 57 | *(.note*) |
58 | } | 58 | } |
59 | |||
60 | . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); | ||
61 | } | 59 | } |
60 | |||
61 | ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); | ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 159740decc41..894aa97f0717 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -14,7 +14,7 @@ | |||
14 | * Mikael Pettersson : PM converted to driver model. | 14 | * Mikael Pettersson : PM converted to driver model. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/perf_counter.h> | 17 | #include <linux/perf_event.h> |
18 | #include <linux/kernel_stat.h> | 18 | #include <linux/kernel_stat.h> |
19 | #include <linux/mc146818rtc.h> | 19 | #include <linux/mc146818rtc.h> |
20 | #include <linux/acpi_pmtmr.h> | 20 | #include <linux/acpi_pmtmr.h> |
@@ -35,7 +35,8 @@ | |||
35 | #include <linux/smp.h> | 35 | #include <linux/smp.h> |
36 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
37 | 37 | ||
38 | #include <asm/perf_counter.h> | 38 | #include <asm/perf_event.h> |
39 | #include <asm/x86_init.h> | ||
39 | #include <asm/pgalloc.h> | 40 | #include <asm/pgalloc.h> |
40 | #include <asm/atomic.h> | 41 | #include <asm/atomic.h> |
41 | #include <asm/mpspec.h> | 42 | #include <asm/mpspec.h> |
@@ -61,7 +62,7 @@ unsigned int boot_cpu_physical_apicid = -1U; | |||
61 | /* | 62 | /* |
62 | * The highest APIC ID seen during enumeration. | 63 | * The highest APIC ID seen during enumeration. |
63 | * | 64 | * |
64 | * This determines the messaging protocol we can use: if all APIC IDs | 65 | * On AMD, this determines the messaging protocol we can use: if all APIC IDs |
65 | * are in the 0 ... 7 range, then we can use logical addressing which | 66 | * are in the 0 ... 7 range, then we can use logical addressing which |
66 | * has some performance advantages (better broadcasting). | 67 | * has some performance advantages (better broadcasting). |
67 | * | 68 | * |
@@ -978,7 +979,7 @@ void lapic_shutdown(void) | |||
978 | { | 979 | { |
979 | unsigned long flags; | 980 | unsigned long flags; |
980 | 981 | ||
981 | if (!cpu_has_apic) | 982 | if (!cpu_has_apic && !apic_from_smp_config()) |
982 | return; | 983 | return; |
983 | 984 | ||
984 | local_irq_save(flags); | 985 | local_irq_save(flags); |
@@ -1188,7 +1189,7 @@ void __cpuinit setup_local_APIC(void) | |||
1188 | apic_write(APIC_ESR, 0); | 1189 | apic_write(APIC_ESR, 0); |
1189 | } | 1190 | } |
1190 | #endif | 1191 | #endif |
1191 | perf_counters_lapic_init(); | 1192 | perf_events_lapic_init(); |
1192 | 1193 | ||
1193 | preempt_disable(); | 1194 | preempt_disable(); |
1194 | 1195 | ||
@@ -1196,8 +1197,7 @@ void __cpuinit setup_local_APIC(void) | |||
1196 | * Double-check whether this APIC is really registered. | 1197 | * Double-check whether this APIC is really registered. |
1197 | * This is meaningless in clustered apic mode, so we skip it. | 1198 | * This is meaningless in clustered apic mode, so we skip it. |
1198 | */ | 1199 | */ |
1199 | if (!apic->apic_id_registered()) | 1200 | BUG_ON(!apic->apic_id_registered()); |
1200 | BUG(); | ||
1201 | 1201 | ||
1202 | /* | 1202 | /* |
1203 | * Intel recommends to set DFR, LDR and TPR before enabling | 1203 | * Intel recommends to set DFR, LDR and TPR before enabling |
@@ -1709,7 +1709,7 @@ int __init APIC_init_uniprocessor(void) | |||
1709 | localise_nmi_watchdog(); | 1709 | localise_nmi_watchdog(); |
1710 | #endif | 1710 | #endif |
1711 | 1711 | ||
1712 | setup_boot_clock(); | 1712 | x86_init.timers.setup_percpu_clockev(); |
1713 | #ifdef CONFIG_X86_64 | 1713 | #ifdef CONFIG_X86_64 |
1714 | check_nmi_watchdog(); | 1714 | check_nmi_watchdog(); |
1715 | #endif | 1715 | #endif |
@@ -1916,24 +1916,14 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1916 | max_physical_apicid = apicid; | 1916 | max_physical_apicid = apicid; |
1917 | 1917 | ||
1918 | #ifdef CONFIG_X86_32 | 1918 | #ifdef CONFIG_X86_32 |
1919 | /* | 1919 | switch (boot_cpu_data.x86_vendor) { |
1920 | * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y | 1920 | case X86_VENDOR_INTEL: |
1921 | * but we need to work other dependencies like SMP_SUSPEND etc | 1921 | if (num_processors > 8) |
1922 | * before this can be done without some confusion. | 1922 | def_to_bigsmp = 1; |
1923 | * if (CPU_HOTPLUG_ENABLED || num_processors > 8) | 1923 | break; |
1924 | * - Ashok Raj <ashok.raj@intel.com> | 1924 | case X86_VENDOR_AMD: |
1925 | */ | 1925 | if (max_physical_apicid >= 8) |
1926 | if (max_physical_apicid >= 8) { | ||
1927 | switch (boot_cpu_data.x86_vendor) { | ||
1928 | case X86_VENDOR_INTEL: | ||
1929 | if (!APIC_XAPIC(version)) { | ||
1930 | def_to_bigsmp = 0; | ||
1931 | break; | ||
1932 | } | ||
1933 | /* If P4 and above fall through */ | ||
1934 | case X86_VENDOR_AMD: | ||
1935 | def_to_bigsmp = 1; | 1926 | def_to_bigsmp = 1; |
1936 | } | ||
1937 | } | 1927 | } |
1938 | #endif | 1928 | #endif |
1939 | 1929 | ||
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 676cdac385c0..77a06413b6b2 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -112,7 +112,7 @@ static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) | |||
112 | return physids_promote(0xFFL); | 112 | return physids_promote(0xFFL); |
113 | } | 113 | } |
114 | 114 | ||
115 | static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) | 115 | static int bigsmp_check_phys_apicid_present(int phys_apicid) |
116 | { | 116 | { |
117 | return 1; | 117 | return 1; |
118 | } | 118 | } |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3c8f9e75d038..dc69f28489f5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -96,6 +96,11 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |||
96 | /* # of MP IRQ source entries */ | 96 | /* # of MP IRQ source entries */ |
97 | int mp_irq_entries; | 97 | int mp_irq_entries; |
98 | 98 | ||
99 | /* Number of legacy interrupts */ | ||
100 | static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; | ||
101 | /* GSI interrupts */ | ||
102 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | ||
103 | |||
99 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 104 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) |
100 | int mp_bus_id_to_type[MAX_MP_BUSSES]; | 105 | int mp_bus_id_to_type[MAX_MP_BUSSES]; |
101 | #endif | 106 | #endif |
@@ -173,6 +178,12 @@ static struct irq_cfg irq_cfgx[NR_IRQS] = { | |||
173 | [15] = { .vector = IRQ15_VECTOR, }, | 178 | [15] = { .vector = IRQ15_VECTOR, }, |
174 | }; | 179 | }; |
175 | 180 | ||
181 | void __init io_apic_disable_legacy(void) | ||
182 | { | ||
183 | nr_legacy_irqs = 0; | ||
184 | nr_irqs_gsi = 0; | ||
185 | } | ||
186 | |||
176 | int __init arch_early_irq_init(void) | 187 | int __init arch_early_irq_init(void) |
177 | { | 188 | { |
178 | struct irq_cfg *cfg; | 189 | struct irq_cfg *cfg; |
@@ -190,7 +201,7 @@ int __init arch_early_irq_init(void) | |||
190 | desc->chip_data = &cfg[i]; | 201 | desc->chip_data = &cfg[i]; |
191 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); | 202 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); |
192 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); | 203 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); |
193 | if (i < NR_IRQS_LEGACY) | 204 | if (i < nr_legacy_irqs) |
194 | cpumask_setall(cfg[i].domain); | 205 | cpumask_setall(cfg[i].domain); |
195 | } | 206 | } |
196 | 207 | ||
@@ -216,17 +227,14 @@ static struct irq_cfg *get_one_free_irq_cfg(int node) | |||
216 | 227 | ||
217 | cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); | 228 | cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); |
218 | if (cfg) { | 229 | if (cfg) { |
219 | if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { | 230 | if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { |
220 | kfree(cfg); | 231 | kfree(cfg); |
221 | cfg = NULL; | 232 | cfg = NULL; |
222 | } else if (!alloc_cpumask_var_node(&cfg->old_domain, | 233 | } else if (!zalloc_cpumask_var_node(&cfg->old_domain, |
223 | GFP_ATOMIC, node)) { | 234 | GFP_ATOMIC, node)) { |
224 | free_cpumask_var(cfg->domain); | 235 | free_cpumask_var(cfg->domain); |
225 | kfree(cfg); | 236 | kfree(cfg); |
226 | cfg = NULL; | 237 | cfg = NULL; |
227 | } else { | ||
228 | cpumask_clear(cfg->domain); | ||
229 | cpumask_clear(cfg->old_domain); | ||
230 | } | 238 | } |
231 | } | 239 | } |
232 | 240 | ||
@@ -867,7 +875,7 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
867 | */ | 875 | */ |
868 | static int EISA_ELCR(unsigned int irq) | 876 | static int EISA_ELCR(unsigned int irq) |
869 | { | 877 | { |
870 | if (irq < NR_IRQS_LEGACY) { | 878 | if (irq < nr_legacy_irqs) { |
871 | unsigned int port = 0x4d0 + (irq >> 3); | 879 | unsigned int port = 0x4d0 + (irq >> 3); |
872 | return (inb(port) >> (irq & 7)) & 1; | 880 | return (inb(port) >> (irq & 7)) & 1; |
873 | } | 881 | } |
@@ -1464,7 +1472,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq | |||
1464 | } | 1472 | } |
1465 | 1473 | ||
1466 | ioapic_register_intr(irq, desc, trigger); | 1474 | ioapic_register_intr(irq, desc, trigger); |
1467 | if (irq < NR_IRQS_LEGACY) | 1475 | if (irq < nr_legacy_irqs) |
1468 | disable_8259A_irq(irq); | 1476 | disable_8259A_irq(irq); |
1469 | 1477 | ||
1470 | ioapic_write_entry(apic_id, pin, entry); | 1478 | ioapic_write_entry(apic_id, pin, entry); |
@@ -1831,7 +1839,7 @@ __apicdebuginit(void) print_PIC(void) | |||
1831 | unsigned int v; | 1839 | unsigned int v; |
1832 | unsigned long flags; | 1840 | unsigned long flags; |
1833 | 1841 | ||
1834 | if (apic_verbosity == APIC_QUIET) | 1842 | if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) |
1835 | return; | 1843 | return; |
1836 | 1844 | ||
1837 | printk(KERN_DEBUG "\nprinting PIC contents\n"); | 1845 | printk(KERN_DEBUG "\nprinting PIC contents\n"); |
@@ -1863,7 +1871,7 @@ __apicdebuginit(int) print_all_ICs(void) | |||
1863 | print_PIC(); | 1871 | print_PIC(); |
1864 | 1872 | ||
1865 | /* don't print out if apic is not there */ | 1873 | /* don't print out if apic is not there */ |
1866 | if (!cpu_has_apic || disable_apic) | 1874 | if (!cpu_has_apic && !apic_from_smp_config()) |
1867 | return 0; | 1875 | return 0; |
1868 | 1876 | ||
1869 | print_all_local_APICs(); | 1877 | print_all_local_APICs(); |
@@ -1894,6 +1902,10 @@ void __init enable_IO_APIC(void) | |||
1894 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1902 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1895 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | 1903 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; |
1896 | } | 1904 | } |
1905 | |||
1906 | if (!nr_legacy_irqs) | ||
1907 | return; | ||
1908 | |||
1897 | for(apic = 0; apic < nr_ioapics; apic++) { | 1909 | for(apic = 0; apic < nr_ioapics; apic++) { |
1898 | int pin; | 1910 | int pin; |
1899 | /* See if any of the pins is in ExtINT mode */ | 1911 | /* See if any of the pins is in ExtINT mode */ |
@@ -1948,6 +1960,9 @@ void disable_IO_APIC(void) | |||
1948 | */ | 1960 | */ |
1949 | clear_IO_APIC(); | 1961 | clear_IO_APIC(); |
1950 | 1962 | ||
1963 | if (!nr_legacy_irqs) | ||
1964 | return; | ||
1965 | |||
1951 | /* | 1966 | /* |
1952 | * If the i8259 is routed through an IOAPIC | 1967 | * If the i8259 is routed through an IOAPIC |
1953 | * Put that IOAPIC in virtual wire mode | 1968 | * Put that IOAPIC in virtual wire mode |
@@ -1981,7 +1996,7 @@ void disable_IO_APIC(void) | |||
1981 | /* | 1996 | /* |
1982 | * Use virtual wire A mode when interrupt remapping is enabled. | 1997 | * Use virtual wire A mode when interrupt remapping is enabled. |
1983 | */ | 1998 | */ |
1984 | if (cpu_has_apic) | 1999 | if (cpu_has_apic || apic_from_smp_config()) |
1985 | disconnect_bsp_APIC(!intr_remapping_enabled && | 2000 | disconnect_bsp_APIC(!intr_remapping_enabled && |
1986 | ioapic_i8259.pin != -1); | 2001 | ioapic_i8259.pin != -1); |
1987 | } | 2002 | } |
@@ -1994,7 +2009,7 @@ void disable_IO_APIC(void) | |||
1994 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 | 2009 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 |
1995 | */ | 2010 | */ |
1996 | 2011 | ||
1997 | static void __init setup_ioapic_ids_from_mpc(void) | 2012 | void __init setup_ioapic_ids_from_mpc(void) |
1998 | { | 2013 | { |
1999 | union IO_APIC_reg_00 reg_00; | 2014 | union IO_APIC_reg_00 reg_00; |
2000 | physid_mask_t phys_id_present_map; | 2015 | physid_mask_t phys_id_present_map; |
@@ -2003,9 +2018,8 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
2003 | unsigned char old_id; | 2018 | unsigned char old_id; |
2004 | unsigned long flags; | 2019 | unsigned long flags; |
2005 | 2020 | ||
2006 | if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) | 2021 | if (acpi_ioapic) |
2007 | return; | 2022 | return; |
2008 | |||
2009 | /* | 2023 | /* |
2010 | * Don't check I/O APIC IDs for xAPIC systems. They have | 2024 | * Don't check I/O APIC IDs for xAPIC systems. They have |
2011 | * no meaning without the serial APIC bus. | 2025 | * no meaning without the serial APIC bus. |
@@ -2179,7 +2193,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2179 | struct irq_cfg *cfg; | 2193 | struct irq_cfg *cfg; |
2180 | 2194 | ||
2181 | spin_lock_irqsave(&ioapic_lock, flags); | 2195 | spin_lock_irqsave(&ioapic_lock, flags); |
2182 | if (irq < NR_IRQS_LEGACY) { | 2196 | if (irq < nr_legacy_irqs) { |
2183 | disable_8259A_irq(irq); | 2197 | disable_8259A_irq(irq); |
2184 | if (i8259A_irq_pending(irq)) | 2198 | if (i8259A_irq_pending(irq)) |
2185 | was_pending = 1; | 2199 | was_pending = 1; |
@@ -2657,7 +2671,7 @@ static inline void init_IO_APIC_traps(void) | |||
2657 | * so default to an old-fashioned 8259 | 2671 | * so default to an old-fashioned 8259 |
2658 | * interrupt if we can.. | 2672 | * interrupt if we can.. |
2659 | */ | 2673 | */ |
2660 | if (irq < NR_IRQS_LEGACY) | 2674 | if (irq < nr_legacy_irqs) |
2661 | make_8259A_irq(irq); | 2675 | make_8259A_irq(irq); |
2662 | else | 2676 | else |
2663 | /* Strange. Oh, well.. */ | 2677 | /* Strange. Oh, well.. */ |
@@ -2993,7 +3007,7 @@ out: | |||
2993 | * the I/O APIC in all cases now. No actual device should request | 3007 | * the I/O APIC in all cases now. No actual device should request |
2994 | * it anyway. --macro | 3008 | * it anyway. --macro |
2995 | */ | 3009 | */ |
2996 | #define PIC_IRQS (1 << PIC_CASCADE_IR) | 3010 | #define PIC_IRQS (1UL << PIC_CASCADE_IR) |
2997 | 3011 | ||
2998 | void __init setup_IO_APIC(void) | 3012 | void __init setup_IO_APIC(void) |
2999 | { | 3013 | { |
@@ -3001,21 +3015,19 @@ void __init setup_IO_APIC(void) | |||
3001 | /* | 3015 | /* |
3002 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP | 3016 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP |
3003 | */ | 3017 | */ |
3004 | 3018 | io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; | |
3005 | io_apic_irqs = ~PIC_IRQS; | ||
3006 | 3019 | ||
3007 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); | 3020 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); |
3008 | /* | 3021 | /* |
3009 | * Set up IO-APIC IRQ routing. | 3022 | * Set up IO-APIC IRQ routing. |
3010 | */ | 3023 | */ |
3011 | #ifdef CONFIG_X86_32 | 3024 | x86_init.mpparse.setup_ioapic_ids(); |
3012 | if (!acpi_ioapic) | 3025 | |
3013 | setup_ioapic_ids_from_mpc(); | ||
3014 | #endif | ||
3015 | sync_Arb_IDs(); | 3026 | sync_Arb_IDs(); |
3016 | setup_IO_APIC_irqs(); | 3027 | setup_IO_APIC_irqs(); |
3017 | init_IO_APIC_traps(); | 3028 | init_IO_APIC_traps(); |
3018 | check_timer(); | 3029 | if (nr_legacy_irqs) |
3030 | check_timer(); | ||
3019 | } | 3031 | } |
3020 | 3032 | ||
3021 | /* | 3033 | /* |
@@ -3116,7 +3128,6 @@ static int __init ioapic_init_sysfs(void) | |||
3116 | 3128 | ||
3117 | device_initcall(ioapic_init_sysfs); | 3129 | device_initcall(ioapic_init_sysfs); |
3118 | 3130 | ||
3119 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | ||
3120 | /* | 3131 | /* |
3121 | * Dynamic irq allocate and deallocation | 3132 | * Dynamic irq allocate and deallocation |
3122 | */ | 3133 | */ |
@@ -3856,7 +3867,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, | |||
3856 | /* | 3867 | /* |
3857 | * IRQs < 16 are already in the irq_2_pin[] map | 3868 | * IRQs < 16 are already in the irq_2_pin[] map |
3858 | */ | 3869 | */ |
3859 | if (irq >= NR_IRQS_LEGACY) { | 3870 | if (irq >= nr_legacy_irqs) { |
3860 | cfg = desc->chip_data; | 3871 | cfg = desc->chip_data; |
3861 | if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { | 3872 | if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { |
3862 | printk(KERN_INFO "can not add pin %d for irq %d\n", | 3873 | printk(KERN_INFO "can not add pin %d for irq %d\n", |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index db7220220d09..7ff61d6a188a 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu) | |||
66 | 66 | ||
67 | static inline int mce_in_progress(void) | 67 | static inline int mce_in_progress(void) |
68 | { | 68 | { |
69 | #if defined(CONFIG_X86_NEW_MCE) | 69 | #if defined(CONFIG_X86_MCE) |
70 | return atomic_read(&mce_entry) > 0; | 70 | return atomic_read(&mce_entry) > 0; |
71 | #endif | 71 | #endif |
72 | return 0; | 72 | return 0; |
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | |||
508 | /* | 508 | /* |
509 | * proc handler for /proc/sys/kernel/nmi | 509 | * proc handler for /proc/sys/kernel/nmi |
510 | */ | 510 | */ |
511 | int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | 511 | int proc_nmi_enabled(struct ctl_table *table, int write, |
512 | void __user *buffer, size_t *length, loff_t *ppos) | 512 | void __user *buffer, size_t *length, loff_t *ppos) |
513 | { | 513 | { |
514 | int old_state; | 514 | int old_state; |
515 | 515 | ||
516 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; | 516 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; |
517 | old_state = nmi_watchdog_enabled; | 517 | old_state = nmi_watchdog_enabled; |
518 | proc_dointvec(table, write, file, buffer, length, ppos); | 518 | proc_dointvec(table, write, buffer, length, ppos); |
519 | if (!!old_state == !!nmi_watchdog_enabled) | 519 | if (!!old_state == !!nmi_watchdog_enabled) |
520 | return 0; | 520 | return 0; |
521 | 521 | ||
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index ca96e68f0d23..efa00e2b8505 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -66,7 +66,6 @@ struct mpc_trans { | |||
66 | unsigned short trans_reserved; | 66 | unsigned short trans_reserved; |
67 | }; | 67 | }; |
68 | 68 | ||
69 | /* x86_quirks member */ | ||
70 | static int mpc_record; | 69 | static int mpc_record; |
71 | 70 | ||
72 | static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; | 71 | static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; |
@@ -130,10 +129,9 @@ void __cpuinit numaq_tsc_disable(void) | |||
130 | } | 129 | } |
131 | } | 130 | } |
132 | 131 | ||
133 | static int __init numaq_pre_time_init(void) | 132 | static void __init numaq_tsc_init(void) |
134 | { | 133 | { |
135 | numaq_tsc_disable(); | 134 | numaq_tsc_disable(); |
136 | return 0; | ||
137 | } | 135 | } |
138 | 136 | ||
139 | static inline int generate_logical_apicid(int quad, int phys_apicid) | 137 | static inline int generate_logical_apicid(int quad, int phys_apicid) |
@@ -177,6 +175,19 @@ static void mpc_oem_pci_bus(struct mpc_bus *m) | |||
177 | quad_local_to_mp_bus_id[quad][local] = m->busid; | 175 | quad_local_to_mp_bus_id[quad][local] = m->busid; |
178 | } | 176 | } |
179 | 177 | ||
178 | /* | ||
179 | * Called from mpparse code. | ||
180 | * mode = 0: prescan | ||
181 | * mode = 1: one mpc entry scanned | ||
182 | */ | ||
183 | static void numaq_mpc_record(unsigned int mode) | ||
184 | { | ||
185 | if (!mode) | ||
186 | mpc_record = 0; | ||
187 | else | ||
188 | mpc_record++; | ||
189 | } | ||
190 | |||
180 | static void __init MP_translation_info(struct mpc_trans *m) | 191 | static void __init MP_translation_info(struct mpc_trans *m) |
181 | { | 192 | { |
182 | printk(KERN_INFO | 193 | printk(KERN_INFO |
@@ -206,9 +217,9 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
206 | /* | 217 | /* |
207 | * Read/parse the MPC oem tables | 218 | * Read/parse the MPC oem tables |
208 | */ | 219 | */ |
209 | static void __init | 220 | static void __init smp_read_mpc_oem(struct mpc_table *mpc) |
210 | smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize) | ||
211 | { | 221 | { |
222 | struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr; | ||
212 | int count = sizeof(*oemtable); /* the header size */ | 223 | int count = sizeof(*oemtable); /* the header size */ |
213 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | 224 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; |
214 | 225 | ||
@@ -250,29 +261,6 @@ static void __init | |||
250 | } | 261 | } |
251 | } | 262 | } |
252 | 263 | ||
253 | static int __init numaq_setup_ioapic_ids(void) | ||
254 | { | ||
255 | /* so can skip it */ | ||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | static struct x86_quirks numaq_x86_quirks __initdata = { | ||
260 | .arch_pre_time_init = numaq_pre_time_init, | ||
261 | .arch_time_init = NULL, | ||
262 | .arch_pre_intr_init = NULL, | ||
263 | .arch_memory_setup = NULL, | ||
264 | .arch_intr_init = NULL, | ||
265 | .arch_trap_init = NULL, | ||
266 | .mach_get_smp_config = NULL, | ||
267 | .mach_find_smp_config = NULL, | ||
268 | .mpc_record = &mpc_record, | ||
269 | .mpc_apic_id = mpc_apic_id, | ||
270 | .mpc_oem_bus_info = mpc_oem_bus_info, | ||
271 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | ||
272 | .smp_read_mpc_oem = smp_read_mpc_oem, | ||
273 | .setup_ioapic_ids = numaq_setup_ioapic_ids, | ||
274 | }; | ||
275 | |||
276 | static __init void early_check_numaq(void) | 264 | static __init void early_check_numaq(void) |
277 | { | 265 | { |
278 | /* | 266 | /* |
@@ -286,8 +274,15 @@ static __init void early_check_numaq(void) | |||
286 | if (smp_found_config) | 274 | if (smp_found_config) |
287 | early_get_smp_config(); | 275 | early_get_smp_config(); |
288 | 276 | ||
289 | if (found_numaq) | 277 | if (found_numaq) { |
290 | x86_quirks = &numaq_x86_quirks; | 278 | x86_init.mpparse.mpc_record = numaq_mpc_record; |
279 | x86_init.mpparse.setup_ioapic_ids = x86_init_noop; | ||
280 | x86_init.mpparse.mpc_apic_id = mpc_apic_id; | ||
281 | x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; | ||
282 | x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; | ||
283 | x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; | ||
284 | x86_init.timers.tsc_pre_init = numaq_tsc_init; | ||
285 | } | ||
291 | } | 286 | } |
292 | 287 | ||
293 | int __init get_memcfg_numaq(void) | 288 | int __init get_memcfg_numaq(void) |
@@ -418,7 +413,7 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) | |||
418 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | 413 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ |
419 | void *xquad_portio; | 414 | void *xquad_portio; |
420 | 415 | ||
421 | static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) | 416 | static inline int numaq_check_phys_apicid_present(int phys_apicid) |
422 | { | 417 | { |
423 | return 1; | 418 | return 1; |
424 | } | 419 | } |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 65edc180fc82..c4cbd3080c1c 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -64,16 +64,23 @@ void __init default_setup_apic_routing(void) | |||
64 | apic = &apic_x2apic_phys; | 64 | apic = &apic_x2apic_phys; |
65 | else | 65 | else |
66 | apic = &apic_x2apic_cluster; | 66 | apic = &apic_x2apic_cluster; |
67 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); | ||
68 | } | 67 | } |
69 | #endif | 68 | #endif |
70 | 69 | ||
71 | if (apic == &apic_flat) { | 70 | if (apic == &apic_flat) { |
72 | if (max_physical_apicid >= 8) | 71 | switch (boot_cpu_data.x86_vendor) { |
73 | apic = &apic_physflat; | 72 | case X86_VENDOR_INTEL: |
74 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); | 73 | if (num_processors > 8) |
74 | apic = &apic_physflat; | ||
75 | break; | ||
76 | case X86_VENDOR_AMD: | ||
77 | if (max_physical_apicid >= 8) | ||
78 | apic = &apic_physflat; | ||
79 | } | ||
75 | } | 80 | } |
76 | 81 | ||
82 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); | ||
83 | |||
77 | if (is_vsmp_box()) { | 84 | if (is_vsmp_box()) { |
78 | /* need to update phys_pkg_id */ | 85 | /* need to update phys_pkg_id */ |
79 | apic->phys_pkg_id = apicid_phys_pkg_id; | 86 | apic->phys_pkg_id = apicid_phys_pkg_id; |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index eafdfbd1ea95..645ecc4ff0be 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -272,7 +272,7 @@ static physid_mask_t summit_apicid_to_cpu_present(int apicid) | |||
272 | return physid_mask_of_physid(0); | 272 | return physid_mask_of_physid(0); |
273 | } | 273 | } |
274 | 274 | ||
275 | static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) | 275 | static int summit_check_phys_apicid_present(int physical_apicid) |
276 | { | 276 | { |
277 | return 1; | 277 | return 1; |
278 | } | 278 | } |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 601159374e87..f5f5886a6b53 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -389,6 +389,16 @@ static __init void map_gru_high(int max_pnode) | |||
389 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); | 389 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); |
390 | } | 390 | } |
391 | 391 | ||
392 | static __init void map_mmr_high(int max_pnode) | ||
393 | { | ||
394 | union uvh_rh_gam_mmr_overlay_config_mmr_u mmr; | ||
395 | int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT; | ||
396 | |||
397 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); | ||
398 | if (mmr.s.enable) | ||
399 | map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); | ||
400 | } | ||
401 | |||
392 | static __init void map_mmioh_high(int max_pnode) | 402 | static __init void map_mmioh_high(int max_pnode) |
393 | { | 403 | { |
394 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; | 404 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; |
@@ -643,6 +653,7 @@ void __init uv_system_init(void) | |||
643 | } | 653 | } |
644 | 654 | ||
645 | map_gru_high(max_pnode); | 655 | map_gru_high(max_pnode); |
656 | map_mmr_high(max_pnode); | ||
646 | map_mmioh_high(max_pnode); | 657 | map_mmioh_high(max_pnode); |
647 | 658 | ||
648 | uv_cpu_init(); | 659 | uv_cpu_init(); |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c1f253dac155..68537e957a9b 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp) | |||
13 | 13 | ||
14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
15 | obj-y += proc.o capflags.o powerflags.o common.o | 15 | obj-y += proc.o capflags.o powerflags.o common.o |
16 | obj-y += vmware.o hypervisor.o | 16 | obj-y += vmware.o hypervisor.o sched.o |
17 | 17 | ||
18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
19 | obj-$(CONFIG_X86_64) += bugs_64.o | 19 | obj-$(CONFIG_X86_64) += bugs_64.o |
@@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o | |||
27 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o | 27 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o |
28 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o | 28 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o |
29 | 29 | ||
30 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o | 30 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
31 | 31 | ||
32 | obj-$(CONFIG_X86_MCE) += mcheck/ | 32 | obj-$(CONFIG_X86_MCE) += mcheck/ |
33 | obj-$(CONFIG_MTRR) += mtrr/ | 33 | obj-$(CONFIG_MTRR) += mtrr/ |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 22a47c82f3c0..c910a716a71c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -184,7 +184,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) | |||
184 | * approved Athlon | 184 | * approved Athlon |
185 | */ | 185 | */ |
186 | WARN_ONCE(1, "WARNING: This combination of AMD" | 186 | WARN_ONCE(1, "WARNING: This combination of AMD" |
187 | "processors is not suitable for SMP.\n"); | 187 | " processors is not suitable for SMP.\n"); |
188 | if (!test_taint(TAINT_UNSAFE_SMP)) | 188 | if (!test_taint(TAINT_UNSAFE_SMP)) |
189 | add_taint(TAINT_UNSAFE_SMP); | 189 | add_taint(TAINT_UNSAFE_SMP); |
190 | 190 | ||
@@ -333,6 +333,16 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
333 | #endif | 333 | #endif |
334 | } | 334 | } |
335 | 335 | ||
336 | int amd_get_nb_id(int cpu) | ||
337 | { | ||
338 | int id = 0; | ||
339 | #ifdef CONFIG_SMP | ||
340 | id = per_cpu(cpu_llc_id, cpu); | ||
341 | #endif | ||
342 | return id; | ||
343 | } | ||
344 | EXPORT_SYMBOL_GPL(amd_get_nb_id); | ||
345 | |||
336 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | 346 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) |
337 | { | 347 | { |
338 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 348 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2055fc2b2e6b..cc25c2b4a567 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -13,7 +13,7 @@ | |||
13 | #include <linux/io.h> | 13 | #include <linux/io.h> |
14 | 14 | ||
15 | #include <asm/stackprotector.h> | 15 | #include <asm/stackprotector.h> |
16 | #include <asm/perf_counter.h> | 16 | #include <asm/perf_event.h> |
17 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
18 | #include <asm/hypervisor.h> | 18 | #include <asm/hypervisor.h> |
19 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
@@ -34,7 +34,6 @@ | |||
34 | #include <asm/mce.h> | 34 | #include <asm/mce.h> |
35 | #include <asm/msr.h> | 35 | #include <asm/msr.h> |
36 | #include <asm/pat.h> | 36 | #include <asm/pat.h> |
37 | #include <linux/smp.h> | ||
38 | 37 | ||
39 | #ifdef CONFIG_X86_LOCAL_APIC | 38 | #ifdef CONFIG_X86_LOCAL_APIC |
40 | #include <asm/uv/uv.h> | 39 | #include <asm/uv/uv.h> |
@@ -870,7 +869,7 @@ void __init identify_boot_cpu(void) | |||
870 | #else | 869 | #else |
871 | vgetcpu_set_mode(); | 870 | vgetcpu_set_mode(); |
872 | #endif | 871 | #endif |
873 | init_hw_perf_counters(); | 872 | init_hw_perf_events(); |
874 | } | 873 | } |
875 | 874 | ||
876 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 875 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 6b2a52dd0403..dca325c03999 100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c | |||
@@ -30,8 +30,8 @@ | |||
30 | #include <asm/apic.h> | 30 | #include <asm/apic.h> |
31 | #include <asm/desc.h> | 31 | #include <asm/desc.h> |
32 | 32 | ||
33 | static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); | 33 | static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr); |
34 | static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); | 34 | static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr); |
35 | static DEFINE_PER_CPU(int, cpu_priv_count); | 35 | static DEFINE_PER_CPU(int, cpu_priv_count); |
36 | 36 | ||
37 | static DEFINE_MUTEX(cpu_debug_lock); | 37 | static DEFINE_MUTEX(cpu_debug_lock); |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ae9b503220ca..7d5c3b0ea8da 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -33,7 +33,7 @@ | |||
33 | #include <linux/cpufreq.h> | 33 | #include <linux/cpufreq.h> |
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <trace/power.h> | 36 | #include <trace/events/power.h> |
37 | 37 | ||
38 | #include <linux/acpi.h> | 38 | #include <linux/acpi.h> |
39 | #include <linux/io.h> | 39 | #include <linux/io.h> |
@@ -60,7 +60,6 @@ enum { | |||
60 | }; | 60 | }; |
61 | 61 | ||
62 | #define INTEL_MSR_RANGE (0xffff) | 62 | #define INTEL_MSR_RANGE (0xffff) |
63 | #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) | ||
64 | 63 | ||
65 | struct acpi_cpufreq_data { | 64 | struct acpi_cpufreq_data { |
66 | struct acpi_processor_performance *acpi_data; | 65 | struct acpi_processor_performance *acpi_data; |
@@ -71,13 +70,7 @@ struct acpi_cpufreq_data { | |||
71 | 70 | ||
72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); | 71 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); |
73 | 72 | ||
74 | struct acpi_msr_data { | 73 | static DEFINE_PER_CPU(struct aperfmperf, old_perf); |
75 | u64 saved_aperf, saved_mperf; | ||
76 | }; | ||
77 | |||
78 | static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); | ||
79 | |||
80 | DEFINE_TRACE(power_mark); | ||
81 | 74 | ||
82 | /* acpi_perf_data is a pointer to percpu data. */ | 75 | /* acpi_perf_data is a pointer to percpu data. */ |
83 | static struct acpi_processor_performance *acpi_perf_data; | 76 | static struct acpi_processor_performance *acpi_perf_data; |
@@ -244,23 +237,12 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
244 | return cmd.val; | 237 | return cmd.val; |
245 | } | 238 | } |
246 | 239 | ||
247 | struct perf_pair { | ||
248 | union { | ||
249 | struct { | ||
250 | u32 lo; | ||
251 | u32 hi; | ||
252 | } split; | ||
253 | u64 whole; | ||
254 | } aperf, mperf; | ||
255 | }; | ||
256 | |||
257 | /* Called via smp_call_function_single(), on the target CPU */ | 240 | /* Called via smp_call_function_single(), on the target CPU */ |
258 | static void read_measured_perf_ctrs(void *_cur) | 241 | static void read_measured_perf_ctrs(void *_cur) |
259 | { | 242 | { |
260 | struct perf_pair *cur = _cur; | 243 | struct aperfmperf *am = _cur; |
261 | 244 | ||
262 | rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); | 245 | get_aperfmperf(am); |
263 | rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); | ||
264 | } | 246 | } |
265 | 247 | ||
266 | /* | 248 | /* |
@@ -279,63 +261,17 @@ static void read_measured_perf_ctrs(void *_cur) | |||
279 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | 261 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, |
280 | unsigned int cpu) | 262 | unsigned int cpu) |
281 | { | 263 | { |
282 | struct perf_pair readin, cur; | 264 | struct aperfmperf perf; |
283 | unsigned int perf_percent; | 265 | unsigned long ratio; |
284 | unsigned int retval; | 266 | unsigned int retval; |
285 | 267 | ||
286 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) | 268 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) |
287 | return 0; | 269 | return 0; |
288 | 270 | ||
289 | cur.aperf.whole = readin.aperf.whole - | 271 | ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); |
290 | per_cpu(msr_data, cpu).saved_aperf; | 272 | per_cpu(old_perf, cpu) = perf; |
291 | cur.mperf.whole = readin.mperf.whole - | ||
292 | per_cpu(msr_data, cpu).saved_mperf; | ||
293 | per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; | ||
294 | per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; | ||
295 | |||
296 | #ifdef __i386__ | ||
297 | /* | ||
298 | * We dont want to do 64 bit divide with 32 bit kernel | ||
299 | * Get an approximate value. Return failure in case we cannot get | ||
300 | * an approximate value. | ||
301 | */ | ||
302 | if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { | ||
303 | int shift_count; | ||
304 | u32 h; | ||
305 | |||
306 | h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); | ||
307 | shift_count = fls(h); | ||
308 | |||
309 | cur.aperf.whole >>= shift_count; | ||
310 | cur.mperf.whole >>= shift_count; | ||
311 | } | ||
312 | |||
313 | if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { | ||
314 | int shift_count = 7; | ||
315 | cur.aperf.split.lo >>= shift_count; | ||
316 | cur.mperf.split.lo >>= shift_count; | ||
317 | } | ||
318 | |||
319 | if (cur.aperf.split.lo && cur.mperf.split.lo) | ||
320 | perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; | ||
321 | else | ||
322 | perf_percent = 0; | ||
323 | |||
324 | #else | ||
325 | if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { | ||
326 | int shift_count = 7; | ||
327 | cur.aperf.whole >>= shift_count; | ||
328 | cur.mperf.whole >>= shift_count; | ||
329 | } | ||
330 | |||
331 | if (cur.aperf.whole && cur.mperf.whole) | ||
332 | perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; | ||
333 | else | ||
334 | perf_percent = 0; | ||
335 | |||
336 | #endif | ||
337 | 273 | ||
338 | retval = (policy->cpuinfo.max_freq * perf_percent) / 100; | 274 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; |
339 | 275 | ||
340 | return retval; | 276 | return retval; |
341 | } | 277 | } |
@@ -394,7 +330,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
394 | unsigned int next_perf_state = 0; /* Index into perf table */ | 330 | unsigned int next_perf_state = 0; /* Index into perf table */ |
395 | unsigned int i; | 331 | unsigned int i; |
396 | int result = 0; | 332 | int result = 0; |
397 | struct power_trace it; | ||
398 | 333 | ||
399 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); | 334 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); |
400 | 335 | ||
@@ -426,7 +361,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
426 | } | 361 | } |
427 | } | 362 | } |
428 | 363 | ||
429 | trace_power_mark(&it, POWER_PSTATE, next_perf_state); | 364 | trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency); |
430 | 365 | ||
431 | switch (data->cpu_feature) { | 366 | switch (data->cpu_feature) { |
432 | case SYSTEM_INTEL_MSR_CAPABLE: | 367 | case SYSTEM_INTEL_MSR_CAPABLE: |
@@ -588,6 +523,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { | |||
588 | }, | 523 | }, |
589 | { } | 524 | { } |
590 | }; | 525 | }; |
526 | |||
527 | static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) | ||
528 | { | ||
529 | /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf | ||
530 | * AL30: A Machine Check Exception (MCE) Occurring during an | ||
531 | * Enhanced Intel SpeedStep Technology Ratio Change May Cause | ||
532 | * Both Processor Cores to Lock Up when HT is enabled*/ | ||
533 | if (c->x86_vendor == X86_VENDOR_INTEL) { | ||
534 | if ((c->x86 == 15) && | ||
535 | (c->x86_model == 6) && | ||
536 | (c->x86_mask == 8) && smt_capable()) | ||
537 | return -ENODEV; | ||
538 | } | ||
539 | return 0; | ||
540 | } | ||
591 | #endif | 541 | #endif |
592 | 542 | ||
593 | static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | 543 | static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) |
@@ -602,6 +552,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
602 | 552 | ||
603 | dprintk("acpi_cpufreq_cpu_init\n"); | 553 | dprintk("acpi_cpufreq_cpu_init\n"); |
604 | 554 | ||
555 | #ifdef CONFIG_SMP | ||
556 | result = acpi_cpufreq_blacklist(c); | ||
557 | if (result) | ||
558 | return result; | ||
559 | #endif | ||
560 | |||
605 | data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); | 561 | data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); |
606 | if (!data) | 562 | if (!data) |
607 | return -ENOMEM; | 563 | return -ENOMEM; |
@@ -731,12 +687,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
731 | acpi_processor_notify_smm(THIS_MODULE); | 687 | acpi_processor_notify_smm(THIS_MODULE); |
732 | 688 | ||
733 | /* Check for APERF/MPERF support in hardware */ | 689 | /* Check for APERF/MPERF support in hardware */ |
734 | if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { | 690 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
735 | unsigned int ecx; | 691 | acpi_cpufreq_driver.getavg = get_measured_perf; |
736 | ecx = cpuid_ecx(6); | ||
737 | if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) | ||
738 | acpi_cpufreq_driver.getavg = get_measured_perf; | ||
739 | } | ||
740 | 692 | ||
741 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 693 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
742 | for (i = 0; i < perf->state_count; i++) | 694 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 2a50ef891000..6394aa5c7985 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -605,9 +605,10 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, | |||
605 | return 0; | 605 | return 0; |
606 | } | 606 | } |
607 | 607 | ||
608 | static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry) | 608 | static void invalidate_entry(struct cpufreq_frequency_table *powernow_table, |
609 | unsigned int entry) | ||
609 | { | 610 | { |
610 | data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; | 611 | powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; |
611 | } | 612 | } |
612 | 613 | ||
613 | static void print_basics(struct powernow_k8_data *data) | 614 | static void print_basics(struct powernow_k8_data *data) |
@@ -854,6 +855,10 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
854 | goto err_out; | 855 | goto err_out; |
855 | } | 856 | } |
856 | 857 | ||
858 | /* fill in data */ | ||
859 | data->numps = data->acpi_data.state_count; | ||
860 | powernow_k8_acpi_pst_values(data, 0); | ||
861 | |||
857 | if (cpu_family == CPU_HW_PSTATE) | 862 | if (cpu_family == CPU_HW_PSTATE) |
858 | ret_val = fill_powernow_table_pstate(data, powernow_table); | 863 | ret_val = fill_powernow_table_pstate(data, powernow_table); |
859 | else | 864 | else |
@@ -866,11 +871,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
866 | powernow_table[data->acpi_data.state_count].index = 0; | 871 | powernow_table[data->acpi_data.state_count].index = 0; |
867 | data->powernow_table = powernow_table; | 872 | data->powernow_table = powernow_table; |
868 | 873 | ||
869 | /* fill in data */ | ||
870 | data->numps = data->acpi_data.state_count; | ||
871 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) | 874 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) |
872 | print_basics(data); | 875 | print_basics(data); |
873 | powernow_k8_acpi_pst_values(data, 0); | ||
874 | 876 | ||
875 | /* notify BIOS that we exist */ | 877 | /* notify BIOS that we exist */ |
876 | acpi_processor_notify_smm(THIS_MODULE); | 878 | acpi_processor_notify_smm(THIS_MODULE); |
@@ -914,13 +916,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, | |||
914 | "bad value %d.\n", i, index); | 916 | "bad value %d.\n", i, index); |
915 | printk(KERN_ERR PFX "Please report to BIOS " | 917 | printk(KERN_ERR PFX "Please report to BIOS " |
916 | "manufacturer\n"); | 918 | "manufacturer\n"); |
917 | invalidate_entry(data, i); | 919 | invalidate_entry(powernow_table, i); |
918 | continue; | 920 | continue; |
919 | } | 921 | } |
920 | rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); | 922 | rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); |
921 | if (!(hi & HW_PSTATE_VALID_MASK)) { | 923 | if (!(hi & HW_PSTATE_VALID_MASK)) { |
922 | dprintk("invalid pstate %d, ignoring\n", index); | 924 | dprintk("invalid pstate %d, ignoring\n", index); |
923 | invalidate_entry(data, i); | 925 | invalidate_entry(powernow_table, i); |
924 | continue; | 926 | continue; |
925 | } | 927 | } |
926 | 928 | ||
@@ -941,7 +943,6 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | |||
941 | struct cpufreq_frequency_table *powernow_table) | 943 | struct cpufreq_frequency_table *powernow_table) |
942 | { | 944 | { |
943 | int i; | 945 | int i; |
944 | int cntlofreq = 0; | ||
945 | 946 | ||
946 | for (i = 0; i < data->acpi_data.state_count; i++) { | 947 | for (i = 0; i < data->acpi_data.state_count; i++) { |
947 | u32 fid; | 948 | u32 fid; |
@@ -970,7 +971,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | |||
970 | /* verify frequency is OK */ | 971 | /* verify frequency is OK */ |
971 | if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { | 972 | if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { |
972 | dprintk("invalid freq %u kHz, ignoring\n", freq); | 973 | dprintk("invalid freq %u kHz, ignoring\n", freq); |
973 | invalidate_entry(data, i); | 974 | invalidate_entry(powernow_table, i); |
974 | continue; | 975 | continue; |
975 | } | 976 | } |
976 | 977 | ||
@@ -978,38 +979,17 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | |||
978 | * BIOSs are using "off" to indicate invalid */ | 979 | * BIOSs are using "off" to indicate invalid */ |
979 | if (vid == VID_OFF) { | 980 | if (vid == VID_OFF) { |
980 | dprintk("invalid vid %u, ignoring\n", vid); | 981 | dprintk("invalid vid %u, ignoring\n", vid); |
981 | invalidate_entry(data, i); | 982 | invalidate_entry(powernow_table, i); |
982 | continue; | 983 | continue; |
983 | } | 984 | } |
984 | 985 | ||
985 | /* verify only 1 entry from the lo frequency table */ | ||
986 | if (fid < HI_FID_TABLE_BOTTOM) { | ||
987 | if (cntlofreq) { | ||
988 | /* if both entries are the same, | ||
989 | * ignore this one ... */ | ||
990 | if ((freq != powernow_table[cntlofreq].frequency) || | ||
991 | (index != powernow_table[cntlofreq].index)) { | ||
992 | printk(KERN_ERR PFX | ||
993 | "Too many lo freq table " | ||
994 | "entries\n"); | ||
995 | return 1; | ||
996 | } | ||
997 | |||
998 | dprintk("double low frequency table entry, " | ||
999 | "ignoring it.\n"); | ||
1000 | invalidate_entry(data, i); | ||
1001 | continue; | ||
1002 | } else | ||
1003 | cntlofreq = i; | ||
1004 | } | ||
1005 | |||
1006 | if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { | 986 | if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { |
1007 | printk(KERN_INFO PFX "invalid freq entries " | 987 | printk(KERN_INFO PFX "invalid freq entries " |
1008 | "%u kHz vs. %u kHz\n", freq, | 988 | "%u kHz vs. %u kHz\n", freq, |
1009 | (unsigned int) | 989 | (unsigned int) |
1010 | (data->acpi_data.states[i].core_frequency | 990 | (data->acpi_data.states[i].core_frequency |
1011 | * 1000)); | 991 | * 1000)); |
1012 | invalidate_entry(data, i); | 992 | invalidate_entry(powernow_table, i); |
1013 | continue; | 993 | continue; |
1014 | } | 994 | } |
1015 | } | 995 | } |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 93ba8eeb100a..08be922de33a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -34,13 +34,6 @@ detect_hypervisor_vendor(struct cpuinfo_x86 *c) | |||
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | 34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; |
35 | } | 35 | } |
36 | 36 | ||
37 | unsigned long get_hypervisor_tsc_freq(void) | ||
38 | { | ||
39 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | ||
40 | return vmware_get_tsc_khz(); | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | static inline void __cpuinit | 37 | static inline void __cpuinit |
45 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | 38 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) |
46 | { | 39 | { |
@@ -55,3 +48,10 @@ void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | |||
55 | detect_hypervisor_vendor(c); | 48 | detect_hypervisor_vendor(c); |
56 | hypervisor_set_feature_bits(c); | 49 | hypervisor_set_feature_bits(c); |
57 | } | 50 | } |
51 | |||
52 | void __init init_hypervisor_platform(void) | ||
53 | { | ||
54 | init_hypervisor(&boot_cpu_data); | ||
55 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | ||
56 | vmware_platform_setup(); | ||
57 | } | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 80a722a071b5..40e1835b35e8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -350,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
350 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 350 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
351 | } | 351 | } |
352 | 352 | ||
353 | if (c->cpuid_level > 6) { | ||
354 | unsigned ecx = cpuid_ecx(6); | ||
355 | if (ecx & 0x01) | ||
356 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
357 | } | ||
358 | |||
353 | if (cpu_has_xmm2) | 359 | if (cpu_has_xmm2) |
354 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 360 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
355 | if (cpu_has_ds) { | 361 | if (cpu_has_ds) { |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 188a1ca5ad2b..4ac6d48fe11b 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -1,11 +1,8 @@ | |||
1 | obj-y = mce.o | 1 | obj-y = mce.o mce-severity.o |
2 | 2 | ||
3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o | ||
4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o | ||
5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o | 3 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o |
6 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 4 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
7 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | 5 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o |
8 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | ||
9 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | 6 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o |
10 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
11 | 8 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c deleted file mode 100644 index b945d5dbc609..000000000000 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ /dev/null | |||
@@ -1,116 +0,0 @@ | |||
1 | /* | ||
2 | * Athlon specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Dave Jones <davej@redhat.com> | ||
4 | */ | ||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/smp.h> | ||
10 | |||
11 | #include <asm/processor.h> | ||
12 | #include <asm/system.h> | ||
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | ||
15 | |||
16 | /* Machine Check Handler For AMD Athlon/Duron: */ | ||
17 | static void k7_machine_check(struct pt_regs *regs, long error_code) | ||
18 | { | ||
19 | u32 alow, ahigh, high, low; | ||
20 | u32 mcgstl, mcgsth; | ||
21 | int recover = 1; | ||
22 | int i; | ||
23 | |||
24 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
25 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
26 | recover = 0; | ||
27 | |||
28 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
29 | smp_processor_id(), mcgsth, mcgstl); | ||
30 | |||
31 | for (i = 1; i < nr_mce_banks; i++) { | ||
32 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
33 | if (high & (1<<31)) { | ||
34 | char misc[20]; | ||
35 | char addr[24]; | ||
36 | |||
37 | misc[0] = '\0'; | ||
38 | addr[0] = '\0'; | ||
39 | |||
40 | if (high & (1<<29)) | ||
41 | recover |= 1; | ||
42 | if (high & (1<<25)) | ||
43 | recover |= 2; | ||
44 | high &= ~(1<<31); | ||
45 | |||
46 | if (high & (1<<27)) { | ||
47 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
48 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | ||
49 | } | ||
50 | if (high & (1<<26)) { | ||
51 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
52 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | ||
53 | } | ||
54 | |||
55 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | ||
56 | smp_processor_id(), i, high, low, misc, addr); | ||
57 | |||
58 | /* Clear it: */ | ||
59 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | ||
60 | /* Serialize: */ | ||
61 | wmb(); | ||
62 | add_taint(TAINT_MACHINE_CHECK); | ||
63 | } | ||
64 | } | ||
65 | |||
66 | if (recover & 2) | ||
67 | panic("CPU context corrupt"); | ||
68 | if (recover & 1) | ||
69 | panic("Unable to continue"); | ||
70 | |||
71 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
72 | |||
73 | mcgstl &= ~(1<<2); | ||
74 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
75 | } | ||
76 | |||
77 | |||
78 | /* AMD K7 machine check is Intel like: */ | ||
79 | void amd_mcheck_init(struct cpuinfo_x86 *c) | ||
80 | { | ||
81 | u32 l, h; | ||
82 | int i; | ||
83 | |||
84 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
85 | return; | ||
86 | |||
87 | machine_check_vector = k7_machine_check; | ||
88 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
89 | wmb(); | ||
90 | |||
91 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | ||
92 | |||
93 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
94 | if (l & (1<<8)) /* Control register present ? */ | ||
95 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
96 | nr_mce_banks = l & 0xff; | ||
97 | |||
98 | /* | ||
99 | * Clear status for MC index 0 separately, we don't touch CTL, | ||
100 | * as some K7 Athlons cause spurious MCEs when its enabled: | ||
101 | */ | ||
102 | if (boot_cpu_data.x86 == 6) { | ||
103 | wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); | ||
104 | i = 1; | ||
105 | } else | ||
106 | i = 0; | ||
107 | |||
108 | for (; i < nr_mce_banks; i++) { | ||
109 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
110 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
111 | } | ||
112 | |||
113 | set_in_cr4(X86_CR4_MCE); | ||
114 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
115 | smp_processor_id()); | ||
116 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index a3a235a53f09..472763d92098 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -18,7 +18,12 @@ | |||
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
21 | #include <linux/notifier.h> | ||
22 | #include <linux/kdebug.h> | ||
23 | #include <linux/cpu.h> | ||
24 | #include <linux/sched.h> | ||
21 | #include <asm/mce.h> | 25 | #include <asm/mce.h> |
26 | #include <asm/apic.h> | ||
22 | 27 | ||
23 | /* Update fake mce registers on current CPU. */ | 28 | /* Update fake mce registers on current CPU. */ |
24 | static void inject_mce(struct mce *m) | 29 | static void inject_mce(struct mce *m) |
@@ -39,44 +44,142 @@ static void inject_mce(struct mce *m) | |||
39 | i->finished = 1; | 44 | i->finished = 1; |
40 | } | 45 | } |
41 | 46 | ||
42 | struct delayed_mce { | 47 | static void raise_poll(struct mce *m) |
43 | struct timer_list timer; | 48 | { |
44 | struct mce m; | 49 | unsigned long flags; |
45 | }; | 50 | mce_banks_t b; |
46 | 51 | ||
47 | /* Inject mce on current CPU */ | 52 | memset(&b, 0xff, sizeof(mce_banks_t)); |
48 | static void raise_mce(unsigned long data) | 53 | local_irq_save(flags); |
54 | machine_check_poll(0, &b); | ||
55 | local_irq_restore(flags); | ||
56 | m->finished = 0; | ||
57 | } | ||
58 | |||
59 | static void raise_exception(struct mce *m, struct pt_regs *pregs) | ||
49 | { | 60 | { |
50 | struct delayed_mce *dm = (struct delayed_mce *)data; | 61 | struct pt_regs regs; |
51 | struct mce *m = &dm->m; | 62 | unsigned long flags; |
52 | int cpu = m->extcpu; | ||
53 | 63 | ||
54 | inject_mce(m); | 64 | if (!pregs) { |
55 | if (m->status & MCI_STATUS_UC) { | ||
56 | struct pt_regs regs; | ||
57 | memset(®s, 0, sizeof(struct pt_regs)); | 65 | memset(®s, 0, sizeof(struct pt_regs)); |
58 | regs.ip = m->ip; | 66 | regs.ip = m->ip; |
59 | regs.cs = m->cs; | 67 | regs.cs = m->cs; |
68 | pregs = ®s; | ||
69 | } | ||
70 | /* in mcheck exeception handler, irq will be disabled */ | ||
71 | local_irq_save(flags); | ||
72 | do_machine_check(pregs, 0); | ||
73 | local_irq_restore(flags); | ||
74 | m->finished = 0; | ||
75 | } | ||
76 | |||
77 | static cpumask_t mce_inject_cpumask; | ||
78 | |||
79 | static int mce_raise_notify(struct notifier_block *self, | ||
80 | unsigned long val, void *data) | ||
81 | { | ||
82 | struct die_args *args = (struct die_args *)data; | ||
83 | int cpu = smp_processor_id(); | ||
84 | struct mce *m = &__get_cpu_var(injectm); | ||
85 | if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) | ||
86 | return NOTIFY_DONE; | ||
87 | cpu_clear(cpu, mce_inject_cpumask); | ||
88 | if (m->inject_flags & MCJ_EXCEPTION) | ||
89 | raise_exception(m, args->regs); | ||
90 | else if (m->status) | ||
91 | raise_poll(m); | ||
92 | return NOTIFY_STOP; | ||
93 | } | ||
94 | |||
95 | static struct notifier_block mce_raise_nb = { | ||
96 | .notifier_call = mce_raise_notify, | ||
97 | .priority = 1000, | ||
98 | }; | ||
99 | |||
100 | /* Inject mce on current CPU */ | ||
101 | static int raise_local(void) | ||
102 | { | ||
103 | struct mce *m = &__get_cpu_var(injectm); | ||
104 | int context = MCJ_CTX(m->inject_flags); | ||
105 | int ret = 0; | ||
106 | int cpu = m->extcpu; | ||
107 | |||
108 | if (m->inject_flags & MCJ_EXCEPTION) { | ||
60 | printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); | 109 | printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); |
61 | do_machine_check(®s, 0); | 110 | switch (context) { |
111 | case MCJ_CTX_IRQ: | ||
112 | /* | ||
113 | * Could do more to fake interrupts like | ||
114 | * calling irq_enter, but the necessary | ||
115 | * machinery isn't exported currently. | ||
116 | */ | ||
117 | /*FALL THROUGH*/ | ||
118 | case MCJ_CTX_PROCESS: | ||
119 | raise_exception(m, NULL); | ||
120 | break; | ||
121 | default: | ||
122 | printk(KERN_INFO "Invalid MCE context\n"); | ||
123 | ret = -EINVAL; | ||
124 | } | ||
62 | printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); | 125 | printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); |
63 | } else { | 126 | } else if (m->status) { |
64 | mce_banks_t b; | ||
65 | memset(&b, 0xff, sizeof(mce_banks_t)); | ||
66 | printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); | 127 | printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); |
67 | machine_check_poll(0, &b); | 128 | raise_poll(m); |
68 | mce_notify_irq(); | 129 | mce_notify_irq(); |
69 | printk(KERN_INFO "Finished machine check poll on CPU %d\n", | 130 | printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu); |
70 | cpu); | 131 | } else |
71 | } | 132 | m->finished = 0; |
72 | kfree(dm); | 133 | |
134 | return ret; | ||
135 | } | ||
136 | |||
137 | static void raise_mce(struct mce *m) | ||
138 | { | ||
139 | int context = MCJ_CTX(m->inject_flags); | ||
140 | |||
141 | inject_mce(m); | ||
142 | |||
143 | if (context == MCJ_CTX_RANDOM) | ||
144 | return; | ||
145 | |||
146 | #ifdef CONFIG_X86_LOCAL_APIC | ||
147 | if (m->inject_flags & MCJ_NMI_BROADCAST) { | ||
148 | unsigned long start; | ||
149 | int cpu; | ||
150 | get_online_cpus(); | ||
151 | mce_inject_cpumask = cpu_online_map; | ||
152 | cpu_clear(get_cpu(), mce_inject_cpumask); | ||
153 | for_each_online_cpu(cpu) { | ||
154 | struct mce *mcpu = &per_cpu(injectm, cpu); | ||
155 | if (!mcpu->finished || | ||
156 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) | ||
157 | cpu_clear(cpu, mce_inject_cpumask); | ||
158 | } | ||
159 | if (!cpus_empty(mce_inject_cpumask)) | ||
160 | apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR); | ||
161 | start = jiffies; | ||
162 | while (!cpus_empty(mce_inject_cpumask)) { | ||
163 | if (!time_before(jiffies, start + 2*HZ)) { | ||
164 | printk(KERN_ERR | ||
165 | "Timeout waiting for mce inject NMI %lx\n", | ||
166 | *cpus_addr(mce_inject_cpumask)); | ||
167 | break; | ||
168 | } | ||
169 | cpu_relax(); | ||
170 | } | ||
171 | raise_local(); | ||
172 | put_cpu(); | ||
173 | put_online_cpus(); | ||
174 | } else | ||
175 | #endif | ||
176 | raise_local(); | ||
73 | } | 177 | } |
74 | 178 | ||
75 | /* Error injection interface */ | 179 | /* Error injection interface */ |
76 | static ssize_t mce_write(struct file *filp, const char __user *ubuf, | 180 | static ssize_t mce_write(struct file *filp, const char __user *ubuf, |
77 | size_t usize, loff_t *off) | 181 | size_t usize, loff_t *off) |
78 | { | 182 | { |
79 | struct delayed_mce *dm; | ||
80 | struct mce m; | 183 | struct mce m; |
81 | 184 | ||
82 | if (!capable(CAP_SYS_ADMIN)) | 185 | if (!capable(CAP_SYS_ADMIN)) |
@@ -96,19 +199,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, | |||
96 | if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) | 199 | if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) |
97 | return -EINVAL; | 200 | return -EINVAL; |
98 | 201 | ||
99 | dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); | ||
100 | if (!dm) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | /* | 202 | /* |
104 | * Need to give user space some time to set everything up, | 203 | * Need to give user space some time to set everything up, |
105 | * so do it a jiffie or two later everywhere. | 204 | * so do it a jiffie or two later everywhere. |
106 | * Should we use a hrtimer here for better synchronization? | ||
107 | */ | 205 | */ |
108 | memcpy(&dm->m, &m, sizeof(struct mce)); | 206 | schedule_timeout(2); |
109 | setup_timer(&dm->timer, raise_mce, (unsigned long)dm); | 207 | raise_mce(&m); |
110 | dm->timer.expires = jiffies + 2; | ||
111 | add_timer_on(&dm->timer, m.extcpu); | ||
112 | return usize; | 208 | return usize; |
113 | } | 209 | } |
114 | 210 | ||
@@ -116,6 +212,7 @@ static int inject_init(void) | |||
116 | { | 212 | { |
117 | printk(KERN_INFO "Machine check injector initialized\n"); | 213 | printk(KERN_INFO "Machine check injector initialized\n"); |
118 | mce_chrdev_ops.write = mce_write; | 214 | mce_chrdev_ops.write = mce_write; |
215 | register_die_notifier(&mce_raise_nb); | ||
119 | return 0; | 216 | return 0; |
120 | } | 217 | } |
121 | 218 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 54dcb8ff12e5..32996f9fab67 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/sysdev.h> | ||
1 | #include <asm/mce.h> | 2 | #include <asm/mce.h> |
2 | 3 | ||
3 | enum severity_level { | 4 | enum severity_level { |
@@ -10,6 +11,20 @@ enum severity_level { | |||
10 | MCE_PANIC_SEVERITY, | 11 | MCE_PANIC_SEVERITY, |
11 | }; | 12 | }; |
12 | 13 | ||
14 | #define ATTR_LEN 16 | ||
15 | |||
16 | /* One object for each MCE bank, shared by all CPUs */ | ||
17 | struct mce_bank { | ||
18 | u64 ctl; /* subevents to enable */ | ||
19 | unsigned char init; /* initialise bank? */ | ||
20 | struct sysdev_attribute attr; /* sysdev attribute */ | ||
21 | char attrname[ATTR_LEN]; /* attribute name */ | ||
22 | }; | ||
23 | |||
13 | int mce_severity(struct mce *a, int tolerant, char **msg); | 24 | int mce_severity(struct mce *a, int tolerant, char **msg); |
25 | struct dentry *mce_get_debugfs_dir(void); | ||
14 | 26 | ||
15 | extern int mce_ser; | 27 | extern int mce_ser; |
28 | |||
29 | extern struct mce_bank *mce_banks; | ||
30 | |||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index ff0807f97056..8a85dd1b1aa1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -139,6 +139,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg) | |||
139 | } | 139 | } |
140 | } | 140 | } |
141 | 141 | ||
142 | #ifdef CONFIG_DEBUG_FS | ||
142 | static void *s_start(struct seq_file *f, loff_t *pos) | 143 | static void *s_start(struct seq_file *f, loff_t *pos) |
143 | { | 144 | { |
144 | if (*pos >= ARRAY_SIZE(severities)) | 145 | if (*pos >= ARRAY_SIZE(severities)) |
@@ -197,7 +198,7 @@ static int __init severities_debugfs_init(void) | |||
197 | { | 198 | { |
198 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; | 199 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; |
199 | 200 | ||
200 | dmce = debugfs_create_dir("mce", NULL); | 201 | dmce = mce_get_debugfs_dir(); |
201 | if (dmce == NULL) | 202 | if (dmce == NULL) |
202 | goto err_out; | 203 | goto err_out; |
203 | fseverities_coverage = debugfs_create_file("severities-coverage", | 204 | fseverities_coverage = debugfs_create_file("severities-coverage", |
@@ -209,10 +210,7 @@ static int __init severities_debugfs_init(void) | |||
209 | return 0; | 210 | return 0; |
210 | 211 | ||
211 | err_out: | 212 | err_out: |
212 | if (fseverities_coverage) | ||
213 | debugfs_remove(fseverities_coverage); | ||
214 | if (dmce) | ||
215 | debugfs_remove(dmce); | ||
216 | return -ENOMEM; | 213 | return -ENOMEM; |
217 | } | 214 | } |
218 | late_initcall(severities_debugfs_init); | 215 | late_initcall(severities_debugfs_init); |
216 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9bfe9d2ea615..b1598a9436d0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
35 | #include <linux/fs.h> | 35 | #include <linux/fs.h> |
36 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
37 | #include <linux/debugfs.h> | ||
37 | 38 | ||
38 | #include <asm/processor.h> | 39 | #include <asm/processor.h> |
39 | #include <asm/hw_irq.h> | 40 | #include <asm/hw_irq.h> |
@@ -45,21 +46,8 @@ | |||
45 | 46 | ||
46 | #include "mce-internal.h" | 47 | #include "mce-internal.h" |
47 | 48 | ||
48 | /* Handle unconfigured int18 (should never happen) */ | ||
49 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
50 | { | ||
51 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | ||
52 | smp_processor_id()); | ||
53 | } | ||
54 | |||
55 | /* Call the installed machine check handler for this CPU setup. */ | ||
56 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | ||
57 | unexpected_machine_check; | ||
58 | |||
59 | int mce_disabled __read_mostly; | 49 | int mce_disabled __read_mostly; |
60 | 50 | ||
61 | #ifdef CONFIG_X86_NEW_MCE | ||
62 | |||
63 | #define MISC_MCELOG_MINOR 227 | 51 | #define MISC_MCELOG_MINOR 227 |
64 | 52 | ||
65 | #define SPINUNIT 100 /* 100ns */ | 53 | #define SPINUNIT 100 /* 100ns */ |
@@ -77,7 +65,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); | |||
77 | */ | 65 | */ |
78 | static int tolerant __read_mostly = 1; | 66 | static int tolerant __read_mostly = 1; |
79 | static int banks __read_mostly; | 67 | static int banks __read_mostly; |
80 | static u64 *bank __read_mostly; | ||
81 | static int rip_msr __read_mostly; | 68 | static int rip_msr __read_mostly; |
82 | static int mce_bootlog __read_mostly = -1; | 69 | static int mce_bootlog __read_mostly = -1; |
83 | static int monarch_timeout __read_mostly = -1; | 70 | static int monarch_timeout __read_mostly = -1; |
@@ -87,28 +74,35 @@ int mce_cmci_disabled __read_mostly; | |||
87 | int mce_ignore_ce __read_mostly; | 74 | int mce_ignore_ce __read_mostly; |
88 | int mce_ser __read_mostly; | 75 | int mce_ser __read_mostly; |
89 | 76 | ||
77 | struct mce_bank *mce_banks __read_mostly; | ||
78 | |||
90 | /* User mode helper program triggered by machine check event */ | 79 | /* User mode helper program triggered by machine check event */ |
91 | static unsigned long mce_need_notify; | 80 | static unsigned long mce_need_notify; |
92 | static char mce_helper[128]; | 81 | static char mce_helper[128]; |
93 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | 82 | static char *mce_helper_argv[2] = { mce_helper, NULL }; |
94 | 83 | ||
95 | static unsigned long dont_init_banks; | ||
96 | |||
97 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | 84 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); |
98 | static DEFINE_PER_CPU(struct mce, mces_seen); | 85 | static DEFINE_PER_CPU(struct mce, mces_seen); |
99 | static int cpu_missing; | 86 | static int cpu_missing; |
100 | 87 | ||
88 | static void default_decode_mce(struct mce *m) | ||
89 | { | ||
90 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | ||
91 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * CPU/chipset specific EDAC code can register a callback here to print | ||
96 | * MCE errors in a human-readable form: | ||
97 | */ | ||
98 | void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; | ||
99 | EXPORT_SYMBOL(x86_mce_decode_callback); | ||
101 | 100 | ||
102 | /* MCA banks polled by the period polling timer for corrected events */ | 101 | /* MCA banks polled by the period polling timer for corrected events */ |
103 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 102 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
104 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 103 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
105 | }; | 104 | }; |
106 | 105 | ||
107 | static inline int skip_bank_init(int i) | ||
108 | { | ||
109 | return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); | ||
110 | } | ||
111 | |||
112 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 106 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
113 | 107 | ||
114 | /* Do initial initialization of a struct mce */ | 108 | /* Do initial initialization of a struct mce */ |
@@ -183,59 +177,60 @@ void mce_log(struct mce *mce) | |||
183 | set_bit(0, &mce_need_notify); | 177 | set_bit(0, &mce_need_notify); |
184 | } | 178 | } |
185 | 179 | ||
186 | void __weak decode_mce(struct mce *m) | ||
187 | { | ||
188 | return; | ||
189 | } | ||
190 | |||
191 | static void print_mce(struct mce *m) | 180 | static void print_mce(struct mce *m) |
192 | { | 181 | { |
193 | printk(KERN_EMERG | 182 | pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", |
194 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | ||
195 | m->extcpu, m->mcgstatus, m->bank, m->status); | 183 | m->extcpu, m->mcgstatus, m->bank, m->status); |
184 | |||
196 | if (m->ip) { | 185 | if (m->ip) { |
197 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | 186 | pr_emerg("RIP%s %02x:<%016Lx> ", |
198 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 187 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
199 | m->cs, m->ip); | 188 | m->cs, m->ip); |
189 | |||
200 | if (m->cs == __KERNEL_CS) | 190 | if (m->cs == __KERNEL_CS) |
201 | print_symbol("{%s}", m->ip); | 191 | print_symbol("{%s}", m->ip); |
202 | printk(KERN_CONT "\n"); | 192 | pr_cont("\n"); |
203 | } | 193 | } |
204 | printk(KERN_EMERG "TSC %llx ", m->tsc); | 194 | |
195 | pr_emerg("TSC %llx ", m->tsc); | ||
205 | if (m->addr) | 196 | if (m->addr) |
206 | printk(KERN_CONT "ADDR %llx ", m->addr); | 197 | pr_cont("ADDR %llx ", m->addr); |
207 | if (m->misc) | 198 | if (m->misc) |
208 | printk(KERN_CONT "MISC %llx ", m->misc); | 199 | pr_cont("MISC %llx ", m->misc); |
209 | printk(KERN_CONT "\n"); | ||
210 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | ||
211 | m->cpuvendor, m->cpuid, m->time, m->socketid, | ||
212 | m->apicid); | ||
213 | 200 | ||
214 | decode_mce(m); | 201 | pr_cont("\n"); |
202 | pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | ||
203 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); | ||
204 | |||
205 | /* | ||
206 | * Print out human-readable details about the MCE error, | ||
207 | * (if the CPU has an implementation for that): | ||
208 | */ | ||
209 | x86_mce_decode_callback(m); | ||
215 | } | 210 | } |
216 | 211 | ||
217 | static void print_mce_head(void) | 212 | static void print_mce_head(void) |
218 | { | 213 | { |
219 | printk(KERN_EMERG "\nHARDWARE ERROR\n"); | 214 | pr_emerg("\nHARDWARE ERROR\n"); |
220 | } | 215 | } |
221 | 216 | ||
222 | static void print_mce_tail(void) | 217 | static void print_mce_tail(void) |
223 | { | 218 | { |
224 | printk(KERN_EMERG "This is not a software problem!\n" | 219 | pr_emerg("This is not a software problem!\n"); |
225 | #if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD)) | ||
226 | "Run through mcelog --ascii to decode and contact your hardware vendor\n" | ||
227 | #endif | ||
228 | ); | ||
229 | } | 220 | } |
230 | 221 | ||
231 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 222 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
232 | 223 | ||
233 | static atomic_t mce_paniced; | 224 | static atomic_t mce_paniced; |
234 | 225 | ||
226 | static int fake_panic; | ||
227 | static atomic_t mce_fake_paniced; | ||
228 | |||
235 | /* Panic in progress. Enable interrupts and wait for final IPI */ | 229 | /* Panic in progress. Enable interrupts and wait for final IPI */ |
236 | static void wait_for_panic(void) | 230 | static void wait_for_panic(void) |
237 | { | 231 | { |
238 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; | 232 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; |
233 | |||
239 | preempt_disable(); | 234 | preempt_disable(); |
240 | local_irq_enable(); | 235 | local_irq_enable(); |
241 | while (timeout-- > 0) | 236 | while (timeout-- > 0) |
@@ -249,15 +244,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
249 | { | 244 | { |
250 | int i; | 245 | int i; |
251 | 246 | ||
252 | /* | 247 | if (!fake_panic) { |
253 | * Make sure only one CPU runs in machine check panic | 248 | /* |
254 | */ | 249 | * Make sure only one CPU runs in machine check panic |
255 | if (atomic_add_return(1, &mce_paniced) > 1) | 250 | */ |
256 | wait_for_panic(); | 251 | if (atomic_inc_return(&mce_paniced) > 1) |
257 | barrier(); | 252 | wait_for_panic(); |
253 | barrier(); | ||
258 | 254 | ||
259 | bust_spinlocks(1); | 255 | bust_spinlocks(1); |
260 | console_verbose(); | 256 | console_verbose(); |
257 | } else { | ||
258 | /* Don't log too much for fake panic */ | ||
259 | if (atomic_inc_return(&mce_fake_paniced) > 1) | ||
260 | return; | ||
261 | } | ||
261 | print_mce_head(); | 262 | print_mce_head(); |
262 | /* First print corrected ones that are still unlogged */ | 263 | /* First print corrected ones that are still unlogged */ |
263 | for (i = 0; i < MCE_LOG_LEN; i++) { | 264 | for (i = 0; i < MCE_LOG_LEN; i++) { |
@@ -284,9 +285,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
284 | print_mce_tail(); | 285 | print_mce_tail(); |
285 | if (exp) | 286 | if (exp) |
286 | printk(KERN_EMERG "Machine check: %s\n", exp); | 287 | printk(KERN_EMERG "Machine check: %s\n", exp); |
287 | if (panic_timeout == 0) | 288 | if (!fake_panic) { |
288 | panic_timeout = mce_panic_timeout; | 289 | if (panic_timeout == 0) |
289 | panic(msg); | 290 | panic_timeout = mce_panic_timeout; |
291 | panic(msg); | ||
292 | } else | ||
293 | printk(KERN_EMERG "Fake kernel panic: %s\n", msg); | ||
290 | } | 294 | } |
291 | 295 | ||
292 | /* Support code for software error injection */ | 296 | /* Support code for software error injection */ |
@@ -294,13 +298,14 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
294 | static int msr_to_offset(u32 msr) | 298 | static int msr_to_offset(u32 msr) |
295 | { | 299 | { |
296 | unsigned bank = __get_cpu_var(injectm.bank); | 300 | unsigned bank = __get_cpu_var(injectm.bank); |
301 | |||
297 | if (msr == rip_msr) | 302 | if (msr == rip_msr) |
298 | return offsetof(struct mce, ip); | 303 | return offsetof(struct mce, ip); |
299 | if (msr == MSR_IA32_MC0_STATUS + bank*4) | 304 | if (msr == MSR_IA32_MCx_STATUS(bank)) |
300 | return offsetof(struct mce, status); | 305 | return offsetof(struct mce, status); |
301 | if (msr == MSR_IA32_MC0_ADDR + bank*4) | 306 | if (msr == MSR_IA32_MCx_ADDR(bank)) |
302 | return offsetof(struct mce, addr); | 307 | return offsetof(struct mce, addr); |
303 | if (msr == MSR_IA32_MC0_MISC + bank*4) | 308 | if (msr == MSR_IA32_MCx_MISC(bank)) |
304 | return offsetof(struct mce, misc); | 309 | return offsetof(struct mce, misc); |
305 | if (msr == MSR_IA32_MCG_STATUS) | 310 | if (msr == MSR_IA32_MCG_STATUS) |
306 | return offsetof(struct mce, mcgstatus); | 311 | return offsetof(struct mce, mcgstatus); |
@@ -311,13 +316,25 @@ static int msr_to_offset(u32 msr) | |||
311 | static u64 mce_rdmsrl(u32 msr) | 316 | static u64 mce_rdmsrl(u32 msr) |
312 | { | 317 | { |
313 | u64 v; | 318 | u64 v; |
319 | |||
314 | if (__get_cpu_var(injectm).finished) { | 320 | if (__get_cpu_var(injectm).finished) { |
315 | int offset = msr_to_offset(msr); | 321 | int offset = msr_to_offset(msr); |
322 | |||
316 | if (offset < 0) | 323 | if (offset < 0) |
317 | return 0; | 324 | return 0; |
318 | return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); | 325 | return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); |
319 | } | 326 | } |
320 | rdmsrl(msr, v); | 327 | |
328 | if (rdmsrl_safe(msr, &v)) { | ||
329 | WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr); | ||
330 | /* | ||
331 | * Return zero in case the access faulted. This should | ||
332 | * not happen normally but can happen if the CPU does | ||
333 | * something weird, or if the code is buggy. | ||
334 | */ | ||
335 | v = 0; | ||
336 | } | ||
337 | |||
321 | return v; | 338 | return v; |
322 | } | 339 | } |
323 | 340 | ||
@@ -325,6 +342,7 @@ static void mce_wrmsrl(u32 msr, u64 v) | |||
325 | { | 342 | { |
326 | if (__get_cpu_var(injectm).finished) { | 343 | if (__get_cpu_var(injectm).finished) { |
327 | int offset = msr_to_offset(msr); | 344 | int offset = msr_to_offset(msr); |
345 | |||
328 | if (offset >= 0) | 346 | if (offset >= 0) |
329 | *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; | 347 | *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; |
330 | return; | 348 | return; |
@@ -421,7 +439,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | |||
421 | m->ip = mce_rdmsrl(rip_msr); | 439 | m->ip = mce_rdmsrl(rip_msr); |
422 | } | 440 | } |
423 | 441 | ||
424 | #ifdef CONFIG_X86_LOCAL_APIC | 442 | #ifdef CONFIG_X86_LOCAL_APIC |
425 | /* | 443 | /* |
426 | * Called after interrupts have been reenabled again | 444 | * Called after interrupts have been reenabled again |
427 | * when a MCE happened during an interrupts off region | 445 | * when a MCE happened during an interrupts off region |
@@ -505,7 +523,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
505 | 523 | ||
506 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | 524 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); |
507 | for (i = 0; i < banks; i++) { | 525 | for (i = 0; i < banks; i++) { |
508 | if (!bank[i] || !test_bit(i, *b)) | 526 | if (!mce_banks[i].ctl || !test_bit(i, *b)) |
509 | continue; | 527 | continue; |
510 | 528 | ||
511 | m.misc = 0; | 529 | m.misc = 0; |
@@ -514,7 +532,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
514 | m.tsc = 0; | 532 | m.tsc = 0; |
515 | 533 | ||
516 | barrier(); | 534 | barrier(); |
517 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | 535 | m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
518 | if (!(m.status & MCI_STATUS_VAL)) | 536 | if (!(m.status & MCI_STATUS_VAL)) |
519 | continue; | 537 | continue; |
520 | 538 | ||
@@ -529,9 +547,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
529 | continue; | 547 | continue; |
530 | 548 | ||
531 | if (m.status & MCI_STATUS_MISCV) | 549 | if (m.status & MCI_STATUS_MISCV) |
532 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | 550 | m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); |
533 | if (m.status & MCI_STATUS_ADDRV) | 551 | if (m.status & MCI_STATUS_ADDRV) |
534 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | 552 | m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); |
535 | 553 | ||
536 | if (!(flags & MCP_TIMESTAMP)) | 554 | if (!(flags & MCP_TIMESTAMP)) |
537 | m.tsc = 0; | 555 | m.tsc = 0; |
@@ -547,7 +565,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
547 | /* | 565 | /* |
548 | * Clear state for this bank. | 566 | * Clear state for this bank. |
549 | */ | 567 | */ |
550 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 568 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
551 | } | 569 | } |
552 | 570 | ||
553 | /* | 571 | /* |
@@ -568,7 +586,7 @@ static int mce_no_way_out(struct mce *m, char **msg) | |||
568 | int i; | 586 | int i; |
569 | 587 | ||
570 | for (i = 0; i < banks; i++) { | 588 | for (i = 0; i < banks; i++) { |
571 | m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | 589 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
572 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | 590 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) |
573 | return 1; | 591 | return 1; |
574 | } | 592 | } |
@@ -628,7 +646,7 @@ out: | |||
628 | * This way we prevent any potential data corruption in a unrecoverable case | 646 | * This way we prevent any potential data corruption in a unrecoverable case |
629 | * and also makes sure always all CPU's errors are examined. | 647 | * and also makes sure always all CPU's errors are examined. |
630 | * | 648 | * |
631 | * Also this detects the case of an machine check event coming from outer | 649 | * Also this detects the case of a machine check event coming from outer |
632 | * space (not detected by any CPUs) In this case some external agent wants | 650 | * space (not detected by any CPUs) In this case some external agent wants |
633 | * us to shut down, so panic too. | 651 | * us to shut down, so panic too. |
634 | * | 652 | * |
@@ -681,7 +699,7 @@ static void mce_reign(void) | |||
681 | * No machine check event found. Must be some external | 699 | * No machine check event found. Must be some external |
682 | * source or one CPU is hung. Panic. | 700 | * source or one CPU is hung. Panic. |
683 | */ | 701 | */ |
684 | if (!m && tolerant < 3) | 702 | if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) |
685 | mce_panic("Machine check from unknown source", NULL, NULL); | 703 | mce_panic("Machine check from unknown source", NULL, NULL); |
686 | 704 | ||
687 | /* | 705 | /* |
@@ -715,7 +733,7 @@ static int mce_start(int *no_way_out) | |||
715 | * global_nwo should be updated before mce_callin | 733 | * global_nwo should be updated before mce_callin |
716 | */ | 734 | */ |
717 | smp_wmb(); | 735 | smp_wmb(); |
718 | order = atomic_add_return(1, &mce_callin); | 736 | order = atomic_inc_return(&mce_callin); |
719 | 737 | ||
720 | /* | 738 | /* |
721 | * Wait for everyone. | 739 | * Wait for everyone. |
@@ -852,7 +870,7 @@ static void mce_clear_state(unsigned long *toclear) | |||
852 | 870 | ||
853 | for (i = 0; i < banks; i++) { | 871 | for (i = 0; i < banks; i++) { |
854 | if (test_bit(i, toclear)) | 872 | if (test_bit(i, toclear)) |
855 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 873 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
856 | } | 874 | } |
857 | } | 875 | } |
858 | 876 | ||
@@ -905,11 +923,11 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
905 | mce_setup(&m); | 923 | mce_setup(&m); |
906 | 924 | ||
907 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | 925 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); |
908 | no_way_out = mce_no_way_out(&m, &msg); | ||
909 | |||
910 | final = &__get_cpu_var(mces_seen); | 926 | final = &__get_cpu_var(mces_seen); |
911 | *final = m; | 927 | *final = m; |
912 | 928 | ||
929 | no_way_out = mce_no_way_out(&m, &msg); | ||
930 | |||
913 | barrier(); | 931 | barrier(); |
914 | 932 | ||
915 | /* | 933 | /* |
@@ -926,14 +944,14 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
926 | order = mce_start(&no_way_out); | 944 | order = mce_start(&no_way_out); |
927 | for (i = 0; i < banks; i++) { | 945 | for (i = 0; i < banks; i++) { |
928 | __clear_bit(i, toclear); | 946 | __clear_bit(i, toclear); |
929 | if (!bank[i]) | 947 | if (!mce_banks[i].ctl) |
930 | continue; | 948 | continue; |
931 | 949 | ||
932 | m.misc = 0; | 950 | m.misc = 0; |
933 | m.addr = 0; | 951 | m.addr = 0; |
934 | m.bank = i; | 952 | m.bank = i; |
935 | 953 | ||
936 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | 954 | m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
937 | if ((m.status & MCI_STATUS_VAL) == 0) | 955 | if ((m.status & MCI_STATUS_VAL) == 0) |
938 | continue; | 956 | continue; |
939 | 957 | ||
@@ -974,9 +992,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
974 | kill_it = 1; | 992 | kill_it = 1; |
975 | 993 | ||
976 | if (m.status & MCI_STATUS_MISCV) | 994 | if (m.status & MCI_STATUS_MISCV) |
977 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | 995 | m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); |
978 | if (m.status & MCI_STATUS_ADDRV) | 996 | if (m.status & MCI_STATUS_ADDRV) |
979 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | 997 | m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); |
980 | 998 | ||
981 | /* | 999 | /* |
982 | * Action optional error. Queue address for later processing. | 1000 | * Action optional error. Queue address for later processing. |
@@ -1101,7 +1119,7 @@ void mce_log_therm_throt_event(__u64 status) | |||
1101 | */ | 1119 | */ |
1102 | static int check_interval = 5 * 60; /* 5 minutes */ | 1120 | static int check_interval = 5 * 60; /* 5 minutes */ |
1103 | 1121 | ||
1104 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ | 1122 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ |
1105 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1123 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1106 | 1124 | ||
1107 | static void mcheck_timer(unsigned long data) | 1125 | static void mcheck_timer(unsigned long data) |
@@ -1120,7 +1138,7 @@ static void mcheck_timer(unsigned long data) | |||
1120 | * Alert userspace if needed. If we logged an MCE, reduce the | 1138 | * Alert userspace if needed. If we logged an MCE, reduce the |
1121 | * polling interval, otherwise increase the polling interval. | 1139 | * polling interval, otherwise increase the polling interval. |
1122 | */ | 1140 | */ |
1123 | n = &__get_cpu_var(next_interval); | 1141 | n = &__get_cpu_var(mce_next_interval); |
1124 | if (mce_notify_irq()) | 1142 | if (mce_notify_irq()) |
1125 | *n = max(*n/2, HZ/100); | 1143 | *n = max(*n/2, HZ/100); |
1126 | else | 1144 | else |
@@ -1169,10 +1187,26 @@ int mce_notify_irq(void) | |||
1169 | } | 1187 | } |
1170 | EXPORT_SYMBOL_GPL(mce_notify_irq); | 1188 | EXPORT_SYMBOL_GPL(mce_notify_irq); |
1171 | 1189 | ||
1190 | static int mce_banks_init(void) | ||
1191 | { | ||
1192 | int i; | ||
1193 | |||
1194 | mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); | ||
1195 | if (!mce_banks) | ||
1196 | return -ENOMEM; | ||
1197 | for (i = 0; i < banks; i++) { | ||
1198 | struct mce_bank *b = &mce_banks[i]; | ||
1199 | |||
1200 | b->ctl = -1ULL; | ||
1201 | b->init = 1; | ||
1202 | } | ||
1203 | return 0; | ||
1204 | } | ||
1205 | |||
1172 | /* | 1206 | /* |
1173 | * Initialize Machine Checks for a CPU. | 1207 | * Initialize Machine Checks for a CPU. |
1174 | */ | 1208 | */ |
1175 | static int mce_cap_init(void) | 1209 | static int __cpuinit mce_cap_init(void) |
1176 | { | 1210 | { |
1177 | unsigned b; | 1211 | unsigned b; |
1178 | u64 cap; | 1212 | u64 cap; |
@@ -1192,11 +1226,11 @@ static int mce_cap_init(void) | |||
1192 | /* Don't support asymmetric configurations today */ | 1226 | /* Don't support asymmetric configurations today */ |
1193 | WARN_ON(banks != 0 && b != banks); | 1227 | WARN_ON(banks != 0 && b != banks); |
1194 | banks = b; | 1228 | banks = b; |
1195 | if (!bank) { | 1229 | if (!mce_banks) { |
1196 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | 1230 | int err = mce_banks_init(); |
1197 | if (!bank) | 1231 | |
1198 | return -ENOMEM; | 1232 | if (err) |
1199 | memset(bank, 0xff, banks * sizeof(u64)); | 1233 | return err; |
1200 | } | 1234 | } |
1201 | 1235 | ||
1202 | /* Use accurate RIP reporting if available. */ | 1236 | /* Use accurate RIP reporting if available. */ |
@@ -1228,15 +1262,17 @@ static void mce_init(void) | |||
1228 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 1262 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
1229 | 1263 | ||
1230 | for (i = 0; i < banks; i++) { | 1264 | for (i = 0; i < banks; i++) { |
1231 | if (skip_bank_init(i)) | 1265 | struct mce_bank *b = &mce_banks[i]; |
1266 | |||
1267 | if (!b->init) | ||
1232 | continue; | 1268 | continue; |
1233 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | 1269 | wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); |
1234 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 1270 | wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
1235 | } | 1271 | } |
1236 | } | 1272 | } |
1237 | 1273 | ||
1238 | /* Add per CPU specific workarounds here */ | 1274 | /* Add per CPU specific workarounds here */ |
1239 | static int mce_cpu_quirks(struct cpuinfo_x86 *c) | 1275 | static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) |
1240 | { | 1276 | { |
1241 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1277 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1242 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | 1278 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); |
@@ -1251,7 +1287,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1251 | * trips off incorrectly with the IOMMU & 3ware | 1287 | * trips off incorrectly with the IOMMU & 3ware |
1252 | * & Cerberus: | 1288 | * & Cerberus: |
1253 | */ | 1289 | */ |
1254 | clear_bit(10, (unsigned long *)&bank[4]); | 1290 | clear_bit(10, (unsigned long *)&mce_banks[4].ctl); |
1255 | } | 1291 | } |
1256 | if (c->x86 <= 17 && mce_bootlog < 0) { | 1292 | if (c->x86 <= 17 && mce_bootlog < 0) { |
1257 | /* | 1293 | /* |
@@ -1265,7 +1301,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1265 | * by default. | 1301 | * by default. |
1266 | */ | 1302 | */ |
1267 | if (c->x86 == 6 && banks > 0) | 1303 | if (c->x86 == 6 && banks > 0) |
1268 | bank[0] = 0; | 1304 | mce_banks[0].ctl = 0; |
1269 | } | 1305 | } |
1270 | 1306 | ||
1271 | if (c->x86_vendor == X86_VENDOR_INTEL) { | 1307 | if (c->x86_vendor == X86_VENDOR_INTEL) { |
@@ -1278,8 +1314,8 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1278 | * valid event later, merely don't write CTL0. | 1314 | * valid event later, merely don't write CTL0. |
1279 | */ | 1315 | */ |
1280 | 1316 | ||
1281 | if (c->x86 == 6 && c->x86_model < 0x1A) | 1317 | if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) |
1282 | __set_bit(0, &dont_init_banks); | 1318 | mce_banks[0].init = 0; |
1283 | 1319 | ||
1284 | /* | 1320 | /* |
1285 | * All newer Intel systems support MCE broadcasting. Enable | 1321 | * All newer Intel systems support MCE broadcasting. Enable |
@@ -1335,7 +1371,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) | |||
1335 | static void mce_init_timer(void) | 1371 | static void mce_init_timer(void) |
1336 | { | 1372 | { |
1337 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1373 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1338 | int *n = &__get_cpu_var(next_interval); | 1374 | int *n = &__get_cpu_var(mce_next_interval); |
1339 | 1375 | ||
1340 | if (mce_ignore_ce) | 1376 | if (mce_ignore_ce) |
1341 | return; | 1377 | return; |
@@ -1348,6 +1384,17 @@ static void mce_init_timer(void) | |||
1348 | add_timer_on(t, smp_processor_id()); | 1384 | add_timer_on(t, smp_processor_id()); |
1349 | } | 1385 | } |
1350 | 1386 | ||
1387 | /* Handle unconfigured int18 (should never happen) */ | ||
1388 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
1389 | { | ||
1390 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | ||
1391 | smp_processor_id()); | ||
1392 | } | ||
1393 | |||
1394 | /* Call the installed machine check handler for this CPU setup. */ | ||
1395 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | ||
1396 | unexpected_machine_check; | ||
1397 | |||
1351 | /* | 1398 | /* |
1352 | * Called for each booted CPU to set up machine checks. | 1399 | * Called for each booted CPU to set up machine checks. |
1353 | * Must be called with preempt off: | 1400 | * Must be called with preempt off: |
@@ -1561,8 +1608,10 @@ static struct miscdevice mce_log_device = { | |||
1561 | */ | 1608 | */ |
1562 | static int __init mcheck_enable(char *str) | 1609 | static int __init mcheck_enable(char *str) |
1563 | { | 1610 | { |
1564 | if (*str == 0) | 1611 | if (*str == 0) { |
1565 | enable_p5_mce(); | 1612 | enable_p5_mce(); |
1613 | return 1; | ||
1614 | } | ||
1566 | if (*str == '=') | 1615 | if (*str == '=') |
1567 | str++; | 1616 | str++; |
1568 | if (!strcmp(str, "off")) | 1617 | if (!strcmp(str, "off")) |
@@ -1603,8 +1652,10 @@ static int mce_disable(void) | |||
1603 | int i; | 1652 | int i; |
1604 | 1653 | ||
1605 | for (i = 0; i < banks; i++) { | 1654 | for (i = 0; i < banks; i++) { |
1606 | if (!skip_bank_init(i)) | 1655 | struct mce_bank *b = &mce_banks[i]; |
1607 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | 1656 | |
1657 | if (b->init) | ||
1658 | wrmsrl(MSR_IA32_MCx_CTL(i), 0); | ||
1608 | } | 1659 | } |
1609 | return 0; | 1660 | return 0; |
1610 | } | 1661 | } |
@@ -1679,14 +1730,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev); | |||
1679 | __cpuinitdata | 1730 | __cpuinitdata |
1680 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | 1731 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); |
1681 | 1732 | ||
1682 | static struct sysdev_attribute *bank_attrs; | 1733 | static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) |
1734 | { | ||
1735 | return container_of(attr, struct mce_bank, attr); | ||
1736 | } | ||
1683 | 1737 | ||
1684 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1738 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, |
1685 | char *buf) | 1739 | char *buf) |
1686 | { | 1740 | { |
1687 | u64 b = bank[attr - bank_attrs]; | 1741 | return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); |
1688 | |||
1689 | return sprintf(buf, "%llx\n", b); | ||
1690 | } | 1742 | } |
1691 | 1743 | ||
1692 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1744 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, |
@@ -1697,7 +1749,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | |||
1697 | if (strict_strtoull(buf, 0, &new) < 0) | 1749 | if (strict_strtoull(buf, 0, &new) < 0) |
1698 | return -EINVAL; | 1750 | return -EINVAL; |
1699 | 1751 | ||
1700 | bank[attr - bank_attrs] = new; | 1752 | attr_to_bank(attr)->ctl = new; |
1701 | mce_restart(); | 1753 | mce_restart(); |
1702 | 1754 | ||
1703 | return size; | 1755 | return size; |
@@ -1839,7 +1891,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1839 | } | 1891 | } |
1840 | for (j = 0; j < banks; j++) { | 1892 | for (j = 0; j < banks; j++) { |
1841 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | 1893 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), |
1842 | &bank_attrs[j]); | 1894 | &mce_banks[j].attr); |
1843 | if (err) | 1895 | if (err) |
1844 | goto error2; | 1896 | goto error2; |
1845 | } | 1897 | } |
@@ -1848,10 +1900,10 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1848 | return 0; | 1900 | return 0; |
1849 | error2: | 1901 | error2: |
1850 | while (--j >= 0) | 1902 | while (--j >= 0) |
1851 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); | 1903 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); |
1852 | error: | 1904 | error: |
1853 | while (--i >= 0) | 1905 | while (--i >= 0) |
1854 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1906 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); |
1855 | 1907 | ||
1856 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1908 | sysdev_unregister(&per_cpu(mce_dev, cpu)); |
1857 | 1909 | ||
@@ -1869,7 +1921,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
1869 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1921 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); |
1870 | 1922 | ||
1871 | for (i = 0; i < banks; i++) | 1923 | for (i = 0; i < banks; i++) |
1872 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); | 1924 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); |
1873 | 1925 | ||
1874 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1926 | sysdev_unregister(&per_cpu(mce_dev, cpu)); |
1875 | cpumask_clear_cpu(cpu, mce_dev_initialized); | 1927 | cpumask_clear_cpu(cpu, mce_dev_initialized); |
@@ -1886,8 +1938,10 @@ static void mce_disable_cpu(void *h) | |||
1886 | if (!(action & CPU_TASKS_FROZEN)) | 1938 | if (!(action & CPU_TASKS_FROZEN)) |
1887 | cmci_clear(); | 1939 | cmci_clear(); |
1888 | for (i = 0; i < banks; i++) { | 1940 | for (i = 0; i < banks; i++) { |
1889 | if (!skip_bank_init(i)) | 1941 | struct mce_bank *b = &mce_banks[i]; |
1890 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | 1942 | |
1943 | if (b->init) | ||
1944 | wrmsrl(MSR_IA32_MCx_CTL(i), 0); | ||
1891 | } | 1945 | } |
1892 | } | 1946 | } |
1893 | 1947 | ||
@@ -1902,8 +1956,10 @@ static void mce_reenable_cpu(void *h) | |||
1902 | if (!(action & CPU_TASKS_FROZEN)) | 1956 | if (!(action & CPU_TASKS_FROZEN)) |
1903 | cmci_reenable(); | 1957 | cmci_reenable(); |
1904 | for (i = 0; i < banks; i++) { | 1958 | for (i = 0; i < banks; i++) { |
1905 | if (!skip_bank_init(i)) | 1959 | struct mce_bank *b = &mce_banks[i]; |
1906 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | 1960 | |
1961 | if (b->init) | ||
1962 | wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); | ||
1907 | } | 1963 | } |
1908 | } | 1964 | } |
1909 | 1965 | ||
@@ -1935,7 +1991,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
1935 | case CPU_DOWN_FAILED: | 1991 | case CPU_DOWN_FAILED: |
1936 | case CPU_DOWN_FAILED_FROZEN: | 1992 | case CPU_DOWN_FAILED_FROZEN: |
1937 | t->expires = round_jiffies(jiffies + | 1993 | t->expires = round_jiffies(jiffies + |
1938 | __get_cpu_var(next_interval)); | 1994 | __get_cpu_var(mce_next_interval)); |
1939 | add_timer_on(t, cpu); | 1995 | add_timer_on(t, cpu); |
1940 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 1996 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
1941 | break; | 1997 | break; |
@@ -1951,35 +2007,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = { | |||
1951 | .notifier_call = mce_cpu_callback, | 2007 | .notifier_call = mce_cpu_callback, |
1952 | }; | 2008 | }; |
1953 | 2009 | ||
1954 | static __init int mce_init_banks(void) | 2010 | static __init void mce_init_banks(void) |
1955 | { | 2011 | { |
1956 | int i; | 2012 | int i; |
1957 | 2013 | ||
1958 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
1959 | GFP_KERNEL); | ||
1960 | if (!bank_attrs) | ||
1961 | return -ENOMEM; | ||
1962 | |||
1963 | for (i = 0; i < banks; i++) { | 2014 | for (i = 0; i < banks; i++) { |
1964 | struct sysdev_attribute *a = &bank_attrs[i]; | 2015 | struct mce_bank *b = &mce_banks[i]; |
2016 | struct sysdev_attribute *a = &b->attr; | ||
1965 | 2017 | ||
1966 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | 2018 | a->attr.name = b->attrname; |
1967 | if (!a->attr.name) | 2019 | snprintf(b->attrname, ATTR_LEN, "bank%d", i); |
1968 | goto nomem; | ||
1969 | 2020 | ||
1970 | a->attr.mode = 0644; | 2021 | a->attr.mode = 0644; |
1971 | a->show = show_bank; | 2022 | a->show = show_bank; |
1972 | a->store = set_bank; | 2023 | a->store = set_bank; |
1973 | } | 2024 | } |
1974 | return 0; | ||
1975 | |||
1976 | nomem: | ||
1977 | while (--i >= 0) | ||
1978 | kfree(bank_attrs[i].attr.name); | ||
1979 | kfree(bank_attrs); | ||
1980 | bank_attrs = NULL; | ||
1981 | |||
1982 | return -ENOMEM; | ||
1983 | } | 2025 | } |
1984 | 2026 | ||
1985 | static __init int mce_init_device(void) | 2027 | static __init int mce_init_device(void) |
@@ -1992,9 +2034,7 @@ static __init int mce_init_device(void) | |||
1992 | 2034 | ||
1993 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | 2035 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); |
1994 | 2036 | ||
1995 | err = mce_init_banks(); | 2037 | mce_init_banks(); |
1996 | if (err) | ||
1997 | return err; | ||
1998 | 2038 | ||
1999 | err = sysdev_class_register(&mce_sysclass); | 2039 | err = sysdev_class_register(&mce_sysclass); |
2000 | if (err) | 2040 | if (err) |
@@ -2014,57 +2054,65 @@ static __init int mce_init_device(void) | |||
2014 | 2054 | ||
2015 | device_initcall(mce_init_device); | 2055 | device_initcall(mce_init_device); |
2016 | 2056 | ||
2017 | #else /* CONFIG_X86_OLD_MCE: */ | 2057 | /* |
2018 | 2058 | * Old style boot options parsing. Only for compatibility. | |
2019 | int nr_mce_banks; | 2059 | */ |
2020 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | 2060 | static int __init mcheck_disable(char *str) |
2061 | { | ||
2062 | mce_disabled = 1; | ||
2063 | return 1; | ||
2064 | } | ||
2065 | __setup("nomce", mcheck_disable); | ||
2021 | 2066 | ||
2022 | /* This has to be run for each processor */ | 2067 | #ifdef CONFIG_DEBUG_FS |
2023 | void mcheck_init(struct cpuinfo_x86 *c) | 2068 | struct dentry *mce_get_debugfs_dir(void) |
2024 | { | 2069 | { |
2025 | if (mce_disabled) | 2070 | static struct dentry *dmce; |
2026 | return; | ||
2027 | 2071 | ||
2028 | switch (c->x86_vendor) { | 2072 | if (!dmce) |
2029 | case X86_VENDOR_AMD: | 2073 | dmce = debugfs_create_dir("mce", NULL); |
2030 | amd_mcheck_init(c); | ||
2031 | break; | ||
2032 | 2074 | ||
2033 | case X86_VENDOR_INTEL: | 2075 | return dmce; |
2034 | if (c->x86 == 5) | 2076 | } |
2035 | intel_p5_mcheck_init(c); | ||
2036 | if (c->x86 == 6) | ||
2037 | intel_p6_mcheck_init(c); | ||
2038 | if (c->x86 == 15) | ||
2039 | intel_p4_mcheck_init(c); | ||
2040 | break; | ||
2041 | 2077 | ||
2042 | case X86_VENDOR_CENTAUR: | 2078 | static void mce_reset(void) |
2043 | if (c->x86 == 5) | 2079 | { |
2044 | winchip_mcheck_init(c); | 2080 | cpu_missing = 0; |
2045 | break; | 2081 | atomic_set(&mce_fake_paniced, 0); |
2082 | atomic_set(&mce_executing, 0); | ||
2083 | atomic_set(&mce_callin, 0); | ||
2084 | atomic_set(&global_nwo, 0); | ||
2085 | } | ||
2046 | 2086 | ||
2047 | default: | 2087 | static int fake_panic_get(void *data, u64 *val) |
2048 | break; | 2088 | { |
2049 | } | 2089 | *val = fake_panic; |
2050 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); | 2090 | return 0; |
2051 | } | 2091 | } |
2052 | 2092 | ||
2053 | static int __init mcheck_enable(char *str) | 2093 | static int fake_panic_set(void *data, u64 val) |
2054 | { | 2094 | { |
2055 | mce_p5_enabled = 1; | 2095 | mce_reset(); |
2056 | return 1; | 2096 | fake_panic = val; |
2097 | return 0; | ||
2057 | } | 2098 | } |
2058 | __setup("mce", mcheck_enable); | ||
2059 | 2099 | ||
2060 | #endif /* CONFIG_X86_OLD_MCE */ | 2100 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, |
2101 | fake_panic_set, "%llu\n"); | ||
2061 | 2102 | ||
2062 | /* | 2103 | static int __init mce_debugfs_init(void) |
2063 | * Old style boot options parsing. Only for compatibility. | ||
2064 | */ | ||
2065 | static int __init mcheck_disable(char *str) | ||
2066 | { | 2104 | { |
2067 | mce_disabled = 1; | 2105 | struct dentry *dmce, *ffake_panic; |
2068 | return 1; | 2106 | |
2107 | dmce = mce_get_debugfs_dir(); | ||
2108 | if (!dmce) | ||
2109 | return -ENOMEM; | ||
2110 | ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL, | ||
2111 | &fake_panic_fops); | ||
2112 | if (!ffake_panic) | ||
2113 | return -ENOMEM; | ||
2114 | |||
2115 | return 0; | ||
2069 | } | 2116 | } |
2070 | __setup("nomce", mcheck_disable); | 2117 | late_initcall(mce_debugfs_init); |
2118 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 1fecba404fd8..83a3d1f4efca 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -69,7 +69,7 @@ struct threshold_bank { | |||
69 | struct threshold_block *blocks; | 69 | struct threshold_block *blocks; |
70 | cpumask_var_t cpus; | 70 | cpumask_var_t cpus; |
71 | }; | 71 | }; |
72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | 72 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); |
73 | 73 | ||
74 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
75 | static unsigned char shared_bank[NR_BANKS] = { | 75 | static unsigned char shared_bank[NR_BANKS] = { |
@@ -489,8 +489,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
489 | int i, err = 0; | 489 | int i, err = 0; |
490 | struct threshold_bank *b = NULL; | 490 | struct threshold_bank *b = NULL; |
491 | char name[32]; | 491 | char name[32]; |
492 | #ifdef CONFIG_SMP | ||
492 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 493 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
493 | 494 | #endif | |
494 | 495 | ||
495 | sprintf(name, "threshold_bank%i", bank); | 496 | sprintf(name, "threshold_bank%i", bank); |
496 | 497 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index e1acec0f7a32..7c785634af2b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
10 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
11 | #include <linux/sched.h> | ||
11 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
12 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
@@ -90,7 +91,7 @@ static void cmci_discover(int banks, int boot) | |||
90 | if (test_bit(i, owned)) | 91 | if (test_bit(i, owned)) |
91 | continue; | 92 | continue; |
92 | 93 | ||
93 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | 94 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
94 | 95 | ||
95 | /* Already owned by someone else? */ | 96 | /* Already owned by someone else? */ |
96 | if (val & CMCI_EN) { | 97 | if (val & CMCI_EN) { |
@@ -101,8 +102,8 @@ static void cmci_discover(int banks, int boot) | |||
101 | } | 102 | } |
102 | 103 | ||
103 | val |= CMCI_EN | CMCI_THRESHOLD; | 104 | val |= CMCI_EN | CMCI_THRESHOLD; |
104 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | 105 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
105 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | 106 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
106 | 107 | ||
107 | /* Did the enable bit stick? -- the bank supports CMCI */ | 108 | /* Did the enable bit stick? -- the bank supports CMCI */ |
108 | if (val & CMCI_EN) { | 109 | if (val & CMCI_EN) { |
@@ -152,9 +153,9 @@ void cmci_clear(void) | |||
152 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | 153 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) |
153 | continue; | 154 | continue; |
154 | /* Disable CMCI */ | 155 | /* Disable CMCI */ |
155 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | 156 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
156 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | 157 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); |
157 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | 158 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
158 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 159 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
159 | } | 160 | } |
160 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | 161 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c deleted file mode 100644 index f5f2d6f71fb6..000000000000 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | /* | ||
2 | * Non Fatal Machine Check Exception Reporting | ||
3 | * | ||
4 | * (C) Copyright 2002 Dave Jones. <davej@redhat.com> | ||
5 | * | ||
6 | * This file contains routines to check for non-fatal MCEs every 15s | ||
7 | * | ||
8 | */ | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/workqueue.h> | ||
11 | #include <linux/jiffies.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/smp.h> | ||
17 | |||
18 | #include <asm/processor.h> | ||
19 | #include <asm/system.h> | ||
20 | #include <asm/mce.h> | ||
21 | #include <asm/msr.h> | ||
22 | |||
23 | static int firstbank; | ||
24 | |||
25 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ | ||
26 | |||
27 | static void mce_checkregs(void *info) | ||
28 | { | ||
29 | u32 low, high; | ||
30 | int i; | ||
31 | |||
32 | for (i = firstbank; i < nr_mce_banks; i++) { | ||
33 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
34 | |||
35 | if (!(high & (1<<31))) | ||
36 | continue; | ||
37 | |||
38 | printk(KERN_INFO "MCE: The hardware reports a non fatal, " | ||
39 | "correctable incident occurred on CPU %d.\n", | ||
40 | smp_processor_id()); | ||
41 | |||
42 | printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | ||
43 | |||
44 | /* | ||
45 | * Scrub the error so we don't pick it up in MCE_RATE | ||
46 | * seconds time: | ||
47 | */ | ||
48 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | ||
49 | |||
50 | /* Serialize: */ | ||
51 | wmb(); | ||
52 | add_taint(TAINT_MACHINE_CHECK); | ||
53 | } | ||
54 | } | ||
55 | |||
56 | static void mce_work_fn(struct work_struct *work); | ||
57 | static DECLARE_DELAYED_WORK(mce_work, mce_work_fn); | ||
58 | |||
59 | static void mce_work_fn(struct work_struct *work) | ||
60 | { | ||
61 | on_each_cpu(mce_checkregs, NULL, 1); | ||
62 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | ||
63 | } | ||
64 | |||
65 | static int __init init_nonfatal_mce_checker(void) | ||
66 | { | ||
67 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
68 | |||
69 | /* Check for MCE support */ | ||
70 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
71 | return -ENODEV; | ||
72 | |||
73 | /* Check for PPro style MCA */ | ||
74 | if (!cpu_has(c, X86_FEATURE_MCA)) | ||
75 | return -ENODEV; | ||
76 | |||
77 | /* Some Athlons misbehave when we frob bank 0 */ | ||
78 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
79 | boot_cpu_data.x86 == 6) | ||
80 | firstbank = 1; | ||
81 | else | ||
82 | firstbank = 0; | ||
83 | |||
84 | /* | ||
85 | * Check for non-fatal errors every MCE_RATE s | ||
86 | */ | ||
87 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | ||
88 | printk(KERN_INFO "Machine check exception polling timer started.\n"); | ||
89 | |||
90 | return 0; | ||
91 | } | ||
92 | module_init(init_nonfatal_mce_checker); | ||
93 | |||
94 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c deleted file mode 100644 index 4482aea9aa2e..000000000000 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ /dev/null | |||
@@ -1,163 +0,0 @@ | |||
1 | /* | ||
2 | * P4 specific Machine Check Exception Reporting | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/init.h> | ||
7 | #include <linux/smp.h> | ||
8 | |||
9 | #include <asm/processor.h> | ||
10 | #include <asm/mce.h> | ||
11 | #include <asm/msr.h> | ||
12 | |||
13 | /* as supported by the P4/Xeon family */ | ||
14 | struct intel_mce_extended_msrs { | ||
15 | u32 eax; | ||
16 | u32 ebx; | ||
17 | u32 ecx; | ||
18 | u32 edx; | ||
19 | u32 esi; | ||
20 | u32 edi; | ||
21 | u32 ebp; | ||
22 | u32 esp; | ||
23 | u32 eflags; | ||
24 | u32 eip; | ||
25 | /* u32 *reserved[]; */ | ||
26 | }; | ||
27 | |||
28 | static int mce_num_extended_msrs; | ||
29 | |||
30 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | ||
31 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | ||
32 | { | ||
33 | u32 h; | ||
34 | |||
35 | rdmsr(MSR_IA32_MCG_EAX, r->eax, h); | ||
36 | rdmsr(MSR_IA32_MCG_EBX, r->ebx, h); | ||
37 | rdmsr(MSR_IA32_MCG_ECX, r->ecx, h); | ||
38 | rdmsr(MSR_IA32_MCG_EDX, r->edx, h); | ||
39 | rdmsr(MSR_IA32_MCG_ESI, r->esi, h); | ||
40 | rdmsr(MSR_IA32_MCG_EDI, r->edi, h); | ||
41 | rdmsr(MSR_IA32_MCG_EBP, r->ebp, h); | ||
42 | rdmsr(MSR_IA32_MCG_ESP, r->esp, h); | ||
43 | rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h); | ||
44 | rdmsr(MSR_IA32_MCG_EIP, r->eip, h); | ||
45 | } | ||
46 | |||
47 | static void intel_machine_check(struct pt_regs *regs, long error_code) | ||
48 | { | ||
49 | u32 alow, ahigh, high, low; | ||
50 | u32 mcgstl, mcgsth; | ||
51 | int recover = 1; | ||
52 | int i; | ||
53 | |||
54 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
55 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
56 | recover = 0; | ||
57 | |||
58 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
59 | smp_processor_id(), mcgsth, mcgstl); | ||
60 | |||
61 | if (mce_num_extended_msrs > 0) { | ||
62 | struct intel_mce_extended_msrs dbg; | ||
63 | |||
64 | intel_get_extended_msrs(&dbg); | ||
65 | |||
66 | printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" | ||
67 | "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" | ||
68 | "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", | ||
69 | smp_processor_id(), dbg.eip, dbg.eflags, | ||
70 | dbg.eax, dbg.ebx, dbg.ecx, dbg.edx, | ||
71 | dbg.esi, dbg.edi, dbg.ebp, dbg.esp); | ||
72 | } | ||
73 | |||
74 | for (i = 0; i < nr_mce_banks; i++) { | ||
75 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
76 | if (high & (1<<31)) { | ||
77 | char misc[20]; | ||
78 | char addr[24]; | ||
79 | |||
80 | misc[0] = addr[0] = '\0'; | ||
81 | if (high & (1<<29)) | ||
82 | recover |= 1; | ||
83 | if (high & (1<<25)) | ||
84 | recover |= 2; | ||
85 | high &= ~(1<<31); | ||
86 | if (high & (1<<27)) { | ||
87 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
88 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | ||
89 | } | ||
90 | if (high & (1<<26)) { | ||
91 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
92 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | ||
93 | } | ||
94 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | ||
95 | smp_processor_id(), i, high, low, misc, addr); | ||
96 | } | ||
97 | } | ||
98 | |||
99 | if (recover & 2) | ||
100 | panic("CPU context corrupt"); | ||
101 | if (recover & 1) | ||
102 | panic("Unable to continue"); | ||
103 | |||
104 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
105 | |||
106 | /* | ||
107 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | ||
108 | * recoverable/continuable.This will allow BIOS to look at the MSRs | ||
109 | * for errors if the OS could not log the error. | ||
110 | */ | ||
111 | for (i = 0; i < nr_mce_banks; i++) { | ||
112 | u32 msr; | ||
113 | msr = MSR_IA32_MC0_STATUS+i*4; | ||
114 | rdmsr(msr, low, high); | ||
115 | if (high&(1<<31)) { | ||
116 | /* Clear it */ | ||
117 | wrmsr(msr, 0UL, 0UL); | ||
118 | /* Serialize */ | ||
119 | wmb(); | ||
120 | add_taint(TAINT_MACHINE_CHECK); | ||
121 | } | ||
122 | } | ||
123 | mcgstl &= ~(1<<2); | ||
124 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
125 | } | ||
126 | |||
127 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) | ||
128 | { | ||
129 | u32 l, h; | ||
130 | int i; | ||
131 | |||
132 | machine_check_vector = intel_machine_check; | ||
133 | wmb(); | ||
134 | |||
135 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | ||
136 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
137 | if (l & (1<<8)) /* Control register present ? */ | ||
138 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
139 | nr_mce_banks = l & 0xff; | ||
140 | |||
141 | for (i = 0; i < nr_mce_banks; i++) { | ||
142 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
143 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
144 | } | ||
145 | |||
146 | set_in_cr4(X86_CR4_MCE); | ||
147 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
148 | smp_processor_id()); | ||
149 | |||
150 | /* Check for P4/Xeon extended MCE MSRs */ | ||
151 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
152 | if (l & (1<<9)) {/* MCG_EXT_P */ | ||
153 | mce_num_extended_msrs = (l >> 16) & 0xff; | ||
154 | printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" | ||
155 | " available\n", | ||
156 | smp_processor_id(), mce_num_extended_msrs); | ||
157 | |||
158 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
159 | /* Check for P4/Xeon Thermal monitor */ | ||
160 | intel_init_thermal(c); | ||
161 | #endif | ||
162 | } | ||
163 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c deleted file mode 100644 index 01e4f8178183..000000000000 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ /dev/null | |||
@@ -1,127 +0,0 @@ | |||
1 | /* | ||
2 | * P6 specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | ||
4 | */ | ||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/smp.h> | ||
10 | |||
11 | #include <asm/processor.h> | ||
12 | #include <asm/system.h> | ||
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | ||
15 | |||
16 | /* Machine Check Handler For PII/PIII */ | ||
17 | static void intel_machine_check(struct pt_regs *regs, long error_code) | ||
18 | { | ||
19 | u32 alow, ahigh, high, low; | ||
20 | u32 mcgstl, mcgsth; | ||
21 | int recover = 1; | ||
22 | int i; | ||
23 | |||
24 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
25 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
26 | recover = 0; | ||
27 | |||
28 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
29 | smp_processor_id(), mcgsth, mcgstl); | ||
30 | |||
31 | for (i = 0; i < nr_mce_banks; i++) { | ||
32 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
33 | if (high & (1<<31)) { | ||
34 | char misc[20]; | ||
35 | char addr[24]; | ||
36 | |||
37 | misc[0] = '\0'; | ||
38 | addr[0] = '\0'; | ||
39 | |||
40 | if (high & (1<<29)) | ||
41 | recover |= 1; | ||
42 | if (high & (1<<25)) | ||
43 | recover |= 2; | ||
44 | high &= ~(1<<31); | ||
45 | |||
46 | if (high & (1<<27)) { | ||
47 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
48 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | ||
49 | } | ||
50 | if (high & (1<<26)) { | ||
51 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
52 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | ||
53 | } | ||
54 | |||
55 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | ||
56 | smp_processor_id(), i, high, low, misc, addr); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | if (recover & 2) | ||
61 | panic("CPU context corrupt"); | ||
62 | if (recover & 1) | ||
63 | panic("Unable to continue"); | ||
64 | |||
65 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
66 | /* | ||
67 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | ||
68 | * recoverable/continuable.This will allow BIOS to look at the MSRs | ||
69 | * for errors if the OS could not log the error: | ||
70 | */ | ||
71 | for (i = 0; i < nr_mce_banks; i++) { | ||
72 | unsigned int msr; | ||
73 | |||
74 | msr = MSR_IA32_MC0_STATUS+i*4; | ||
75 | rdmsr(msr, low, high); | ||
76 | if (high & (1<<31)) { | ||
77 | /* Clear it: */ | ||
78 | wrmsr(msr, 0UL, 0UL); | ||
79 | /* Serialize: */ | ||
80 | wmb(); | ||
81 | add_taint(TAINT_MACHINE_CHECK); | ||
82 | } | ||
83 | } | ||
84 | mcgstl &= ~(1<<2); | ||
85 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
86 | } | ||
87 | |||
88 | /* Set up machine check reporting for processors with Intel style MCE: */ | ||
89 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c) | ||
90 | { | ||
91 | u32 l, h; | ||
92 | int i; | ||
93 | |||
94 | /* Check for MCE support */ | ||
95 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
96 | return; | ||
97 | |||
98 | /* Check for PPro style MCA */ | ||
99 | if (!cpu_has(c, X86_FEATURE_MCA)) | ||
100 | return; | ||
101 | |||
102 | /* Ok machine check is available */ | ||
103 | machine_check_vector = intel_machine_check; | ||
104 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
105 | wmb(); | ||
106 | |||
107 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | ||
108 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
109 | if (l & (1<<8)) /* Control register present ? */ | ||
110 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
111 | nr_mce_banks = l & 0xff; | ||
112 | |||
113 | /* | ||
114 | * Following the example in IA-32 SDM Vol 3: | ||
115 | * - MC0_CTL should not be written | ||
116 | * - Status registers on all banks should be cleared on reset | ||
117 | */ | ||
118 | for (i = 1; i < nr_mce_banks; i++) | ||
119 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
120 | |||
121 | for (i = 0; i < nr_mce_banks; i++) | ||
122 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
123 | |||
124 | set_in_cr4(X86_CR4_MCE); | ||
125 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
126 | smp_processor_id()); | ||
127 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 5957a93e5173..b3a1dba75330 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -34,20 +34,31 @@ | |||
34 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
35 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
36 | 36 | ||
37 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | 37 | /* |
38 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | 38 | * Current thermal throttling state: |
39 | static DEFINE_PER_CPU(bool, thermal_throttle_active); | 39 | */ |
40 | struct thermal_state { | ||
41 | bool is_throttled; | ||
42 | |||
43 | u64 next_check; | ||
44 | unsigned long throttle_count; | ||
45 | unsigned long last_throttle_count; | ||
46 | }; | ||
47 | |||
48 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | ||
40 | 49 | ||
41 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
42 | 51 | ||
43 | #ifdef CONFIG_SYSFS | 52 | #ifdef CONFIG_SYSFS |
44 | #define define_therm_throt_sysdev_one_ro(_name) \ | 53 | #define define_therm_throt_sysdev_one_ro(_name) \ |
45 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 54 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) |
46 | 55 | ||
47 | #define define_therm_throt_sysdev_show_func(name) \ | 56 | #define define_therm_throt_sysdev_show_func(name) \ |
48 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | 57 | \ |
49 | struct sysdev_attribute *attr, \ | 58 | static ssize_t therm_throt_sysdev_show_##name( \ |
50 | char *buf) \ | 59 | struct sys_device *dev, \ |
60 | struct sysdev_attribute *attr, \ | ||
61 | char *buf) \ | ||
51 | { \ | 62 | { \ |
52 | unsigned int cpu = dev->id; \ | 63 | unsigned int cpu = dev->id; \ |
53 | ssize_t ret; \ | 64 | ssize_t ret; \ |
@@ -55,7 +66,7 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | |||
55 | preempt_disable(); /* CPU hotplug */ \ | 66 | preempt_disable(); /* CPU hotplug */ \ |
56 | if (cpu_online(cpu)) \ | 67 | if (cpu_online(cpu)) \ |
57 | ret = sprintf(buf, "%lu\n", \ | 68 | ret = sprintf(buf, "%lu\n", \ |
58 | per_cpu(thermal_throttle_##name, cpu)); \ | 69 | per_cpu(thermal_state, cpu).name); \ |
59 | else \ | 70 | else \ |
60 | ret = 0; \ | 71 | ret = 0; \ |
61 | preempt_enable(); \ | 72 | preempt_enable(); \ |
@@ -63,11 +74,11 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | |||
63 | return ret; \ | 74 | return ret; \ |
64 | } | 75 | } |
65 | 76 | ||
66 | define_therm_throt_sysdev_show_func(count); | 77 | define_therm_throt_sysdev_show_func(throttle_count); |
67 | define_therm_throt_sysdev_one_ro(count); | 78 | define_therm_throt_sysdev_one_ro(throttle_count); |
68 | 79 | ||
69 | static struct attribute *thermal_throttle_attrs[] = { | 80 | static struct attribute *thermal_throttle_attrs[] = { |
70 | &attr_count.attr, | 81 | &attr_throttle_count.attr, |
71 | NULL | 82 | NULL |
72 | }; | 83 | }; |
73 | 84 | ||
@@ -93,33 +104,39 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
93 | * 1 : Event should be logged further, and a message has been | 104 | * 1 : Event should be logged further, and a message has been |
94 | * printed to the syslog. | 105 | * printed to the syslog. |
95 | */ | 106 | */ |
96 | static int therm_throt_process(int curr) | 107 | static int therm_throt_process(bool is_throttled) |
97 | { | 108 | { |
98 | unsigned int cpu = smp_processor_id(); | 109 | struct thermal_state *state; |
99 | __u64 tmp_jiffs = get_jiffies_64(); | 110 | unsigned int this_cpu; |
100 | bool was_throttled = __get_cpu_var(thermal_throttle_active); | 111 | bool was_throttled; |
101 | bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; | 112 | u64 now; |
113 | |||
114 | this_cpu = smp_processor_id(); | ||
115 | now = get_jiffies_64(); | ||
116 | state = &per_cpu(thermal_state, this_cpu); | ||
117 | |||
118 | was_throttled = state->is_throttled; | ||
119 | state->is_throttled = is_throttled; | ||
102 | 120 | ||
103 | if (is_throttled) | 121 | if (is_throttled) |
104 | __get_cpu_var(thermal_throttle_count)++; | 122 | state->throttle_count++; |
105 | 123 | ||
106 | if (!(was_throttled ^ is_throttled) && | 124 | if (time_before64(now, state->next_check) && |
107 | time_before64(tmp_jiffs, __get_cpu_var(next_check))) | 125 | state->throttle_count != state->last_throttle_count) |
108 | return 0; | 126 | return 0; |
109 | 127 | ||
110 | __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; | 128 | state->next_check = now + CHECK_INTERVAL; |
129 | state->last_throttle_count = state->throttle_count; | ||
111 | 130 | ||
112 | /* if we just entered the thermal event */ | 131 | /* if we just entered the thermal event */ |
113 | if (is_throttled) { | 132 | if (is_throttled) { |
114 | printk(KERN_CRIT "CPU%d: Temperature above threshold, " | 133 | printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); |
115 | "cpu clock throttled (total events = %lu)\n", | ||
116 | cpu, __get_cpu_var(thermal_throttle_count)); | ||
117 | 134 | ||
118 | add_taint(TAINT_MACHINE_CHECK); | 135 | add_taint(TAINT_MACHINE_CHECK); |
119 | return 1; | 136 | return 1; |
120 | } | 137 | } |
121 | if (was_throttled) { | 138 | if (was_throttled) { |
122 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); | 139 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); |
123 | return 1; | 140 | return 1; |
124 | } | 141 | } |
125 | 142 | ||
@@ -213,7 +230,7 @@ static void intel_thermal_interrupt(void) | |||
213 | __u64 msr_val; | 230 | __u64 msr_val; |
214 | 231 | ||
215 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 232 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
216 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) | 233 | if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) |
217 | mce_log_therm_throt_event(msr_val); | 234 | mce_log_therm_throt_event(msr_val); |
218 | } | 235 | } |
219 | 236 | ||
@@ -260,9 +277,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
260 | return; | 277 | return; |
261 | } | 278 | } |
262 | 279 | ||
263 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
264 | tm2 = 1; | ||
265 | |||
266 | /* Check whether a vector already exists */ | 280 | /* Check whether a vector already exists */ |
267 | if (h & APIC_VECTOR_MASK) { | 281 | if (h & APIC_VECTOR_MASK) { |
268 | printk(KERN_DEBUG | 282 | printk(KERN_DEBUG |
@@ -271,6 +285,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
271 | return; | 285 | return; |
272 | } | 286 | } |
273 | 287 | ||
288 | /* early Pentium M models use different method for enabling TM2 */ | ||
289 | if (cpu_has(c, X86_FEATURE_TM2)) { | ||
290 | if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) { | ||
291 | rdmsr(MSR_THERM2_CTL, l, h); | ||
292 | if (l & MSR_THERM2_CTL_TM_SELECT) | ||
293 | tm2 = 1; | ||
294 | } else if (l & MSR_IA32_MISC_ENABLE_TM2) | ||
295 | tm2 = 1; | ||
296 | } | ||
297 | |||
274 | /* We'll mask the thermal vector in the lapic till we're ready: */ | 298 | /* We'll mask the thermal vector in the lapic till we're ready: */ |
275 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | 299 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; |
276 | apic_write(APIC_LVTTHMR, h); | 300 | apic_write(APIC_LVTTHMR, h); |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 08b6ea4c62b4..3c1b12d461d1 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
96 | unsigned long long base, size; | 96 | unsigned long long base, size; |
97 | char *ptr; | 97 | char *ptr; |
98 | char line[LINE_SIZE]; | 98 | char line[LINE_SIZE]; |
99 | int length; | ||
99 | size_t linelen; | 100 | size_t linelen; |
100 | 101 | ||
101 | if (!capable(CAP_SYS_ADMIN)) | 102 | if (!capable(CAP_SYS_ADMIN)) |
102 | return -EPERM; | 103 | return -EPERM; |
103 | if (!len) | ||
104 | return -EINVAL; | ||
105 | 104 | ||
106 | memset(line, 0, LINE_SIZE); | 105 | memset(line, 0, LINE_SIZE); |
107 | if (len > LINE_SIZE) | 106 | |
108 | len = LINE_SIZE; | 107 | length = len; |
109 | if (copy_from_user(line, buf, len - 1)) | 108 | length--; |
109 | |||
110 | if (length > LINE_SIZE - 1) | ||
111 | length = LINE_SIZE - 1; | ||
112 | |||
113 | if (length < 0) | ||
114 | return -EINVAL; | ||
115 | |||
116 | if (copy_from_user(line, buf, length)) | ||
110 | return -EFAULT; | 117 | return -EFAULT; |
111 | 118 | ||
112 | linelen = strlen(line); | 119 | linelen = strlen(line); |
@@ -126,8 +133,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
126 | return -EINVAL; | 133 | return -EINVAL; |
127 | 134 | ||
128 | base = simple_strtoull(line + 5, &ptr, 0); | 135 | base = simple_strtoull(line + 5, &ptr, 0); |
129 | for (; isspace(*ptr); ++ptr) | 136 | while (isspace(*ptr)) |
130 | ; | 137 | ptr++; |
131 | 138 | ||
132 | if (strncmp(ptr, "size=", 5)) | 139 | if (strncmp(ptr, "size=", 5)) |
133 | return -EINVAL; | 140 | return -EINVAL; |
@@ -135,14 +142,14 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
135 | size = simple_strtoull(ptr + 5, &ptr, 0); | 142 | size = simple_strtoull(ptr + 5, &ptr, 0); |
136 | if ((base & 0xfff) || (size & 0xfff)) | 143 | if ((base & 0xfff) || (size & 0xfff)) |
137 | return -EINVAL; | 144 | return -EINVAL; |
138 | for (; isspace(*ptr); ++ptr) | 145 | while (isspace(*ptr)) |
139 | ; | 146 | ptr++; |
140 | 147 | ||
141 | if (strncmp(ptr, "type=", 5)) | 148 | if (strncmp(ptr, "type=", 5)) |
142 | return -EINVAL; | 149 | return -EINVAL; |
143 | ptr += 5; | 150 | ptr += 5; |
144 | for (; isspace(*ptr); ++ptr) | 151 | while (isspace(*ptr)) |
145 | ; | 152 | ptr++; |
146 | 153 | ||
147 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { | 154 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { |
148 | if (strcmp(ptr, mtrr_strings[i])) | 155 | if (strcmp(ptr, mtrr_strings[i])) |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 7af0f88a4163..84e83de54575 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -58,6 +58,7 @@ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; | |||
58 | static DEFINE_MUTEX(mtrr_mutex); | 58 | static DEFINE_MUTEX(mtrr_mutex); |
59 | 59 | ||
60 | u64 size_or_mask, size_and_mask; | 60 | u64 size_or_mask, size_and_mask; |
61 | static bool mtrr_aps_delayed_init; | ||
61 | 62 | ||
62 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; | 63 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; |
63 | 64 | ||
@@ -163,7 +164,10 @@ static void ipi_handler(void *info) | |||
163 | if (data->smp_reg != ~0U) { | 164 | if (data->smp_reg != ~0U) { |
164 | mtrr_if->set(data->smp_reg, data->smp_base, | 165 | mtrr_if->set(data->smp_reg, data->smp_base, |
165 | data->smp_size, data->smp_type); | 166 | data->smp_size, data->smp_type); |
166 | } else { | 167 | } else if (mtrr_aps_delayed_init) { |
168 | /* | ||
169 | * Initialize the MTRRs inaddition to the synchronisation. | ||
170 | */ | ||
167 | mtrr_if->set_all(); | 171 | mtrr_if->set_all(); |
168 | } | 172 | } |
169 | 173 | ||
@@ -265,6 +269,8 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
265 | */ | 269 | */ |
266 | if (reg != ~0U) | 270 | if (reg != ~0U) |
267 | mtrr_if->set(reg, base, size, type); | 271 | mtrr_if->set(reg, base, size, type); |
272 | else if (!mtrr_aps_delayed_init) | ||
273 | mtrr_if->set_all(); | ||
268 | 274 | ||
269 | /* Wait for the others */ | 275 | /* Wait for the others */ |
270 | while (atomic_read(&data.count)) | 276 | while (atomic_read(&data.count)) |
@@ -721,9 +727,7 @@ void __init mtrr_bp_init(void) | |||
721 | 727 | ||
722 | void mtrr_ap_init(void) | 728 | void mtrr_ap_init(void) |
723 | { | 729 | { |
724 | unsigned long flags; | 730 | if (!use_intel() || mtrr_aps_delayed_init) |
725 | |||
726 | if (!mtrr_if || !use_intel()) | ||
727 | return; | 731 | return; |
728 | /* | 732 | /* |
729 | * Ideally we should hold mtrr_mutex here to avoid mtrr entries | 733 | * Ideally we should hold mtrr_mutex here to avoid mtrr entries |
@@ -738,11 +742,7 @@ void mtrr_ap_init(void) | |||
738 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug | 742 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug |
739 | * lock to prevent mtrr entry changes | 743 | * lock to prevent mtrr entry changes |
740 | */ | 744 | */ |
741 | local_irq_save(flags); | 745 | set_mtrr(~0U, 0, 0, 0); |
742 | |||
743 | mtrr_if->set_all(); | ||
744 | |||
745 | local_irq_restore(flags); | ||
746 | } | 746 | } |
747 | 747 | ||
748 | /** | 748 | /** |
@@ -753,6 +753,34 @@ void mtrr_save_state(void) | |||
753 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); | 753 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); |
754 | } | 754 | } |
755 | 755 | ||
756 | void set_mtrr_aps_delayed_init(void) | ||
757 | { | ||
758 | if (!use_intel()) | ||
759 | return; | ||
760 | |||
761 | mtrr_aps_delayed_init = true; | ||
762 | } | ||
763 | |||
764 | /* | ||
765 | * MTRR initialization for all AP's | ||
766 | */ | ||
767 | void mtrr_aps_init(void) | ||
768 | { | ||
769 | if (!use_intel()) | ||
770 | return; | ||
771 | |||
772 | set_mtrr(~0U, 0, 0, 0); | ||
773 | mtrr_aps_delayed_init = false; | ||
774 | } | ||
775 | |||
776 | void mtrr_bp_restore(void) | ||
777 | { | ||
778 | if (!use_intel()) | ||
779 | return; | ||
780 | |||
781 | mtrr_if->set_all(); | ||
782 | } | ||
783 | |||
756 | static int __init mtrr_init_finialize(void) | 784 | static int __init mtrr_init_finialize(void) |
757 | { | 785 | { |
758 | if (!mtrr_if) | 786 | if (!mtrr_if) |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_event.c index f9cd0849bd42..2e20bca3cca1 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Performance counter x86 architecture code | 2 | * Performance events x86 architecture code |
3 | * | 3 | * |
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | 5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar |
@@ -11,7 +11,7 @@ | |||
11 | * For licencing details see kernel-base/COPYING | 11 | * For licencing details see kernel-base/COPYING |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/perf_counter.h> | 14 | #include <linux/perf_event.h> |
15 | #include <linux/capability.h> | 15 | #include <linux/capability.h> |
16 | #include <linux/notifier.h> | 16 | #include <linux/notifier.h> |
17 | #include <linux/hardirq.h> | 17 | #include <linux/hardirq.h> |
@@ -27,19 +27,19 @@ | |||
27 | #include <asm/stacktrace.h> | 27 | #include <asm/stacktrace.h> |
28 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
29 | 29 | ||
30 | static u64 perf_counter_mask __read_mostly; | 30 | static u64 perf_event_mask __read_mostly; |
31 | 31 | ||
32 | /* The maximal number of PEBS counters: */ | 32 | /* The maximal number of PEBS events: */ |
33 | #define MAX_PEBS_COUNTERS 4 | 33 | #define MAX_PEBS_EVENTS 4 |
34 | 34 | ||
35 | /* The size of a BTS record in bytes: */ | 35 | /* The size of a BTS record in bytes: */ |
36 | #define BTS_RECORD_SIZE 24 | 36 | #define BTS_RECORD_SIZE 24 |
37 | 37 | ||
38 | /* The size of a per-cpu BTS buffer in bytes: */ | 38 | /* The size of a per-cpu BTS buffer in bytes: */ |
39 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) | 39 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) |
40 | 40 | ||
41 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | 41 | /* The BTS overflow threshold in bytes from the end of the buffer: */ |
42 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) | 42 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) |
43 | 43 | ||
44 | 44 | ||
45 | /* | 45 | /* |
@@ -65,11 +65,11 @@ struct debug_store { | |||
65 | u64 pebs_index; | 65 | u64 pebs_index; |
66 | u64 pebs_absolute_maximum; | 66 | u64 pebs_absolute_maximum; |
67 | u64 pebs_interrupt_threshold; | 67 | u64 pebs_interrupt_threshold; |
68 | u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; | 68 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; |
69 | }; | 69 | }; |
70 | 70 | ||
71 | struct cpu_hw_counters { | 71 | struct cpu_hw_events { |
72 | struct perf_counter *counters[X86_PMC_IDX_MAX]; | 72 | struct perf_event *events[X86_PMC_IDX_MAX]; |
73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
75 | unsigned long interrupts; | 75 | unsigned long interrupts; |
@@ -77,6 +77,18 @@ struct cpu_hw_counters { | |||
77 | struct debug_store *ds; | 77 | struct debug_store *ds; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | struct event_constraint { | ||
81 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
82 | int code; | ||
83 | }; | ||
84 | |||
85 | #define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } | ||
86 | #define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } | ||
87 | |||
88 | #define for_each_event_constraint(e, c) \ | ||
89 | for ((e) = (c); (e)->idxmsk[0]; (e)++) | ||
90 | |||
91 | |||
80 | /* | 92 | /* |
81 | * struct x86_pmu - generic x86 pmu | 93 | * struct x86_pmu - generic x86 pmu |
82 | */ | 94 | */ |
@@ -86,30 +98,34 @@ struct x86_pmu { | |||
86 | int (*handle_irq)(struct pt_regs *); | 98 | int (*handle_irq)(struct pt_regs *); |
87 | void (*disable_all)(void); | 99 | void (*disable_all)(void); |
88 | void (*enable_all)(void); | 100 | void (*enable_all)(void); |
89 | void (*enable)(struct hw_perf_counter *, int); | 101 | void (*enable)(struct hw_perf_event *, int); |
90 | void (*disable)(struct hw_perf_counter *, int); | 102 | void (*disable)(struct hw_perf_event *, int); |
91 | unsigned eventsel; | 103 | unsigned eventsel; |
92 | unsigned perfctr; | 104 | unsigned perfctr; |
93 | u64 (*event_map)(int); | 105 | u64 (*event_map)(int); |
94 | u64 (*raw_event)(u64); | 106 | u64 (*raw_event)(u64); |
95 | int max_events; | 107 | int max_events; |
96 | int num_counters; | 108 | int num_events; |
97 | int num_counters_fixed; | 109 | int num_events_fixed; |
98 | int counter_bits; | 110 | int event_bits; |
99 | u64 counter_mask; | 111 | u64 event_mask; |
100 | int apic; | 112 | int apic; |
101 | u64 max_period; | 113 | u64 max_period; |
102 | u64 intel_ctrl; | 114 | u64 intel_ctrl; |
103 | void (*enable_bts)(u64 config); | 115 | void (*enable_bts)(u64 config); |
104 | void (*disable_bts)(void); | 116 | void (*disable_bts)(void); |
117 | int (*get_event_idx)(struct cpu_hw_events *cpuc, | ||
118 | struct hw_perf_event *hwc); | ||
105 | }; | 119 | }; |
106 | 120 | ||
107 | static struct x86_pmu x86_pmu __read_mostly; | 121 | static struct x86_pmu x86_pmu __read_mostly; |
108 | 122 | ||
109 | static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { | 123 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
110 | .enabled = 1, | 124 | .enabled = 1, |
111 | }; | 125 | }; |
112 | 126 | ||
127 | static const struct event_constraint *event_constraints; | ||
128 | |||
113 | /* | 129 | /* |
114 | * Not sure about some of these | 130 | * Not sure about some of these |
115 | */ | 131 | */ |
@@ -124,37 +140,47 @@ static const u64 p6_perfmon_event_map[] = | |||
124 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | 140 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, |
125 | }; | 141 | }; |
126 | 142 | ||
127 | static u64 p6_pmu_event_map(int event) | 143 | static u64 p6_pmu_event_map(int hw_event) |
128 | { | 144 | { |
129 | return p6_perfmon_event_map[event]; | 145 | return p6_perfmon_event_map[hw_event]; |
130 | } | 146 | } |
131 | 147 | ||
132 | /* | 148 | /* |
133 | * Counter setting that is specified not to count anything. | 149 | * Event setting that is specified not to count anything. |
134 | * We use this to effectively disable a counter. | 150 | * We use this to effectively disable a counter. |
135 | * | 151 | * |
136 | * L2_RQSTS with 0 MESI unit mask. | 152 | * L2_RQSTS with 0 MESI unit mask. |
137 | */ | 153 | */ |
138 | #define P6_NOP_COUNTER 0x0000002EULL | 154 | #define P6_NOP_EVENT 0x0000002EULL |
139 | 155 | ||
140 | static u64 p6_pmu_raw_event(u64 event) | 156 | static u64 p6_pmu_raw_event(u64 hw_event) |
141 | { | 157 | { |
142 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | 158 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL |
143 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | 159 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL |
144 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | 160 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL |
145 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | 161 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL |
146 | #define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL | 162 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL |
147 | 163 | ||
148 | #define P6_EVNTSEL_MASK \ | 164 | #define P6_EVNTSEL_MASK \ |
149 | (P6_EVNTSEL_EVENT_MASK | \ | 165 | (P6_EVNTSEL_EVENT_MASK | \ |
150 | P6_EVNTSEL_UNIT_MASK | \ | 166 | P6_EVNTSEL_UNIT_MASK | \ |
151 | P6_EVNTSEL_EDGE_MASK | \ | 167 | P6_EVNTSEL_EDGE_MASK | \ |
152 | P6_EVNTSEL_INV_MASK | \ | 168 | P6_EVNTSEL_INV_MASK | \ |
153 | P6_EVNTSEL_COUNTER_MASK) | 169 | P6_EVNTSEL_REG_MASK) |
154 | 170 | ||
155 | return event & P6_EVNTSEL_MASK; | 171 | return hw_event & P6_EVNTSEL_MASK; |
156 | } | 172 | } |
157 | 173 | ||
174 | static const struct event_constraint intel_p6_event_constraints[] = | ||
175 | { | ||
176 | EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
177 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
178 | EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
179 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
180 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
181 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
182 | EVENT_CONSTRAINT_END | ||
183 | }; | ||
158 | 184 | ||
159 | /* | 185 | /* |
160 | * Intel PerfMon v3. Used on Core2 and later. | 186 | * Intel PerfMon v3. Used on Core2 and later. |
@@ -170,16 +196,45 @@ static const u64 intel_perfmon_event_map[] = | |||
170 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 196 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
171 | }; | 197 | }; |
172 | 198 | ||
173 | static u64 intel_pmu_event_map(int event) | 199 | static const struct event_constraint intel_core_event_constraints[] = |
200 | { | ||
201 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
202 | EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
203 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
204 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
205 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
206 | EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
207 | EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
208 | EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
209 | EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
210 | EVENT_CONSTRAINT_END | ||
211 | }; | ||
212 | |||
213 | static const struct event_constraint intel_nehalem_event_constraints[] = | ||
214 | { | ||
215 | EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
216 | EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
217 | EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
218 | EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
219 | EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
220 | EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ | ||
221 | EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
222 | EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ | ||
223 | EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ | ||
224 | EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ | ||
225 | EVENT_CONSTRAINT_END | ||
226 | }; | ||
227 | |||
228 | static u64 intel_pmu_event_map(int hw_event) | ||
174 | { | 229 | { |
175 | return intel_perfmon_event_map[event]; | 230 | return intel_perfmon_event_map[hw_event]; |
176 | } | 231 | } |
177 | 232 | ||
178 | /* | 233 | /* |
179 | * Generalized hw caching related event table, filled | 234 | * Generalized hw caching related hw_event table, filled |
180 | * in on a per model basis. A value of 0 means | 235 | * in on a per model basis. A value of 0 means |
181 | * 'not supported', -1 means 'event makes no sense on | 236 | * 'not supported', -1 means 'hw_event makes no sense on |
182 | * this CPU', any other value means the raw event | 237 | * this CPU', any other value means the raw hw_event |
183 | * ID. | 238 | * ID. |
184 | */ | 239 | */ |
185 | 240 | ||
@@ -463,22 +518,22 @@ static const u64 atom_hw_cache_event_ids | |||
463 | }, | 518 | }, |
464 | }; | 519 | }; |
465 | 520 | ||
466 | static u64 intel_pmu_raw_event(u64 event) | 521 | static u64 intel_pmu_raw_event(u64 hw_event) |
467 | { | 522 | { |
468 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | 523 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL |
469 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | 524 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL |
470 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | 525 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL |
471 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | 526 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL |
472 | #define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL | 527 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL |
473 | 528 | ||
474 | #define CORE_EVNTSEL_MASK \ | 529 | #define CORE_EVNTSEL_MASK \ |
475 | (CORE_EVNTSEL_EVENT_MASK | \ | 530 | (CORE_EVNTSEL_EVENT_MASK | \ |
476 | CORE_EVNTSEL_UNIT_MASK | \ | 531 | CORE_EVNTSEL_UNIT_MASK | \ |
477 | CORE_EVNTSEL_EDGE_MASK | \ | 532 | CORE_EVNTSEL_EDGE_MASK | \ |
478 | CORE_EVNTSEL_INV_MASK | \ | 533 | CORE_EVNTSEL_INV_MASK | \ |
479 | CORE_EVNTSEL_COUNTER_MASK) | 534 | CORE_EVNTSEL_REG_MASK) |
480 | 535 | ||
481 | return event & CORE_EVNTSEL_MASK; | 536 | return hw_event & CORE_EVNTSEL_MASK; |
482 | } | 537 | } |
483 | 538 | ||
484 | static const u64 amd_hw_cache_event_ids | 539 | static const u64 amd_hw_cache_event_ids |
@@ -585,39 +640,39 @@ static const u64 amd_perfmon_event_map[] = | |||
585 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 640 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
586 | }; | 641 | }; |
587 | 642 | ||
588 | static u64 amd_pmu_event_map(int event) | 643 | static u64 amd_pmu_event_map(int hw_event) |
589 | { | 644 | { |
590 | return amd_perfmon_event_map[event]; | 645 | return amd_perfmon_event_map[hw_event]; |
591 | } | 646 | } |
592 | 647 | ||
593 | static u64 amd_pmu_raw_event(u64 event) | 648 | static u64 amd_pmu_raw_event(u64 hw_event) |
594 | { | 649 | { |
595 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL | 650 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL |
596 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | 651 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL |
597 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | 652 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL |
598 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | 653 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL |
599 | #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL | 654 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL |
600 | 655 | ||
601 | #define K7_EVNTSEL_MASK \ | 656 | #define K7_EVNTSEL_MASK \ |
602 | (K7_EVNTSEL_EVENT_MASK | \ | 657 | (K7_EVNTSEL_EVENT_MASK | \ |
603 | K7_EVNTSEL_UNIT_MASK | \ | 658 | K7_EVNTSEL_UNIT_MASK | \ |
604 | K7_EVNTSEL_EDGE_MASK | \ | 659 | K7_EVNTSEL_EDGE_MASK | \ |
605 | K7_EVNTSEL_INV_MASK | \ | 660 | K7_EVNTSEL_INV_MASK | \ |
606 | K7_EVNTSEL_COUNTER_MASK) | 661 | K7_EVNTSEL_REG_MASK) |
607 | 662 | ||
608 | return event & K7_EVNTSEL_MASK; | 663 | return hw_event & K7_EVNTSEL_MASK; |
609 | } | 664 | } |
610 | 665 | ||
611 | /* | 666 | /* |
612 | * Propagate counter elapsed time into the generic counter. | 667 | * Propagate event elapsed time into the generic event. |
613 | * Can only be executed on the CPU where the counter is active. | 668 | * Can only be executed on the CPU where the event is active. |
614 | * Returns the delta events processed. | 669 | * Returns the delta events processed. |
615 | */ | 670 | */ |
616 | static u64 | 671 | static u64 |
617 | x86_perf_counter_update(struct perf_counter *counter, | 672 | x86_perf_event_update(struct perf_event *event, |
618 | struct hw_perf_counter *hwc, int idx) | 673 | struct hw_perf_event *hwc, int idx) |
619 | { | 674 | { |
620 | int shift = 64 - x86_pmu.counter_bits; | 675 | int shift = 64 - x86_pmu.event_bits; |
621 | u64 prev_raw_count, new_raw_count; | 676 | u64 prev_raw_count, new_raw_count; |
622 | s64 delta; | 677 | s64 delta; |
623 | 678 | ||
@@ -625,15 +680,15 @@ x86_perf_counter_update(struct perf_counter *counter, | |||
625 | return 0; | 680 | return 0; |
626 | 681 | ||
627 | /* | 682 | /* |
628 | * Careful: an NMI might modify the previous counter value. | 683 | * Careful: an NMI might modify the previous event value. |
629 | * | 684 | * |
630 | * Our tactic to handle this is to first atomically read and | 685 | * Our tactic to handle this is to first atomically read and |
631 | * exchange a new raw count - then add that new-prev delta | 686 | * exchange a new raw count - then add that new-prev delta |
632 | * count to the generic counter atomically: | 687 | * count to the generic event atomically: |
633 | */ | 688 | */ |
634 | again: | 689 | again: |
635 | prev_raw_count = atomic64_read(&hwc->prev_count); | 690 | prev_raw_count = atomic64_read(&hwc->prev_count); |
636 | rdmsrl(hwc->counter_base + idx, new_raw_count); | 691 | rdmsrl(hwc->event_base + idx, new_raw_count); |
637 | 692 | ||
638 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | 693 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, |
639 | new_raw_count) != prev_raw_count) | 694 | new_raw_count) != prev_raw_count) |
@@ -642,7 +697,7 @@ again: | |||
642 | /* | 697 | /* |
643 | * Now we have the new raw value and have updated the prev | 698 | * Now we have the new raw value and have updated the prev |
644 | * timestamp already. We can now calculate the elapsed delta | 699 | * timestamp already. We can now calculate the elapsed delta |
645 | * (counter-)time and add that to the generic counter. | 700 | * (event-)time and add that to the generic event. |
646 | * | 701 | * |
647 | * Careful, not all hw sign-extends above the physical width | 702 | * Careful, not all hw sign-extends above the physical width |
648 | * of the count. | 703 | * of the count. |
@@ -650,13 +705,13 @@ again: | |||
650 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 705 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
651 | delta >>= shift; | 706 | delta >>= shift; |
652 | 707 | ||
653 | atomic64_add(delta, &counter->count); | 708 | atomic64_add(delta, &event->count); |
654 | atomic64_sub(delta, &hwc->period_left); | 709 | atomic64_sub(delta, &hwc->period_left); |
655 | 710 | ||
656 | return new_raw_count; | 711 | return new_raw_count; |
657 | } | 712 | } |
658 | 713 | ||
659 | static atomic_t active_counters; | 714 | static atomic_t active_events; |
660 | static DEFINE_MUTEX(pmc_reserve_mutex); | 715 | static DEFINE_MUTEX(pmc_reserve_mutex); |
661 | 716 | ||
662 | static bool reserve_pmc_hardware(void) | 717 | static bool reserve_pmc_hardware(void) |
@@ -667,12 +722,12 @@ static bool reserve_pmc_hardware(void) | |||
667 | if (nmi_watchdog == NMI_LOCAL_APIC) | 722 | if (nmi_watchdog == NMI_LOCAL_APIC) |
668 | disable_lapic_nmi_watchdog(); | 723 | disable_lapic_nmi_watchdog(); |
669 | 724 | ||
670 | for (i = 0; i < x86_pmu.num_counters; i++) { | 725 | for (i = 0; i < x86_pmu.num_events; i++) { |
671 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 726 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
672 | goto perfctr_fail; | 727 | goto perfctr_fail; |
673 | } | 728 | } |
674 | 729 | ||
675 | for (i = 0; i < x86_pmu.num_counters; i++) { | 730 | for (i = 0; i < x86_pmu.num_events; i++) { |
676 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 731 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
677 | goto eventsel_fail; | 732 | goto eventsel_fail; |
678 | } | 733 | } |
@@ -685,7 +740,7 @@ eventsel_fail: | |||
685 | for (i--; i >= 0; i--) | 740 | for (i--; i >= 0; i--) |
686 | release_evntsel_nmi(x86_pmu.eventsel + i); | 741 | release_evntsel_nmi(x86_pmu.eventsel + i); |
687 | 742 | ||
688 | i = x86_pmu.num_counters; | 743 | i = x86_pmu.num_events; |
689 | 744 | ||
690 | perfctr_fail: | 745 | perfctr_fail: |
691 | for (i--; i >= 0; i--) | 746 | for (i--; i >= 0; i--) |
@@ -703,7 +758,7 @@ static void release_pmc_hardware(void) | |||
703 | #ifdef CONFIG_X86_LOCAL_APIC | 758 | #ifdef CONFIG_X86_LOCAL_APIC |
704 | int i; | 759 | int i; |
705 | 760 | ||
706 | for (i = 0; i < x86_pmu.num_counters; i++) { | 761 | for (i = 0; i < x86_pmu.num_events; i++) { |
707 | release_perfctr_nmi(x86_pmu.perfctr + i); | 762 | release_perfctr_nmi(x86_pmu.perfctr + i); |
708 | release_evntsel_nmi(x86_pmu.eventsel + i); | 763 | release_evntsel_nmi(x86_pmu.eventsel + i); |
709 | } | 764 | } |
@@ -720,7 +775,7 @@ static inline bool bts_available(void) | |||
720 | 775 | ||
721 | static inline void init_debug_store_on_cpu(int cpu) | 776 | static inline void init_debug_store_on_cpu(int cpu) |
722 | { | 777 | { |
723 | struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; | 778 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
724 | 779 | ||
725 | if (!ds) | 780 | if (!ds) |
726 | return; | 781 | return; |
@@ -732,7 +787,7 @@ static inline void init_debug_store_on_cpu(int cpu) | |||
732 | 787 | ||
733 | static inline void fini_debug_store_on_cpu(int cpu) | 788 | static inline void fini_debug_store_on_cpu(int cpu) |
734 | { | 789 | { |
735 | if (!per_cpu(cpu_hw_counters, cpu).ds) | 790 | if (!per_cpu(cpu_hw_events, cpu).ds) |
736 | return; | 791 | return; |
737 | 792 | ||
738 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | 793 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); |
@@ -751,12 +806,12 @@ static void release_bts_hardware(void) | |||
751 | fini_debug_store_on_cpu(cpu); | 806 | fini_debug_store_on_cpu(cpu); |
752 | 807 | ||
753 | for_each_possible_cpu(cpu) { | 808 | for_each_possible_cpu(cpu) { |
754 | struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; | 809 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
755 | 810 | ||
756 | if (!ds) | 811 | if (!ds) |
757 | continue; | 812 | continue; |
758 | 813 | ||
759 | per_cpu(cpu_hw_counters, cpu).ds = NULL; | 814 | per_cpu(cpu_hw_events, cpu).ds = NULL; |
760 | 815 | ||
761 | kfree((void *)(unsigned long)ds->bts_buffer_base); | 816 | kfree((void *)(unsigned long)ds->bts_buffer_base); |
762 | kfree(ds); | 817 | kfree(ds); |
@@ -796,7 +851,7 @@ static int reserve_bts_hardware(void) | |||
796 | ds->bts_interrupt_threshold = | 851 | ds->bts_interrupt_threshold = |
797 | ds->bts_absolute_maximum - BTS_OVFL_TH; | 852 | ds->bts_absolute_maximum - BTS_OVFL_TH; |
798 | 853 | ||
799 | per_cpu(cpu_hw_counters, cpu).ds = ds; | 854 | per_cpu(cpu_hw_events, cpu).ds = ds; |
800 | err = 0; | 855 | err = 0; |
801 | } | 856 | } |
802 | 857 | ||
@@ -812,9 +867,9 @@ static int reserve_bts_hardware(void) | |||
812 | return err; | 867 | return err; |
813 | } | 868 | } |
814 | 869 | ||
815 | static void hw_perf_counter_destroy(struct perf_counter *counter) | 870 | static void hw_perf_event_destroy(struct perf_event *event) |
816 | { | 871 | { |
817 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { | 872 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
818 | release_pmc_hardware(); | 873 | release_pmc_hardware(); |
819 | release_bts_hardware(); | 874 | release_bts_hardware(); |
820 | mutex_unlock(&pmc_reserve_mutex); | 875 | mutex_unlock(&pmc_reserve_mutex); |
@@ -827,7 +882,7 @@ static inline int x86_pmu_initialized(void) | |||
827 | } | 882 | } |
828 | 883 | ||
829 | static inline int | 884 | static inline int |
830 | set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) | 885 | set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) |
831 | { | 886 | { |
832 | unsigned int cache_type, cache_op, cache_result; | 887 | unsigned int cache_type, cache_op, cache_result; |
833 | u64 config, val; | 888 | u64 config, val; |
@@ -880,7 +935,7 @@ static void intel_pmu_enable_bts(u64 config) | |||
880 | 935 | ||
881 | static void intel_pmu_disable_bts(void) | 936 | static void intel_pmu_disable_bts(void) |
882 | { | 937 | { |
883 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 938 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
884 | unsigned long debugctlmsr; | 939 | unsigned long debugctlmsr; |
885 | 940 | ||
886 | if (!cpuc->ds) | 941 | if (!cpuc->ds) |
@@ -898,10 +953,10 @@ static void intel_pmu_disable_bts(void) | |||
898 | /* | 953 | /* |
899 | * Setup the hardware configuration for a given attr_type | 954 | * Setup the hardware configuration for a given attr_type |
900 | */ | 955 | */ |
901 | static int __hw_perf_counter_init(struct perf_counter *counter) | 956 | static int __hw_perf_event_init(struct perf_event *event) |
902 | { | 957 | { |
903 | struct perf_counter_attr *attr = &counter->attr; | 958 | struct perf_event_attr *attr = &event->attr; |
904 | struct hw_perf_counter *hwc = &counter->hw; | 959 | struct hw_perf_event *hwc = &event->hw; |
905 | u64 config; | 960 | u64 config; |
906 | int err; | 961 | int err; |
907 | 962 | ||
@@ -909,27 +964,31 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
909 | return -ENODEV; | 964 | return -ENODEV; |
910 | 965 | ||
911 | err = 0; | 966 | err = 0; |
912 | if (!atomic_inc_not_zero(&active_counters)) { | 967 | if (!atomic_inc_not_zero(&active_events)) { |
913 | mutex_lock(&pmc_reserve_mutex); | 968 | mutex_lock(&pmc_reserve_mutex); |
914 | if (atomic_read(&active_counters) == 0) { | 969 | if (atomic_read(&active_events) == 0) { |
915 | if (!reserve_pmc_hardware()) | 970 | if (!reserve_pmc_hardware()) |
916 | err = -EBUSY; | 971 | err = -EBUSY; |
917 | else | 972 | else |
918 | err = reserve_bts_hardware(); | 973 | err = reserve_bts_hardware(); |
919 | } | 974 | } |
920 | if (!err) | 975 | if (!err) |
921 | atomic_inc(&active_counters); | 976 | atomic_inc(&active_events); |
922 | mutex_unlock(&pmc_reserve_mutex); | 977 | mutex_unlock(&pmc_reserve_mutex); |
923 | } | 978 | } |
924 | if (err) | 979 | if (err) |
925 | return err; | 980 | return err; |
926 | 981 | ||
982 | event->destroy = hw_perf_event_destroy; | ||
983 | |||
927 | /* | 984 | /* |
928 | * Generate PMC IRQs: | 985 | * Generate PMC IRQs: |
929 | * (keep 'enabled' bit clear for now) | 986 | * (keep 'enabled' bit clear for now) |
930 | */ | 987 | */ |
931 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | 988 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; |
932 | 989 | ||
990 | hwc->idx = -1; | ||
991 | |||
933 | /* | 992 | /* |
934 | * Count user and OS events unless requested not to. | 993 | * Count user and OS events unless requested not to. |
935 | */ | 994 | */ |
@@ -946,17 +1005,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
946 | /* | 1005 | /* |
947 | * If we have a PMU initialized but no APIC | 1006 | * If we have a PMU initialized but no APIC |
948 | * interrupts, we cannot sample hardware | 1007 | * interrupts, we cannot sample hardware |
949 | * counters (user-space has to fall back and | 1008 | * events (user-space has to fall back and |
950 | * sample via a hrtimer based software counter): | 1009 | * sample via a hrtimer based software event): |
951 | */ | 1010 | */ |
952 | if (!x86_pmu.apic) | 1011 | if (!x86_pmu.apic) |
953 | return -EOPNOTSUPP; | 1012 | return -EOPNOTSUPP; |
954 | } | 1013 | } |
955 | 1014 | ||
956 | counter->destroy = hw_perf_counter_destroy; | ||
957 | |||
958 | /* | 1015 | /* |
959 | * Raw event type provide the config in the event structure | 1016 | * Raw hw_event type provide the config in the hw_event structure |
960 | */ | 1017 | */ |
961 | if (attr->type == PERF_TYPE_RAW) { | 1018 | if (attr->type == PERF_TYPE_RAW) { |
962 | hwc->config |= x86_pmu.raw_event(attr->config); | 1019 | hwc->config |= x86_pmu.raw_event(attr->config); |
@@ -1001,7 +1058,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
1001 | 1058 | ||
1002 | static void p6_pmu_disable_all(void) | 1059 | static void p6_pmu_disable_all(void) |
1003 | { | 1060 | { |
1004 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1061 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1005 | u64 val; | 1062 | u64 val; |
1006 | 1063 | ||
1007 | if (!cpuc->enabled) | 1064 | if (!cpuc->enabled) |
@@ -1018,7 +1075,7 @@ static void p6_pmu_disable_all(void) | |||
1018 | 1075 | ||
1019 | static void intel_pmu_disable_all(void) | 1076 | static void intel_pmu_disable_all(void) |
1020 | { | 1077 | { |
1021 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1078 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1022 | 1079 | ||
1023 | if (!cpuc->enabled) | 1080 | if (!cpuc->enabled) |
1024 | return; | 1081 | return; |
@@ -1034,7 +1091,7 @@ static void intel_pmu_disable_all(void) | |||
1034 | 1091 | ||
1035 | static void amd_pmu_disable_all(void) | 1092 | static void amd_pmu_disable_all(void) |
1036 | { | 1093 | { |
1037 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1094 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1038 | int idx; | 1095 | int idx; |
1039 | 1096 | ||
1040 | if (!cpuc->enabled) | 1097 | if (!cpuc->enabled) |
@@ -1043,12 +1100,12 @@ static void amd_pmu_disable_all(void) | |||
1043 | cpuc->enabled = 0; | 1100 | cpuc->enabled = 0; |
1044 | /* | 1101 | /* |
1045 | * ensure we write the disable before we start disabling the | 1102 | * ensure we write the disable before we start disabling the |
1046 | * counters proper, so that amd_pmu_enable_counter() does the | 1103 | * events proper, so that amd_pmu_enable_event() does the |
1047 | * right thing. | 1104 | * right thing. |
1048 | */ | 1105 | */ |
1049 | barrier(); | 1106 | barrier(); |
1050 | 1107 | ||
1051 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1108 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1052 | u64 val; | 1109 | u64 val; |
1053 | 1110 | ||
1054 | if (!test_bit(idx, cpuc->active_mask)) | 1111 | if (!test_bit(idx, cpuc->active_mask)) |
@@ -1070,7 +1127,7 @@ void hw_perf_disable(void) | |||
1070 | 1127 | ||
1071 | static void p6_pmu_enable_all(void) | 1128 | static void p6_pmu_enable_all(void) |
1072 | { | 1129 | { |
1073 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1130 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1074 | unsigned long val; | 1131 | unsigned long val; |
1075 | 1132 | ||
1076 | if (cpuc->enabled) | 1133 | if (cpuc->enabled) |
@@ -1087,7 +1144,7 @@ static void p6_pmu_enable_all(void) | |||
1087 | 1144 | ||
1088 | static void intel_pmu_enable_all(void) | 1145 | static void intel_pmu_enable_all(void) |
1089 | { | 1146 | { |
1090 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1147 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1091 | 1148 | ||
1092 | if (cpuc->enabled) | 1149 | if (cpuc->enabled) |
1093 | return; | 1150 | return; |
@@ -1098,19 +1155,19 @@ static void intel_pmu_enable_all(void) | |||
1098 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 1155 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
1099 | 1156 | ||
1100 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 1157 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
1101 | struct perf_counter *counter = | 1158 | struct perf_event *event = |
1102 | cpuc->counters[X86_PMC_IDX_FIXED_BTS]; | 1159 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; |
1103 | 1160 | ||
1104 | if (WARN_ON_ONCE(!counter)) | 1161 | if (WARN_ON_ONCE(!event)) |
1105 | return; | 1162 | return; |
1106 | 1163 | ||
1107 | intel_pmu_enable_bts(counter->hw.config); | 1164 | intel_pmu_enable_bts(event->hw.config); |
1108 | } | 1165 | } |
1109 | } | 1166 | } |
1110 | 1167 | ||
1111 | static void amd_pmu_enable_all(void) | 1168 | static void amd_pmu_enable_all(void) |
1112 | { | 1169 | { |
1113 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1170 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1114 | int idx; | 1171 | int idx; |
1115 | 1172 | ||
1116 | if (cpuc->enabled) | 1173 | if (cpuc->enabled) |
@@ -1119,14 +1176,14 @@ static void amd_pmu_enable_all(void) | |||
1119 | cpuc->enabled = 1; | 1176 | cpuc->enabled = 1; |
1120 | barrier(); | 1177 | barrier(); |
1121 | 1178 | ||
1122 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1179 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1123 | struct perf_counter *counter = cpuc->counters[idx]; | 1180 | struct perf_event *event = cpuc->events[idx]; |
1124 | u64 val; | 1181 | u64 val; |
1125 | 1182 | ||
1126 | if (!test_bit(idx, cpuc->active_mask)) | 1183 | if (!test_bit(idx, cpuc->active_mask)) |
1127 | continue; | 1184 | continue; |
1128 | 1185 | ||
1129 | val = counter->hw.config; | 1186 | val = event->hw.config; |
1130 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 1187 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
1131 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | 1188 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); |
1132 | } | 1189 | } |
@@ -1153,19 +1210,19 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
1153 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 1210 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
1154 | } | 1211 | } |
1155 | 1212 | ||
1156 | static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 1213 | static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) |
1157 | { | 1214 | { |
1158 | (void)checking_wrmsrl(hwc->config_base + idx, | 1215 | (void)checking_wrmsrl(hwc->config_base + idx, |
1159 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | 1216 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); |
1160 | } | 1217 | } |
1161 | 1218 | ||
1162 | static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | 1219 | static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) |
1163 | { | 1220 | { |
1164 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); | 1221 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); |
1165 | } | 1222 | } |
1166 | 1223 | ||
1167 | static inline void | 1224 | static inline void |
1168 | intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) | 1225 | intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) |
1169 | { | 1226 | { |
1170 | int idx = __idx - X86_PMC_IDX_FIXED; | 1227 | int idx = __idx - X86_PMC_IDX_FIXED; |
1171 | u64 ctrl_val, mask; | 1228 | u64 ctrl_val, mask; |
@@ -1178,10 +1235,10 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) | |||
1178 | } | 1235 | } |
1179 | 1236 | ||
1180 | static inline void | 1237 | static inline void |
1181 | p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | 1238 | p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) |
1182 | { | 1239 | { |
1183 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1240 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1184 | u64 val = P6_NOP_COUNTER; | 1241 | u64 val = P6_NOP_EVENT; |
1185 | 1242 | ||
1186 | if (cpuc->enabled) | 1243 | if (cpuc->enabled) |
1187 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 1244 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
@@ -1190,7 +1247,7 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | |||
1190 | } | 1247 | } |
1191 | 1248 | ||
1192 | static inline void | 1249 | static inline void |
1193 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | 1250 | intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) |
1194 | { | 1251 | { |
1195 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | 1252 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { |
1196 | intel_pmu_disable_bts(); | 1253 | intel_pmu_disable_bts(); |
@@ -1202,24 +1259,24 @@ intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | |||
1202 | return; | 1259 | return; |
1203 | } | 1260 | } |
1204 | 1261 | ||
1205 | x86_pmu_disable_counter(hwc, idx); | 1262 | x86_pmu_disable_event(hwc, idx); |
1206 | } | 1263 | } |
1207 | 1264 | ||
1208 | static inline void | 1265 | static inline void |
1209 | amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | 1266 | amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) |
1210 | { | 1267 | { |
1211 | x86_pmu_disable_counter(hwc, idx); | 1268 | x86_pmu_disable_event(hwc, idx); |
1212 | } | 1269 | } |
1213 | 1270 | ||
1214 | static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); | 1271 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
1215 | 1272 | ||
1216 | /* | 1273 | /* |
1217 | * Set the next IRQ period, based on the hwc->period_left value. | 1274 | * Set the next IRQ period, based on the hwc->period_left value. |
1218 | * To be called with the counter disabled in hw: | 1275 | * To be called with the event disabled in hw: |
1219 | */ | 1276 | */ |
1220 | static int | 1277 | static int |
1221 | x86_perf_counter_set_period(struct perf_counter *counter, | 1278 | x86_perf_event_set_period(struct perf_event *event, |
1222 | struct hw_perf_counter *hwc, int idx) | 1279 | struct hw_perf_event *hwc, int idx) |
1223 | { | 1280 | { |
1224 | s64 left = atomic64_read(&hwc->period_left); | 1281 | s64 left = atomic64_read(&hwc->period_left); |
1225 | s64 period = hwc->sample_period; | 1282 | s64 period = hwc->sample_period; |
@@ -1245,7 +1302,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
1245 | ret = 1; | 1302 | ret = 1; |
1246 | } | 1303 | } |
1247 | /* | 1304 | /* |
1248 | * Quirk: certain CPUs dont like it if just 1 event is left: | 1305 | * Quirk: certain CPUs dont like it if just 1 hw_event is left: |
1249 | */ | 1306 | */ |
1250 | if (unlikely(left < 2)) | 1307 | if (unlikely(left < 2)) |
1251 | left = 2; | 1308 | left = 2; |
@@ -1253,24 +1310,24 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
1253 | if (left > x86_pmu.max_period) | 1310 | if (left > x86_pmu.max_period) |
1254 | left = x86_pmu.max_period; | 1311 | left = x86_pmu.max_period; |
1255 | 1312 | ||
1256 | per_cpu(prev_left[idx], smp_processor_id()) = left; | 1313 | per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; |
1257 | 1314 | ||
1258 | /* | 1315 | /* |
1259 | * The hw counter starts counting from this counter offset, | 1316 | * The hw event starts counting from this event offset, |
1260 | * mark it to be able to extra future deltas: | 1317 | * mark it to be able to extra future deltas: |
1261 | */ | 1318 | */ |
1262 | atomic64_set(&hwc->prev_count, (u64)-left); | 1319 | atomic64_set(&hwc->prev_count, (u64)-left); |
1263 | 1320 | ||
1264 | err = checking_wrmsrl(hwc->counter_base + idx, | 1321 | err = checking_wrmsrl(hwc->event_base + idx, |
1265 | (u64)(-left) & x86_pmu.counter_mask); | 1322 | (u64)(-left) & x86_pmu.event_mask); |
1266 | 1323 | ||
1267 | perf_counter_update_userpage(counter); | 1324 | perf_event_update_userpage(event); |
1268 | 1325 | ||
1269 | return ret; | 1326 | return ret; |
1270 | } | 1327 | } |
1271 | 1328 | ||
1272 | static inline void | 1329 | static inline void |
1273 | intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) | 1330 | intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) |
1274 | { | 1331 | { |
1275 | int idx = __idx - X86_PMC_IDX_FIXED; | 1332 | int idx = __idx - X86_PMC_IDX_FIXED; |
1276 | u64 ctrl_val, bits, mask; | 1333 | u64 ctrl_val, bits, mask; |
@@ -1295,9 +1352,9 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) | |||
1295 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 1352 | err = checking_wrmsrl(hwc->config_base, ctrl_val); |
1296 | } | 1353 | } |
1297 | 1354 | ||
1298 | static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 1355 | static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) |
1299 | { | 1356 | { |
1300 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1357 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1301 | u64 val; | 1358 | u64 val; |
1302 | 1359 | ||
1303 | val = hwc->config; | 1360 | val = hwc->config; |
@@ -1308,10 +1365,10 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | |||
1308 | } | 1365 | } |
1309 | 1366 | ||
1310 | 1367 | ||
1311 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 1368 | static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) |
1312 | { | 1369 | { |
1313 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | 1370 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { |
1314 | if (!__get_cpu_var(cpu_hw_counters).enabled) | 1371 | if (!__get_cpu_var(cpu_hw_events).enabled) |
1315 | return; | 1372 | return; |
1316 | 1373 | ||
1317 | intel_pmu_enable_bts(hwc->config); | 1374 | intel_pmu_enable_bts(hwc->config); |
@@ -1323,134 +1380,189 @@ static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | |||
1323 | return; | 1380 | return; |
1324 | } | 1381 | } |
1325 | 1382 | ||
1326 | x86_pmu_enable_counter(hwc, idx); | 1383 | x86_pmu_enable_event(hwc, idx); |
1327 | } | 1384 | } |
1328 | 1385 | ||
1329 | static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 1386 | static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) |
1330 | { | 1387 | { |
1331 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1388 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1332 | 1389 | ||
1333 | if (cpuc->enabled) | 1390 | if (cpuc->enabled) |
1334 | x86_pmu_enable_counter(hwc, idx); | 1391 | x86_pmu_enable_event(hwc, idx); |
1335 | } | 1392 | } |
1336 | 1393 | ||
1337 | static int | 1394 | static int fixed_mode_idx(struct hw_perf_event *hwc) |
1338 | fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | ||
1339 | { | 1395 | { |
1340 | unsigned int event; | 1396 | unsigned int hw_event; |
1341 | 1397 | ||
1342 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | 1398 | hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; |
1343 | 1399 | ||
1344 | if (unlikely((event == | 1400 | if (unlikely((hw_event == |
1345 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | 1401 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && |
1346 | (hwc->sample_period == 1))) | 1402 | (hwc->sample_period == 1))) |
1347 | return X86_PMC_IDX_FIXED_BTS; | 1403 | return X86_PMC_IDX_FIXED_BTS; |
1348 | 1404 | ||
1349 | if (!x86_pmu.num_counters_fixed) | 1405 | if (!x86_pmu.num_events_fixed) |
1350 | return -1; | 1406 | return -1; |
1351 | 1407 | ||
1352 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 1408 | /* |
1409 | * fixed counters do not take all possible filters | ||
1410 | */ | ||
1411 | if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) | ||
1412 | return -1; | ||
1413 | |||
1414 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | ||
1353 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | 1415 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; |
1354 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | 1416 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) |
1355 | return X86_PMC_IDX_FIXED_CPU_CYCLES; | 1417 | return X86_PMC_IDX_FIXED_CPU_CYCLES; |
1356 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) | 1418 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) |
1357 | return X86_PMC_IDX_FIXED_BUS_CYCLES; | 1419 | return X86_PMC_IDX_FIXED_BUS_CYCLES; |
1358 | 1420 | ||
1359 | return -1; | 1421 | return -1; |
1360 | } | 1422 | } |
1361 | 1423 | ||
1362 | /* | 1424 | /* |
1363 | * Find a PMC slot for the freshly enabled / scheduled in counter: | 1425 | * generic counter allocator: get next free counter |
1364 | */ | 1426 | */ |
1365 | static int x86_pmu_enable(struct perf_counter *counter) | 1427 | static int |
1428 | gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1366 | { | 1429 | { |
1367 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1368 | struct hw_perf_counter *hwc = &counter->hw; | ||
1369 | int idx; | 1430 | int idx; |
1370 | 1431 | ||
1371 | idx = fixed_mode_idx(counter, hwc); | 1432 | idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); |
1433 | return idx == x86_pmu.num_events ? -1 : idx; | ||
1434 | } | ||
1435 | |||
1436 | /* | ||
1437 | * intel-specific counter allocator: check event constraints | ||
1438 | */ | ||
1439 | static int | ||
1440 | intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1441 | { | ||
1442 | const struct event_constraint *event_constraint; | ||
1443 | int i, code; | ||
1444 | |||
1445 | if (!event_constraints) | ||
1446 | goto skip; | ||
1447 | |||
1448 | code = hwc->config & CORE_EVNTSEL_EVENT_MASK; | ||
1449 | |||
1450 | for_each_event_constraint(event_constraint, event_constraints) { | ||
1451 | if (code == event_constraint->code) { | ||
1452 | for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { | ||
1453 | if (!test_and_set_bit(i, cpuc->used_mask)) | ||
1454 | return i; | ||
1455 | } | ||
1456 | return -1; | ||
1457 | } | ||
1458 | } | ||
1459 | skip: | ||
1460 | return gen_get_event_idx(cpuc, hwc); | ||
1461 | } | ||
1462 | |||
1463 | static int | ||
1464 | x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1465 | { | ||
1466 | int idx; | ||
1467 | |||
1468 | idx = fixed_mode_idx(hwc); | ||
1372 | if (idx == X86_PMC_IDX_FIXED_BTS) { | 1469 | if (idx == X86_PMC_IDX_FIXED_BTS) { |
1373 | /* BTS is already occupied. */ | 1470 | /* BTS is already occupied. */ |
1374 | if (test_and_set_bit(idx, cpuc->used_mask)) | 1471 | if (test_and_set_bit(idx, cpuc->used_mask)) |
1375 | return -EAGAIN; | 1472 | return -EAGAIN; |
1376 | 1473 | ||
1377 | hwc->config_base = 0; | 1474 | hwc->config_base = 0; |
1378 | hwc->counter_base = 0; | 1475 | hwc->event_base = 0; |
1379 | hwc->idx = idx; | 1476 | hwc->idx = idx; |
1380 | } else if (idx >= 0) { | 1477 | } else if (idx >= 0) { |
1381 | /* | 1478 | /* |
1382 | * Try to get the fixed counter, if that is already taken | 1479 | * Try to get the fixed event, if that is already taken |
1383 | * then try to get a generic counter: | 1480 | * then try to get a generic event: |
1384 | */ | 1481 | */ |
1385 | if (test_and_set_bit(idx, cpuc->used_mask)) | 1482 | if (test_and_set_bit(idx, cpuc->used_mask)) |
1386 | goto try_generic; | 1483 | goto try_generic; |
1387 | 1484 | ||
1388 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | 1485 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; |
1389 | /* | 1486 | /* |
1390 | * We set it so that counter_base + idx in wrmsr/rdmsr maps to | 1487 | * We set it so that event_base + idx in wrmsr/rdmsr maps to |
1391 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | 1488 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: |
1392 | */ | 1489 | */ |
1393 | hwc->counter_base = | 1490 | hwc->event_base = |
1394 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | 1491 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; |
1395 | hwc->idx = idx; | 1492 | hwc->idx = idx; |
1396 | } else { | 1493 | } else { |
1397 | idx = hwc->idx; | 1494 | idx = hwc->idx; |
1398 | /* Try to get the previous generic counter again */ | 1495 | /* Try to get the previous generic event again */ |
1399 | if (test_and_set_bit(idx, cpuc->used_mask)) { | 1496 | if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { |
1400 | try_generic: | 1497 | try_generic: |
1401 | idx = find_first_zero_bit(cpuc->used_mask, | 1498 | idx = x86_pmu.get_event_idx(cpuc, hwc); |
1402 | x86_pmu.num_counters); | 1499 | if (idx == -1) |
1403 | if (idx == x86_pmu.num_counters) | ||
1404 | return -EAGAIN; | 1500 | return -EAGAIN; |
1405 | 1501 | ||
1406 | set_bit(idx, cpuc->used_mask); | 1502 | set_bit(idx, cpuc->used_mask); |
1407 | hwc->idx = idx; | 1503 | hwc->idx = idx; |
1408 | } | 1504 | } |
1409 | hwc->config_base = x86_pmu.eventsel; | 1505 | hwc->config_base = x86_pmu.eventsel; |
1410 | hwc->counter_base = x86_pmu.perfctr; | 1506 | hwc->event_base = x86_pmu.perfctr; |
1411 | } | 1507 | } |
1412 | 1508 | ||
1413 | perf_counters_lapic_init(); | 1509 | return idx; |
1510 | } | ||
1511 | |||
1512 | /* | ||
1513 | * Find a PMC slot for the freshly enabled / scheduled in event: | ||
1514 | */ | ||
1515 | static int x86_pmu_enable(struct perf_event *event) | ||
1516 | { | ||
1517 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1518 | struct hw_perf_event *hwc = &event->hw; | ||
1519 | int idx; | ||
1520 | |||
1521 | idx = x86_schedule_event(cpuc, hwc); | ||
1522 | if (idx < 0) | ||
1523 | return idx; | ||
1524 | |||
1525 | perf_events_lapic_init(); | ||
1414 | 1526 | ||
1415 | x86_pmu.disable(hwc, idx); | 1527 | x86_pmu.disable(hwc, idx); |
1416 | 1528 | ||
1417 | cpuc->counters[idx] = counter; | 1529 | cpuc->events[idx] = event; |
1418 | set_bit(idx, cpuc->active_mask); | 1530 | set_bit(idx, cpuc->active_mask); |
1419 | 1531 | ||
1420 | x86_perf_counter_set_period(counter, hwc, idx); | 1532 | x86_perf_event_set_period(event, hwc, idx); |
1421 | x86_pmu.enable(hwc, idx); | 1533 | x86_pmu.enable(hwc, idx); |
1422 | 1534 | ||
1423 | perf_counter_update_userpage(counter); | 1535 | perf_event_update_userpage(event); |
1424 | 1536 | ||
1425 | return 0; | 1537 | return 0; |
1426 | } | 1538 | } |
1427 | 1539 | ||
1428 | static void x86_pmu_unthrottle(struct perf_counter *counter) | 1540 | static void x86_pmu_unthrottle(struct perf_event *event) |
1429 | { | 1541 | { |
1430 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1542 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1431 | struct hw_perf_counter *hwc = &counter->hw; | 1543 | struct hw_perf_event *hwc = &event->hw; |
1432 | 1544 | ||
1433 | if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || | 1545 | if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || |
1434 | cpuc->counters[hwc->idx] != counter)) | 1546 | cpuc->events[hwc->idx] != event)) |
1435 | return; | 1547 | return; |
1436 | 1548 | ||
1437 | x86_pmu.enable(hwc, hwc->idx); | 1549 | x86_pmu.enable(hwc, hwc->idx); |
1438 | } | 1550 | } |
1439 | 1551 | ||
1440 | void perf_counter_print_debug(void) | 1552 | void perf_event_print_debug(void) |
1441 | { | 1553 | { |
1442 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 1554 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
1443 | struct cpu_hw_counters *cpuc; | 1555 | struct cpu_hw_events *cpuc; |
1444 | unsigned long flags; | 1556 | unsigned long flags; |
1445 | int cpu, idx; | 1557 | int cpu, idx; |
1446 | 1558 | ||
1447 | if (!x86_pmu.num_counters) | 1559 | if (!x86_pmu.num_events) |
1448 | return; | 1560 | return; |
1449 | 1561 | ||
1450 | local_irq_save(flags); | 1562 | local_irq_save(flags); |
1451 | 1563 | ||
1452 | cpu = smp_processor_id(); | 1564 | cpu = smp_processor_id(); |
1453 | cpuc = &per_cpu(cpu_hw_counters, cpu); | 1565 | cpuc = &per_cpu(cpu_hw_events, cpu); |
1454 | 1566 | ||
1455 | if (x86_pmu.version >= 2) { | 1567 | if (x86_pmu.version >= 2) { |
1456 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); | 1568 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); |
@@ -1466,11 +1578,11 @@ void perf_counter_print_debug(void) | |||
1466 | } | 1578 | } |
1467 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); | 1579 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); |
1468 | 1580 | ||
1469 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1581 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1470 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1582 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
1471 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1583 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); |
1472 | 1584 | ||
1473 | prev_left = per_cpu(prev_left[idx], cpu); | 1585 | prev_left = per_cpu(pmc_prev_left[idx], cpu); |
1474 | 1586 | ||
1475 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", | 1587 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", |
1476 | cpu, idx, pmc_ctrl); | 1588 | cpu, idx, pmc_ctrl); |
@@ -1479,7 +1591,7 @@ void perf_counter_print_debug(void) | |||
1479 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | 1591 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", |
1480 | cpu, idx, prev_left); | 1592 | cpu, idx, prev_left); |
1481 | } | 1593 | } |
1482 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | 1594 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { |
1483 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | 1595 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); |
1484 | 1596 | ||
1485 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | 1597 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", |
@@ -1488,8 +1600,7 @@ void perf_counter_print_debug(void) | |||
1488 | local_irq_restore(flags); | 1600 | local_irq_restore(flags); |
1489 | } | 1601 | } |
1490 | 1602 | ||
1491 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, | 1603 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) |
1492 | struct perf_sample_data *data) | ||
1493 | { | 1604 | { |
1494 | struct debug_store *ds = cpuc->ds; | 1605 | struct debug_store *ds = cpuc->ds; |
1495 | struct bts_record { | 1606 | struct bts_record { |
@@ -1497,11 +1608,14 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, | |||
1497 | u64 to; | 1608 | u64 to; |
1498 | u64 flags; | 1609 | u64 flags; |
1499 | }; | 1610 | }; |
1500 | struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; | 1611 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; |
1501 | unsigned long orig_ip = data->regs->ip; | ||
1502 | struct bts_record *at, *top; | 1612 | struct bts_record *at, *top; |
1613 | struct perf_output_handle handle; | ||
1614 | struct perf_event_header header; | ||
1615 | struct perf_sample_data data; | ||
1616 | struct pt_regs regs; | ||
1503 | 1617 | ||
1504 | if (!counter) | 1618 | if (!event) |
1505 | return; | 1619 | return; |
1506 | 1620 | ||
1507 | if (!ds) | 1621 | if (!ds) |
@@ -1510,26 +1624,45 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, | |||
1510 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | 1624 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; |
1511 | top = (struct bts_record *)(unsigned long)ds->bts_index; | 1625 | top = (struct bts_record *)(unsigned long)ds->bts_index; |
1512 | 1626 | ||
1627 | if (top <= at) | ||
1628 | return; | ||
1629 | |||
1513 | ds->bts_index = ds->bts_buffer_base; | 1630 | ds->bts_index = ds->bts_buffer_base; |
1514 | 1631 | ||
1632 | |||
1633 | data.period = event->hw.last_period; | ||
1634 | data.addr = 0; | ||
1635 | regs.ip = 0; | ||
1636 | |||
1637 | /* | ||
1638 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
1639 | * We will overwrite the from and to address before we output | ||
1640 | * the sample. | ||
1641 | */ | ||
1642 | perf_prepare_sample(&header, &data, event, ®s); | ||
1643 | |||
1644 | if (perf_output_begin(&handle, event, | ||
1645 | header.size * (top - at), 1, 1)) | ||
1646 | return; | ||
1647 | |||
1515 | for (; at < top; at++) { | 1648 | for (; at < top; at++) { |
1516 | data->regs->ip = at->from; | 1649 | data.ip = at->from; |
1517 | data->addr = at->to; | 1650 | data.addr = at->to; |
1518 | 1651 | ||
1519 | perf_counter_output(counter, 1, data); | 1652 | perf_output_sample(&handle, &header, &data, event); |
1520 | } | 1653 | } |
1521 | 1654 | ||
1522 | data->regs->ip = orig_ip; | 1655 | perf_output_end(&handle); |
1523 | data->addr = 0; | ||
1524 | 1656 | ||
1525 | /* There's new data available. */ | 1657 | /* There's new data available. */ |
1526 | counter->pending_kill = POLL_IN; | 1658 | event->hw.interrupts++; |
1659 | event->pending_kill = POLL_IN; | ||
1527 | } | 1660 | } |
1528 | 1661 | ||
1529 | static void x86_pmu_disable(struct perf_counter *counter) | 1662 | static void x86_pmu_disable(struct perf_event *event) |
1530 | { | 1663 | { |
1531 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1664 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1532 | struct hw_perf_counter *hwc = &counter->hw; | 1665 | struct hw_perf_event *hwc = &event->hw; |
1533 | int idx = hwc->idx; | 1666 | int idx = hwc->idx; |
1534 | 1667 | ||
1535 | /* | 1668 | /* |
@@ -1541,67 +1674,63 @@ static void x86_pmu_disable(struct perf_counter *counter) | |||
1541 | 1674 | ||
1542 | /* | 1675 | /* |
1543 | * Make sure the cleared pointer becomes visible before we | 1676 | * Make sure the cleared pointer becomes visible before we |
1544 | * (potentially) free the counter: | 1677 | * (potentially) free the event: |
1545 | */ | 1678 | */ |
1546 | barrier(); | 1679 | barrier(); |
1547 | 1680 | ||
1548 | /* | 1681 | /* |
1549 | * Drain the remaining delta count out of a counter | 1682 | * Drain the remaining delta count out of a event |
1550 | * that we are disabling: | 1683 | * that we are disabling: |
1551 | */ | 1684 | */ |
1552 | x86_perf_counter_update(counter, hwc, idx); | 1685 | x86_perf_event_update(event, hwc, idx); |
1553 | 1686 | ||
1554 | /* Drain the remaining BTS records. */ | 1687 | /* Drain the remaining BTS records. */ |
1555 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | 1688 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) |
1556 | struct perf_sample_data data; | 1689 | intel_pmu_drain_bts_buffer(cpuc); |
1557 | struct pt_regs regs; | ||
1558 | 1690 | ||
1559 | data.regs = ®s; | 1691 | cpuc->events[idx] = NULL; |
1560 | intel_pmu_drain_bts_buffer(cpuc, &data); | ||
1561 | } | ||
1562 | cpuc->counters[idx] = NULL; | ||
1563 | clear_bit(idx, cpuc->used_mask); | 1692 | clear_bit(idx, cpuc->used_mask); |
1564 | 1693 | ||
1565 | perf_counter_update_userpage(counter); | 1694 | perf_event_update_userpage(event); |
1566 | } | 1695 | } |
1567 | 1696 | ||
1568 | /* | 1697 | /* |
1569 | * Save and restart an expired counter. Called by NMI contexts, | 1698 | * Save and restart an expired event. Called by NMI contexts, |
1570 | * so it has to be careful about preempting normal counter ops: | 1699 | * so it has to be careful about preempting normal event ops: |
1571 | */ | 1700 | */ |
1572 | static int intel_pmu_save_and_restart(struct perf_counter *counter) | 1701 | static int intel_pmu_save_and_restart(struct perf_event *event) |
1573 | { | 1702 | { |
1574 | struct hw_perf_counter *hwc = &counter->hw; | 1703 | struct hw_perf_event *hwc = &event->hw; |
1575 | int idx = hwc->idx; | 1704 | int idx = hwc->idx; |
1576 | int ret; | 1705 | int ret; |
1577 | 1706 | ||
1578 | x86_perf_counter_update(counter, hwc, idx); | 1707 | x86_perf_event_update(event, hwc, idx); |
1579 | ret = x86_perf_counter_set_period(counter, hwc, idx); | 1708 | ret = x86_perf_event_set_period(event, hwc, idx); |
1580 | 1709 | ||
1581 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | 1710 | if (event->state == PERF_EVENT_STATE_ACTIVE) |
1582 | intel_pmu_enable_counter(hwc, idx); | 1711 | intel_pmu_enable_event(hwc, idx); |
1583 | 1712 | ||
1584 | return ret; | 1713 | return ret; |
1585 | } | 1714 | } |
1586 | 1715 | ||
1587 | static void intel_pmu_reset(void) | 1716 | static void intel_pmu_reset(void) |
1588 | { | 1717 | { |
1589 | struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; | 1718 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; |
1590 | unsigned long flags; | 1719 | unsigned long flags; |
1591 | int idx; | 1720 | int idx; |
1592 | 1721 | ||
1593 | if (!x86_pmu.num_counters) | 1722 | if (!x86_pmu.num_events) |
1594 | return; | 1723 | return; |
1595 | 1724 | ||
1596 | local_irq_save(flags); | 1725 | local_irq_save(flags); |
1597 | 1726 | ||
1598 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 1727 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
1599 | 1728 | ||
1600 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1729 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1601 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 1730 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); |
1602 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | 1731 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); |
1603 | } | 1732 | } |
1604 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | 1733 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { |
1605 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 1734 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
1606 | } | 1735 | } |
1607 | if (ds) | 1736 | if (ds) |
@@ -1613,39 +1742,38 @@ static void intel_pmu_reset(void) | |||
1613 | static int p6_pmu_handle_irq(struct pt_regs *regs) | 1742 | static int p6_pmu_handle_irq(struct pt_regs *regs) |
1614 | { | 1743 | { |
1615 | struct perf_sample_data data; | 1744 | struct perf_sample_data data; |
1616 | struct cpu_hw_counters *cpuc; | 1745 | struct cpu_hw_events *cpuc; |
1617 | struct perf_counter *counter; | 1746 | struct perf_event *event; |
1618 | struct hw_perf_counter *hwc; | 1747 | struct hw_perf_event *hwc; |
1619 | int idx, handled = 0; | 1748 | int idx, handled = 0; |
1620 | u64 val; | 1749 | u64 val; |
1621 | 1750 | ||
1622 | data.regs = regs; | ||
1623 | data.addr = 0; | 1751 | data.addr = 0; |
1624 | 1752 | ||
1625 | cpuc = &__get_cpu_var(cpu_hw_counters); | 1753 | cpuc = &__get_cpu_var(cpu_hw_events); |
1626 | 1754 | ||
1627 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1755 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1628 | if (!test_bit(idx, cpuc->active_mask)) | 1756 | if (!test_bit(idx, cpuc->active_mask)) |
1629 | continue; | 1757 | continue; |
1630 | 1758 | ||
1631 | counter = cpuc->counters[idx]; | 1759 | event = cpuc->events[idx]; |
1632 | hwc = &counter->hw; | 1760 | hwc = &event->hw; |
1633 | 1761 | ||
1634 | val = x86_perf_counter_update(counter, hwc, idx); | 1762 | val = x86_perf_event_update(event, hwc, idx); |
1635 | if (val & (1ULL << (x86_pmu.counter_bits - 1))) | 1763 | if (val & (1ULL << (x86_pmu.event_bits - 1))) |
1636 | continue; | 1764 | continue; |
1637 | 1765 | ||
1638 | /* | 1766 | /* |
1639 | * counter overflow | 1767 | * event overflow |
1640 | */ | 1768 | */ |
1641 | handled = 1; | 1769 | handled = 1; |
1642 | data.period = counter->hw.last_period; | 1770 | data.period = event->hw.last_period; |
1643 | 1771 | ||
1644 | if (!x86_perf_counter_set_period(counter, hwc, idx)) | 1772 | if (!x86_perf_event_set_period(event, hwc, idx)) |
1645 | continue; | 1773 | continue; |
1646 | 1774 | ||
1647 | if (perf_counter_overflow(counter, 1, &data)) | 1775 | if (perf_event_overflow(event, 1, &data, regs)) |
1648 | p6_pmu_disable_counter(hwc, idx); | 1776 | p6_pmu_disable_event(hwc, idx); |
1649 | } | 1777 | } |
1650 | 1778 | ||
1651 | if (handled) | 1779 | if (handled) |
@@ -1661,17 +1789,16 @@ static int p6_pmu_handle_irq(struct pt_regs *regs) | |||
1661 | static int intel_pmu_handle_irq(struct pt_regs *regs) | 1789 | static int intel_pmu_handle_irq(struct pt_regs *regs) |
1662 | { | 1790 | { |
1663 | struct perf_sample_data data; | 1791 | struct perf_sample_data data; |
1664 | struct cpu_hw_counters *cpuc; | 1792 | struct cpu_hw_events *cpuc; |
1665 | int bit, loops; | 1793 | int bit, loops; |
1666 | u64 ack, status; | 1794 | u64 ack, status; |
1667 | 1795 | ||
1668 | data.regs = regs; | ||
1669 | data.addr = 0; | 1796 | data.addr = 0; |
1670 | 1797 | ||
1671 | cpuc = &__get_cpu_var(cpu_hw_counters); | 1798 | cpuc = &__get_cpu_var(cpu_hw_events); |
1672 | 1799 | ||
1673 | perf_disable(); | 1800 | perf_disable(); |
1674 | intel_pmu_drain_bts_buffer(cpuc, &data); | 1801 | intel_pmu_drain_bts_buffer(cpuc); |
1675 | status = intel_pmu_get_status(); | 1802 | status = intel_pmu_get_status(); |
1676 | if (!status) { | 1803 | if (!status) { |
1677 | perf_enable(); | 1804 | perf_enable(); |
@@ -1681,8 +1808,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1681 | loops = 0; | 1808 | loops = 0; |
1682 | again: | 1809 | again: |
1683 | if (++loops > 100) { | 1810 | if (++loops > 100) { |
1684 | WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); | 1811 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); |
1685 | perf_counter_print_debug(); | 1812 | perf_event_print_debug(); |
1686 | intel_pmu_reset(); | 1813 | intel_pmu_reset(); |
1687 | perf_enable(); | 1814 | perf_enable(); |
1688 | return 1; | 1815 | return 1; |
@@ -1691,19 +1818,19 @@ again: | |||
1691 | inc_irq_stat(apic_perf_irqs); | 1818 | inc_irq_stat(apic_perf_irqs); |
1692 | ack = status; | 1819 | ack = status; |
1693 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 1820 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
1694 | struct perf_counter *counter = cpuc->counters[bit]; | 1821 | struct perf_event *event = cpuc->events[bit]; |
1695 | 1822 | ||
1696 | clear_bit(bit, (unsigned long *) &status); | 1823 | clear_bit(bit, (unsigned long *) &status); |
1697 | if (!test_bit(bit, cpuc->active_mask)) | 1824 | if (!test_bit(bit, cpuc->active_mask)) |
1698 | continue; | 1825 | continue; |
1699 | 1826 | ||
1700 | if (!intel_pmu_save_and_restart(counter)) | 1827 | if (!intel_pmu_save_and_restart(event)) |
1701 | continue; | 1828 | continue; |
1702 | 1829 | ||
1703 | data.period = counter->hw.last_period; | 1830 | data.period = event->hw.last_period; |
1704 | 1831 | ||
1705 | if (perf_counter_overflow(counter, 1, &data)) | 1832 | if (perf_event_overflow(event, 1, &data, regs)) |
1706 | intel_pmu_disable_counter(&counter->hw, bit); | 1833 | intel_pmu_disable_event(&event->hw, bit); |
1707 | } | 1834 | } |
1708 | 1835 | ||
1709 | intel_pmu_ack_status(ack); | 1836 | intel_pmu_ack_status(ack); |
@@ -1723,39 +1850,38 @@ again: | |||
1723 | static int amd_pmu_handle_irq(struct pt_regs *regs) | 1850 | static int amd_pmu_handle_irq(struct pt_regs *regs) |
1724 | { | 1851 | { |
1725 | struct perf_sample_data data; | 1852 | struct perf_sample_data data; |
1726 | struct cpu_hw_counters *cpuc; | 1853 | struct cpu_hw_events *cpuc; |
1727 | struct perf_counter *counter; | 1854 | struct perf_event *event; |
1728 | struct hw_perf_counter *hwc; | 1855 | struct hw_perf_event *hwc; |
1729 | int idx, handled = 0; | 1856 | int idx, handled = 0; |
1730 | u64 val; | 1857 | u64 val; |
1731 | 1858 | ||
1732 | data.regs = regs; | ||
1733 | data.addr = 0; | 1859 | data.addr = 0; |
1734 | 1860 | ||
1735 | cpuc = &__get_cpu_var(cpu_hw_counters); | 1861 | cpuc = &__get_cpu_var(cpu_hw_events); |
1736 | 1862 | ||
1737 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1863 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1738 | if (!test_bit(idx, cpuc->active_mask)) | 1864 | if (!test_bit(idx, cpuc->active_mask)) |
1739 | continue; | 1865 | continue; |
1740 | 1866 | ||
1741 | counter = cpuc->counters[idx]; | 1867 | event = cpuc->events[idx]; |
1742 | hwc = &counter->hw; | 1868 | hwc = &event->hw; |
1743 | 1869 | ||
1744 | val = x86_perf_counter_update(counter, hwc, idx); | 1870 | val = x86_perf_event_update(event, hwc, idx); |
1745 | if (val & (1ULL << (x86_pmu.counter_bits - 1))) | 1871 | if (val & (1ULL << (x86_pmu.event_bits - 1))) |
1746 | continue; | 1872 | continue; |
1747 | 1873 | ||
1748 | /* | 1874 | /* |
1749 | * counter overflow | 1875 | * event overflow |
1750 | */ | 1876 | */ |
1751 | handled = 1; | 1877 | handled = 1; |
1752 | data.period = counter->hw.last_period; | 1878 | data.period = event->hw.last_period; |
1753 | 1879 | ||
1754 | if (!x86_perf_counter_set_period(counter, hwc, idx)) | 1880 | if (!x86_perf_event_set_period(event, hwc, idx)) |
1755 | continue; | 1881 | continue; |
1756 | 1882 | ||
1757 | if (perf_counter_overflow(counter, 1, &data)) | 1883 | if (perf_event_overflow(event, 1, &data, regs)) |
1758 | amd_pmu_disable_counter(hwc, idx); | 1884 | amd_pmu_disable_event(hwc, idx); |
1759 | } | 1885 | } |
1760 | 1886 | ||
1761 | if (handled) | 1887 | if (handled) |
@@ -1769,18 +1895,21 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) | |||
1769 | irq_enter(); | 1895 | irq_enter(); |
1770 | ack_APIC_irq(); | 1896 | ack_APIC_irq(); |
1771 | inc_irq_stat(apic_pending_irqs); | 1897 | inc_irq_stat(apic_pending_irqs); |
1772 | perf_counter_do_pending(); | 1898 | perf_event_do_pending(); |
1773 | irq_exit(); | 1899 | irq_exit(); |
1774 | } | 1900 | } |
1775 | 1901 | ||
1776 | void set_perf_counter_pending(void) | 1902 | void set_perf_event_pending(void) |
1777 | { | 1903 | { |
1778 | #ifdef CONFIG_X86_LOCAL_APIC | 1904 | #ifdef CONFIG_X86_LOCAL_APIC |
1905 | if (!x86_pmu.apic || !x86_pmu_initialized()) | ||
1906 | return; | ||
1907 | |||
1779 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | 1908 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); |
1780 | #endif | 1909 | #endif |
1781 | } | 1910 | } |
1782 | 1911 | ||
1783 | void perf_counters_lapic_init(void) | 1912 | void perf_events_lapic_init(void) |
1784 | { | 1913 | { |
1785 | #ifdef CONFIG_X86_LOCAL_APIC | 1914 | #ifdef CONFIG_X86_LOCAL_APIC |
1786 | if (!x86_pmu.apic || !x86_pmu_initialized()) | 1915 | if (!x86_pmu.apic || !x86_pmu_initialized()) |
@@ -1794,13 +1923,13 @@ void perf_counters_lapic_init(void) | |||
1794 | } | 1923 | } |
1795 | 1924 | ||
1796 | static int __kprobes | 1925 | static int __kprobes |
1797 | perf_counter_nmi_handler(struct notifier_block *self, | 1926 | perf_event_nmi_handler(struct notifier_block *self, |
1798 | unsigned long cmd, void *__args) | 1927 | unsigned long cmd, void *__args) |
1799 | { | 1928 | { |
1800 | struct die_args *args = __args; | 1929 | struct die_args *args = __args; |
1801 | struct pt_regs *regs; | 1930 | struct pt_regs *regs; |
1802 | 1931 | ||
1803 | if (!atomic_read(&active_counters)) | 1932 | if (!atomic_read(&active_events)) |
1804 | return NOTIFY_DONE; | 1933 | return NOTIFY_DONE; |
1805 | 1934 | ||
1806 | switch (cmd) { | 1935 | switch (cmd) { |
@@ -1819,7 +1948,7 @@ perf_counter_nmi_handler(struct notifier_block *self, | |||
1819 | #endif | 1948 | #endif |
1820 | /* | 1949 | /* |
1821 | * Can't rely on the handled return value to say it was our NMI, two | 1950 | * Can't rely on the handled return value to say it was our NMI, two |
1822 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. | 1951 | * events could trigger 'simultaneously' raising two back-to-back NMIs. |
1823 | * | 1952 | * |
1824 | * If the first NMI handles both, the latter will be empty and daze | 1953 | * If the first NMI handles both, the latter will be empty and daze |
1825 | * the CPU. | 1954 | * the CPU. |
@@ -1829,8 +1958,8 @@ perf_counter_nmi_handler(struct notifier_block *self, | |||
1829 | return NOTIFY_STOP; | 1958 | return NOTIFY_STOP; |
1830 | } | 1959 | } |
1831 | 1960 | ||
1832 | static __read_mostly struct notifier_block perf_counter_nmi_notifier = { | 1961 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { |
1833 | .notifier_call = perf_counter_nmi_handler, | 1962 | .notifier_call = perf_event_nmi_handler, |
1834 | .next = NULL, | 1963 | .next = NULL, |
1835 | .priority = 1 | 1964 | .priority = 1 |
1836 | }; | 1965 | }; |
@@ -1840,8 +1969,8 @@ static struct x86_pmu p6_pmu = { | |||
1840 | .handle_irq = p6_pmu_handle_irq, | 1969 | .handle_irq = p6_pmu_handle_irq, |
1841 | .disable_all = p6_pmu_disable_all, | 1970 | .disable_all = p6_pmu_disable_all, |
1842 | .enable_all = p6_pmu_enable_all, | 1971 | .enable_all = p6_pmu_enable_all, |
1843 | .enable = p6_pmu_enable_counter, | 1972 | .enable = p6_pmu_enable_event, |
1844 | .disable = p6_pmu_disable_counter, | 1973 | .disable = p6_pmu_disable_event, |
1845 | .eventsel = MSR_P6_EVNTSEL0, | 1974 | .eventsel = MSR_P6_EVNTSEL0, |
1846 | .perfctr = MSR_P6_PERFCTR0, | 1975 | .perfctr = MSR_P6_PERFCTR0, |
1847 | .event_map = p6_pmu_event_map, | 1976 | .event_map = p6_pmu_event_map, |
@@ -1850,16 +1979,17 @@ static struct x86_pmu p6_pmu = { | |||
1850 | .apic = 1, | 1979 | .apic = 1, |
1851 | .max_period = (1ULL << 31) - 1, | 1980 | .max_period = (1ULL << 31) - 1, |
1852 | .version = 0, | 1981 | .version = 0, |
1853 | .num_counters = 2, | 1982 | .num_events = 2, |
1854 | /* | 1983 | /* |
1855 | * Counters have 40 bits implemented. However they are designed such | 1984 | * Events have 40 bits implemented. However they are designed such |
1856 | * that bits [32-39] are sign extensions of bit 31. As such the | 1985 | * that bits [32-39] are sign extensions of bit 31. As such the |
1857 | * effective width of a counter for P6-like PMU is 32 bits only. | 1986 | * effective width of a event for P6-like PMU is 32 bits only. |
1858 | * | 1987 | * |
1859 | * See IA-32 Intel Architecture Software developer manual Vol 3B | 1988 | * See IA-32 Intel Architecture Software developer manual Vol 3B |
1860 | */ | 1989 | */ |
1861 | .counter_bits = 32, | 1990 | .event_bits = 32, |
1862 | .counter_mask = (1ULL << 32) - 1, | 1991 | .event_mask = (1ULL << 32) - 1, |
1992 | .get_event_idx = intel_get_event_idx, | ||
1863 | }; | 1993 | }; |
1864 | 1994 | ||
1865 | static struct x86_pmu intel_pmu = { | 1995 | static struct x86_pmu intel_pmu = { |
@@ -1867,8 +1997,8 @@ static struct x86_pmu intel_pmu = { | |||
1867 | .handle_irq = intel_pmu_handle_irq, | 1997 | .handle_irq = intel_pmu_handle_irq, |
1868 | .disable_all = intel_pmu_disable_all, | 1998 | .disable_all = intel_pmu_disable_all, |
1869 | .enable_all = intel_pmu_enable_all, | 1999 | .enable_all = intel_pmu_enable_all, |
1870 | .enable = intel_pmu_enable_counter, | 2000 | .enable = intel_pmu_enable_event, |
1871 | .disable = intel_pmu_disable_counter, | 2001 | .disable = intel_pmu_disable_event, |
1872 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 2002 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
1873 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 2003 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
1874 | .event_map = intel_pmu_event_map, | 2004 | .event_map = intel_pmu_event_map, |
@@ -1878,11 +2008,12 @@ static struct x86_pmu intel_pmu = { | |||
1878 | /* | 2008 | /* |
1879 | * Intel PMCs cannot be accessed sanely above 32 bit width, | 2009 | * Intel PMCs cannot be accessed sanely above 32 bit width, |
1880 | * so we install an artificial 1<<31 period regardless of | 2010 | * so we install an artificial 1<<31 period regardless of |
1881 | * the generic counter period: | 2011 | * the generic event period: |
1882 | */ | 2012 | */ |
1883 | .max_period = (1ULL << 31) - 1, | 2013 | .max_period = (1ULL << 31) - 1, |
1884 | .enable_bts = intel_pmu_enable_bts, | 2014 | .enable_bts = intel_pmu_enable_bts, |
1885 | .disable_bts = intel_pmu_disable_bts, | 2015 | .disable_bts = intel_pmu_disable_bts, |
2016 | .get_event_idx = intel_get_event_idx, | ||
1886 | }; | 2017 | }; |
1887 | 2018 | ||
1888 | static struct x86_pmu amd_pmu = { | 2019 | static struct x86_pmu amd_pmu = { |
@@ -1890,19 +2021,20 @@ static struct x86_pmu amd_pmu = { | |||
1890 | .handle_irq = amd_pmu_handle_irq, | 2021 | .handle_irq = amd_pmu_handle_irq, |
1891 | .disable_all = amd_pmu_disable_all, | 2022 | .disable_all = amd_pmu_disable_all, |
1892 | .enable_all = amd_pmu_enable_all, | 2023 | .enable_all = amd_pmu_enable_all, |
1893 | .enable = amd_pmu_enable_counter, | 2024 | .enable = amd_pmu_enable_event, |
1894 | .disable = amd_pmu_disable_counter, | 2025 | .disable = amd_pmu_disable_event, |
1895 | .eventsel = MSR_K7_EVNTSEL0, | 2026 | .eventsel = MSR_K7_EVNTSEL0, |
1896 | .perfctr = MSR_K7_PERFCTR0, | 2027 | .perfctr = MSR_K7_PERFCTR0, |
1897 | .event_map = amd_pmu_event_map, | 2028 | .event_map = amd_pmu_event_map, |
1898 | .raw_event = amd_pmu_raw_event, | 2029 | .raw_event = amd_pmu_raw_event, |
1899 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 2030 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
1900 | .num_counters = 4, | 2031 | .num_events = 4, |
1901 | .counter_bits = 48, | 2032 | .event_bits = 48, |
1902 | .counter_mask = (1ULL << 48) - 1, | 2033 | .event_mask = (1ULL << 48) - 1, |
1903 | .apic = 1, | 2034 | .apic = 1, |
1904 | /* use highest bit to detect overflow */ | 2035 | /* use highest bit to detect overflow */ |
1905 | .max_period = (1ULL << 47) - 1, | 2036 | .max_period = (1ULL << 47) - 1, |
2037 | .get_event_idx = gen_get_event_idx, | ||
1906 | }; | 2038 | }; |
1907 | 2039 | ||
1908 | static int p6_pmu_init(void) | 2040 | static int p6_pmu_init(void) |
@@ -1915,10 +2047,12 @@ static int p6_pmu_init(void) | |||
1915 | case 7: | 2047 | case 7: |
1916 | case 8: | 2048 | case 8: |
1917 | case 11: /* Pentium III */ | 2049 | case 11: /* Pentium III */ |
2050 | event_constraints = intel_p6_event_constraints; | ||
1918 | break; | 2051 | break; |
1919 | case 9: | 2052 | case 9: |
1920 | case 13: | 2053 | case 13: |
1921 | /* Pentium M */ | 2054 | /* Pentium M */ |
2055 | event_constraints = intel_p6_event_constraints; | ||
1922 | break; | 2056 | break; |
1923 | default: | 2057 | default: |
1924 | pr_cont("unsupported p6 CPU model %d ", | 2058 | pr_cont("unsupported p6 CPU model %d ", |
@@ -1956,7 +2090,7 @@ static int intel_pmu_init(void) | |||
1956 | 2090 | ||
1957 | /* | 2091 | /* |
1958 | * Check whether the Architectural PerfMon supports | 2092 | * Check whether the Architectural PerfMon supports |
1959 | * Branch Misses Retired Event or not. | 2093 | * Branch Misses Retired hw_event or not. |
1960 | */ | 2094 | */ |
1961 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | 2095 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); |
1962 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | 2096 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) |
@@ -1968,15 +2102,15 @@ static int intel_pmu_init(void) | |||
1968 | 2102 | ||
1969 | x86_pmu = intel_pmu; | 2103 | x86_pmu = intel_pmu; |
1970 | x86_pmu.version = version; | 2104 | x86_pmu.version = version; |
1971 | x86_pmu.num_counters = eax.split.num_counters; | 2105 | x86_pmu.num_events = eax.split.num_events; |
1972 | x86_pmu.counter_bits = eax.split.bit_width; | 2106 | x86_pmu.event_bits = eax.split.bit_width; |
1973 | x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; | 2107 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; |
1974 | 2108 | ||
1975 | /* | 2109 | /* |
1976 | * Quirk: v2 perfmon does not report fixed-purpose counters, so | 2110 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
1977 | * assume at least 3 counters: | 2111 | * assume at least 3 events: |
1978 | */ | 2112 | */ |
1979 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); | 2113 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); |
1980 | 2114 | ||
1981 | /* | 2115 | /* |
1982 | * Install the hw-cache-events table: | 2116 | * Install the hw-cache-events table: |
@@ -1990,12 +2124,14 @@ static int intel_pmu_init(void) | |||
1990 | sizeof(hw_cache_event_ids)); | 2124 | sizeof(hw_cache_event_ids)); |
1991 | 2125 | ||
1992 | pr_cont("Core2 events, "); | 2126 | pr_cont("Core2 events, "); |
2127 | event_constraints = intel_core_event_constraints; | ||
1993 | break; | 2128 | break; |
1994 | default: | 2129 | default: |
1995 | case 26: | 2130 | case 26: |
1996 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 2131 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
1997 | sizeof(hw_cache_event_ids)); | 2132 | sizeof(hw_cache_event_ids)); |
1998 | 2133 | ||
2134 | event_constraints = intel_nehalem_event_constraints; | ||
1999 | pr_cont("Nehalem/Corei7 events, "); | 2135 | pr_cont("Nehalem/Corei7 events, "); |
2000 | break; | 2136 | break; |
2001 | case 28: | 2137 | case 28: |
@@ -2023,11 +2159,11 @@ static int amd_pmu_init(void) | |||
2023 | return 0; | 2159 | return 0; |
2024 | } | 2160 | } |
2025 | 2161 | ||
2026 | void __init init_hw_perf_counters(void) | 2162 | void __init init_hw_perf_events(void) |
2027 | { | 2163 | { |
2028 | int err; | 2164 | int err; |
2029 | 2165 | ||
2030 | pr_info("Performance Counters: "); | 2166 | pr_info("Performance Events: "); |
2031 | 2167 | ||
2032 | switch (boot_cpu_data.x86_vendor) { | 2168 | switch (boot_cpu_data.x86_vendor) { |
2033 | case X86_VENDOR_INTEL: | 2169 | case X86_VENDOR_INTEL: |
@@ -2040,45 +2176,45 @@ void __init init_hw_perf_counters(void) | |||
2040 | return; | 2176 | return; |
2041 | } | 2177 | } |
2042 | if (err != 0) { | 2178 | if (err != 0) { |
2043 | pr_cont("no PMU driver, software counters only.\n"); | 2179 | pr_cont("no PMU driver, software events only.\n"); |
2044 | return; | 2180 | return; |
2045 | } | 2181 | } |
2046 | 2182 | ||
2047 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 2183 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
2048 | 2184 | ||
2049 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 2185 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { |
2050 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", | 2186 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
2051 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); | 2187 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); |
2052 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | 2188 | x86_pmu.num_events = X86_PMC_MAX_GENERIC; |
2053 | } | 2189 | } |
2054 | perf_counter_mask = (1 << x86_pmu.num_counters) - 1; | 2190 | perf_event_mask = (1 << x86_pmu.num_events) - 1; |
2055 | perf_max_counters = x86_pmu.num_counters; | 2191 | perf_max_events = x86_pmu.num_events; |
2056 | 2192 | ||
2057 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | 2193 | if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { |
2058 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", | 2194 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
2059 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); | 2195 | x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); |
2060 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | 2196 | x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; |
2061 | } | 2197 | } |
2062 | 2198 | ||
2063 | perf_counter_mask |= | 2199 | perf_event_mask |= |
2064 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; | 2200 | ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; |
2065 | x86_pmu.intel_ctrl = perf_counter_mask; | 2201 | x86_pmu.intel_ctrl = perf_event_mask; |
2066 | 2202 | ||
2067 | perf_counters_lapic_init(); | 2203 | perf_events_lapic_init(); |
2068 | register_die_notifier(&perf_counter_nmi_notifier); | 2204 | register_die_notifier(&perf_event_nmi_notifier); |
2069 | 2205 | ||
2070 | pr_info("... version: %d\n", x86_pmu.version); | 2206 | pr_info("... version: %d\n", x86_pmu.version); |
2071 | pr_info("... bit width: %d\n", x86_pmu.counter_bits); | 2207 | pr_info("... bit width: %d\n", x86_pmu.event_bits); |
2072 | pr_info("... generic counters: %d\n", x86_pmu.num_counters); | 2208 | pr_info("... generic registers: %d\n", x86_pmu.num_events); |
2073 | pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); | 2209 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); |
2074 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 2210 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
2075 | pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed); | 2211 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); |
2076 | pr_info("... counter mask: %016Lx\n", perf_counter_mask); | 2212 | pr_info("... event mask: %016Lx\n", perf_event_mask); |
2077 | } | 2213 | } |
2078 | 2214 | ||
2079 | static inline void x86_pmu_read(struct perf_counter *counter) | 2215 | static inline void x86_pmu_read(struct perf_event *event) |
2080 | { | 2216 | { |
2081 | x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); | 2217 | x86_perf_event_update(event, &event->hw, event->hw.idx); |
2082 | } | 2218 | } |
2083 | 2219 | ||
2084 | static const struct pmu pmu = { | 2220 | static const struct pmu pmu = { |
@@ -2088,13 +2224,52 @@ static const struct pmu pmu = { | |||
2088 | .unthrottle = x86_pmu_unthrottle, | 2224 | .unthrottle = x86_pmu_unthrottle, |
2089 | }; | 2225 | }; |
2090 | 2226 | ||
2091 | const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | 2227 | static int |
2228 | validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
2229 | { | ||
2230 | struct hw_perf_event fake_event = event->hw; | ||
2231 | |||
2232 | if (event->pmu != &pmu) | ||
2233 | return 0; | ||
2234 | |||
2235 | return x86_schedule_event(cpuc, &fake_event); | ||
2236 | } | ||
2237 | |||
2238 | static int validate_group(struct perf_event *event) | ||
2239 | { | ||
2240 | struct perf_event *sibling, *leader = event->group_leader; | ||
2241 | struct cpu_hw_events fake_pmu; | ||
2242 | |||
2243 | memset(&fake_pmu, 0, sizeof(fake_pmu)); | ||
2244 | |||
2245 | if (!validate_event(&fake_pmu, leader)) | ||
2246 | return -ENOSPC; | ||
2247 | |||
2248 | list_for_each_entry(sibling, &leader->sibling_list, group_entry) { | ||
2249 | if (!validate_event(&fake_pmu, sibling)) | ||
2250 | return -ENOSPC; | ||
2251 | } | ||
2252 | |||
2253 | if (!validate_event(&fake_pmu, event)) | ||
2254 | return -ENOSPC; | ||
2255 | |||
2256 | return 0; | ||
2257 | } | ||
2258 | |||
2259 | const struct pmu *hw_perf_event_init(struct perf_event *event) | ||
2092 | { | 2260 | { |
2093 | int err; | 2261 | int err; |
2094 | 2262 | ||
2095 | err = __hw_perf_counter_init(counter); | 2263 | err = __hw_perf_event_init(event); |
2096 | if (err) | 2264 | if (!err) { |
2265 | if (event->group_leader != event) | ||
2266 | err = validate_group(event); | ||
2267 | } | ||
2268 | if (err) { | ||
2269 | if (event->destroy) | ||
2270 | event->destroy(event); | ||
2097 | return ERR_PTR(err); | 2271 | return ERR_PTR(err); |
2272 | } | ||
2098 | 2273 | ||
2099 | return &pmu; | 2274 | return &pmu; |
2100 | } | 2275 | } |
@@ -2110,8 +2285,8 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) | |||
2110 | entry->ip[entry->nr++] = ip; | 2285 | entry->ip[entry->nr++] = ip; |
2111 | } | 2286 | } |
2112 | 2287 | ||
2113 | static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); | 2288 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); |
2114 | static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); | 2289 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); |
2115 | static DEFINE_PER_CPU(int, in_nmi_frame); | 2290 | static DEFINE_PER_CPU(int, in_nmi_frame); |
2116 | 2291 | ||
2117 | 2292 | ||
@@ -2264,9 +2439,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
2264 | struct perf_callchain_entry *entry; | 2439 | struct perf_callchain_entry *entry; |
2265 | 2440 | ||
2266 | if (in_nmi()) | 2441 | if (in_nmi()) |
2267 | entry = &__get_cpu_var(nmi_entry); | 2442 | entry = &__get_cpu_var(pmc_nmi_entry); |
2268 | else | 2443 | else |
2269 | entry = &__get_cpu_var(irq_entry); | 2444 | entry = &__get_cpu_var(pmc_irq_entry); |
2270 | 2445 | ||
2271 | entry->nr = 0; | 2446 | entry->nr = 0; |
2272 | 2447 | ||
@@ -2275,7 +2450,7 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
2275 | return entry; | 2450 | return entry; |
2276 | } | 2451 | } |
2277 | 2452 | ||
2278 | void hw_perf_counter_setup_online(int cpu) | 2453 | void hw_perf_event_setup_online(int cpu) |
2279 | { | 2454 | { |
2280 | init_debug_store_on_cpu(cpu); | 2455 | init_debug_store_on_cpu(cpu); |
2281 | } | 2456 | } |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 392bea43b890..fab786f60ed6 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -20,7 +20,7 @@ | |||
20 | #include <linux/kprobes.h> | 20 | #include <linux/kprobes.h> |
21 | 21 | ||
22 | #include <asm/apic.h> | 22 | #include <asm/apic.h> |
23 | #include <asm/perf_counter.h> | 23 | #include <asm/perf_event.h> |
24 | 24 | ||
25 | struct nmi_watchdog_ctlblk { | 25 | struct nmi_watchdog_ctlblk { |
26 | unsigned int cccr_msr; | 26 | unsigned int cccr_msr; |
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c new file mode 100644 index 000000000000..a640ae5ad201 --- /dev/null +++ b/arch/x86/kernel/cpu/sched.c | |||
@@ -0,0 +1,55 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/math64.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/irqflags.h> | ||
5 | |||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/processor.h> | ||
8 | |||
9 | #ifdef CONFIG_SMP | ||
10 | |||
11 | static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); | ||
12 | |||
13 | static unsigned long scale_aperfmperf(void) | ||
14 | { | ||
15 | struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); | ||
16 | unsigned long ratio, flags; | ||
17 | |||
18 | local_irq_save(flags); | ||
19 | get_aperfmperf(&val); | ||
20 | local_irq_restore(flags); | ||
21 | |||
22 | ratio = calc_aperfmperf_ratio(old, &val); | ||
23 | *old = val; | ||
24 | |||
25 | return ratio; | ||
26 | } | ||
27 | |||
28 | unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
29 | { | ||
30 | /* | ||
31 | * do aperf/mperf on the cpu level because it includes things | ||
32 | * like turbo mode, which are relevant to full cores. | ||
33 | */ | ||
34 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
35 | return scale_aperfmperf(); | ||
36 | |||
37 | /* | ||
38 | * maybe have something cpufreq here | ||
39 | */ | ||
40 | |||
41 | return default_scale_freq_power(sd, cpu); | ||
42 | } | ||
43 | |||
44 | unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
45 | { | ||
46 | /* | ||
47 | * aperf/mperf already includes the smt gain | ||
48 | */ | ||
49 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
50 | return SCHED_LOAD_SCALE; | ||
51 | |||
52 | return default_scale_smt_power(sd, cpu); | ||
53 | } | ||
54 | |||
55 | #endif | ||
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index bc24f514ec93..1cbed97b59cf 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | #include <asm/div64.h> | 25 | #include <asm/div64.h> |
26 | #include <asm/vmware.h> | 26 | #include <asm/vmware.h> |
27 | #include <asm/x86_init.h> | ||
27 | 28 | ||
28 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | 29 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 |
29 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | 30 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 |
@@ -47,21 +48,35 @@ static inline int __vmware_platform(void) | |||
47 | return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; | 48 | return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; |
48 | } | 49 | } |
49 | 50 | ||
50 | static unsigned long __vmware_get_tsc_khz(void) | 51 | static unsigned long vmware_get_tsc_khz(void) |
51 | { | 52 | { |
52 | uint64_t tsc_hz; | 53 | uint64_t tsc_hz; |
53 | uint32_t eax, ebx, ecx, edx; | 54 | uint32_t eax, ebx, ecx, edx; |
54 | 55 | ||
55 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | 56 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); |
56 | 57 | ||
57 | if (ebx == UINT_MAX) | ||
58 | return 0; | ||
59 | tsc_hz = eax | (((uint64_t)ebx) << 32); | 58 | tsc_hz = eax | (((uint64_t)ebx) << 32); |
60 | do_div(tsc_hz, 1000); | 59 | do_div(tsc_hz, 1000); |
61 | BUG_ON(tsc_hz >> 32); | 60 | BUG_ON(tsc_hz >> 32); |
61 | printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", | ||
62 | (unsigned long) tsc_hz / 1000, | ||
63 | (unsigned long) tsc_hz % 1000); | ||
62 | return tsc_hz; | 64 | return tsc_hz; |
63 | } | 65 | } |
64 | 66 | ||
67 | void __init vmware_platform_setup(void) | ||
68 | { | ||
69 | uint32_t eax, ebx, ecx, edx; | ||
70 | |||
71 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | ||
72 | |||
73 | if (ebx != UINT_MAX) | ||
74 | x86_platform.calibrate_tsc = vmware_get_tsc_khz; | ||
75 | else | ||
76 | printk(KERN_WARNING | ||
77 | "Failed to get TSC freq from the hypervisor\n"); | ||
78 | } | ||
79 | |||
65 | /* | 80 | /* |
66 | * While checking the dmi string infomation, just checking the product | 81 | * While checking the dmi string infomation, just checking the product |
67 | * serial key should be enough, as this will always have a VMware | 82 | * serial key should be enough, as this will always have a VMware |
@@ -87,12 +102,6 @@ int vmware_platform(void) | |||
87 | return 0; | 102 | return 0; |
88 | } | 103 | } |
89 | 104 | ||
90 | unsigned long vmware_get_tsc_khz(void) | ||
91 | { | ||
92 | BUG_ON(!vmware_platform()); | ||
93 | return __vmware_get_tsc_khz(); | ||
94 | } | ||
95 | |||
96 | /* | 105 | /* |
97 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | 106 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
98 | * Still, due to timing difference when running on virtual cpus, the TSC can | 107 | * Still, due to timing difference when running on virtual cpus, the TSC can |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index b07af8861244..6a52d4b36a30 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -182,7 +182,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier = | |||
182 | .notifier_call = cpuid_class_cpu_callback, | 182 | .notifier_call = cpuid_class_cpu_callback, |
183 | }; | 183 | }; |
184 | 184 | ||
185 | static char *cpuid_nodename(struct device *dev) | 185 | static char *cpuid_devnode(struct device *dev, mode_t *mode) |
186 | { | 186 | { |
187 | return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); | 187 | return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); |
188 | } | 188 | } |
@@ -203,7 +203,7 @@ static int __init cpuid_init(void) | |||
203 | err = PTR_ERR(cpuid_class); | 203 | err = PTR_ERR(cpuid_class); |
204 | goto out_chrdev; | 204 | goto out_chrdev; |
205 | } | 205 | } |
206 | cpuid_class->nodename = cpuid_nodename; | 206 | cpuid_class->devnode = cpuid_devnode; |
207 | for_each_online_cpu(i) { | 207 | for_each_online_cpu(i) { |
208 | err = cpuid_device_create(i); | 208 | err = cpuid_device_create(i); |
209 | if (err != 0) | 209 | if (err != 0) |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index bca5fba91c9e..f7dd2a7c3bf4 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/kallsyms.h> | 5 | #include <linux/kallsyms.h> |
6 | #include <linux/kprobes.h> | 6 | #include <linux/kprobes.h> |
7 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
8 | #include <linux/utsname.h> | ||
9 | #include <linux/hardirq.h> | 8 | #include <linux/hardirq.h> |
10 | #include <linux/kdebug.h> | 9 | #include <linux/kdebug.h> |
11 | #include <linux/module.h> | 10 | #include <linux/module.h> |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 54b0a3276766..a071e6be177e 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/kallsyms.h> | 5 | #include <linux/kallsyms.h> |
6 | #include <linux/kprobes.h> | 6 | #include <linux/kprobes.h> |
7 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
8 | #include <linux/utsname.h> | ||
9 | #include <linux/hardirq.h> | 8 | #include <linux/hardirq.h> |
10 | #include <linux/kdebug.h> | 9 | #include <linux/kdebug.h> |
11 | #include <linux/module.h> | 10 | #include <linux/module.h> |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 147005a1cc3c..d17d482a04f4 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -1331,7 +1331,7 @@ void __init e820_reserve_resources(void) | |||
1331 | struct resource *res; | 1331 | struct resource *res; |
1332 | u64 end; | 1332 | u64 end; |
1333 | 1333 | ||
1334 | res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); | 1334 | res = alloc_bootmem(sizeof(struct resource) * e820.nr_map); |
1335 | e820_res = res; | 1335 | e820_res = res; |
1336 | for (i = 0; i < e820.nr_map; i++) { | 1336 | for (i = 0; i < e820.nr_map; i++) { |
1337 | end = e820.map[i].addr + e820.map[i].size - 1; | 1337 | end = e820.map[i].addr + e820.map[i].size - 1; |
@@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos) | |||
1378 | if (mb < 16) | 1378 | if (mb < 16) |
1379 | return 1024*1024; | 1379 | return 1024*1024; |
1380 | 1380 | ||
1381 | /* To 32MB for anything above that */ | 1381 | /* To 64MB for anything above that */ |
1382 | return 32*1024*1024; | 1382 | return 64*1024*1024; |
1383 | } | 1383 | } |
1384 | 1384 | ||
1385 | #define MAX_RESOURCE_SIZE ((resource_size_t)-1) | 1385 | #define MAX_RESOURCE_SIZE ((resource_size_t)-1) |
@@ -1455,28 +1455,11 @@ char *__init default_machine_specific_memory_setup(void) | |||
1455 | return who; | 1455 | return who; |
1456 | } | 1456 | } |
1457 | 1457 | ||
1458 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) | ||
1459 | { | ||
1460 | if (x86_quirks->arch_memory_setup) { | ||
1461 | char *who = x86_quirks->arch_memory_setup(); | ||
1462 | |||
1463 | if (who) | ||
1464 | return who; | ||
1465 | } | ||
1466 | return default_machine_specific_memory_setup(); | ||
1467 | } | ||
1468 | |||
1469 | /* Overridden in paravirt.c if CONFIG_PARAVIRT */ | ||
1470 | char * __init __attribute__((weak)) memory_setup(void) | ||
1471 | { | ||
1472 | return machine_specific_memory_setup(); | ||
1473 | } | ||
1474 | |||
1475 | void __init setup_memory_map(void) | 1458 | void __init setup_memory_map(void) |
1476 | { | 1459 | { |
1477 | char *who; | 1460 | char *who; |
1478 | 1461 | ||
1479 | who = memory_setup(); | 1462 | who = x86_init.resources.memory_setup(); |
1480 | memcpy(&e820_saved, &e820, sizeof(struct e820map)); | 1463 | memcpy(&e820_saved, &e820, sizeof(struct e820map)); |
1481 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 1464 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
1482 | e820_print_map(who); | 1465 | e820_print_map(who); |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 335f049d110f..b9c830c12b4a 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -160,721 +160,6 @@ static struct console early_serial_console = { | |||
160 | .index = -1, | 160 | .index = -1, |
161 | }; | 161 | }; |
162 | 162 | ||
163 | #ifdef CONFIG_EARLY_PRINTK_DBGP | ||
164 | |||
165 | static struct ehci_caps __iomem *ehci_caps; | ||
166 | static struct ehci_regs __iomem *ehci_regs; | ||
167 | static struct ehci_dbg_port __iomem *ehci_debug; | ||
168 | static unsigned int dbgp_endpoint_out; | ||
169 | |||
170 | struct ehci_dev { | ||
171 | u32 bus; | ||
172 | u32 slot; | ||
173 | u32 func; | ||
174 | }; | ||
175 | |||
176 | static struct ehci_dev ehci_dev; | ||
177 | |||
178 | #define USB_DEBUG_DEVNUM 127 | ||
179 | |||
180 | #define DBGP_DATA_TOGGLE 0x8800 | ||
181 | |||
182 | static inline u32 dbgp_pid_update(u32 x, u32 tok) | ||
183 | { | ||
184 | return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff); | ||
185 | } | ||
186 | |||
187 | static inline u32 dbgp_len_update(u32 x, u32 len) | ||
188 | { | ||
189 | return (x & ~0x0f) | (len & 0x0f); | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * USB Packet IDs (PIDs) | ||
194 | */ | ||
195 | |||
196 | /* token */ | ||
197 | #define USB_PID_OUT 0xe1 | ||
198 | #define USB_PID_IN 0x69 | ||
199 | #define USB_PID_SOF 0xa5 | ||
200 | #define USB_PID_SETUP 0x2d | ||
201 | /* handshake */ | ||
202 | #define USB_PID_ACK 0xd2 | ||
203 | #define USB_PID_NAK 0x5a | ||
204 | #define USB_PID_STALL 0x1e | ||
205 | #define USB_PID_NYET 0x96 | ||
206 | /* data */ | ||
207 | #define USB_PID_DATA0 0xc3 | ||
208 | #define USB_PID_DATA1 0x4b | ||
209 | #define USB_PID_DATA2 0x87 | ||
210 | #define USB_PID_MDATA 0x0f | ||
211 | /* Special */ | ||
212 | #define USB_PID_PREAMBLE 0x3c | ||
213 | #define USB_PID_ERR 0x3c | ||
214 | #define USB_PID_SPLIT 0x78 | ||
215 | #define USB_PID_PING 0xb4 | ||
216 | #define USB_PID_UNDEF_0 0xf0 | ||
217 | |||
218 | #define USB_PID_DATA_TOGGLE 0x88 | ||
219 | #define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE) | ||
220 | |||
221 | #define PCI_CAP_ID_EHCI_DEBUG 0xa | ||
222 | |||
223 | #define HUB_ROOT_RESET_TIME 50 /* times are in msec */ | ||
224 | #define HUB_SHORT_RESET_TIME 10 | ||
225 | #define HUB_LONG_RESET_TIME 200 | ||
226 | #define HUB_RESET_TIMEOUT 500 | ||
227 | |||
228 | #define DBGP_MAX_PACKET 8 | ||
229 | |||
230 | static int dbgp_wait_until_complete(void) | ||
231 | { | ||
232 | u32 ctrl; | ||
233 | int loop = 0x100000; | ||
234 | |||
235 | do { | ||
236 | ctrl = readl(&ehci_debug->control); | ||
237 | /* Stop when the transaction is finished */ | ||
238 | if (ctrl & DBGP_DONE) | ||
239 | break; | ||
240 | } while (--loop > 0); | ||
241 | |||
242 | if (!loop) | ||
243 | return -1; | ||
244 | |||
245 | /* | ||
246 | * Now that we have observed the completed transaction, | ||
247 | * clear the done bit. | ||
248 | */ | ||
249 | writel(ctrl | DBGP_DONE, &ehci_debug->control); | ||
250 | return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl); | ||
251 | } | ||
252 | |||
253 | static void __init dbgp_mdelay(int ms) | ||
254 | { | ||
255 | int i; | ||
256 | |||
257 | while (ms--) { | ||
258 | for (i = 0; i < 1000; i++) | ||
259 | outb(0x1, 0x80); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | static void dbgp_breath(void) | ||
264 | { | ||
265 | /* Sleep to give the debug port a chance to breathe */ | ||
266 | } | ||
267 | |||
268 | static int dbgp_wait_until_done(unsigned ctrl) | ||
269 | { | ||
270 | u32 pids, lpid; | ||
271 | int ret; | ||
272 | int loop = 3; | ||
273 | |||
274 | retry: | ||
275 | writel(ctrl | DBGP_GO, &ehci_debug->control); | ||
276 | ret = dbgp_wait_until_complete(); | ||
277 | pids = readl(&ehci_debug->pids); | ||
278 | lpid = DBGP_PID_GET(pids); | ||
279 | |||
280 | if (ret < 0) | ||
281 | return ret; | ||
282 | |||
283 | /* | ||
284 | * If the port is getting full or it has dropped data | ||
285 | * start pacing ourselves, not necessary but it's friendly. | ||
286 | */ | ||
287 | if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET)) | ||
288 | dbgp_breath(); | ||
289 | |||
290 | /* If I get a NACK reissue the transmission */ | ||
291 | if (lpid == USB_PID_NAK) { | ||
292 | if (--loop > 0) | ||
293 | goto retry; | ||
294 | } | ||
295 | |||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | static void dbgp_set_data(const void *buf, int size) | ||
300 | { | ||
301 | const unsigned char *bytes = buf; | ||
302 | u32 lo, hi; | ||
303 | int i; | ||
304 | |||
305 | lo = hi = 0; | ||
306 | for (i = 0; i < 4 && i < size; i++) | ||
307 | lo |= bytes[i] << (8*i); | ||
308 | for (; i < 8 && i < size; i++) | ||
309 | hi |= bytes[i] << (8*(i - 4)); | ||
310 | writel(lo, &ehci_debug->data03); | ||
311 | writel(hi, &ehci_debug->data47); | ||
312 | } | ||
313 | |||
314 | static void __init dbgp_get_data(void *buf, int size) | ||
315 | { | ||
316 | unsigned char *bytes = buf; | ||
317 | u32 lo, hi; | ||
318 | int i; | ||
319 | |||
320 | lo = readl(&ehci_debug->data03); | ||
321 | hi = readl(&ehci_debug->data47); | ||
322 | for (i = 0; i < 4 && i < size; i++) | ||
323 | bytes[i] = (lo >> (8*i)) & 0xff; | ||
324 | for (; i < 8 && i < size; i++) | ||
325 | bytes[i] = (hi >> (8*(i - 4))) & 0xff; | ||
326 | } | ||
327 | |||
328 | static int dbgp_bulk_write(unsigned devnum, unsigned endpoint, | ||
329 | const char *bytes, int size) | ||
330 | { | ||
331 | u32 pids, addr, ctrl; | ||
332 | int ret; | ||
333 | |||
334 | if (size > DBGP_MAX_PACKET) | ||
335 | return -1; | ||
336 | |||
337 | addr = DBGP_EPADDR(devnum, endpoint); | ||
338 | |||
339 | pids = readl(&ehci_debug->pids); | ||
340 | pids = dbgp_pid_update(pids, USB_PID_OUT); | ||
341 | |||
342 | ctrl = readl(&ehci_debug->control); | ||
343 | ctrl = dbgp_len_update(ctrl, size); | ||
344 | ctrl |= DBGP_OUT; | ||
345 | ctrl |= DBGP_GO; | ||
346 | |||
347 | dbgp_set_data(bytes, size); | ||
348 | writel(addr, &ehci_debug->address); | ||
349 | writel(pids, &ehci_debug->pids); | ||
350 | |||
351 | ret = dbgp_wait_until_done(ctrl); | ||
352 | if (ret < 0) | ||
353 | return ret; | ||
354 | |||
355 | return ret; | ||
356 | } | ||
357 | |||
358 | static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data, | ||
359 | int size) | ||
360 | { | ||
361 | u32 pids, addr, ctrl; | ||
362 | int ret; | ||
363 | |||
364 | if (size > DBGP_MAX_PACKET) | ||
365 | return -1; | ||
366 | |||
367 | addr = DBGP_EPADDR(devnum, endpoint); | ||
368 | |||
369 | pids = readl(&ehci_debug->pids); | ||
370 | pids = dbgp_pid_update(pids, USB_PID_IN); | ||
371 | |||
372 | ctrl = readl(&ehci_debug->control); | ||
373 | ctrl = dbgp_len_update(ctrl, size); | ||
374 | ctrl &= ~DBGP_OUT; | ||
375 | ctrl |= DBGP_GO; | ||
376 | |||
377 | writel(addr, &ehci_debug->address); | ||
378 | writel(pids, &ehci_debug->pids); | ||
379 | ret = dbgp_wait_until_done(ctrl); | ||
380 | if (ret < 0) | ||
381 | return ret; | ||
382 | |||
383 | if (size > ret) | ||
384 | size = ret; | ||
385 | dbgp_get_data(data, size); | ||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | static int __init dbgp_control_msg(unsigned devnum, int requesttype, | ||
390 | int request, int value, int index, void *data, int size) | ||
391 | { | ||
392 | u32 pids, addr, ctrl; | ||
393 | struct usb_ctrlrequest req; | ||
394 | int read; | ||
395 | int ret; | ||
396 | |||
397 | read = (requesttype & USB_DIR_IN) != 0; | ||
398 | if (size > (read ? DBGP_MAX_PACKET:0)) | ||
399 | return -1; | ||
400 | |||
401 | /* Compute the control message */ | ||
402 | req.bRequestType = requesttype; | ||
403 | req.bRequest = request; | ||
404 | req.wValue = cpu_to_le16(value); | ||
405 | req.wIndex = cpu_to_le16(index); | ||
406 | req.wLength = cpu_to_le16(size); | ||
407 | |||
408 | pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP); | ||
409 | addr = DBGP_EPADDR(devnum, 0); | ||
410 | |||
411 | ctrl = readl(&ehci_debug->control); | ||
412 | ctrl = dbgp_len_update(ctrl, sizeof(req)); | ||
413 | ctrl |= DBGP_OUT; | ||
414 | ctrl |= DBGP_GO; | ||
415 | |||
416 | /* Send the setup message */ | ||
417 | dbgp_set_data(&req, sizeof(req)); | ||
418 | writel(addr, &ehci_debug->address); | ||
419 | writel(pids, &ehci_debug->pids); | ||
420 | ret = dbgp_wait_until_done(ctrl); | ||
421 | if (ret < 0) | ||
422 | return ret; | ||
423 | |||
424 | /* Read the result */ | ||
425 | return dbgp_bulk_read(devnum, 0, data, size); | ||
426 | } | ||
427 | |||
428 | |||
429 | /* Find a PCI capability */ | ||
430 | static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap) | ||
431 | { | ||
432 | u8 pos; | ||
433 | int bytes; | ||
434 | |||
435 | if (!(read_pci_config_16(num, slot, func, PCI_STATUS) & | ||
436 | PCI_STATUS_CAP_LIST)) | ||
437 | return 0; | ||
438 | |||
439 | pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST); | ||
440 | for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { | ||
441 | u8 id; | ||
442 | |||
443 | pos &= ~3; | ||
444 | id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID); | ||
445 | if (id == 0xff) | ||
446 | break; | ||
447 | if (id == cap) | ||
448 | return pos; | ||
449 | |||
450 | pos = read_pci_config_byte(num, slot, func, | ||
451 | pos+PCI_CAP_LIST_NEXT); | ||
452 | } | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func) | ||
457 | { | ||
458 | u32 class; | ||
459 | |||
460 | class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION); | ||
461 | if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI) | ||
462 | return 0; | ||
463 | |||
464 | return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG); | ||
465 | } | ||
466 | |||
467 | static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc) | ||
468 | { | ||
469 | u32 bus, slot, func; | ||
470 | |||
471 | for (bus = 0; bus < 256; bus++) { | ||
472 | for (slot = 0; slot < 32; slot++) { | ||
473 | for (func = 0; func < 8; func++) { | ||
474 | unsigned cap; | ||
475 | |||
476 | cap = __find_dbgp(bus, slot, func); | ||
477 | |||
478 | if (!cap) | ||
479 | continue; | ||
480 | if (ehci_num-- != 0) | ||
481 | continue; | ||
482 | *rbus = bus; | ||
483 | *rslot = slot; | ||
484 | *rfunc = func; | ||
485 | return cap; | ||
486 | } | ||
487 | } | ||
488 | } | ||
489 | return 0; | ||
490 | } | ||
491 | |||
492 | static int __init ehci_reset_port(int port) | ||
493 | { | ||
494 | u32 portsc; | ||
495 | u32 delay_time, delay; | ||
496 | int loop; | ||
497 | |||
498 | /* Reset the usb debug port */ | ||
499 | portsc = readl(&ehci_regs->port_status[port - 1]); | ||
500 | portsc &= ~PORT_PE; | ||
501 | portsc |= PORT_RESET; | ||
502 | writel(portsc, &ehci_regs->port_status[port - 1]); | ||
503 | |||
504 | delay = HUB_ROOT_RESET_TIME; | ||
505 | for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT; | ||
506 | delay_time += delay) { | ||
507 | dbgp_mdelay(delay); | ||
508 | |||
509 | portsc = readl(&ehci_regs->port_status[port - 1]); | ||
510 | if (portsc & PORT_RESET) { | ||
511 | /* force reset to complete */ | ||
512 | loop = 2; | ||
513 | writel(portsc & ~(PORT_RWC_BITS | PORT_RESET), | ||
514 | &ehci_regs->port_status[port - 1]); | ||
515 | do { | ||
516 | portsc = readl(&ehci_regs->port_status[port-1]); | ||
517 | } while ((portsc & PORT_RESET) && (--loop > 0)); | ||
518 | } | ||
519 | |||
520 | /* Device went away? */ | ||
521 | if (!(portsc & PORT_CONNECT)) | ||
522 | return -ENOTCONN; | ||
523 | |||
524 | /* bomb out completely if something weird happend */ | ||
525 | if ((portsc & PORT_CSC)) | ||
526 | return -EINVAL; | ||
527 | |||
528 | /* If we've finished resetting, then break out of the loop */ | ||
529 | if (!(portsc & PORT_RESET) && (portsc & PORT_PE)) | ||
530 | return 0; | ||
531 | } | ||
532 | return -EBUSY; | ||
533 | } | ||
534 | |||
535 | static int __init ehci_wait_for_port(int port) | ||
536 | { | ||
537 | u32 status; | ||
538 | int ret, reps; | ||
539 | |||
540 | for (reps = 0; reps < 3; reps++) { | ||
541 | dbgp_mdelay(100); | ||
542 | status = readl(&ehci_regs->status); | ||
543 | if (status & STS_PCD) { | ||
544 | ret = ehci_reset_port(port); | ||
545 | if (ret == 0) | ||
546 | return 0; | ||
547 | } | ||
548 | } | ||
549 | return -ENOTCONN; | ||
550 | } | ||
551 | |||
552 | #ifdef DBGP_DEBUG | ||
553 | # define dbgp_printk early_printk | ||
554 | #else | ||
555 | static inline void dbgp_printk(const char *fmt, ...) { } | ||
556 | #endif | ||
557 | |||
558 | typedef void (*set_debug_port_t)(int port); | ||
559 | |||
560 | static void __init default_set_debug_port(int port) | ||
561 | { | ||
562 | } | ||
563 | |||
564 | static set_debug_port_t __initdata set_debug_port = default_set_debug_port; | ||
565 | |||
566 | static void __init nvidia_set_debug_port(int port) | ||
567 | { | ||
568 | u32 dword; | ||
569 | dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, | ||
570 | 0x74); | ||
571 | dword &= ~(0x0f<<12); | ||
572 | dword |= ((port & 0x0f)<<12); | ||
573 | write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74, | ||
574 | dword); | ||
575 | dbgp_printk("set debug port to %d\n", port); | ||
576 | } | ||
577 | |||
578 | static void __init detect_set_debug_port(void) | ||
579 | { | ||
580 | u32 vendorid; | ||
581 | |||
582 | vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, | ||
583 | 0x00); | ||
584 | |||
585 | if ((vendorid & 0xffff) == 0x10de) { | ||
586 | dbgp_printk("using nvidia set_debug_port\n"); | ||
587 | set_debug_port = nvidia_set_debug_port; | ||
588 | } | ||
589 | } | ||
590 | |||
591 | static int __init ehci_setup(void) | ||
592 | { | ||
593 | struct usb_debug_descriptor dbgp_desc; | ||
594 | u32 cmd, ctrl, status, portsc, hcs_params; | ||
595 | u32 debug_port, new_debug_port = 0, n_ports; | ||
596 | u32 devnum; | ||
597 | int ret, i; | ||
598 | int loop; | ||
599 | int port_map_tried; | ||
600 | int playtimes = 3; | ||
601 | |||
602 | try_next_time: | ||
603 | port_map_tried = 0; | ||
604 | |||
605 | try_next_port: | ||
606 | |||
607 | hcs_params = readl(&ehci_caps->hcs_params); | ||
608 | debug_port = HCS_DEBUG_PORT(hcs_params); | ||
609 | n_ports = HCS_N_PORTS(hcs_params); | ||
610 | |||
611 | dbgp_printk("debug_port: %d\n", debug_port); | ||
612 | dbgp_printk("n_ports: %d\n", n_ports); | ||
613 | |||
614 | for (i = 1; i <= n_ports; i++) { | ||
615 | portsc = readl(&ehci_regs->port_status[i-1]); | ||
616 | dbgp_printk("portstatus%d: %08x\n", i, portsc); | ||
617 | } | ||
618 | |||
619 | if (port_map_tried && (new_debug_port != debug_port)) { | ||
620 | if (--playtimes) { | ||
621 | set_debug_port(new_debug_port); | ||
622 | goto try_next_time; | ||
623 | } | ||
624 | return -1; | ||
625 | } | ||
626 | |||
627 | loop = 10; | ||
628 | /* Reset the EHCI controller */ | ||
629 | cmd = readl(&ehci_regs->command); | ||
630 | cmd |= CMD_RESET; | ||
631 | writel(cmd, &ehci_regs->command); | ||
632 | do { | ||
633 | cmd = readl(&ehci_regs->command); | ||
634 | } while ((cmd & CMD_RESET) && (--loop > 0)); | ||
635 | |||
636 | if (!loop) { | ||
637 | dbgp_printk("can not reset ehci\n"); | ||
638 | return -1; | ||
639 | } | ||
640 | dbgp_printk("ehci reset done\n"); | ||
641 | |||
642 | /* Claim ownership, but do not enable yet */ | ||
643 | ctrl = readl(&ehci_debug->control); | ||
644 | ctrl |= DBGP_OWNER; | ||
645 | ctrl &= ~(DBGP_ENABLED | DBGP_INUSE); | ||
646 | writel(ctrl, &ehci_debug->control); | ||
647 | |||
648 | /* Start the ehci running */ | ||
649 | cmd = readl(&ehci_regs->command); | ||
650 | cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET); | ||
651 | cmd |= CMD_RUN; | ||
652 | writel(cmd, &ehci_regs->command); | ||
653 | |||
654 | /* Ensure everything is routed to the EHCI */ | ||
655 | writel(FLAG_CF, &ehci_regs->configured_flag); | ||
656 | |||
657 | /* Wait until the controller is no longer halted */ | ||
658 | loop = 10; | ||
659 | do { | ||
660 | status = readl(&ehci_regs->status); | ||
661 | } while ((status & STS_HALT) && (--loop > 0)); | ||
662 | |||
663 | if (!loop) { | ||
664 | dbgp_printk("ehci can be started\n"); | ||
665 | return -1; | ||
666 | } | ||
667 | dbgp_printk("ehci started\n"); | ||
668 | |||
669 | /* Wait for a device to show up in the debug port */ | ||
670 | ret = ehci_wait_for_port(debug_port); | ||
671 | if (ret < 0) { | ||
672 | dbgp_printk("No device found in debug port\n"); | ||
673 | goto next_debug_port; | ||
674 | } | ||
675 | dbgp_printk("ehci wait for port done\n"); | ||
676 | |||
677 | /* Enable the debug port */ | ||
678 | ctrl = readl(&ehci_debug->control); | ||
679 | ctrl |= DBGP_CLAIM; | ||
680 | writel(ctrl, &ehci_debug->control); | ||
681 | ctrl = readl(&ehci_debug->control); | ||
682 | if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) { | ||
683 | dbgp_printk("No device in debug port\n"); | ||
684 | writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control); | ||
685 | goto err; | ||
686 | } | ||
687 | dbgp_printk("debug ported enabled\n"); | ||
688 | |||
689 | /* Completely transfer the debug device to the debug controller */ | ||
690 | portsc = readl(&ehci_regs->port_status[debug_port - 1]); | ||
691 | portsc &= ~PORT_PE; | ||
692 | writel(portsc, &ehci_regs->port_status[debug_port - 1]); | ||
693 | |||
694 | dbgp_mdelay(100); | ||
695 | |||
696 | /* Find the debug device and make it device number 127 */ | ||
697 | for (devnum = 0; devnum <= 127; devnum++) { | ||
698 | ret = dbgp_control_msg(devnum, | ||
699 | USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE, | ||
700 | USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0, | ||
701 | &dbgp_desc, sizeof(dbgp_desc)); | ||
702 | if (ret > 0) | ||
703 | break; | ||
704 | } | ||
705 | if (devnum > 127) { | ||
706 | dbgp_printk("Could not find attached debug device\n"); | ||
707 | goto err; | ||
708 | } | ||
709 | if (ret < 0) { | ||
710 | dbgp_printk("Attached device is not a debug device\n"); | ||
711 | goto err; | ||
712 | } | ||
713 | dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint; | ||
714 | |||
715 | /* Move the device to 127 if it isn't already there */ | ||
716 | if (devnum != USB_DEBUG_DEVNUM) { | ||
717 | ret = dbgp_control_msg(devnum, | ||
718 | USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, | ||
719 | USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0); | ||
720 | if (ret < 0) { | ||
721 | dbgp_printk("Could not move attached device to %d\n", | ||
722 | USB_DEBUG_DEVNUM); | ||
723 | goto err; | ||
724 | } | ||
725 | devnum = USB_DEBUG_DEVNUM; | ||
726 | dbgp_printk("debug device renamed to 127\n"); | ||
727 | } | ||
728 | |||
729 | /* Enable the debug interface */ | ||
730 | ret = dbgp_control_msg(USB_DEBUG_DEVNUM, | ||
731 | USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE, | ||
732 | USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0); | ||
733 | if (ret < 0) { | ||
734 | dbgp_printk(" Could not enable the debug device\n"); | ||
735 | goto err; | ||
736 | } | ||
737 | dbgp_printk("debug interface enabled\n"); | ||
738 | |||
739 | /* Perform a small write to get the even/odd data state in sync | ||
740 | */ | ||
741 | ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1); | ||
742 | if (ret < 0) { | ||
743 | dbgp_printk("dbgp_bulk_write failed: %d\n", ret); | ||
744 | goto err; | ||
745 | } | ||
746 | dbgp_printk("small write doned\n"); | ||
747 | |||
748 | return 0; | ||
749 | err: | ||
750 | /* Things didn't work so remove my claim */ | ||
751 | ctrl = readl(&ehci_debug->control); | ||
752 | ctrl &= ~(DBGP_CLAIM | DBGP_OUT); | ||
753 | writel(ctrl, &ehci_debug->control); | ||
754 | return -1; | ||
755 | |||
756 | next_debug_port: | ||
757 | port_map_tried |= (1<<(debug_port - 1)); | ||
758 | new_debug_port = ((debug_port-1+1)%n_ports) + 1; | ||
759 | if (port_map_tried != ((1<<n_ports) - 1)) { | ||
760 | set_debug_port(new_debug_port); | ||
761 | goto try_next_port; | ||
762 | } | ||
763 | if (--playtimes) { | ||
764 | set_debug_port(new_debug_port); | ||
765 | goto try_next_time; | ||
766 | } | ||
767 | |||
768 | return -1; | ||
769 | } | ||
770 | |||
771 | static int __init early_dbgp_init(char *s) | ||
772 | { | ||
773 | u32 debug_port, bar, offset; | ||
774 | u32 bus, slot, func, cap; | ||
775 | void __iomem *ehci_bar; | ||
776 | u32 dbgp_num; | ||
777 | u32 bar_val; | ||
778 | char *e; | ||
779 | int ret; | ||
780 | u8 byte; | ||
781 | |||
782 | if (!early_pci_allowed()) | ||
783 | return -1; | ||
784 | |||
785 | dbgp_num = 0; | ||
786 | if (*s) | ||
787 | dbgp_num = simple_strtoul(s, &e, 10); | ||
788 | dbgp_printk("dbgp_num: %d\n", dbgp_num); | ||
789 | |||
790 | cap = find_dbgp(dbgp_num, &bus, &slot, &func); | ||
791 | if (!cap) | ||
792 | return -1; | ||
793 | |||
794 | dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot, | ||
795 | func); | ||
796 | |||
797 | debug_port = read_pci_config(bus, slot, func, cap); | ||
798 | bar = (debug_port >> 29) & 0x7; | ||
799 | bar = (bar * 4) + 0xc; | ||
800 | offset = (debug_port >> 16) & 0xfff; | ||
801 | dbgp_printk("bar: %02x offset: %03x\n", bar, offset); | ||
802 | if (bar != PCI_BASE_ADDRESS_0) { | ||
803 | dbgp_printk("only debug ports on bar 1 handled.\n"); | ||
804 | |||
805 | return -1; | ||
806 | } | ||
807 | |||
808 | bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0); | ||
809 | dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset); | ||
810 | if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) { | ||
811 | dbgp_printk("only simple 32bit mmio bars supported\n"); | ||
812 | |||
813 | return -1; | ||
814 | } | ||
815 | |||
816 | /* double check if the mem space is enabled */ | ||
817 | byte = read_pci_config_byte(bus, slot, func, 0x04); | ||
818 | if (!(byte & 0x2)) { | ||
819 | byte |= 0x02; | ||
820 | write_pci_config_byte(bus, slot, func, 0x04, byte); | ||
821 | dbgp_printk("mmio for ehci enabled\n"); | ||
822 | } | ||
823 | |||
824 | /* | ||
825 | * FIXME I don't have the bar size so just guess PAGE_SIZE is more | ||
826 | * than enough. 1K is the biggest I have seen. | ||
827 | */ | ||
828 | set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK); | ||
829 | ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE); | ||
830 | ehci_bar += bar_val & ~PAGE_MASK; | ||
831 | dbgp_printk("ehci_bar: %p\n", ehci_bar); | ||
832 | |||
833 | ehci_caps = ehci_bar; | ||
834 | ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase)); | ||
835 | ehci_debug = ehci_bar + offset; | ||
836 | ehci_dev.bus = bus; | ||
837 | ehci_dev.slot = slot; | ||
838 | ehci_dev.func = func; | ||
839 | |||
840 | detect_set_debug_port(); | ||
841 | |||
842 | ret = ehci_setup(); | ||
843 | if (ret < 0) { | ||
844 | dbgp_printk("ehci_setup failed\n"); | ||
845 | ehci_debug = NULL; | ||
846 | |||
847 | return -1; | ||
848 | } | ||
849 | |||
850 | return 0; | ||
851 | } | ||
852 | |||
853 | static void early_dbgp_write(struct console *con, const char *str, u32 n) | ||
854 | { | ||
855 | int chunk, ret; | ||
856 | |||
857 | if (!ehci_debug) | ||
858 | return; | ||
859 | while (n > 0) { | ||
860 | chunk = n; | ||
861 | if (chunk > DBGP_MAX_PACKET) | ||
862 | chunk = DBGP_MAX_PACKET; | ||
863 | ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, | ||
864 | dbgp_endpoint_out, str, chunk); | ||
865 | str += chunk; | ||
866 | n -= chunk; | ||
867 | } | ||
868 | } | ||
869 | |||
870 | static struct console early_dbgp_console = { | ||
871 | .name = "earlydbg", | ||
872 | .write = early_dbgp_write, | ||
873 | .flags = CON_PRINTBUFFER, | ||
874 | .index = -1, | ||
875 | }; | ||
876 | #endif | ||
877 | |||
878 | /* Direct interface for emergencies */ | 163 | /* Direct interface for emergencies */ |
879 | static struct console *early_console = &early_vga_console; | 164 | static struct console *early_console = &early_vga_console; |
880 | static int __initdata early_console_initialized; | 165 | static int __initdata early_console_initialized; |
@@ -891,10 +176,24 @@ asmlinkage void early_printk(const char *fmt, ...) | |||
891 | va_end(ap); | 176 | va_end(ap); |
892 | } | 177 | } |
893 | 178 | ||
179 | static inline void early_console_register(struct console *con, int keep_early) | ||
180 | { | ||
181 | if (early_console->index != -1) { | ||
182 | printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n", | ||
183 | con->name); | ||
184 | return; | ||
185 | } | ||
186 | early_console = con; | ||
187 | if (keep_early) | ||
188 | early_console->flags &= ~CON_BOOT; | ||
189 | else | ||
190 | early_console->flags |= CON_BOOT; | ||
191 | register_console(early_console); | ||
192 | } | ||
894 | 193 | ||
895 | static int __init setup_early_printk(char *buf) | 194 | static int __init setup_early_printk(char *buf) |
896 | { | 195 | { |
897 | int keep_early; | 196 | int keep; |
898 | 197 | ||
899 | if (!buf) | 198 | if (!buf) |
900 | return 0; | 199 | return 0; |
@@ -903,42 +202,37 @@ static int __init setup_early_printk(char *buf) | |||
903 | return 0; | 202 | return 0; |
904 | early_console_initialized = 1; | 203 | early_console_initialized = 1; |
905 | 204 | ||
906 | keep_early = (strstr(buf, "keep") != NULL); | 205 | keep = (strstr(buf, "keep") != NULL); |
907 | 206 | ||
908 | if (!strncmp(buf, "serial", 6)) { | 207 | while (*buf != '\0') { |
909 | early_serial_init(buf + 6); | 208 | if (!strncmp(buf, "serial", 6)) { |
910 | early_console = &early_serial_console; | 209 | buf += 6; |
911 | } else if (!strncmp(buf, "ttyS", 4)) { | 210 | early_serial_init(buf); |
912 | early_serial_init(buf); | 211 | early_console_register(&early_serial_console, keep); |
913 | early_console = &early_serial_console; | 212 | if (!strncmp(buf, ",ttyS", 5)) |
914 | } else if (!strncmp(buf, "vga", 3) | 213 | buf += 5; |
915 | && boot_params.screen_info.orig_video_isVGA == 1) { | 214 | } |
916 | max_xpos = boot_params.screen_info.orig_video_cols; | 215 | if (!strncmp(buf, "ttyS", 4)) { |
917 | max_ypos = boot_params.screen_info.orig_video_lines; | 216 | early_serial_init(buf + 4); |
918 | current_ypos = boot_params.screen_info.orig_y; | 217 | early_console_register(&early_serial_console, keep); |
919 | early_console = &early_vga_console; | 218 | } |
219 | if (!strncmp(buf, "vga", 3) && | ||
220 | boot_params.screen_info.orig_video_isVGA == 1) { | ||
221 | max_xpos = boot_params.screen_info.orig_video_cols; | ||
222 | max_ypos = boot_params.screen_info.orig_video_lines; | ||
223 | current_ypos = boot_params.screen_info.orig_y; | ||
224 | early_console_register(&early_vga_console, keep); | ||
225 | } | ||
920 | #ifdef CONFIG_EARLY_PRINTK_DBGP | 226 | #ifdef CONFIG_EARLY_PRINTK_DBGP |
921 | } else if (!strncmp(buf, "dbgp", 4)) { | 227 | if (!strncmp(buf, "dbgp", 4) && !early_dbgp_init(buf + 4)) |
922 | if (early_dbgp_init(buf+4) < 0) | 228 | early_console_register(&early_dbgp_console, keep); |
923 | return 0; | ||
924 | early_console = &early_dbgp_console; | ||
925 | /* | ||
926 | * usb subsys will reset ehci controller, so don't keep | ||
927 | * that early console | ||
928 | */ | ||
929 | keep_early = 0; | ||
930 | #endif | 229 | #endif |
931 | #ifdef CONFIG_HVC_XEN | 230 | #ifdef CONFIG_HVC_XEN |
932 | } else if (!strncmp(buf, "xen", 3)) { | 231 | if (!strncmp(buf, "xen", 3)) |
933 | early_console = &xenboot_console; | 232 | early_console_register(&xenboot_console, keep); |
934 | #endif | 233 | #endif |
234 | buf++; | ||
935 | } | 235 | } |
936 | |||
937 | if (keep_early) | ||
938 | early_console->flags &= ~CON_BOOT; | ||
939 | else | ||
940 | early_console->flags |= CON_BOOT; | ||
941 | register_console(early_console); | ||
942 | return 0; | 236 | return 0; |
943 | } | 237 | } |
944 | 238 | ||
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index fe26ba3e3451..ad5bd988fb79 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <asm/time.h> | 42 | #include <asm/time.h> |
43 | #include <asm/cacheflush.h> | 43 | #include <asm/cacheflush.h> |
44 | #include <asm/tlbflush.h> | 44 | #include <asm/tlbflush.h> |
45 | #include <asm/x86_init.h> | ||
45 | 46 | ||
46 | #define EFI_DEBUG 1 | 47 | #define EFI_DEBUG 1 |
47 | #define PFX "EFI: " | 48 | #define PFX "EFI: " |
@@ -453,6 +454,9 @@ void __init efi_init(void) | |||
453 | if (add_efi_memmap) | 454 | if (add_efi_memmap) |
454 | do_add_efi_memmap(); | 455 | do_add_efi_memmap(); |
455 | 456 | ||
457 | x86_platform.get_wallclock = efi_get_time; | ||
458 | x86_platform.set_wallclock = efi_set_rtc_mmss; | ||
459 | |||
456 | /* Setup for EFI runtime service */ | 460 | /* Setup for EFI runtime service */ |
457 | reboot_type = BOOT_EFI; | 461 | reboot_type = BOOT_EFI; |
458 | 462 | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d607c6..7d52e9da5e0c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -1185,17 +1185,14 @@ END(ftrace_graph_caller) | |||
1185 | 1185 | ||
1186 | .globl return_to_handler | 1186 | .globl return_to_handler |
1187 | return_to_handler: | 1187 | return_to_handler: |
1188 | pushl $0 | ||
1189 | pushl %eax | 1188 | pushl %eax |
1190 | pushl %ecx | ||
1191 | pushl %edx | 1189 | pushl %edx |
1192 | movl %ebp, %eax | 1190 | movl %ebp, %eax |
1193 | call ftrace_return_to_handler | 1191 | call ftrace_return_to_handler |
1194 | movl %eax, 0xc(%esp) | 1192 | movl %eax, %ecx |
1195 | popl %edx | 1193 | popl %edx |
1196 | popl %ecx | ||
1197 | popl %eax | 1194 | popl %eax |
1198 | ret | 1195 | jmp *%ecx |
1199 | #endif | 1196 | #endif |
1200 | 1197 | ||
1201 | .section .rodata,"a" | 1198 | .section .rodata,"a" |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c251be745107..bd5bbddddf91 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -146,7 +146,7 @@ ENTRY(ftrace_graph_caller) | |||
146 | END(ftrace_graph_caller) | 146 | END(ftrace_graph_caller) |
147 | 147 | ||
148 | GLOBAL(return_to_handler) | 148 | GLOBAL(return_to_handler) |
149 | subq $80, %rsp | 149 | subq $24, %rsp |
150 | 150 | ||
151 | /* Save the return values */ | 151 | /* Save the return values */ |
152 | movq %rax, (%rsp) | 152 | movq %rax, (%rsp) |
@@ -155,11 +155,11 @@ GLOBAL(return_to_handler) | |||
155 | 155 | ||
156 | call ftrace_return_to_handler | 156 | call ftrace_return_to_handler |
157 | 157 | ||
158 | movq %rax, 72(%rsp) | 158 | movq %rax, %rdi |
159 | movq 8(%rsp), %rdx | 159 | movq 8(%rsp), %rdx |
160 | movq (%rsp), %rax | 160 | movq (%rsp), %rax |
161 | addq $72, %rsp | 161 | addq $24, %rsp |
162 | retq | 162 | jmp *%rdi |
163 | #endif | 163 | #endif |
164 | 164 | ||
165 | 165 | ||
@@ -536,20 +536,13 @@ sysret_signal: | |||
536 | bt $TIF_SYSCALL_AUDIT,%edx | 536 | bt $TIF_SYSCALL_AUDIT,%edx |
537 | jc sysret_audit | 537 | jc sysret_audit |
538 | #endif | 538 | #endif |
539 | /* edx: work flags (arg3) */ | 539 | /* |
540 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 540 | * We have a signal, or exit tracing or single-step. |
541 | xorl %esi,%esi # oldset -> arg2 | 541 | * These all wind up with the iret return path anyway, |
542 | SAVE_REST | 542 | * so just join that path right now. |
543 | FIXUP_TOP_OF_STACK %r11 | 543 | */ |
544 | call do_notify_resume | 544 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET |
545 | RESTORE_TOP_OF_STACK %r11 | 545 | jmp int_check_syscall_exit_work |
546 | RESTORE_REST | ||
547 | movl $_TIF_WORK_MASK,%edi | ||
548 | /* Use IRET because user could have changed frame. This | ||
549 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | ||
550 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
551 | TRACE_IRQS_OFF | ||
552 | jmp int_with_check | ||
553 | 546 | ||
554 | badsys: | 547 | badsys: |
555 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 548 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
@@ -654,6 +647,7 @@ int_careful: | |||
654 | int_very_careful: | 647 | int_very_careful: |
655 | TRACE_IRQS_ON | 648 | TRACE_IRQS_ON |
656 | ENABLE_INTERRUPTS(CLBR_NONE) | 649 | ENABLE_INTERRUPTS(CLBR_NONE) |
650 | int_check_syscall_exit_work: | ||
657 | SAVE_REST | 651 | SAVE_REST |
658 | /* Check for syscall exit trace */ | 652 | /* Check for syscall exit trace */ |
659 | testl $_TIF_WORK_SYSCALL_EXIT,%edx | 653 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
@@ -1021,7 +1015,7 @@ apicinterrupt ERROR_APIC_VECTOR \ | |||
1021 | apicinterrupt SPURIOUS_APIC_VECTOR \ | 1015 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
1022 | spurious_interrupt smp_spurious_interrupt | 1016 | spurious_interrupt smp_spurious_interrupt |
1023 | 1017 | ||
1024 | #ifdef CONFIG_PERF_COUNTERS | 1018 | #ifdef CONFIG_PERF_EVENTS |
1025 | apicinterrupt LOCAL_PENDING_VECTOR \ | 1019 | apicinterrupt LOCAL_PENDING_VECTOR \ |
1026 | perf_pending_interrupt smp_perf_pending_interrupt | 1020 | perf_pending_interrupt smp_perf_pending_interrupt |
1027 | #endif | 1021 | #endif |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9dbb527e1652..5a1b9758fd62 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -9,6 +9,8 @@ | |||
9 | * the dangers of modifying code on the run. | 9 | * the dangers of modifying code on the run. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
13 | |||
12 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
13 | #include <linux/hardirq.h> | 15 | #include <linux/hardirq.h> |
14 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
@@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data) | |||
336 | 338 | ||
337 | switch (faulted) { | 339 | switch (faulted) { |
338 | case 0: | 340 | case 0: |
339 | pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); | 341 | pr_info("converting mcount calls to 0f 1f 44 00 00\n"); |
340 | memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); | 342 | memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); |
341 | break; | 343 | break; |
342 | case 1: | 344 | case 1: |
343 | pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); | 345 | pr_info("converting mcount calls to 66 66 66 66 90\n"); |
344 | memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); | 346 | memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); |
345 | break; | 347 | break; |
346 | case 2: | 348 | case 2: |
347 | pr_info("ftrace: converting mcount calls to jmp . + 5\n"); | 349 | pr_info("converting mcount calls to jmp . + 5\n"); |
348 | memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); | 350 | memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); |
349 | break; | 351 | break; |
350 | } | 352 | } |
@@ -468,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
468 | 470 | ||
469 | #ifdef CONFIG_FTRACE_SYSCALLS | 471 | #ifdef CONFIG_FTRACE_SYSCALLS |
470 | 472 | ||
471 | extern unsigned long __start_syscalls_metadata[]; | ||
472 | extern unsigned long __stop_syscalls_metadata[]; | ||
473 | extern unsigned long *sys_call_table; | 473 | extern unsigned long *sys_call_table; |
474 | 474 | ||
475 | static struct syscall_metadata **syscalls_metadata; | 475 | unsigned long __init arch_syscall_addr(int nr) |
476 | |||
477 | static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) | ||
478 | { | ||
479 | struct syscall_metadata *start; | ||
480 | struct syscall_metadata *stop; | ||
481 | char str[KSYM_SYMBOL_LEN]; | ||
482 | |||
483 | |||
484 | start = (struct syscall_metadata *)__start_syscalls_metadata; | ||
485 | stop = (struct syscall_metadata *)__stop_syscalls_metadata; | ||
486 | kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); | ||
487 | |||
488 | for ( ; start < stop; start++) { | ||
489 | if (start->name && !strcmp(start->name, str)) | ||
490 | return start; | ||
491 | } | ||
492 | return NULL; | ||
493 | } | ||
494 | |||
495 | struct syscall_metadata *syscall_nr_to_meta(int nr) | ||
496 | { | ||
497 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) | ||
498 | return NULL; | ||
499 | |||
500 | return syscalls_metadata[nr]; | ||
501 | } | ||
502 | |||
503 | int syscall_name_to_nr(char *name) | ||
504 | { | 476 | { |
505 | int i; | 477 | return (unsigned long)(&sys_call_table)[nr]; |
506 | |||
507 | if (!syscalls_metadata) | ||
508 | return -1; | ||
509 | |||
510 | for (i = 0; i < NR_syscalls; i++) { | ||
511 | if (syscalls_metadata[i]) { | ||
512 | if (!strcmp(syscalls_metadata[i]->name, name)) | ||
513 | return i; | ||
514 | } | ||
515 | } | ||
516 | return -1; | ||
517 | } | ||
518 | |||
519 | void set_syscall_enter_id(int num, int id) | ||
520 | { | ||
521 | syscalls_metadata[num]->enter_id = id; | ||
522 | } | ||
523 | |||
524 | void set_syscall_exit_id(int num, int id) | ||
525 | { | ||
526 | syscalls_metadata[num]->exit_id = id; | ||
527 | } | ||
528 | |||
529 | static int __init arch_init_ftrace_syscalls(void) | ||
530 | { | ||
531 | int i; | ||
532 | struct syscall_metadata *meta; | ||
533 | unsigned long **psys_syscall_table = &sys_call_table; | ||
534 | |||
535 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | ||
536 | NR_syscalls, GFP_KERNEL); | ||
537 | if (!syscalls_metadata) { | ||
538 | WARN_ON(1); | ||
539 | return -ENOMEM; | ||
540 | } | ||
541 | |||
542 | for (i = 0; i < NR_syscalls; i++) { | ||
543 | meta = find_syscall_meta(psys_syscall_table[i]); | ||
544 | syscalls_metadata[i] = meta; | ||
545 | } | ||
546 | return 0; | ||
547 | } | 478 | } |
548 | arch_initcall(arch_init_ftrace_syscalls); | ||
549 | #endif | 479 | #endif |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3f8579f8d42c..4f8e2507e8f3 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -11,8 +11,21 @@ | |||
11 | #include <asm/setup.h> | 11 | #include <asm/setup.h> |
12 | #include <asm/sections.h> | 12 | #include <asm/sections.h> |
13 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
14 | #include <asm/bios_ebda.h> | 14 | #include <asm/page.h> |
15 | #include <asm/trampoline.h> | 15 | #include <asm/trampoline.h> |
16 | #include <asm/apic.h> | ||
17 | #include <asm/io_apic.h> | ||
18 | #include <asm/bios_ebda.h> | ||
19 | |||
20 | static void __init i386_default_early_setup(void) | ||
21 | { | ||
22 | /* Initilize 32bit specific setup functions */ | ||
23 | x86_init.resources.probe_roms = probe_roms; | ||
24 | x86_init.resources.reserve_resources = i386_reserve_resources; | ||
25 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; | ||
26 | |||
27 | reserve_ebda_region(); | ||
28 | } | ||
16 | 29 | ||
17 | void __init i386_start_kernel(void) | 30 | void __init i386_start_kernel(void) |
18 | { | 31 | { |
@@ -29,7 +42,16 @@ void __init i386_start_kernel(void) | |||
29 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | 42 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); |
30 | } | 43 | } |
31 | #endif | 44 | #endif |
32 | reserve_ebda_region(); | 45 | |
46 | /* Call the subarch specific early setup function */ | ||
47 | switch (boot_params.hdr.hardware_subarch) { | ||
48 | case X86_SUBARCH_MRST: | ||
49 | x86_mrst_early_setup(); | ||
50 | break; | ||
51 | default: | ||
52 | i386_default_early_setup(); | ||
53 | break; | ||
54 | } | ||
33 | 55 | ||
34 | /* | 56 | /* |
35 | * At this point everything still needed from the boot loader | 57 | * At this point everything still needed from the boot loader |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 70eaa852c732..0b06cd778fd9 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -23,8 +23,8 @@ | |||
23 | #include <asm/sections.h> | 23 | #include <asm/sections.h> |
24 | #include <asm/kdebug.h> | 24 | #include <asm/kdebug.h> |
25 | #include <asm/e820.h> | 25 | #include <asm/e820.h> |
26 | #include <asm/bios_ebda.h> | ||
27 | #include <asm/trampoline.h> | 26 | #include <asm/trampoline.h> |
27 | #include <asm/bios_ebda.h> | ||
28 | 28 | ||
29 | static void __init zap_identity_mappings(void) | 29 | static void __init zap_identity_mappings(void) |
30 | { | 30 | { |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 7ffec6b3b331..050c278481b1 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -79,7 +79,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) | |||
79 | * any particular GDT layout, because we load our own as soon as we | 79 | * any particular GDT layout, because we load our own as soon as we |
80 | * can. | 80 | * can. |
81 | */ | 81 | */ |
82 | .section .text.head,"ax",@progbits | 82 | __HEAD |
83 | ENTRY(startup_32) | 83 | ENTRY(startup_32) |
84 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | 84 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking |
85 | us to not reload segments */ | 85 | us to not reload segments */ |
@@ -157,6 +157,7 @@ subarch_entries: | |||
157 | .long default_entry /* normal x86/PC */ | 157 | .long default_entry /* normal x86/PC */ |
158 | .long lguest_entry /* lguest hypervisor */ | 158 | .long lguest_entry /* lguest hypervisor */ |
159 | .long xen_entry /* Xen hypervisor */ | 159 | .long xen_entry /* Xen hypervisor */ |
160 | .long default_entry /* Moorestown MID */ | ||
160 | num_subarch_entries = (. - subarch_entries) / 4 | 161 | num_subarch_entries = (. - subarch_entries) / 4 |
161 | .previous | 162 | .previous |
162 | #endif /* CONFIG_PARAVIRT */ | 163 | #endif /* CONFIG_PARAVIRT */ |
@@ -607,7 +608,7 @@ ENTRY(initial_code) | |||
607 | /* | 608 | /* |
608 | * BSS section | 609 | * BSS section |
609 | */ | 610 | */ |
610 | .section ".bss.page_aligned","wa" | 611 | __PAGE_ALIGNED_BSS |
611 | .align PAGE_SIZE_asm | 612 | .align PAGE_SIZE_asm |
612 | #ifdef CONFIG_X86_PAE | 613 | #ifdef CONFIG_X86_PAE |
613 | swapper_pg_pmd: | 614 | swapper_pg_pmd: |
@@ -625,7 +626,7 @@ ENTRY(empty_zero_page) | |||
625 | * This starts the data section. | 626 | * This starts the data section. |
626 | */ | 627 | */ |
627 | #ifdef CONFIG_X86_PAE | 628 | #ifdef CONFIG_X86_PAE |
628 | .section ".data.page_aligned","wa" | 629 | __PAGE_ALIGNED_DATA |
629 | /* Page-aligned for the benefit of paravirt? */ | 630 | /* Page-aligned for the benefit of paravirt? */ |
630 | .align PAGE_SIZE_asm | 631 | .align PAGE_SIZE_asm |
631 | ENTRY(swapper_pg_dir) | 632 | ENTRY(swapper_pg_dir) |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index fa54f78e2a05..780cd928fcd5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -40,7 +40,7 @@ L4_START_KERNEL = pgd_index(__START_KERNEL_map) | |||
40 | L3_START_KERNEL = pud_index(__START_KERNEL_map) | 40 | L3_START_KERNEL = pud_index(__START_KERNEL_map) |
41 | 41 | ||
42 | .text | 42 | .text |
43 | .section .text.head | 43 | __HEAD |
44 | .code64 | 44 | .code64 |
45 | .globl startup_64 | 45 | .globl startup_64 |
46 | startup_64: | 46 | startup_64: |
@@ -418,7 +418,7 @@ ENTRY(phys_base) | |||
418 | ENTRY(idt_table) | 418 | ENTRY(idt_table) |
419 | .skip IDT_ENTRIES * 16 | 419 | .skip IDT_ENTRIES * 16 |
420 | 420 | ||
421 | .section .bss.page_aligned, "aw", @nobits | 421 | __PAGE_ALIGNED_BSS |
422 | .align PAGE_SIZE | 422 | .align PAGE_SIZE |
423 | ENTRY(empty_zero_page) | 423 | ENTRY(empty_zero_page) |
424 | .skip PAGE_SIZE | 424 | .skip PAGE_SIZE |
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 43cec6bdda63..9c3bd4a2050e 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c | |||
@@ -10,6 +10,16 @@ | |||
10 | EXPORT_SYMBOL(mcount); | 10 | EXPORT_SYMBOL(mcount); |
11 | #endif | 11 | #endif |
12 | 12 | ||
13 | /* | ||
14 | * Note, this is a prototype to get at the symbol for | ||
15 | * the export, but dont use it from C code, it is used | ||
16 | * by assembly code and is not using C calling convention! | ||
17 | */ | ||
18 | #ifndef CONFIG_X86_CMPXCHG64 | ||
19 | extern void cmpxchg8b_emu(void); | ||
20 | EXPORT_SYMBOL(cmpxchg8b_emu); | ||
21 | #endif | ||
22 | |||
13 | /* Networking helper routines. */ | 23 | /* Networking helper routines. */ |
14 | EXPORT_SYMBOL(csum_partial_copy_generic); | 24 | EXPORT_SYMBOL(csum_partial_copy_generic); |
15 | 25 | ||
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 5cf36c053ac4..23c167925a5c 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -19,12 +19,6 @@ | |||
19 | DEFINE_SPINLOCK(i8253_lock); | 19 | DEFINE_SPINLOCK(i8253_lock); |
20 | EXPORT_SYMBOL(i8253_lock); | 20 | EXPORT_SYMBOL(i8253_lock); |
21 | 21 | ||
22 | #ifdef CONFIG_X86_32 | ||
23 | static void pit_disable_clocksource(void); | ||
24 | #else | ||
25 | static inline void pit_disable_clocksource(void) { } | ||
26 | #endif | ||
27 | |||
28 | /* | 22 | /* |
29 | * HPET replaces the PIT, when enabled. So we need to know, which of | 23 | * HPET replaces the PIT, when enabled. So we need to know, which of |
30 | * the two timers is used | 24 | * the two timers is used |
@@ -57,12 +51,10 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
57 | outb_pit(0, PIT_CH0); | 51 | outb_pit(0, PIT_CH0); |
58 | outb_pit(0, PIT_CH0); | 52 | outb_pit(0, PIT_CH0); |
59 | } | 53 | } |
60 | pit_disable_clocksource(); | ||
61 | break; | 54 | break; |
62 | 55 | ||
63 | case CLOCK_EVT_MODE_ONESHOT: | 56 | case CLOCK_EVT_MODE_ONESHOT: |
64 | /* One shot setup */ | 57 | /* One shot setup */ |
65 | pit_disable_clocksource(); | ||
66 | outb_pit(0x38, PIT_MODE); | 58 | outb_pit(0x38, PIT_MODE); |
67 | break; | 59 | break; |
68 | 60 | ||
@@ -200,17 +192,6 @@ static struct clocksource pit_cs = { | |||
200 | .shift = 20, | 192 | .shift = 20, |
201 | }; | 193 | }; |
202 | 194 | ||
203 | static void pit_disable_clocksource(void) | ||
204 | { | ||
205 | /* | ||
206 | * Use mult to check whether it is registered or not | ||
207 | */ | ||
208 | if (pit_cs.mult) { | ||
209 | clocksource_unregister(&pit_cs); | ||
210 | pit_cs.mult = 0; | ||
211 | } | ||
212 | } | ||
213 | |||
214 | static int __init init_pit_clocksource(void) | 195 | static int __init init_pit_clocksource(void) |
215 | { | 196 | { |
216 | /* | 197 | /* |
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 270ff83efc11..3a54dcb9cd0e 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
@@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | |||
20 | * way process stacks are handled. This is done by having a special | 20 | * way process stacks are handled. This is done by having a special |
21 | * "init_task" linker map entry.. | 21 | * "init_task" linker map entry.. |
22 | */ | 22 | */ |
23 | union thread_union init_thread_union | 23 | union thread_union init_thread_union __init_task_data = |
24 | __attribute__((__section__(".data.init_task"))) = | 24 | { INIT_THREAD_INFO(init_task) }; |
25 | { INIT_THREAD_INFO(init_task) }; | ||
26 | 25 | ||
27 | /* | 26 | /* |
28 | * Initial task structure. | 27 | * Initial task structure. |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index b0cdde6932f5..74656d1d4e30 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -104,7 +104,7 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
104 | seq_printf(p, " Threshold APIC interrupts\n"); | 104 | seq_printf(p, " Threshold APIC interrupts\n"); |
105 | # endif | 105 | # endif |
106 | #endif | 106 | #endif |
107 | #ifdef CONFIG_X86_NEW_MCE | 107 | #ifdef CONFIG_X86_MCE |
108 | seq_printf(p, "%*s: ", prec, "MCE"); | 108 | seq_printf(p, "%*s: ", prec, "MCE"); |
109 | for_each_online_cpu(j) | 109 | for_each_online_cpu(j) |
110 | seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); | 110 | seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); |
@@ -200,7 +200,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
200 | sum += irq_stats(cpu)->irq_threshold_count; | 200 | sum += irq_stats(cpu)->irq_threshold_count; |
201 | # endif | 201 | # endif |
202 | #endif | 202 | #endif |
203 | #ifdef CONFIG_X86_NEW_MCE | 203 | #ifdef CONFIG_X86_MCE |
204 | sum += per_cpu(mce_exception_count, cpu); | 204 | sum += per_cpu(mce_exception_count, cpu); |
205 | sum += per_cpu(mce_poll_count, cpu); | 205 | sum += per_cpu(mce_poll_count, cpu); |
206 | #endif | 206 | #endif |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 92b7703d3d58..40f30773fb29 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -116,7 +116,7 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
116 | return 0; | 116 | return 0; |
117 | } | 117 | } |
118 | 118 | ||
119 | static void __init init_ISA_irqs(void) | 119 | void __init init_ISA_irqs(void) |
120 | { | 120 | { |
121 | int i; | 121 | int i; |
122 | 122 | ||
@@ -140,8 +140,10 @@ static void __init init_ISA_irqs(void) | |||
140 | } | 140 | } |
141 | } | 141 | } |
142 | 142 | ||
143 | /* Overridden in paravirt.c */ | 143 | void __init init_IRQ(void) |
144 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | 144 | { |
145 | x86_init.irqs.intr_init(); | ||
146 | } | ||
145 | 147 | ||
146 | static void __init smp_intr_init(void) | 148 | static void __init smp_intr_init(void) |
147 | { | 149 | { |
@@ -190,7 +192,7 @@ static void __init apic_intr_init(void) | |||
190 | #ifdef CONFIG_X86_MCE_THRESHOLD | 192 | #ifdef CONFIG_X86_MCE_THRESHOLD |
191 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | 193 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); |
192 | #endif | 194 | #endif |
193 | #if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) | 195 | #if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC) |
194 | alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); | 196 | alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); |
195 | #endif | 197 | #endif |
196 | 198 | ||
@@ -206,39 +208,19 @@ static void __init apic_intr_init(void) | |||
206 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 208 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
207 | 209 | ||
208 | /* Performance monitoring interrupts: */ | 210 | /* Performance monitoring interrupts: */ |
209 | # ifdef CONFIG_PERF_COUNTERS | 211 | # ifdef CONFIG_PERF_EVENTS |
210 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); | 212 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); |
211 | # endif | 213 | # endif |
212 | 214 | ||
213 | #endif | 215 | #endif |
214 | } | 216 | } |
215 | 217 | ||
216 | /** | ||
217 | * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors | ||
218 | * | ||
219 | * Description: | ||
220 | * Perform any necessary interrupt initialisation prior to setting up | ||
221 | * the "ordinary" interrupt call gates. For legacy reasons, the ISA | ||
222 | * interrupts should be initialised here if the machine emulates a PC | ||
223 | * in any way. | ||
224 | **/ | ||
225 | static void __init x86_quirk_pre_intr_init(void) | ||
226 | { | ||
227 | #ifdef CONFIG_X86_32 | ||
228 | if (x86_quirks->arch_pre_intr_init) { | ||
229 | if (x86_quirks->arch_pre_intr_init()) | ||
230 | return; | ||
231 | } | ||
232 | #endif | ||
233 | init_ISA_irqs(); | ||
234 | } | ||
235 | |||
236 | void __init native_init_IRQ(void) | 218 | void __init native_init_IRQ(void) |
237 | { | 219 | { |
238 | int i; | 220 | int i; |
239 | 221 | ||
240 | /* Execute any quirks before the call gates are initialised: */ | 222 | /* Execute any quirks before the call gates are initialised: */ |
241 | x86_quirk_pre_intr_init(); | 223 | x86_init.irqs.pre_vector_init(); |
242 | 224 | ||
243 | apic_intr_init(); | 225 | apic_intr_init(); |
244 | 226 | ||
@@ -258,12 +240,6 @@ void __init native_init_IRQ(void) | |||
258 | 240 | ||
259 | #ifdef CONFIG_X86_32 | 241 | #ifdef CONFIG_X86_32 |
260 | /* | 242 | /* |
261 | * Call quirks after call gates are initialised (usually add in | ||
262 | * the architecture specific gates): | ||
263 | */ | ||
264 | x86_quirk_intr_init(); | ||
265 | |||
266 | /* | ||
267 | * External FPU? Set up irq13 if so, for | 243 | * External FPU? Set up irq13 if so, for |
268 | * original braindamaged IBM FERR coupling. | 244 | * original braindamaged IBM FERR coupling. |
269 | */ | 245 | */ |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e5efcdcca31b..feaeb0d3aa4f 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
23 | #include <asm/apic.h> | 23 | #include <asm/apic.h> |
24 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
25 | |||
26 | #include <asm/x86_init.h> | ||
25 | #include <asm/reboot.h> | 27 | #include <asm/reboot.h> |
26 | 28 | ||
27 | #define KVM_SCALE 22 | 29 | #define KVM_SCALE 22 |
@@ -182,12 +184,13 @@ void __init kvmclock_init(void) | |||
182 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | 184 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { |
183 | if (kvm_register_clock("boot clock")) | 185 | if (kvm_register_clock("boot clock")) |
184 | return; | 186 | return; |
185 | pv_time_ops.get_wallclock = kvm_get_wallclock; | ||
186 | pv_time_ops.set_wallclock = kvm_set_wallclock; | ||
187 | pv_time_ops.sched_clock = kvm_clock_read; | 187 | pv_time_ops.sched_clock = kvm_clock_read; |
188 | pv_time_ops.get_tsc_khz = kvm_get_tsc_khz; | 188 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; |
189 | x86_platform.get_wallclock = kvm_get_wallclock; | ||
190 | x86_platform.set_wallclock = kvm_set_wallclock; | ||
189 | #ifdef CONFIG_X86_LOCAL_APIC | 191 | #ifdef CONFIG_X86_LOCAL_APIC |
190 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | 192 | x86_cpuinit.setup_percpu_clockev = |
193 | kvm_setup_secondary_clock; | ||
191 | #endif | 194 | #endif |
192 | #ifdef CONFIG_SMP | 195 | #ifdef CONFIG_SMP |
193 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 196 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 71f1d99a635d..ec6ef60cbd17 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -67,8 +67,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
67 | #ifdef CONFIG_SMP | 67 | #ifdef CONFIG_SMP |
68 | preempt_disable(); | 68 | preempt_disable(); |
69 | load_LDT(pc); | 69 | load_LDT(pc); |
70 | if (!cpus_equal(current->mm->cpu_vm_mask, | 70 | if (!cpumask_equal(mm_cpumask(current->mm), |
71 | cpumask_of_cpu(smp_processor_id()))) | 71 | cpumask_of(smp_processor_id()))) |
72 | smp_call_function(flush_ldt, current->mm, 1); | 72 | smp_call_function(flush_ldt, current->mm, 1); |
73 | preempt_enable(); | 73 | preempt_enable(); |
74 | #else | 74 | #else |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 9371448290ac..378e9a8f1bf8 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -210,8 +210,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, | |||
210 | { | 210 | { |
211 | ssize_t ret = -EINVAL; | 211 | ssize_t ret = -EINVAL; |
212 | 212 | ||
213 | if ((len >> PAGE_SHIFT) > num_physpages) { | 213 | if ((len >> PAGE_SHIFT) > totalram_pages) { |
214 | pr_err("microcode: too much data (max %ld pages)\n", num_physpages); | 214 | pr_err("microcode: too much data (max %ld pages)\n", totalram_pages); |
215 | return ret; | 215 | return ret; |
216 | } | 216 | } |
217 | 217 | ||
@@ -236,7 +236,7 @@ static const struct file_operations microcode_fops = { | |||
236 | static struct miscdevice microcode_dev = { | 236 | static struct miscdevice microcode_dev = { |
237 | .minor = MICROCODE_MINOR, | 237 | .minor = MICROCODE_MINOR, |
238 | .name = "microcode", | 238 | .name = "microcode", |
239 | .devnode = "cpu/microcode", | 239 | .nodename = "cpu/microcode", |
240 | .fops = µcode_fops, | 240 | .fops = µcode_fops, |
241 | }; | 241 | }; |
242 | 242 | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index fcd513bf2846..5be95ef4ffec 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -45,6 +45,11 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
45 | return sum & 0xFF; | 45 | return sum & 0xFF; |
46 | } | 46 | } |
47 | 47 | ||
48 | int __init default_mpc_apic_id(struct mpc_cpu *m) | ||
49 | { | ||
50 | return m->apicid; | ||
51 | } | ||
52 | |||
48 | static void __init MP_processor_info(struct mpc_cpu *m) | 53 | static void __init MP_processor_info(struct mpc_cpu *m) |
49 | { | 54 | { |
50 | int apicid; | 55 | int apicid; |
@@ -55,10 +60,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
55 | return; | 60 | return; |
56 | } | 61 | } |
57 | 62 | ||
58 | if (x86_quirks->mpc_apic_id) | 63 | apicid = x86_init.mpparse.mpc_apic_id(m); |
59 | apicid = x86_quirks->mpc_apic_id(m); | ||
60 | else | ||
61 | apicid = m->apicid; | ||
62 | 64 | ||
63 | if (m->cpuflag & CPU_BOOTPROCESSOR) { | 65 | if (m->cpuflag & CPU_BOOTPROCESSOR) { |
64 | bootup_cpu = " (Bootup-CPU)"; | 66 | bootup_cpu = " (Bootup-CPU)"; |
@@ -70,16 +72,18 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
70 | } | 72 | } |
71 | 73 | ||
72 | #ifdef CONFIG_X86_IO_APIC | 74 | #ifdef CONFIG_X86_IO_APIC |
73 | static void __init MP_bus_info(struct mpc_bus *m) | 75 | void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str) |
74 | { | 76 | { |
75 | char str[7]; | ||
76 | memcpy(str, m->bustype, 6); | 77 | memcpy(str, m->bustype, 6); |
77 | str[6] = 0; | 78 | str[6] = 0; |
79 | apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); | ||
80 | } | ||
78 | 81 | ||
79 | if (x86_quirks->mpc_oem_bus_info) | 82 | static void __init MP_bus_info(struct mpc_bus *m) |
80 | x86_quirks->mpc_oem_bus_info(m, str); | 83 | { |
81 | else | 84 | char str[7]; |
82 | apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); | 85 | |
86 | x86_init.mpparse.mpc_oem_bus_info(m, str); | ||
83 | 87 | ||
84 | #if MAX_MP_BUSSES < 256 | 88 | #if MAX_MP_BUSSES < 256 |
85 | if (m->busid >= MAX_MP_BUSSES) { | 89 | if (m->busid >= MAX_MP_BUSSES) { |
@@ -96,8 +100,8 @@ static void __init MP_bus_info(struct mpc_bus *m) | |||
96 | mp_bus_id_to_type[m->busid] = MP_BUS_ISA; | 100 | mp_bus_id_to_type[m->busid] = MP_BUS_ISA; |
97 | #endif | 101 | #endif |
98 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | 102 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { |
99 | if (x86_quirks->mpc_oem_pci_bus) | 103 | if (x86_init.mpparse.mpc_oem_pci_bus) |
100 | x86_quirks->mpc_oem_pci_bus(m); | 104 | x86_init.mpparse.mpc_oem_pci_bus(m); |
101 | 105 | ||
102 | clear_bit(m->busid, mp_bus_not_pci); | 106 | clear_bit(m->busid, mp_bus_not_pci); |
103 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | 107 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) |
@@ -291,6 +295,8 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt) | |||
291 | 1, mpc, mpc->length, 1); | 295 | 1, mpc, mpc->length, 1); |
292 | } | 296 | } |
293 | 297 | ||
298 | void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } | ||
299 | |||
294 | static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | 300 | static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) |
295 | { | 301 | { |
296 | char str[16]; | 302 | char str[16]; |
@@ -312,16 +318,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | |||
312 | if (early) | 318 | if (early) |
313 | return 1; | 319 | return 1; |
314 | 320 | ||
315 | if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) { | 321 | if (mpc->oemptr) |
316 | struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr; | 322 | x86_init.mpparse.smp_read_mpc_oem(mpc); |
317 | x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize); | ||
318 | } | ||
319 | 323 | ||
320 | /* | 324 | /* |
321 | * Now process the configuration blocks. | 325 | * Now process the configuration blocks. |
322 | */ | 326 | */ |
323 | if (x86_quirks->mpc_record) | 327 | x86_init.mpparse.mpc_record(0); |
324 | *x86_quirks->mpc_record = 0; | ||
325 | 328 | ||
326 | while (count < mpc->length) { | 329 | while (count < mpc->length) { |
327 | switch (*mpt) { | 330 | switch (*mpt) { |
@@ -353,8 +356,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | |||
353 | count = mpc->length; | 356 | count = mpc->length; |
354 | break; | 357 | break; |
355 | } | 358 | } |
356 | if (x86_quirks->mpc_record) | 359 | x86_init.mpparse.mpc_record(1); |
357 | (*x86_quirks->mpc_record)++; | ||
358 | } | 360 | } |
359 | 361 | ||
360 | #ifdef CONFIG_X86_BIGSMP | 362 | #ifdef CONFIG_X86_BIGSMP |
@@ -608,7 +610,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) | |||
608 | /* | 610 | /* |
609 | * Scan the memory blocks for an SMP configuration block. | 611 | * Scan the memory blocks for an SMP configuration block. |
610 | */ | 612 | */ |
611 | static void __init __get_smp_config(unsigned int early) | 613 | void __init default_get_smp_config(unsigned int early) |
612 | { | 614 | { |
613 | struct mpf_intel *mpf = mpf_found; | 615 | struct mpf_intel *mpf = mpf_found; |
614 | 616 | ||
@@ -625,11 +627,6 @@ static void __init __get_smp_config(unsigned int early) | |||
625 | if (acpi_lapic && acpi_ioapic) | 627 | if (acpi_lapic && acpi_ioapic) |
626 | return; | 628 | return; |
627 | 629 | ||
628 | if (x86_quirks->mach_get_smp_config) { | ||
629 | if (x86_quirks->mach_get_smp_config(early)) | ||
630 | return; | ||
631 | } | ||
632 | |||
633 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", | 630 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", |
634 | mpf->specification); | 631 | mpf->specification); |
635 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) | 632 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) |
@@ -670,16 +667,6 @@ static void __init __get_smp_config(unsigned int early) | |||
670 | */ | 667 | */ |
671 | } | 668 | } |
672 | 669 | ||
673 | void __init early_get_smp_config(void) | ||
674 | { | ||
675 | __get_smp_config(1); | ||
676 | } | ||
677 | |||
678 | void __init get_smp_config(void) | ||
679 | { | ||
680 | __get_smp_config(0); | ||
681 | } | ||
682 | |||
683 | static void __init smp_reserve_bootmem(struct mpf_intel *mpf) | 670 | static void __init smp_reserve_bootmem(struct mpf_intel *mpf) |
684 | { | 671 | { |
685 | unsigned long size = get_mpc_size(mpf->physptr); | 672 | unsigned long size = get_mpc_size(mpf->physptr); |
@@ -745,14 +732,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
745 | return 0; | 732 | return 0; |
746 | } | 733 | } |
747 | 734 | ||
748 | static void __init __find_smp_config(unsigned int reserve) | 735 | void __init default_find_smp_config(unsigned int reserve) |
749 | { | 736 | { |
750 | unsigned int address; | 737 | unsigned int address; |
751 | 738 | ||
752 | if (x86_quirks->mach_find_smp_config) { | ||
753 | if (x86_quirks->mach_find_smp_config(reserve)) | ||
754 | return; | ||
755 | } | ||
756 | /* | 739 | /* |
757 | * FIXME: Linux assumes you have 640K of base ram.. | 740 | * FIXME: Linux assumes you have 640K of base ram.. |
758 | * this continues the error... | 741 | * this continues the error... |
@@ -787,16 +770,6 @@ static void __init __find_smp_config(unsigned int reserve) | |||
787 | smp_scan_config(address, 0x400, reserve); | 770 | smp_scan_config(address, 0x400, reserve); |
788 | } | 771 | } |
789 | 772 | ||
790 | void __init early_find_smp_config(void) | ||
791 | { | ||
792 | __find_smp_config(0); | ||
793 | } | ||
794 | |||
795 | void __init find_smp_config(void) | ||
796 | { | ||
797 | __find_smp_config(1); | ||
798 | } | ||
799 | |||
800 | #ifdef CONFIG_X86_IO_APIC | 773 | #ifdef CONFIG_X86_IO_APIC |
801 | static u8 __initdata irq_used[MAX_IRQ_SOURCES]; | 774 | static u8 __initdata irq_used[MAX_IRQ_SOURCES]; |
802 | 775 | ||
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c new file mode 100644 index 000000000000..3b7078abc871 --- /dev/null +++ b/arch/x86/kernel/mrst.c | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * mrst.c: Intel Moorestown platform specific setup code | ||
3 | * | ||
4 | * (C) Copyright 2008 Intel Corporation | ||
5 | * Author: Jacob Pan (jacob.jun.pan@intel.com) | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | */ | ||
12 | #include <linux/init.h> | ||
13 | |||
14 | #include <asm/setup.h> | ||
15 | |||
16 | /* | ||
17 | * Moorestown specific x86_init function overrides and early setup | ||
18 | * calls. | ||
19 | */ | ||
20 | void __init x86_mrst_early_setup(void) | ||
21 | { | ||
22 | x86_init.resources.probe_roms = x86_init_noop; | ||
23 | x86_init.resources.reserve_resources = x86_init_noop; | ||
24 | } | ||
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 7dd950094178..6a3cefc7dda1 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -241,7 +241,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = { | |||
241 | .notifier_call = msr_class_cpu_callback, | 241 | .notifier_call = msr_class_cpu_callback, |
242 | }; | 242 | }; |
243 | 243 | ||
244 | static char *msr_nodename(struct device *dev) | 244 | static char *msr_devnode(struct device *dev, mode_t *mode) |
245 | { | 245 | { |
246 | return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); | 246 | return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); |
247 | } | 247 | } |
@@ -262,7 +262,7 @@ static int __init msr_init(void) | |||
262 | err = PTR_ERR(msr_class); | 262 | err = PTR_ERR(msr_class); |
263 | goto out_chrdev; | 263 | goto out_chrdev; |
264 | } | 264 | } |
265 | msr_class->nodename = msr_nodename; | 265 | msr_class->devnode = msr_devnode; |
266 | for_each_online_cpu(i) { | 266 | for_each_online_cpu(i) { |
267 | err = msr_device_create(i); | 267 | err = msr_device_create(i); |
268 | if (err != 0) | 268 | if (err != 0) |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f5b0b4a01fb2..1b1739d16310 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -54,17 +54,12 @@ u64 _paravirt_ident_64(u64 x) | |||
54 | return x; | 54 | return x; |
55 | } | 55 | } |
56 | 56 | ||
57 | static void __init default_banner(void) | 57 | void __init default_banner(void) |
58 | { | 58 | { |
59 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 59 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
60 | pv_info.name); | 60 | pv_info.name); |
61 | } | 61 | } |
62 | 62 | ||
63 | char *memory_setup(void) | ||
64 | { | ||
65 | return pv_init_ops.memory_setup(); | ||
66 | } | ||
67 | |||
68 | /* Simple instruction patching code. */ | 63 | /* Simple instruction patching code. */ |
69 | #define DEF_NATIVE(ops, name, code) \ | 64 | #define DEF_NATIVE(ops, name, code) \ |
70 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | 65 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ |
@@ -188,11 +183,6 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | |||
188 | return insn_len; | 183 | return insn_len; |
189 | } | 184 | } |
190 | 185 | ||
191 | void init_IRQ(void) | ||
192 | { | ||
193 | pv_irq_ops.init_IRQ(); | ||
194 | } | ||
195 | |||
196 | static void native_flush_tlb(void) | 186 | static void native_flush_tlb(void) |
197 | { | 187 | { |
198 | __native_flush_tlb(); | 188 | __native_flush_tlb(); |
@@ -218,13 +208,6 @@ extern void native_irq_enable_sysexit(void); | |||
218 | extern void native_usergs_sysret32(void); | 208 | extern void native_usergs_sysret32(void); |
219 | extern void native_usergs_sysret64(void); | 209 | extern void native_usergs_sysret64(void); |
220 | 210 | ||
221 | static int __init print_banner(void) | ||
222 | { | ||
223 | pv_init_ops.banner(); | ||
224 | return 0; | ||
225 | } | ||
226 | core_initcall(print_banner); | ||
227 | |||
228 | static struct resource reserve_ioports = { | 211 | static struct resource reserve_ioports = { |
229 | .start = 0, | 212 | .start = 0, |
230 | .end = IO_SPACE_LIMIT, | 213 | .end = IO_SPACE_LIMIT, |
@@ -320,21 +303,13 @@ struct pv_info pv_info = { | |||
320 | 303 | ||
321 | struct pv_init_ops pv_init_ops = { | 304 | struct pv_init_ops pv_init_ops = { |
322 | .patch = native_patch, | 305 | .patch = native_patch, |
323 | .banner = default_banner, | ||
324 | .arch_setup = paravirt_nop, | ||
325 | .memory_setup = machine_specific_memory_setup, | ||
326 | }; | 306 | }; |
327 | 307 | ||
328 | struct pv_time_ops pv_time_ops = { | 308 | struct pv_time_ops pv_time_ops = { |
329 | .time_init = hpet_time_init, | ||
330 | .get_wallclock = native_get_wallclock, | ||
331 | .set_wallclock = native_set_wallclock, | ||
332 | .sched_clock = native_sched_clock, | 309 | .sched_clock = native_sched_clock, |
333 | .get_tsc_khz = native_calibrate_tsc, | ||
334 | }; | 310 | }; |
335 | 311 | ||
336 | struct pv_irq_ops pv_irq_ops = { | 312 | struct pv_irq_ops pv_irq_ops = { |
337 | .init_IRQ = native_init_IRQ, | ||
338 | .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), | 313 | .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), |
339 | .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), | 314 | .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), |
340 | .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), | 315 | .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), |
@@ -409,8 +384,6 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
409 | 384 | ||
410 | struct pv_apic_ops pv_apic_ops = { | 385 | struct pv_apic_ops pv_apic_ops = { |
411 | #ifdef CONFIG_X86_LOCAL_APIC | 386 | #ifdef CONFIG_X86_LOCAL_APIC |
412 | .setup_boot_clock = setup_boot_APIC_clock, | ||
413 | .setup_secondary_clock = setup_secondary_APIC_clock, | ||
414 | .startup_ipi_hook = paravirt_nop, | 387 | .startup_ipi_hook = paravirt_nop, |
415 | #endif | 388 | #endif |
416 | }; | 389 | }; |
@@ -424,13 +397,6 @@ struct pv_apic_ops pv_apic_ops = { | |||
424 | #endif | 397 | #endif |
425 | 398 | ||
426 | struct pv_mmu_ops pv_mmu_ops = { | 399 | struct pv_mmu_ops pv_mmu_ops = { |
427 | #ifndef CONFIG_X86_64 | ||
428 | .pagetable_setup_start = native_pagetable_setup_start, | ||
429 | .pagetable_setup_done = native_pagetable_setup_done, | ||
430 | #else | ||
431 | .pagetable_setup_start = paravirt_nop, | ||
432 | .pagetable_setup_done = paravirt_nop, | ||
433 | #endif | ||
434 | 400 | ||
435 | .read_cr2 = native_read_cr2, | 401 | .read_cr2 = native_read_cr2, |
436 | .write_cr2 = native_write_cr2, | 402 | .write_cr2 = native_write_cr2, |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d71c8655905b..b2a71dca5642 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0; | |||
35 | 35 | ||
36 | /* | 36 | /* |
37 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. | 37 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. |
38 | * If this variable is 1, IOMMU implementations do no DMA ranslation for | 38 | * If this variable is 1, IOMMU implementations do no DMA translation for |
39 | * devices and allow every device to access to whole physical memory. This is | 39 | * devices and allow every device to access to whole physical memory. This is |
40 | * useful if a user want to use an IOMMU only for KVM device assignment to | 40 | * useful if a user want to use an IOMMU only for KVM device assignment to |
41 | * guests and not for driver dma translation. | 41 | * guests and not for driver dma translation. |
@@ -225,10 +225,8 @@ static __init int iommu_setup(char *p) | |||
225 | if (!strncmp(p, "soft", 4)) | 225 | if (!strncmp(p, "soft", 4)) |
226 | swiotlb = 1; | 226 | swiotlb = 1; |
227 | #endif | 227 | #endif |
228 | if (!strncmp(p, "pt", 2)) { | 228 | if (!strncmp(p, "pt", 2)) |
229 | iommu_pass_through = 1; | 229 | iommu_pass_through = 1; |
230 | return 1; | ||
231 | } | ||
232 | 230 | ||
233 | gart_parse_options(p); | 231 | gart_parse_options(p); |
234 | 232 | ||
@@ -313,7 +311,7 @@ void pci_iommu_shutdown(void) | |||
313 | amd_iommu_shutdown(); | 311 | amd_iommu_shutdown(); |
314 | } | 312 | } |
315 | /* Must execute after PCI subsystem */ | 313 | /* Must execute after PCI subsystem */ |
316 | fs_initcall(pci_iommu_init); | 314 | rootfs_initcall(pci_iommu_init); |
317 | 315 | ||
318 | #ifdef CONFIG_PCI | 316 | #ifdef CONFIG_PCI |
319 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ | 317 | /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 98a827ee9ed7..a7f1b64f86e0 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/agp_backend.h> | 16 | #include <linux/agp_backend.h> |
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | #include <linux/sched.h> | ||
19 | #include <linux/string.h> | 20 | #include <linux/string.h> |
20 | #include <linux/spinlock.h> | 21 | #include <linux/spinlock.h> |
21 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index e8a35016115f..aaa6b7839f1e 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -46,9 +46,8 @@ void __init pci_swiotlb_init(void) | |||
46 | { | 46 | { |
47 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 47 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
48 | #ifdef CONFIG_X86_64 | 48 | #ifdef CONFIG_X86_64 |
49 | if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) || | 49 | if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) |
50 | iommu_pass_through) | 50 | swiotlb = 1; |
51 | swiotlb = 1; | ||
52 | #endif | 51 | #endif |
53 | if (swiotlb_force) | 52 | if (swiotlb_force) |
54 | swiotlb = 1; | 53 | swiotlb = 1; |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1092a1a2fbe6..2275ce5776de 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/random.h> | 11 | #include <linux/random.h> |
12 | #include <trace/power.h> | 12 | #include <trace/events/power.h> |
13 | #include <asm/system.h> | 13 | #include <asm/system.h> |
14 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
15 | #include <asm/syscalls.h> | 15 | #include <asm/syscalls.h> |
@@ -27,9 +27,6 @@ EXPORT_SYMBOL(idle_nomwait); | |||
27 | 27 | ||
28 | struct kmem_cache *task_xstate_cachep; | 28 | struct kmem_cache *task_xstate_cachep; |
29 | 29 | ||
30 | DEFINE_TRACE(power_start); | ||
31 | DEFINE_TRACE(power_end); | ||
32 | |||
33 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 30 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
34 | { | 31 | { |
35 | *dst = *src; | 32 | *dst = *src; |
@@ -289,9 +286,7 @@ static inline int hlt_use_halt(void) | |||
289 | void default_idle(void) | 286 | void default_idle(void) |
290 | { | 287 | { |
291 | if (hlt_use_halt()) { | 288 | if (hlt_use_halt()) { |
292 | struct power_trace it; | 289 | trace_power_start(POWER_CSTATE, 1); |
293 | |||
294 | trace_power_start(&it, POWER_CSTATE, 1); | ||
295 | current_thread_info()->status &= ~TS_POLLING; | 290 | current_thread_info()->status &= ~TS_POLLING; |
296 | /* | 291 | /* |
297 | * TS_POLLING-cleared state must be visible before we | 292 | * TS_POLLING-cleared state must be visible before we |
@@ -304,7 +299,6 @@ void default_idle(void) | |||
304 | else | 299 | else |
305 | local_irq_enable(); | 300 | local_irq_enable(); |
306 | current_thread_info()->status |= TS_POLLING; | 301 | current_thread_info()->status |= TS_POLLING; |
307 | trace_power_end(&it); | ||
308 | } else { | 302 | } else { |
309 | local_irq_enable(); | 303 | local_irq_enable(); |
310 | /* loop is done by the caller */ | 304 | /* loop is done by the caller */ |
@@ -362,9 +356,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
362 | */ | 356 | */ |
363 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 357 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
364 | { | 358 | { |
365 | struct power_trace it; | 359 | trace_power_start(POWER_CSTATE, (ax>>4)+1); |
366 | |||
367 | trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); | ||
368 | if (!need_resched()) { | 360 | if (!need_resched()) { |
369 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | 361 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) |
370 | clflush((void *)¤t_thread_info()->flags); | 362 | clflush((void *)¤t_thread_info()->flags); |
@@ -374,15 +366,13 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | |||
374 | if (!need_resched()) | 366 | if (!need_resched()) |
375 | __mwait(ax, cx); | 367 | __mwait(ax, cx); |
376 | } | 368 | } |
377 | trace_power_end(&it); | ||
378 | } | 369 | } |
379 | 370 | ||
380 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ | 371 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ |
381 | static void mwait_idle(void) | 372 | static void mwait_idle(void) |
382 | { | 373 | { |
383 | struct power_trace it; | ||
384 | if (!need_resched()) { | 374 | if (!need_resched()) { |
385 | trace_power_start(&it, POWER_CSTATE, 1); | 375 | trace_power_start(POWER_CSTATE, 1); |
386 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) | 376 | if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) |
387 | clflush((void *)¤t_thread_info()->flags); | 377 | clflush((void *)¤t_thread_info()->flags); |
388 | 378 | ||
@@ -392,7 +382,6 @@ static void mwait_idle(void) | |||
392 | __sti_mwait(0, 0); | 382 | __sti_mwait(0, 0); |
393 | else | 383 | else |
394 | local_irq_enable(); | 384 | local_irq_enable(); |
395 | trace_power_end(&it); | ||
396 | } else | 385 | } else |
397 | local_irq_enable(); | 386 | local_irq_enable(); |
398 | } | 387 | } |
@@ -404,13 +393,11 @@ static void mwait_idle(void) | |||
404 | */ | 393 | */ |
405 | static void poll_idle(void) | 394 | static void poll_idle(void) |
406 | { | 395 | { |
407 | struct power_trace it; | 396 | trace_power_start(POWER_CSTATE, 0); |
408 | |||
409 | trace_power_start(&it, POWER_CSTATE, 0); | ||
410 | local_irq_enable(); | 397 | local_irq_enable(); |
411 | while (!need_resched()) | 398 | while (!need_resched()) |
412 | cpu_relax(); | 399 | cpu_relax(); |
413 | trace_power_end(&it); | 400 | trace_power_end(0); |
414 | } | 401 | } |
415 | 402 | ||
416 | /* | 403 | /* |
@@ -558,10 +545,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
558 | void __init init_c1e_mask(void) | 545 | void __init init_c1e_mask(void) |
559 | { | 546 | { |
560 | /* If we're using c1e_idle, we need to allocate c1e_mask. */ | 547 | /* If we're using c1e_idle, we need to allocate c1e_mask. */ |
561 | if (pm_idle == c1e_idle) { | 548 | if (pm_idle == c1e_idle) |
562 | alloc_cpumask_var(&c1e_mask, GFP_KERNEL); | 549 | zalloc_cpumask_var(&c1e_mask, GFP_KERNEL); |
563 | cpumask_clear(c1e_mask); | ||
564 | } | ||
565 | } | 550 | } |
566 | 551 | ||
567 | static int __init idle_setup(char *str) | 552 | static int __init idle_setup(char *str) |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 113b8927c822..267cb85b479c 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -312,16 +312,6 @@ static int putreg(struct task_struct *child, | |||
312 | return set_flags(child, value); | 312 | return set_flags(child, value); |
313 | 313 | ||
314 | #ifdef CONFIG_X86_64 | 314 | #ifdef CONFIG_X86_64 |
315 | /* | ||
316 | * Orig_ax is really just a flag with small positive and | ||
317 | * negative values, so make sure to always sign-extend it | ||
318 | * from 32 bits so that it works correctly regardless of | ||
319 | * whether we come from a 32-bit environment or not. | ||
320 | */ | ||
321 | case offsetof(struct user_regs_struct, orig_ax): | ||
322 | value = (long) (s32) value; | ||
323 | break; | ||
324 | |||
325 | case offsetof(struct user_regs_struct,fs_base): | 315 | case offsetof(struct user_regs_struct,fs_base): |
326 | if (value >= TASK_SIZE_OF(child)) | 316 | if (value >= TASK_SIZE_OF(child)) |
327 | return -EIO; | 317 | return -EIO; |
@@ -1177,10 +1167,15 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) | |||
1177 | 1167 | ||
1178 | case offsetof(struct user32, regs.orig_eax): | 1168 | case offsetof(struct user32, regs.orig_eax): |
1179 | /* | 1169 | /* |
1180 | * Sign-extend the value so that orig_eax = -1 | 1170 | * A 32-bit debugger setting orig_eax means to restore |
1181 | * causes (long)orig_ax < 0 tests to fire correctly. | 1171 | * the state of the task restarting a 32-bit syscall. |
1172 | * Make sure we interpret the -ERESTART* codes correctly | ||
1173 | * in case the task is not actually still sitting at the | ||
1174 | * exit from a 32-bit syscall with TS_COMPAT still set. | ||
1182 | */ | 1175 | */ |
1183 | regs->orig_ax = (long) (s32) value; | 1176 | regs->orig_ax = value; |
1177 | if (syscall_get_nr(child, regs) >= 0) | ||
1178 | task_thread_info(child)->status |= TS_COMPAT; | ||
1184 | break; | 1179 | break; |
1185 | 1180 | ||
1186 | case offsetof(struct user32, regs.eflags): | 1181 | case offsetof(struct user32, regs.eflags): |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index af71d06624bf..6c3b2c6fd772 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -508,7 +508,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev) | |||
508 | 508 | ||
509 | pci_read_config_dword(nb_ht, 0x60, &val); | 509 | pci_read_config_dword(nb_ht, 0x60, &val); |
510 | set_dev_node(&dev->dev, val & 7); | 510 | set_dev_node(&dev->dev, val & 7); |
511 | pci_dev_put(dev); | 511 | pci_dev_put(nb_ht); |
512 | } | 512 | } |
513 | 513 | ||
514 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, | 514 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a06e8d101844..a1a3cdda06e1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -4,6 +4,8 @@ | |||
4 | #include <linux/pm.h> | 4 | #include <linux/pm.h> |
5 | #include <linux/efi.h> | 5 | #include <linux/efi.h> |
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/sched.h> | ||
8 | #include <linux/tboot.h> | ||
7 | #include <acpi/reboot.h> | 9 | #include <acpi/reboot.h> |
8 | #include <asm/io.h> | 10 | #include <asm/io.h> |
9 | #include <asm/apic.h> | 11 | #include <asm/apic.h> |
@@ -508,6 +510,8 @@ static void native_machine_emergency_restart(void) | |||
508 | if (reboot_emergency) | 510 | if (reboot_emergency) |
509 | emergency_vmx_disable_all(); | 511 | emergency_vmx_disable_all(); |
510 | 512 | ||
513 | tboot_shutdown(TB_SHUTDOWN_REBOOT); | ||
514 | |||
511 | /* Tell the BIOS if we want cold or warm reboot */ | 515 | /* Tell the BIOS if we want cold or warm reboot */ |
512 | *((unsigned short *)__va(0x472)) = reboot_mode; | 516 | *((unsigned short *)__va(0x472)) = reboot_mode; |
513 | 517 | ||
@@ -634,6 +638,8 @@ static void native_machine_halt(void) | |||
634 | /* stop other cpus and apics */ | 638 | /* stop other cpus and apics */ |
635 | machine_shutdown(); | 639 | machine_shutdown(); |
636 | 640 | ||
641 | tboot_shutdown(TB_SHUTDOWN_HALT); | ||
642 | |||
637 | /* stop this cpu */ | 643 | /* stop this cpu */ |
638 | stop_this_cpu(NULL); | 644 | stop_this_cpu(NULL); |
639 | } | 645 | } |
@@ -645,6 +651,8 @@ static void native_machine_power_off(void) | |||
645 | machine_shutdown(); | 651 | machine_shutdown(); |
646 | pm_power_off(); | 652 | pm_power_off(); |
647 | } | 653 | } |
654 | /* a fallback in case there is no PM info available */ | ||
655 | tboot_shutdown(TB_SHUTDOWN_HALT); | ||
648 | } | 656 | } |
649 | 657 | ||
650 | struct machine_ops machine_ops = { | 658 | struct machine_ops machine_ops = { |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 5d465b207e72..1cfbbfc3ae26 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/pnp.h> | 8 | #include <linux/pnp.h> |
9 | 9 | ||
10 | #include <asm/vsyscall.h> | 10 | #include <asm/vsyscall.h> |
11 | #include <asm/x86_init.h> | ||
11 | #include <asm/time.h> | 12 | #include <asm/time.h> |
12 | 13 | ||
13 | #ifdef CONFIG_X86_32 | 14 | #ifdef CONFIG_X86_32 |
@@ -165,33 +166,29 @@ void rtc_cmos_write(unsigned char val, unsigned char addr) | |||
165 | } | 166 | } |
166 | EXPORT_SYMBOL(rtc_cmos_write); | 167 | EXPORT_SYMBOL(rtc_cmos_write); |
167 | 168 | ||
168 | static int set_rtc_mmss(unsigned long nowtime) | 169 | int update_persistent_clock(struct timespec now) |
169 | { | 170 | { |
170 | unsigned long flags; | 171 | unsigned long flags; |
171 | int retval; | 172 | int retval; |
172 | 173 | ||
173 | spin_lock_irqsave(&rtc_lock, flags); | 174 | spin_lock_irqsave(&rtc_lock, flags); |
174 | retval = set_wallclock(nowtime); | 175 | retval = x86_platform.set_wallclock(now.tv_sec); |
175 | spin_unlock_irqrestore(&rtc_lock, flags); | 176 | spin_unlock_irqrestore(&rtc_lock, flags); |
176 | 177 | ||
177 | return retval; | 178 | return retval; |
178 | } | 179 | } |
179 | 180 | ||
180 | /* not static: needed by APM */ | 181 | /* not static: needed by APM */ |
181 | unsigned long read_persistent_clock(void) | 182 | void read_persistent_clock(struct timespec *ts) |
182 | { | 183 | { |
183 | unsigned long retval, flags; | 184 | unsigned long retval, flags; |
184 | 185 | ||
185 | spin_lock_irqsave(&rtc_lock, flags); | 186 | spin_lock_irqsave(&rtc_lock, flags); |
186 | retval = get_wallclock(); | 187 | retval = x86_platform.get_wallclock(); |
187 | spin_unlock_irqrestore(&rtc_lock, flags); | 188 | spin_unlock_irqrestore(&rtc_lock, flags); |
188 | 189 | ||
189 | return retval; | 190 | ts->tv_sec = retval; |
190 | } | 191 | ts->tv_nsec = 0; |
191 | |||
192 | int update_persistent_clock(struct timespec now) | ||
193 | { | ||
194 | return set_rtc_mmss(now.tv_sec); | ||
195 | } | 192 | } |
196 | 193 | ||
197 | unsigned long long native_read_tsc(void) | 194 | unsigned long long native_read_tsc(void) |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 63f32d220ef2..e09f0e2c14b5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/screen_info.h> | 27 | #include <linux/screen_info.h> |
28 | #include <linux/ioport.h> | 28 | #include <linux/ioport.h> |
29 | #include <linux/acpi.h> | 29 | #include <linux/acpi.h> |
30 | #include <linux/sfi.h> | ||
30 | #include <linux/apm_bios.h> | 31 | #include <linux/apm_bios.h> |
31 | #include <linux/initrd.h> | 32 | #include <linux/initrd.h> |
32 | #include <linux/bootmem.h> | 33 | #include <linux/bootmem.h> |
@@ -66,6 +67,7 @@ | |||
66 | 67 | ||
67 | #include <linux/percpu.h> | 68 | #include <linux/percpu.h> |
68 | #include <linux/crash_dump.h> | 69 | #include <linux/crash_dump.h> |
70 | #include <linux/tboot.h> | ||
69 | 71 | ||
70 | #include <video/edid.h> | 72 | #include <video/edid.h> |
71 | 73 | ||
@@ -108,10 +110,6 @@ | |||
108 | #include <asm/numa_64.h> | 110 | #include <asm/numa_64.h> |
109 | #endif | 111 | #endif |
110 | 112 | ||
111 | #ifndef ARCH_SETUP | ||
112 | #define ARCH_SETUP | ||
113 | #endif | ||
114 | |||
115 | /* | 113 | /* |
116 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 114 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
117 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | 115 | * The direct mapping extends to max_pfn_mapped, so that we can directly access |
@@ -133,9 +131,9 @@ int default_cpu_present_to_apicid(int mps_cpu) | |||
133 | return __default_cpu_present_to_apicid(mps_cpu); | 131 | return __default_cpu_present_to_apicid(mps_cpu); |
134 | } | 132 | } |
135 | 133 | ||
136 | int default_check_phys_apicid_present(int boot_cpu_physical_apicid) | 134 | int default_check_phys_apicid_present(int phys_apicid) |
137 | { | 135 | { |
138 | return __default_check_phys_apicid_present(boot_cpu_physical_apicid); | 136 | return __default_check_phys_apicid_present(phys_apicid); |
139 | } | 137 | } |
140 | #endif | 138 | #endif |
141 | 139 | ||
@@ -171,13 +169,6 @@ static struct resource bss_resource = { | |||
171 | 169 | ||
172 | 170 | ||
173 | #ifdef CONFIG_X86_32 | 171 | #ifdef CONFIG_X86_32 |
174 | static struct resource video_ram_resource = { | ||
175 | .name = "Video RAM area", | ||
176 | .start = 0xa0000, | ||
177 | .end = 0xbffff, | ||
178 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
179 | }; | ||
180 | |||
181 | /* cpu data as detected by the assembly code in head.S */ | 172 | /* cpu data as detected by the assembly code in head.S */ |
182 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; | 173 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; |
183 | /* common cpu data for all cpus */ | 174 | /* common cpu data for all cpus */ |
@@ -605,7 +596,7 @@ static struct resource standard_io_resources[] = { | |||
605 | .flags = IORESOURCE_BUSY | IORESOURCE_IO } | 596 | .flags = IORESOURCE_BUSY | IORESOURCE_IO } |
606 | }; | 597 | }; |
607 | 598 | ||
608 | static void __init reserve_standard_io_resources(void) | 599 | void __init reserve_standard_io_resources(void) |
609 | { | 600 | { |
610 | int i; | 601 | int i; |
611 | 602 | ||
@@ -637,10 +628,6 @@ static int __init setup_elfcorehdr(char *arg) | |||
637 | early_param("elfcorehdr", setup_elfcorehdr); | 628 | early_param("elfcorehdr", setup_elfcorehdr); |
638 | #endif | 629 | #endif |
639 | 630 | ||
640 | static struct x86_quirks default_x86_quirks __initdata; | ||
641 | |||
642 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | ||
643 | |||
644 | #ifdef CONFIG_X86_RESERVE_LOW_64K | 631 | #ifdef CONFIG_X86_RESERVE_LOW_64K |
645 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | 632 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) |
646 | { | 633 | { |
@@ -757,7 +744,7 @@ void __init setup_arch(char **cmdline_p) | |||
757 | } | 744 | } |
758 | #endif | 745 | #endif |
759 | 746 | ||
760 | ARCH_SETUP | 747 | x86_init.oem.arch_setup(); |
761 | 748 | ||
762 | setup_memory_map(); | 749 | setup_memory_map(); |
763 | parse_setup_data(); | 750 | parse_setup_data(); |
@@ -796,6 +783,16 @@ void __init setup_arch(char **cmdline_p) | |||
796 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 783 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
797 | *cmdline_p = command_line; | 784 | *cmdline_p = command_line; |
798 | 785 | ||
786 | #ifdef CONFIG_X86_64 | ||
787 | /* | ||
788 | * Must call this twice: Once just to detect whether hardware doesn't | ||
789 | * support NX (so that the early EHCI debug console setup can safely | ||
790 | * call set_fixmap(), and then again after parsing early parameters to | ||
791 | * honor the respective command line option. | ||
792 | */ | ||
793 | check_efer(); | ||
794 | #endif | ||
795 | |||
799 | parse_early_param(); | 796 | parse_early_param(); |
800 | 797 | ||
801 | #ifdef CONFIG_X86_64 | 798 | #ifdef CONFIG_X86_64 |
@@ -833,11 +830,9 @@ void __init setup_arch(char **cmdline_p) | |||
833 | * VMware detection requires dmi to be available, so this | 830 | * VMware detection requires dmi to be available, so this |
834 | * needs to be done after dmi_scan_machine, for the BP. | 831 | * needs to be done after dmi_scan_machine, for the BP. |
835 | */ | 832 | */ |
836 | init_hypervisor(&boot_cpu_data); | 833 | init_hypervisor_platform(); |
837 | 834 | ||
838 | #ifdef CONFIG_X86_32 | 835 | x86_init.resources.probe_roms(); |
839 | probe_roms(); | ||
840 | #endif | ||
841 | 836 | ||
842 | /* after parse_early_param, so could debug it */ | 837 | /* after parse_early_param, so could debug it */ |
843 | insert_resource(&iomem_resource, &code_resource); | 838 | insert_resource(&iomem_resource, &code_resource); |
@@ -972,10 +967,11 @@ void __init setup_arch(char **cmdline_p) | |||
972 | kvmclock_init(); | 967 | kvmclock_init(); |
973 | #endif | 968 | #endif |
974 | 969 | ||
975 | paravirt_pagetable_setup_start(swapper_pg_dir); | 970 | x86_init.paging.pagetable_setup_start(swapper_pg_dir); |
976 | paging_init(); | 971 | paging_init(); |
977 | paravirt_pagetable_setup_done(swapper_pg_dir); | 972 | x86_init.paging.pagetable_setup_done(swapper_pg_dir); |
978 | paravirt_post_allocator_init(); | 973 | |
974 | tboot_probe(); | ||
979 | 975 | ||
980 | #ifdef CONFIG_X86_64 | 976 | #ifdef CONFIG_X86_64 |
981 | map_vsyscall(); | 977 | map_vsyscall(); |
@@ -990,13 +986,13 @@ void __init setup_arch(char **cmdline_p) | |||
990 | */ | 986 | */ |
991 | acpi_boot_init(); | 987 | acpi_boot_init(); |
992 | 988 | ||
993 | #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS) | 989 | sfi_init(); |
990 | |||
994 | /* | 991 | /* |
995 | * get boot-time SMP configuration: | 992 | * get boot-time SMP configuration: |
996 | */ | 993 | */ |
997 | if (smp_found_config) | 994 | if (smp_found_config) |
998 | get_smp_config(); | 995 | get_smp_config(); |
999 | #endif | ||
1000 | 996 | ||
1001 | prefill_possible_map(); | 997 | prefill_possible_map(); |
1002 | 998 | ||
@@ -1015,10 +1011,7 @@ void __init setup_arch(char **cmdline_p) | |||
1015 | e820_reserve_resources(); | 1011 | e820_reserve_resources(); |
1016 | e820_mark_nosave_regions(max_low_pfn); | 1012 | e820_mark_nosave_regions(max_low_pfn); |
1017 | 1013 | ||
1018 | #ifdef CONFIG_X86_32 | 1014 | x86_init.resources.reserve_resources(); |
1019 | request_resource(&iomem_resource, &video_ram_resource); | ||
1020 | #endif | ||
1021 | reserve_standard_io_resources(); | ||
1022 | 1015 | ||
1023 | e820_setup_gap(); | 1016 | e820_setup_gap(); |
1024 | 1017 | ||
@@ -1030,78 +1023,22 @@ void __init setup_arch(char **cmdline_p) | |||
1030 | conswitchp = &dummy_con; | 1023 | conswitchp = &dummy_con; |
1031 | #endif | 1024 | #endif |
1032 | #endif | 1025 | #endif |
1026 | x86_init.oem.banner(); | ||
1033 | } | 1027 | } |
1034 | 1028 | ||
1035 | #ifdef CONFIG_X86_32 | 1029 | #ifdef CONFIG_X86_32 |
1036 | 1030 | ||
1037 | /** | 1031 | static struct resource video_ram_resource = { |
1038 | * x86_quirk_intr_init - post gate setup interrupt initialisation | 1032 | .name = "Video RAM area", |
1039 | * | 1033 | .start = 0xa0000, |
1040 | * Description: | 1034 | .end = 0xbffff, |
1041 | * Fill in any interrupts that may have been left out by the general | 1035 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
1042 | * init_IRQ() routine. interrupts having to do with the machine rather | ||
1043 | * than the devices on the I/O bus (like APIC interrupts in intel MP | ||
1044 | * systems) are started here. | ||
1045 | **/ | ||
1046 | void __init x86_quirk_intr_init(void) | ||
1047 | { | ||
1048 | if (x86_quirks->arch_intr_init) { | ||
1049 | if (x86_quirks->arch_intr_init()) | ||
1050 | return; | ||
1051 | } | ||
1052 | } | ||
1053 | |||
1054 | /** | ||
1055 | * x86_quirk_trap_init - initialise system specific traps | ||
1056 | * | ||
1057 | * Description: | ||
1058 | * Called as the final act of trap_init(). Used in VISWS to initialise | ||
1059 | * the various board specific APIC traps. | ||
1060 | **/ | ||
1061 | void __init x86_quirk_trap_init(void) | ||
1062 | { | ||
1063 | if (x86_quirks->arch_trap_init) { | ||
1064 | if (x86_quirks->arch_trap_init()) | ||
1065 | return; | ||
1066 | } | ||
1067 | } | ||
1068 | |||
1069 | static struct irqaction irq0 = { | ||
1070 | .handler = timer_interrupt, | ||
1071 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, | ||
1072 | .name = "timer" | ||
1073 | }; | 1036 | }; |
1074 | 1037 | ||
1075 | /** | 1038 | void __init i386_reserve_resources(void) |
1076 | * x86_quirk_pre_time_init - do any specific initialisations before. | ||
1077 | * | ||
1078 | **/ | ||
1079 | void __init x86_quirk_pre_time_init(void) | ||
1080 | { | 1039 | { |
1081 | if (x86_quirks->arch_pre_time_init) | 1040 | request_resource(&iomem_resource, &video_ram_resource); |
1082 | x86_quirks->arch_pre_time_init(); | 1041 | reserve_standard_io_resources(); |
1083 | } | 1042 | } |
1084 | 1043 | ||
1085 | /** | ||
1086 | * x86_quirk_time_init - do any specific initialisations for the system timer. | ||
1087 | * | ||
1088 | * Description: | ||
1089 | * Must plug the system timer interrupt source at HZ into the IRQ listed | ||
1090 | * in irq_vectors.h:TIMER_IRQ | ||
1091 | **/ | ||
1092 | void __init x86_quirk_time_init(void) | ||
1093 | { | ||
1094 | if (x86_quirks->arch_time_init) { | ||
1095 | /* | ||
1096 | * A nonzero return code does not mean failure, it means | ||
1097 | * that the architecture quirk does not want any | ||
1098 | * generic (timer) setup to be performed after this: | ||
1099 | */ | ||
1100 | if (x86_quirks->arch_time_init()) | ||
1101 | return; | ||
1102 | } | ||
1103 | |||
1104 | irq0.mask = cpumask_of_cpu(0); | ||
1105 | setup_irq(0, &irq0); | ||
1106 | } | ||
1107 | #endif /* CONFIG_X86_32 */ | 1044 | #endif /* CONFIG_X86_32 */ |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 07d81916f212..d559af913e1f 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -55,6 +55,7 @@ EXPORT_SYMBOL(__per_cpu_offset); | |||
55 | #define PERCPU_FIRST_CHUNK_RESERVE 0 | 55 | #define PERCPU_FIRST_CHUNK_RESERVE 0 |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | #ifdef CONFIG_X86_32 | ||
58 | /** | 59 | /** |
59 | * pcpu_need_numa - determine percpu allocation needs to consider NUMA | 60 | * pcpu_need_numa - determine percpu allocation needs to consider NUMA |
60 | * | 61 | * |
@@ -83,6 +84,7 @@ static bool __init pcpu_need_numa(void) | |||
83 | #endif | 84 | #endif |
84 | return false; | 85 | return false; |
85 | } | 86 | } |
87 | #endif | ||
86 | 88 | ||
87 | /** | 89 | /** |
88 | * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu | 90 | * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu |
@@ -124,308 +126,35 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
124 | } | 126 | } |
125 | 127 | ||
126 | /* | 128 | /* |
127 | * Large page remap allocator | 129 | * Helpers for first chunk memory allocation |
128 | * | ||
129 | * This allocator uses PMD page as unit. A PMD page is allocated for | ||
130 | * each cpu and each is remapped into vmalloc area using PMD mapping. | ||
131 | * As PMD page is quite large, only part of it is used for the first | ||
132 | * chunk. Unused part is returned to the bootmem allocator. | ||
133 | * | ||
134 | * So, the PMD pages are mapped twice - once to the physical mapping | ||
135 | * and to the vmalloc area for the first percpu chunk. The double | ||
136 | * mapping does add one more PMD TLB entry pressure but still is much | ||
137 | * better than only using 4k mappings while still being NUMA friendly. | ||
138 | */ | 130 | */ |
139 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 131 | static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) |
140 | struct pcpul_ent { | ||
141 | unsigned int cpu; | ||
142 | void *ptr; | ||
143 | }; | ||
144 | |||
145 | static size_t pcpul_size; | ||
146 | static struct pcpul_ent *pcpul_map; | ||
147 | static struct vm_struct pcpul_vm; | ||
148 | |||
149 | static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) | ||
150 | { | 132 | { |
151 | size_t off = (size_t)pageno << PAGE_SHIFT; | 133 | return pcpu_alloc_bootmem(cpu, size, align); |
152 | |||
153 | if (off >= pcpul_size) | ||
154 | return NULL; | ||
155 | |||
156 | return virt_to_page(pcpul_map[cpu].ptr + off); | ||
157 | } | 134 | } |
158 | 135 | ||
159 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | 136 | static void __init pcpu_fc_free(void *ptr, size_t size) |
160 | { | 137 | { |
161 | size_t map_size, dyn_size; | 138 | free_bootmem(__pa(ptr), size); |
162 | unsigned int cpu; | ||
163 | int i, j; | ||
164 | ssize_t ret; | ||
165 | |||
166 | if (!chosen) { | ||
167 | size_t vm_size = VMALLOC_END - VMALLOC_START; | ||
168 | size_t tot_size = nr_cpu_ids * PMD_SIZE; | ||
169 | |||
170 | /* on non-NUMA, embedding is better */ | ||
171 | if (!pcpu_need_numa()) | ||
172 | return -EINVAL; | ||
173 | |||
174 | /* don't consume more than 20% of vmalloc area */ | ||
175 | if (tot_size > vm_size / 5) { | ||
176 | pr_info("PERCPU: too large chunk size %zuMB for " | ||
177 | "large page remap\n", tot_size >> 20); | ||
178 | return -EINVAL; | ||
179 | } | ||
180 | } | ||
181 | |||
182 | /* need PSE */ | ||
183 | if (!cpu_has_pse) { | ||
184 | pr_warning("PERCPU: lpage allocator requires PSE\n"); | ||
185 | return -EINVAL; | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Currently supports only single page. Supporting multiple | ||
190 | * pages won't be too difficult if it ever becomes necessary. | ||
191 | */ | ||
192 | pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | ||
193 | PERCPU_DYNAMIC_RESERVE); | ||
194 | if (pcpul_size > PMD_SIZE) { | ||
195 | pr_warning("PERCPU: static data is larger than large page, " | ||
196 | "can't use large page\n"); | ||
197 | return -EINVAL; | ||
198 | } | ||
199 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | ||
200 | |||
201 | /* allocate pointer array and alloc large pages */ | ||
202 | map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); | ||
203 | pcpul_map = alloc_bootmem(map_size); | ||
204 | |||
205 | for_each_possible_cpu(cpu) { | ||
206 | pcpul_map[cpu].cpu = cpu; | ||
207 | pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, | ||
208 | PMD_SIZE); | ||
209 | if (!pcpul_map[cpu].ptr) { | ||
210 | pr_warning("PERCPU: failed to allocate large page " | ||
211 | "for cpu%u\n", cpu); | ||
212 | goto enomem; | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Only use pcpul_size bytes and give back the rest. | ||
217 | * | ||
218 | * Ingo: The 2MB up-rounding bootmem is needed to make | ||
219 | * sure the partial 2MB page is still fully RAM - it's | ||
220 | * not well-specified to have a PAT-incompatible area | ||
221 | * (unmapped RAM, device memory, etc.) in that hole. | ||
222 | */ | ||
223 | free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), | ||
224 | PMD_SIZE - pcpul_size); | ||
225 | |||
226 | memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); | ||
227 | } | ||
228 | |||
229 | /* allocate address and map */ | ||
230 | pcpul_vm.flags = VM_ALLOC; | ||
231 | pcpul_vm.size = nr_cpu_ids * PMD_SIZE; | ||
232 | vm_area_register_early(&pcpul_vm, PMD_SIZE); | ||
233 | |||
234 | for_each_possible_cpu(cpu) { | ||
235 | pmd_t *pmd, pmd_v; | ||
236 | |||
237 | pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + | ||
238 | cpu * PMD_SIZE); | ||
239 | pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), | ||
240 | PAGE_KERNEL_LARGE); | ||
241 | set_pmd(pmd, pmd_v); | ||
242 | } | ||
243 | |||
244 | /* we're ready, commit */ | ||
245 | pr_info("PERCPU: Remapped at %p with large pages, static data " | ||
246 | "%zu bytes\n", pcpul_vm.addr, static_size); | ||
247 | |||
248 | ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, | ||
249 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, | ||
250 | PMD_SIZE, pcpul_vm.addr, NULL); | ||
251 | |||
252 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | ||
253 | for (i = 0; i < nr_cpu_ids - 1; i++) | ||
254 | for (j = i + 1; j < nr_cpu_ids; j++) | ||
255 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | ||
256 | struct pcpul_ent tmp = pcpul_map[i]; | ||
257 | pcpul_map[i] = pcpul_map[j]; | ||
258 | pcpul_map[j] = tmp; | ||
259 | } | ||
260 | |||
261 | return ret; | ||
262 | |||
263 | enomem: | ||
264 | for_each_possible_cpu(cpu) | ||
265 | if (pcpul_map[cpu].ptr) | ||
266 | free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); | ||
267 | free_bootmem(__pa(pcpul_map), map_size); | ||
268 | return -ENOMEM; | ||
269 | } | 139 | } |
270 | 140 | ||
271 | /** | 141 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
272 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
273 | * @kaddr: the kernel address in question | ||
274 | * | ||
275 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
276 | * used by pageattr to detect VM aliases and break up the pcpu PMD | ||
277 | * mapping such that the same physical page is not mapped under | ||
278 | * different attributes. | ||
279 | * | ||
280 | * The recycled area is always at the tail of a partially used PMD | ||
281 | * page. | ||
282 | * | ||
283 | * RETURNS: | ||
284 | * Address of corresponding remapped pcpu address if match is found; | ||
285 | * otherwise, NULL. | ||
286 | */ | ||
287 | void *pcpu_lpage_remapped(void *kaddr) | ||
288 | { | 142 | { |
289 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); | 143 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
290 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; | 144 | if (early_cpu_to_node(from) == early_cpu_to_node(to)) |
291 | int left = 0, right = nr_cpu_ids - 1; | 145 | return LOCAL_DISTANCE; |
292 | int pos; | 146 | else |
293 | 147 | return REMOTE_DISTANCE; | |
294 | /* pcpul in use at all? */ | ||
295 | if (!pcpul_map) | ||
296 | return NULL; | ||
297 | |||
298 | /* okay, perform binary search */ | ||
299 | while (left <= right) { | ||
300 | pos = (left + right) / 2; | ||
301 | |||
302 | if (pcpul_map[pos].ptr < pmd_addr) | ||
303 | left = pos + 1; | ||
304 | else if (pcpul_map[pos].ptr > pmd_addr) | ||
305 | right = pos - 1; | ||
306 | else { | ||
307 | /* it shouldn't be in the area for the first chunk */ | ||
308 | WARN_ON(offset < pcpul_size); | ||
309 | |||
310 | return pcpul_vm.addr + | ||
311 | pcpul_map[pos].cpu * PMD_SIZE + offset; | ||
312 | } | ||
313 | } | ||
314 | |||
315 | return NULL; | ||
316 | } | ||
317 | #else | 148 | #else |
318 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | 149 | return LOCAL_DISTANCE; |
319 | { | ||
320 | return -EINVAL; | ||
321 | } | ||
322 | #endif | 150 | #endif |
323 | |||
324 | /* | ||
325 | * Embedding allocator | ||
326 | * | ||
327 | * The first chunk is sized to just contain the static area plus | ||
328 | * module and dynamic reserves and embedded into linear physical | ||
329 | * mapping so that it can use PMD mapping without additional TLB | ||
330 | * pressure. | ||
331 | */ | ||
332 | static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) | ||
333 | { | ||
334 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; | ||
335 | |||
336 | /* | ||
337 | * If large page isn't supported, there's no benefit in doing | ||
338 | * this. Also, embedding allocation doesn't play well with | ||
339 | * NUMA. | ||
340 | */ | ||
341 | if (!chosen && (!cpu_has_pse || pcpu_need_numa())) | ||
342 | return -EINVAL; | ||
343 | |||
344 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, | ||
345 | reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); | ||
346 | } | 151 | } |
347 | 152 | ||
348 | /* | 153 | static void __init pcpup_populate_pte(unsigned long addr) |
349 | * 4k page allocator | ||
350 | * | ||
351 | * This is the basic allocator. Static percpu area is allocated | ||
352 | * page-by-page and most of initialization is done by the generic | ||
353 | * setup function. | ||
354 | */ | ||
355 | static struct page **pcpu4k_pages __initdata; | ||
356 | static int pcpu4k_nr_static_pages __initdata; | ||
357 | |||
358 | static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) | ||
359 | { | ||
360 | if (pageno < pcpu4k_nr_static_pages) | ||
361 | return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; | ||
362 | return NULL; | ||
363 | } | ||
364 | |||
365 | static void __init pcpu4k_populate_pte(unsigned long addr) | ||
366 | { | 154 | { |
367 | populate_extra_pte(addr); | 155 | populate_extra_pte(addr); |
368 | } | 156 | } |
369 | 157 | ||
370 | static ssize_t __init setup_pcpu_4k(size_t static_size) | ||
371 | { | ||
372 | size_t pages_size; | ||
373 | unsigned int cpu; | ||
374 | int i, j; | ||
375 | ssize_t ret; | ||
376 | |||
377 | pcpu4k_nr_static_pages = PFN_UP(static_size); | ||
378 | |||
379 | /* unaligned allocations can't be freed, round up to page size */ | ||
380 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids | ||
381 | * sizeof(pcpu4k_pages[0])); | ||
382 | pcpu4k_pages = alloc_bootmem(pages_size); | ||
383 | |||
384 | /* allocate and copy */ | ||
385 | j = 0; | ||
386 | for_each_possible_cpu(cpu) | ||
387 | for (i = 0; i < pcpu4k_nr_static_pages; i++) { | ||
388 | void *ptr; | ||
389 | |||
390 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); | ||
391 | if (!ptr) { | ||
392 | pr_warning("PERCPU: failed to allocate " | ||
393 | "4k page for cpu%u\n", cpu); | ||
394 | goto enomem; | ||
395 | } | ||
396 | |||
397 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); | ||
398 | pcpu4k_pages[j++] = virt_to_page(ptr); | ||
399 | } | ||
400 | |||
401 | /* we're ready, commit */ | ||
402 | pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", | ||
403 | pcpu4k_nr_static_pages, static_size); | ||
404 | |||
405 | ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, | ||
406 | PERCPU_FIRST_CHUNK_RESERVE, -1, | ||
407 | -1, NULL, pcpu4k_populate_pte); | ||
408 | goto out_free_ar; | ||
409 | |||
410 | enomem: | ||
411 | while (--j >= 0) | ||
412 | free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE); | ||
413 | ret = -ENOMEM; | ||
414 | out_free_ar: | ||
415 | free_bootmem(__pa(pcpu4k_pages), pages_size); | ||
416 | return ret; | ||
417 | } | ||
418 | |||
419 | /* for explicit first chunk allocator selection */ | ||
420 | static char pcpu_chosen_alloc[16] __initdata; | ||
421 | |||
422 | static int __init percpu_alloc_setup(char *str) | ||
423 | { | ||
424 | strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); | ||
425 | return 0; | ||
426 | } | ||
427 | early_param("percpu_alloc", percpu_alloc_setup); | ||
428 | |||
429 | static inline void setup_percpu_segment(int cpu) | 158 | static inline void setup_percpu_segment(int cpu) |
430 | { | 159 | { |
431 | #ifdef CONFIG_X86_32 | 160 | #ifdef CONFIG_X86_32 |
@@ -441,52 +170,49 @@ static inline void setup_percpu_segment(int cpu) | |||
441 | 170 | ||
442 | void __init setup_per_cpu_areas(void) | 171 | void __init setup_per_cpu_areas(void) |
443 | { | 172 | { |
444 | size_t static_size = __per_cpu_end - __per_cpu_start; | ||
445 | unsigned int cpu; | 173 | unsigned int cpu; |
446 | unsigned long delta; | 174 | unsigned long delta; |
447 | size_t pcpu_unit_size; | 175 | int rc; |
448 | ssize_t ret; | ||
449 | 176 | ||
450 | pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", | 177 | pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", |
451 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); | 178 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); |
452 | 179 | ||
453 | /* | 180 | /* |
454 | * Allocate percpu area. If PSE is supported, try to make use | 181 | * Allocate percpu area. Embedding allocator is our favorite; |
455 | * of large page mappings. Please read comments on top of | 182 | * however, on NUMA configurations, it can result in very |
456 | * each allocator for details. | 183 | * sparse unit mapping and vmalloc area isn't spacious enough |
184 | * on 32bit. Use page in that case. | ||
457 | */ | 185 | */ |
458 | ret = -EINVAL; | 186 | #ifdef CONFIG_X86_32 |
459 | if (strlen(pcpu_chosen_alloc)) { | 187 | if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) |
460 | if (strcmp(pcpu_chosen_alloc, "4k")) { | 188 | pcpu_chosen_fc = PCPU_FC_PAGE; |
461 | if (!strcmp(pcpu_chosen_alloc, "lpage")) | 189 | #endif |
462 | ret = setup_pcpu_lpage(static_size, true); | 190 | rc = -EINVAL; |
463 | else if (!strcmp(pcpu_chosen_alloc, "embed")) | 191 | if (pcpu_chosen_fc != PCPU_FC_PAGE) { |
464 | ret = setup_pcpu_embed(static_size, true); | 192 | const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE; |
465 | else | 193 | const size_t dyn_size = PERCPU_MODULE_RESERVE + |
466 | pr_warning("PERCPU: unknown allocator %s " | 194 | PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; |
467 | "specified\n", pcpu_chosen_alloc); | 195 | |
468 | if (ret < 0) | 196 | rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, |
469 | pr_warning("PERCPU: %s allocator failed (%zd), " | 197 | dyn_size, atom_size, |
470 | "falling back to 4k\n", | 198 | pcpu_cpu_distance, |
471 | pcpu_chosen_alloc, ret); | 199 | pcpu_fc_alloc, pcpu_fc_free); |
472 | } | 200 | if (rc < 0) |
473 | } else { | 201 | pr_warning("PERCPU: %s allocator failed (%d), " |
474 | ret = setup_pcpu_lpage(static_size, false); | 202 | "falling back to page size\n", |
475 | if (ret < 0) | 203 | pcpu_fc_names[pcpu_chosen_fc], rc); |
476 | ret = setup_pcpu_embed(static_size, false); | ||
477 | } | 204 | } |
478 | if (ret < 0) | 205 | if (rc < 0) |
479 | ret = setup_pcpu_4k(static_size); | 206 | rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, |
480 | if (ret < 0) | 207 | pcpu_fc_alloc, pcpu_fc_free, |
481 | panic("cannot allocate static percpu area (%zu bytes, err=%zd)", | 208 | pcpup_populate_pte); |
482 | static_size, ret); | 209 | if (rc < 0) |
483 | 210 | panic("cannot initialize percpu area (err=%d)", rc); | |
484 | pcpu_unit_size = ret; | ||
485 | 211 | ||
486 | /* alrighty, percpu areas up and running */ | 212 | /* alrighty, percpu areas up and running */ |
487 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; | 213 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; |
488 | for_each_possible_cpu(cpu) { | 214 | for_each_possible_cpu(cpu) { |
489 | per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; | 215 | per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; |
490 | per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); | 216 | per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); |
491 | per_cpu(cpu_number, cpu) = cpu; | 217 | per_cpu(cpu_number, cpu) = cpu; |
492 | setup_percpu_segment(cpu); | 218 | setup_percpu_segment(cpu); |
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c new file mode 100644 index 000000000000..34e099382651 --- /dev/null +++ b/arch/x86/kernel/sfi.c | |||
@@ -0,0 +1,122 @@ | |||
1 | /* | ||
2 | * sfi.c - x86 architecture SFI support. | ||
3 | * | ||
4 | * Copyright (c) 2009, Intel Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms and conditions of the GNU General Public License, | ||
8 | * version 2, as published by the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License along with | ||
16 | * this program; if not, write to the Free Software Foundation, Inc., | ||
17 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #define KMSG_COMPONENT "SFI" | ||
22 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | ||
23 | |||
24 | #include <linux/acpi.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/sfi.h> | ||
27 | #include <linux/io.h> | ||
28 | |||
29 | #include <asm/io_apic.h> | ||
30 | #include <asm/mpspec.h> | ||
31 | #include <asm/setup.h> | ||
32 | #include <asm/apic.h> | ||
33 | |||
34 | #ifdef CONFIG_X86_LOCAL_APIC | ||
35 | static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | ||
36 | |||
37 | void __init mp_sfi_register_lapic_address(unsigned long address) | ||
38 | { | ||
39 | mp_lapic_addr = address; | ||
40 | |||
41 | set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); | ||
42 | if (boot_cpu_physical_apicid == -1U) | ||
43 | boot_cpu_physical_apicid = read_apic_id(); | ||
44 | |||
45 | pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid); | ||
46 | } | ||
47 | |||
48 | /* All CPUs enumerated by SFI must be present and enabled */ | ||
49 | void __cpuinit mp_sfi_register_lapic(u8 id) | ||
50 | { | ||
51 | if (MAX_APICS - id <= 0) { | ||
52 | pr_warning("Processor #%d invalid (max %d)\n", | ||
53 | id, MAX_APICS); | ||
54 | return; | ||
55 | } | ||
56 | |||
57 | pr_info("registering lapic[%d]\n", id); | ||
58 | |||
59 | generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR))); | ||
60 | } | ||
61 | |||
62 | static int __init sfi_parse_cpus(struct sfi_table_header *table) | ||
63 | { | ||
64 | struct sfi_table_simple *sb; | ||
65 | struct sfi_cpu_table_entry *pentry; | ||
66 | int i; | ||
67 | int cpu_num; | ||
68 | |||
69 | sb = (struct sfi_table_simple *)table; | ||
70 | cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry); | ||
71 | pentry = (struct sfi_cpu_table_entry *)sb->pentry; | ||
72 | |||
73 | for (i = 0; i < cpu_num; i++) { | ||
74 | mp_sfi_register_lapic(pentry->apic_id); | ||
75 | pentry++; | ||
76 | } | ||
77 | |||
78 | smp_found_config = 1; | ||
79 | return 0; | ||
80 | } | ||
81 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
82 | |||
83 | #ifdef CONFIG_X86_IO_APIC | ||
84 | static u32 gsi_base; | ||
85 | |||
86 | static int __init sfi_parse_ioapic(struct sfi_table_header *table) | ||
87 | { | ||
88 | struct sfi_table_simple *sb; | ||
89 | struct sfi_apic_table_entry *pentry; | ||
90 | int i, num; | ||
91 | |||
92 | sb = (struct sfi_table_simple *)table; | ||
93 | num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry); | ||
94 | pentry = (struct sfi_apic_table_entry *)sb->pentry; | ||
95 | |||
96 | for (i = 0; i < num; i++) { | ||
97 | mp_register_ioapic(i, pentry->phys_addr, gsi_base); | ||
98 | gsi_base += io_apic_get_redir_entries(i); | ||
99 | pentry++; | ||
100 | } | ||
101 | |||
102 | WARN(pic_mode, KERN_WARNING | ||
103 | "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n"); | ||
104 | pic_mode = 0; | ||
105 | return 0; | ||
106 | } | ||
107 | #endif /* CONFIG_X86_IO_APIC */ | ||
108 | |||
109 | /* | ||
110 | * sfi_platform_init(): register lapics & io-apics | ||
111 | */ | ||
112 | int __init sfi_platform_init(void) | ||
113 | { | ||
114 | #ifdef CONFIG_X86_LOCAL_APIC | ||
115 | mp_sfi_register_lapic_address(sfi_lapic_addr); | ||
116 | sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus); | ||
117 | #endif | ||
118 | #ifdef CONFIG_X86_IO_APIC | ||
119 | sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic); | ||
120 | #endif | ||
121 | return 0; | ||
122 | } | ||
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index baaf8052f355..fbf3b07c8567 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -847,7 +847,7 @@ static void do_signal(struct pt_regs *regs) | |||
847 | void | 847 | void |
848 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 848 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
849 | { | 849 | { |
850 | #ifdef CONFIG_X86_NEW_MCE | 850 | #ifdef CONFIG_X86_MCE |
851 | /* notify userspace of pending MCEs */ | 851 | /* notify userspace of pending MCEs */ |
852 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 852 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
853 | mce_notify_process(); | 853 | mce_notify_process(); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ec7b64c2df82..213a7a3e4562 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/bootmem.h> | 47 | #include <linux/bootmem.h> |
48 | #include <linux/err.h> | 48 | #include <linux/err.h> |
49 | #include <linux/nmi.h> | 49 | #include <linux/nmi.h> |
50 | #include <linux/tboot.h> | ||
50 | 51 | ||
51 | #include <asm/acpi.h> | 52 | #include <asm/acpi.h> |
52 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
@@ -324,7 +325,7 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
324 | /* enable local interrupts */ | 325 | /* enable local interrupts */ |
325 | local_irq_enable(); | 326 | local_irq_enable(); |
326 | 327 | ||
327 | setup_secondary_clock(); | 328 | x86_cpuinit.setup_percpu_clockev(); |
328 | 329 | ||
329 | wmb(); | 330 | wmb(); |
330 | load_debug_registers(); | 331 | load_debug_registers(); |
@@ -1060,12 +1061,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1060 | #endif | 1061 | #endif |
1061 | current_thread_info()->cpu = 0; /* needed? */ | 1062 | current_thread_info()->cpu = 0; /* needed? */ |
1062 | for_each_possible_cpu(i) { | 1063 | for_each_possible_cpu(i) { |
1063 | alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); | 1064 | zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); |
1064 | alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); | 1065 | zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); |
1065 | alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); | 1066 | zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); |
1066 | cpumask_clear(per_cpu(cpu_core_map, i)); | ||
1067 | cpumask_clear(per_cpu(cpu_sibling_map, i)); | ||
1068 | cpumask_clear(cpu_data(i).llc_shared_map); | ||
1069 | } | 1067 | } |
1070 | set_cpu_sibling_map(0); | 1068 | set_cpu_sibling_map(0); |
1071 | 1069 | ||
@@ -1115,13 +1113,26 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1115 | 1113 | ||
1116 | printk(KERN_INFO "CPU%d: ", 0); | 1114 | printk(KERN_INFO "CPU%d: ", 0); |
1117 | print_cpu_info(&cpu_data(0)); | 1115 | print_cpu_info(&cpu_data(0)); |
1118 | setup_boot_clock(); | 1116 | x86_init.timers.setup_percpu_clockev(); |
1119 | 1117 | ||
1120 | if (is_uv_system()) | 1118 | if (is_uv_system()) |
1121 | uv_system_init(); | 1119 | uv_system_init(); |
1120 | |||
1121 | set_mtrr_aps_delayed_init(); | ||
1122 | out: | 1122 | out: |
1123 | preempt_enable(); | 1123 | preempt_enable(); |
1124 | } | 1124 | } |
1125 | |||
1126 | void arch_enable_nonboot_cpus_begin(void) | ||
1127 | { | ||
1128 | set_mtrr_aps_delayed_init(); | ||
1129 | } | ||
1130 | |||
1131 | void arch_enable_nonboot_cpus_end(void) | ||
1132 | { | ||
1133 | mtrr_aps_init(); | ||
1134 | } | ||
1135 | |||
1125 | /* | 1136 | /* |
1126 | * Early setup to make printk work. | 1137 | * Early setup to make printk work. |
1127 | */ | 1138 | */ |
@@ -1143,6 +1154,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1143 | setup_ioapic_dest(); | 1154 | setup_ioapic_dest(); |
1144 | #endif | 1155 | #endif |
1145 | check_nmi_watchdog(); | 1156 | check_nmi_watchdog(); |
1157 | mtrr_aps_init(); | ||
1146 | } | 1158 | } |
1147 | 1159 | ||
1148 | static int __initdata setup_possible_cpus = -1; | 1160 | static int __initdata setup_possible_cpus = -1; |
@@ -1321,6 +1333,7 @@ void play_dead_common(void) | |||
1321 | void native_play_dead(void) | 1333 | void native_play_dead(void) |
1322 | { | 1334 | { |
1323 | play_dead_common(); | 1335 | play_dead_common(); |
1336 | tboot_shutdown(TB_SHUTDOWN_WFS); | ||
1324 | wbinvd_halt(); | 1337 | wbinvd_halt(); |
1325 | } | 1338 | } |
1326 | 1339 | ||
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d51321ddafda..0157cd26d7cc 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -335,4 +335,4 @@ ENTRY(sys_call_table) | |||
335 | .long sys_preadv | 335 | .long sys_preadv |
336 | .long sys_pwritev | 336 | .long sys_pwritev |
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | 337 | .long sys_rt_tgsigqueueinfo /* 335 */ |
338 | .long sys_perf_counter_open | 338 | .long sys_perf_event_open |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c new file mode 100644 index 000000000000..86c9f91b48ae --- /dev/null +++ b/arch/x86/kernel/tboot.c | |||
@@ -0,0 +1,447 @@ | |||
1 | /* | ||
2 | * tboot.c: main implementation of helper functions used by kernel for | ||
3 | * runtime support of Intel(R) Trusted Execution Technology | ||
4 | * | ||
5 | * Copyright (c) 2006-2009, Intel Corporation | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms and conditions of the GNU General Public License, | ||
9 | * version 2, as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License along with | ||
17 | * this program; if not, write to the Free Software Foundation, Inc., | ||
18 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | #include <linux/dma_remapping.h> | ||
23 | #include <linux/init_task.h> | ||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/delay.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/dmar.h> | ||
29 | #include <linux/cpu.h> | ||
30 | #include <linux/pfn.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/tboot.h> | ||
33 | |||
34 | #include <asm/trampoline.h> | ||
35 | #include <asm/processor.h> | ||
36 | #include <asm/bootparam.h> | ||
37 | #include <asm/pgtable.h> | ||
38 | #include <asm/pgalloc.h> | ||
39 | #include <asm/fixmap.h> | ||
40 | #include <asm/proto.h> | ||
41 | #include <asm/setup.h> | ||
42 | #include <asm/e820.h> | ||
43 | #include <asm/io.h> | ||
44 | |||
45 | #include "acpi/realmode/wakeup.h" | ||
46 | |||
47 | /* Global pointer to shared data; NULL means no measured launch. */ | ||
48 | struct tboot *tboot __read_mostly; | ||
49 | |||
50 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ | ||
51 | #define AP_WAIT_TIMEOUT 1 | ||
52 | |||
53 | #undef pr_fmt | ||
54 | #define pr_fmt(fmt) "tboot: " fmt | ||
55 | |||
56 | static u8 tboot_uuid[16] __initdata = TBOOT_UUID; | ||
57 | |||
58 | void __init tboot_probe(void) | ||
59 | { | ||
60 | /* Look for valid page-aligned address for shared page. */ | ||
61 | if (!boot_params.tboot_addr) | ||
62 | return; | ||
63 | /* | ||
64 | * also verify that it is mapped as we expect it before calling | ||
65 | * set_fixmap(), to reduce chance of garbage value causing crash | ||
66 | */ | ||
67 | if (!e820_any_mapped(boot_params.tboot_addr, | ||
68 | boot_params.tboot_addr, E820_RESERVED)) { | ||
69 | pr_warning("non-0 tboot_addr but it is not of type E820_RESERVED\n"); | ||
70 | return; | ||
71 | } | ||
72 | |||
73 | /* only a natively booted kernel should be using TXT */ | ||
74 | if (paravirt_enabled()) { | ||
75 | pr_warning("non-0 tboot_addr but pv_ops is enabled\n"); | ||
76 | return; | ||
77 | } | ||
78 | |||
79 | /* Map and check for tboot UUID. */ | ||
80 | set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); | ||
81 | tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); | ||
82 | if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) { | ||
83 | pr_warning("tboot at 0x%llx is invalid\n", | ||
84 | boot_params.tboot_addr); | ||
85 | tboot = NULL; | ||
86 | return; | ||
87 | } | ||
88 | if (tboot->version < 5) { | ||
89 | pr_warning("tboot version is invalid: %u\n", tboot->version); | ||
90 | tboot = NULL; | ||
91 | return; | ||
92 | } | ||
93 | |||
94 | pr_info("found shared page at phys addr 0x%llx:\n", | ||
95 | boot_params.tboot_addr); | ||
96 | pr_debug("version: %d\n", tboot->version); | ||
97 | pr_debug("log_addr: 0x%08x\n", tboot->log_addr); | ||
98 | pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry); | ||
99 | pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base); | ||
100 | pr_debug("tboot_size: 0x%x\n", tboot->tboot_size); | ||
101 | } | ||
102 | |||
103 | static pgd_t *tboot_pg_dir; | ||
104 | static struct mm_struct tboot_mm = { | ||
105 | .mm_rb = RB_ROOT, | ||
106 | .pgd = swapper_pg_dir, | ||
107 | .mm_users = ATOMIC_INIT(2), | ||
108 | .mm_count = ATOMIC_INIT(1), | ||
109 | .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), | ||
110 | .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), | ||
111 | .mmlist = LIST_HEAD_INIT(init_mm.mmlist), | ||
112 | .cpu_vm_mask = CPU_MASK_ALL, | ||
113 | }; | ||
114 | |||
115 | static inline void switch_to_tboot_pt(void) | ||
116 | { | ||
117 | write_cr3(virt_to_phys(tboot_pg_dir)); | ||
118 | } | ||
119 | |||
120 | static int map_tboot_page(unsigned long vaddr, unsigned long pfn, | ||
121 | pgprot_t prot) | ||
122 | { | ||
123 | pgd_t *pgd; | ||
124 | pud_t *pud; | ||
125 | pmd_t *pmd; | ||
126 | pte_t *pte; | ||
127 | |||
128 | pgd = pgd_offset(&tboot_mm, vaddr); | ||
129 | pud = pud_alloc(&tboot_mm, pgd, vaddr); | ||
130 | if (!pud) | ||
131 | return -1; | ||
132 | pmd = pmd_alloc(&tboot_mm, pud, vaddr); | ||
133 | if (!pmd) | ||
134 | return -1; | ||
135 | pte = pte_alloc_map(&tboot_mm, pmd, vaddr); | ||
136 | if (!pte) | ||
137 | return -1; | ||
138 | set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); | ||
139 | pte_unmap(pte); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn, | ||
144 | unsigned long nr) | ||
145 | { | ||
146 | /* Reuse the original kernel mapping */ | ||
147 | tboot_pg_dir = pgd_alloc(&tboot_mm); | ||
148 | if (!tboot_pg_dir) | ||
149 | return -1; | ||
150 | |||
151 | for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) { | ||
152 | if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC)) | ||
153 | return -1; | ||
154 | } | ||
155 | |||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | static void tboot_create_trampoline(void) | ||
160 | { | ||
161 | u32 map_base, map_size; | ||
162 | |||
163 | /* Create identity map for tboot shutdown code. */ | ||
164 | map_base = PFN_DOWN(tboot->tboot_base); | ||
165 | map_size = PFN_UP(tboot->tboot_size); | ||
166 | if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size)) | ||
167 | panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n", | ||
168 | map_base, map_size); | ||
169 | } | ||
170 | |||
171 | #ifdef CONFIG_ACPI_SLEEP | ||
172 | |||
173 | static void add_mac_region(phys_addr_t start, unsigned long size) | ||
174 | { | ||
175 | struct tboot_mac_region *mr; | ||
176 | phys_addr_t end = start + size; | ||
177 | |||
178 | if (start && size) { | ||
179 | mr = &tboot->mac_regions[tboot->num_mac_regions++]; | ||
180 | mr->start = round_down(start, PAGE_SIZE); | ||
181 | mr->size = round_up(end, PAGE_SIZE) - mr->start; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | static int tboot_setup_sleep(void) | ||
186 | { | ||
187 | tboot->num_mac_regions = 0; | ||
188 | |||
189 | /* S3 resume code */ | ||
190 | add_mac_region(acpi_wakeup_address, WAKEUP_SIZE); | ||
191 | |||
192 | #ifdef CONFIG_X86_TRAMPOLINE | ||
193 | /* AP trampoline code */ | ||
194 | add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE); | ||
195 | #endif | ||
196 | |||
197 | /* kernel code + data + bss */ | ||
198 | add_mac_region(virt_to_phys(_text), _end - _text); | ||
199 | |||
200 | tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | #else /* no CONFIG_ACPI_SLEEP */ | ||
206 | |||
207 | static int tboot_setup_sleep(void) | ||
208 | { | ||
209 | /* S3 shutdown requested, but S3 not supported by the kernel... */ | ||
210 | BUG(); | ||
211 | return -1; | ||
212 | } | ||
213 | |||
214 | #endif | ||
215 | |||
216 | void tboot_shutdown(u32 shutdown_type) | ||
217 | { | ||
218 | void (*shutdown)(void); | ||
219 | |||
220 | if (!tboot_enabled()) | ||
221 | return; | ||
222 | |||
223 | /* | ||
224 | * if we're being called before the 1:1 mapping is set up then just | ||
225 | * return and let the normal shutdown happen; this should only be | ||
226 | * due to very early panic() | ||
227 | */ | ||
228 | if (!tboot_pg_dir) | ||
229 | return; | ||
230 | |||
231 | /* if this is S3 then set regions to MAC */ | ||
232 | if (shutdown_type == TB_SHUTDOWN_S3) | ||
233 | if (tboot_setup_sleep()) | ||
234 | return; | ||
235 | |||
236 | tboot->shutdown_type = shutdown_type; | ||
237 | |||
238 | switch_to_tboot_pt(); | ||
239 | |||
240 | shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry; | ||
241 | shutdown(); | ||
242 | |||
243 | /* should not reach here */ | ||
244 | while (1) | ||
245 | halt(); | ||
246 | } | ||
247 | |||
248 | static void tboot_copy_fadt(const struct acpi_table_fadt *fadt) | ||
249 | { | ||
250 | #define TB_COPY_GAS(tbg, g) \ | ||
251 | tbg.space_id = g.space_id; \ | ||
252 | tbg.bit_width = g.bit_width; \ | ||
253 | tbg.bit_offset = g.bit_offset; \ | ||
254 | tbg.access_width = g.access_width; \ | ||
255 | tbg.address = g.address; | ||
256 | |||
257 | TB_COPY_GAS(tboot->acpi_sinfo.pm1a_cnt_blk, fadt->xpm1a_control_block); | ||
258 | TB_COPY_GAS(tboot->acpi_sinfo.pm1b_cnt_blk, fadt->xpm1b_control_block); | ||
259 | TB_COPY_GAS(tboot->acpi_sinfo.pm1a_evt_blk, fadt->xpm1a_event_block); | ||
260 | TB_COPY_GAS(tboot->acpi_sinfo.pm1b_evt_blk, fadt->xpm1b_event_block); | ||
261 | |||
262 | /* | ||
263 | * We need phys addr of waking vector, but can't use virt_to_phys() on | ||
264 | * &acpi_gbl_FACS because it is ioremap'ed, so calc from FACS phys | ||
265 | * addr. | ||
266 | */ | ||
267 | tboot->acpi_sinfo.wakeup_vector = fadt->facs + | ||
268 | offsetof(struct acpi_table_facs, firmware_waking_vector); | ||
269 | } | ||
270 | |||
271 | void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) | ||
272 | { | ||
273 | static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = { | ||
274 | /* S0,1,2: */ -1, -1, -1, | ||
275 | /* S3: */ TB_SHUTDOWN_S3, | ||
276 | /* S4: */ TB_SHUTDOWN_S4, | ||
277 | /* S5: */ TB_SHUTDOWN_S5 }; | ||
278 | |||
279 | if (!tboot_enabled()) | ||
280 | return; | ||
281 | |||
282 | tboot_copy_fadt(&acpi_gbl_FADT); | ||
283 | tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control; | ||
284 | tboot->acpi_sinfo.pm1b_cnt_val = pm1b_control; | ||
285 | /* we always use the 32b wakeup vector */ | ||
286 | tboot->acpi_sinfo.vector_width = 32; | ||
287 | |||
288 | if (sleep_state >= ACPI_S_STATE_COUNT || | ||
289 | acpi_shutdown_map[sleep_state] == -1) { | ||
290 | pr_warning("unsupported sleep state 0x%x\n", sleep_state); | ||
291 | return; | ||
292 | } | ||
293 | |||
294 | tboot_shutdown(acpi_shutdown_map[sleep_state]); | ||
295 | } | ||
296 | |||
297 | static atomic_t ap_wfs_count; | ||
298 | |||
299 | static int tboot_wait_for_aps(int num_aps) | ||
300 | { | ||
301 | unsigned long timeout; | ||
302 | |||
303 | timeout = AP_WAIT_TIMEOUT*HZ; | ||
304 | while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps && | ||
305 | timeout) { | ||
306 | mdelay(1); | ||
307 | timeout--; | ||
308 | } | ||
309 | |||
310 | if (timeout) | ||
311 | pr_warning("tboot wait for APs timeout\n"); | ||
312 | |||
313 | return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps); | ||
314 | } | ||
315 | |||
316 | static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb, | ||
317 | unsigned long action, void *hcpu) | ||
318 | { | ||
319 | switch (action) { | ||
320 | case CPU_DYING: | ||
321 | atomic_inc(&ap_wfs_count); | ||
322 | if (num_online_cpus() == 1) | ||
323 | if (tboot_wait_for_aps(atomic_read(&ap_wfs_count))) | ||
324 | return NOTIFY_BAD; | ||
325 | break; | ||
326 | } | ||
327 | return NOTIFY_OK; | ||
328 | } | ||
329 | |||
330 | static struct notifier_block tboot_cpu_notifier __cpuinitdata = | ||
331 | { | ||
332 | .notifier_call = tboot_cpu_callback, | ||
333 | }; | ||
334 | |||
335 | static __init int tboot_late_init(void) | ||
336 | { | ||
337 | if (!tboot_enabled()) | ||
338 | return 0; | ||
339 | |||
340 | tboot_create_trampoline(); | ||
341 | |||
342 | atomic_set(&ap_wfs_count, 0); | ||
343 | register_hotcpu_notifier(&tboot_cpu_notifier); | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | late_initcall(tboot_late_init); | ||
348 | |||
349 | /* | ||
350 | * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE) | ||
351 | */ | ||
352 | |||
353 | #define TXT_PUB_CONFIG_REGS_BASE 0xfed30000 | ||
354 | #define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000 | ||
355 | |||
356 | /* # pages for each config regs space - used by fixmap */ | ||
357 | #define NR_TXT_CONFIG_PAGES ((TXT_PUB_CONFIG_REGS_BASE - \ | ||
358 | TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT) | ||
359 | |||
360 | /* offsets from pub/priv config space */ | ||
361 | #define TXTCR_HEAP_BASE 0x0300 | ||
362 | #define TXTCR_HEAP_SIZE 0x0308 | ||
363 | |||
364 | #define SHA1_SIZE 20 | ||
365 | |||
366 | struct sha1_hash { | ||
367 | u8 hash[SHA1_SIZE]; | ||
368 | }; | ||
369 | |||
370 | struct sinit_mle_data { | ||
371 | u32 version; /* currently 6 */ | ||
372 | struct sha1_hash bios_acm_id; | ||
373 | u32 edx_senter_flags; | ||
374 | u64 mseg_valid; | ||
375 | struct sha1_hash sinit_hash; | ||
376 | struct sha1_hash mle_hash; | ||
377 | struct sha1_hash stm_hash; | ||
378 | struct sha1_hash lcp_policy_hash; | ||
379 | u32 lcp_policy_control; | ||
380 | u32 rlp_wakeup_addr; | ||
381 | u32 reserved; | ||
382 | u32 num_mdrs; | ||
383 | u32 mdrs_off; | ||
384 | u32 num_vtd_dmars; | ||
385 | u32 vtd_dmars_off; | ||
386 | } __packed; | ||
387 | |||
388 | struct acpi_table_header *tboot_get_dmar_table(struct acpi_table_header *dmar_tbl) | ||
389 | { | ||
390 | void *heap_base, *heap_ptr, *config; | ||
391 | |||
392 | if (!tboot_enabled()) | ||
393 | return dmar_tbl; | ||
394 | |||
395 | /* | ||
396 | * ACPI tables may not be DMA protected by tboot, so use DMAR copy | ||
397 | * SINIT saved in SinitMleData in TXT heap (which is DMA protected) | ||
398 | */ | ||
399 | |||
400 | /* map config space in order to get heap addr */ | ||
401 | config = ioremap(TXT_PUB_CONFIG_REGS_BASE, NR_TXT_CONFIG_PAGES * | ||
402 | PAGE_SIZE); | ||
403 | if (!config) | ||
404 | return NULL; | ||
405 | |||
406 | /* now map TXT heap */ | ||
407 | heap_base = ioremap(*(u64 *)(config + TXTCR_HEAP_BASE), | ||
408 | *(u64 *)(config + TXTCR_HEAP_SIZE)); | ||
409 | iounmap(config); | ||
410 | if (!heap_base) | ||
411 | return NULL; | ||
412 | |||
413 | /* walk heap to SinitMleData */ | ||
414 | /* skip BiosData */ | ||
415 | heap_ptr = heap_base + *(u64 *)heap_base; | ||
416 | /* skip OsMleData */ | ||
417 | heap_ptr += *(u64 *)heap_ptr; | ||
418 | /* skip OsSinitData */ | ||
419 | heap_ptr += *(u64 *)heap_ptr; | ||
420 | /* now points to SinitMleDataSize; set to SinitMleData */ | ||
421 | heap_ptr += sizeof(u64); | ||
422 | /* get addr of DMAR table */ | ||
423 | dmar_tbl = (struct acpi_table_header *)(heap_ptr + | ||
424 | ((struct sinit_mle_data *)heap_ptr)->vtd_dmars_off - | ||
425 | sizeof(u64)); | ||
426 | |||
427 | /* don't unmap heap because dmar.c needs access to this */ | ||
428 | |||
429 | return dmar_tbl; | ||
430 | } | ||
431 | |||
432 | int tboot_force_iommu(void) | ||
433 | { | ||
434 | if (!tboot_enabled()) | ||
435 | return 0; | ||
436 | |||
437 | if (no_iommu || swiotlb || dmar_disabled) | ||
438 | pr_warning("Forcing Intel-IOMMU to enabled\n"); | ||
439 | |||
440 | dmar_disabled = 0; | ||
441 | #ifdef CONFIG_SWIOTLB | ||
442 | swiotlb = 0; | ||
443 | #endif | ||
444 | no_iommu = 0; | ||
445 | |||
446 | return 1; | ||
447 | } | ||
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c new file mode 100644 index 000000000000..be2573448ed9 --- /dev/null +++ b/arch/x86/kernel/time.c | |||
@@ -0,0 +1,121 @@ | |||
1 | /* | ||
2 | * Copyright (c) 1991,1992,1995 Linus Torvalds | ||
3 | * Copyright (c) 1994 Alan Modra | ||
4 | * Copyright (c) 1995 Markus Kuhn | ||
5 | * Copyright (c) 1996 Ingo Molnar | ||
6 | * Copyright (c) 1998 Andrea Arcangeli | ||
7 | * Copyright (c) 2002,2006 Vojtech Pavlik | ||
8 | * Copyright (c) 2003 Andi Kleen | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #include <linux/clockchips.h> | ||
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/time.h> | ||
15 | #include <linux/mca.h> | ||
16 | |||
17 | #include <asm/vsyscall.h> | ||
18 | #include <asm/x86_init.h> | ||
19 | #include <asm/i8259.h> | ||
20 | #include <asm/i8253.h> | ||
21 | #include <asm/timer.h> | ||
22 | #include <asm/hpet.h> | ||
23 | #include <asm/time.h> | ||
24 | |||
25 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) | ||
26 | int timer_ack; | ||
27 | #endif | ||
28 | |||
29 | #ifdef CONFIG_X86_64 | ||
30 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | ||
31 | #endif | ||
32 | |||
33 | unsigned long profile_pc(struct pt_regs *regs) | ||
34 | { | ||
35 | unsigned long pc = instruction_pointer(regs); | ||
36 | |||
37 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | ||
38 | #ifdef CONFIG_FRAME_POINTER | ||
39 | return *(unsigned long *)(regs->bp + sizeof(long)); | ||
40 | #else | ||
41 | unsigned long *sp = | ||
42 | (unsigned long *)kernel_stack_pointer(regs); | ||
43 | /* | ||
44 | * Return address is either directly at stack pointer | ||
45 | * or above a saved flags. Eflags has bits 22-31 zero, | ||
46 | * kernel addresses don't. | ||
47 | */ | ||
48 | if (sp[0] >> 22) | ||
49 | return sp[0]; | ||
50 | if (sp[1] >> 22) | ||
51 | return sp[1]; | ||
52 | #endif | ||
53 | } | ||
54 | return pc; | ||
55 | } | ||
56 | EXPORT_SYMBOL(profile_pc); | ||
57 | |||
58 | /* | ||
59 | * Default timer interrupt handler for PIT/HPET | ||
60 | */ | ||
61 | static irqreturn_t timer_interrupt(int irq, void *dev_id) | ||
62 | { | ||
63 | /* Keep nmi watchdog up to date */ | ||
64 | inc_irq_stat(irq0_irqs); | ||
65 | |||
66 | /* Optimized out for !IO_APIC and x86_64 */ | ||
67 | if (timer_ack) { | ||
68 | /* | ||
69 | * Subtle, when I/O APICs are used we have to ack timer IRQ | ||
70 | * manually to deassert NMI lines for the watchdog if run | ||
71 | * on an 82489DX-based system. | ||
72 | */ | ||
73 | spin_lock(&i8259A_lock); | ||
74 | outb(0x0c, PIC_MASTER_OCW3); | ||
75 | /* Ack the IRQ; AEOI will end it automatically. */ | ||
76 | inb(PIC_MASTER_POLL); | ||
77 | spin_unlock(&i8259A_lock); | ||
78 | } | ||
79 | |||
80 | global_clock_event->event_handler(global_clock_event); | ||
81 | |||
82 | /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ | ||
83 | if (MCA_bus) | ||
84 | outb_p(inb_p(0x61)| 0x80, 0x61); | ||
85 | |||
86 | return IRQ_HANDLED; | ||
87 | } | ||
88 | |||
89 | static struct irqaction irq0 = { | ||
90 | .handler = timer_interrupt, | ||
91 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, | ||
92 | .name = "timer" | ||
93 | }; | ||
94 | |||
95 | void __init setup_default_timer_irq(void) | ||
96 | { | ||
97 | setup_irq(0, &irq0); | ||
98 | } | ||
99 | |||
100 | /* Default timer init function */ | ||
101 | void __init hpet_time_init(void) | ||
102 | { | ||
103 | if (!hpet_enable()) | ||
104 | setup_pit_timer(); | ||
105 | setup_default_timer_irq(); | ||
106 | } | ||
107 | |||
108 | static __init void x86_late_time_init(void) | ||
109 | { | ||
110 | x86_init.timers.timer_init(); | ||
111 | tsc_init(); | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Initialize TSC and delay the periodic timer init to | ||
116 | * late x86_late_time_init() so ioremap works. | ||
117 | */ | ||
118 | void __init time_init(void) | ||
119 | { | ||
120 | late_time_init = x86_late_time_init; | ||
121 | } | ||
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c deleted file mode 100644 index 5c5d87f0b2e1..000000000000 --- a/arch/x86/kernel/time_32.c +++ /dev/null | |||
@@ -1,137 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992, 1995 Linus Torvalds | ||
3 | * | ||
4 | * This file contains the PC-specific time handling details: | ||
5 | * reading the RTC at bootup, etc.. | ||
6 | * 1994-07-02 Alan Modra | ||
7 | * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime | ||
8 | * 1995-03-26 Markus Kuhn | ||
9 | * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 | ||
10 | * precision CMOS clock update | ||
11 | * 1996-05-03 Ingo Molnar | ||
12 | * fixed time warps in do_[slow|fast]_gettimeoffset() | ||
13 | * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 | ||
14 | * "A Kernel Model for Precision Timekeeping" by Dave Mills | ||
15 | * 1998-09-05 (Various) | ||
16 | * More robust do_fast_gettimeoffset() algorithm implemented | ||
17 | * (works with APM, Cyrix 6x86MX and Centaur C6), | ||
18 | * monotonic gettimeofday() with fast_get_timeoffset(), | ||
19 | * drift-proof precision TSC calibration on boot | ||
20 | * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D. | ||
21 | * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>; | ||
22 | * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>). | ||
23 | * 1998-12-16 Andrea Arcangeli | ||
24 | * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy | ||
25 | * because was not accounting lost_ticks. | ||
26 | * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli | ||
27 | * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to | ||
28 | * serialize accesses to xtime/lost_ticks). | ||
29 | */ | ||
30 | |||
31 | #include <linux/init.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/time.h> | ||
34 | #include <linux/mca.h> | ||
35 | |||
36 | #include <asm/setup.h> | ||
37 | #include <asm/hpet.h> | ||
38 | #include <asm/time.h> | ||
39 | #include <asm/timer.h> | ||
40 | |||
41 | #include <asm/do_timer.h> | ||
42 | |||
43 | int timer_ack; | ||
44 | |||
45 | unsigned long profile_pc(struct pt_regs *regs) | ||
46 | { | ||
47 | unsigned long pc = instruction_pointer(regs); | ||
48 | |||
49 | #ifdef CONFIG_SMP | ||
50 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | ||
51 | #ifdef CONFIG_FRAME_POINTER | ||
52 | return *(unsigned long *)(regs->bp + sizeof(long)); | ||
53 | #else | ||
54 | unsigned long *sp = (unsigned long *)®s->sp; | ||
55 | |||
56 | /* Return address is either directly at stack pointer | ||
57 | or above a saved flags. Eflags has bits 22-31 zero, | ||
58 | kernel addresses don't. */ | ||
59 | if (sp[0] >> 22) | ||
60 | return sp[0]; | ||
61 | if (sp[1] >> 22) | ||
62 | return sp[1]; | ||
63 | #endif | ||
64 | } | ||
65 | #endif | ||
66 | return pc; | ||
67 | } | ||
68 | EXPORT_SYMBOL(profile_pc); | ||
69 | |||
70 | /* | ||
71 | * This is the same as the above, except we _also_ save the current | ||
72 | * Time Stamp Counter value at the time of the timer interrupt, so that | ||
73 | * we later on can estimate the time of day more exactly. | ||
74 | */ | ||
75 | irqreturn_t timer_interrupt(int irq, void *dev_id) | ||
76 | { | ||
77 | /* Keep nmi watchdog up to date */ | ||
78 | inc_irq_stat(irq0_irqs); | ||
79 | |||
80 | #ifdef CONFIG_X86_IO_APIC | ||
81 | if (timer_ack) { | ||
82 | /* | ||
83 | * Subtle, when I/O APICs are used we have to ack timer IRQ | ||
84 | * manually to deassert NMI lines for the watchdog if run | ||
85 | * on an 82489DX-based system. | ||
86 | */ | ||
87 | spin_lock(&i8259A_lock); | ||
88 | outb(0x0c, PIC_MASTER_OCW3); | ||
89 | /* Ack the IRQ; AEOI will end it automatically. */ | ||
90 | inb(PIC_MASTER_POLL); | ||
91 | spin_unlock(&i8259A_lock); | ||
92 | } | ||
93 | #endif | ||
94 | |||
95 | do_timer_interrupt_hook(); | ||
96 | |||
97 | #ifdef CONFIG_MCA | ||
98 | if (MCA_bus) { | ||
99 | /* The PS/2 uses level-triggered interrupts. You can't | ||
100 | turn them off, nor would you want to (any attempt to | ||
101 | enable edge-triggered interrupts usually gets intercepted by a | ||
102 | special hardware circuit). Hence we have to acknowledge | ||
103 | the timer interrupt. Through some incredibly stupid | ||
104 | design idea, the reset for IRQ 0 is done by setting the | ||
105 | high bit of the PPI port B (0x61). Note that some PS/2s, | ||
106 | notably the 55SX, work fine if this is removed. */ | ||
107 | |||
108 | u8 irq_v = inb_p(0x61); /* read the current state */ | ||
109 | outb_p(irq_v | 0x80, 0x61); /* reset the IRQ */ | ||
110 | } | ||
111 | #endif | ||
112 | |||
113 | return IRQ_HANDLED; | ||
114 | } | ||
115 | |||
116 | /* Duplicate of time_init() below, with hpet_enable part added */ | ||
117 | void __init hpet_time_init(void) | ||
118 | { | ||
119 | if (!hpet_enable()) | ||
120 | setup_pit_timer(); | ||
121 | x86_quirk_time_init(); | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * This is called directly from init code; we must delay timer setup in the | ||
126 | * HPET case as we can't make the decision to turn on HPET this early in the | ||
127 | * boot process. | ||
128 | * | ||
129 | * The chosen time_init function will usually be hpet_time_init, above, but | ||
130 | * in the case of virtual hardware, an alternative function may be substituted. | ||
131 | */ | ||
132 | void __init time_init(void) | ||
133 | { | ||
134 | x86_quirk_pre_time_init(); | ||
135 | tsc_init(); | ||
136 | late_time_init = choose_time_init(); | ||
137 | } | ||
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c deleted file mode 100644 index 5ba343e61844..000000000000 --- a/arch/x86/kernel/time_64.c +++ /dev/null | |||
@@ -1,135 +0,0 @@ | |||
1 | /* | ||
2 | * "High Precision Event Timer" based timekeeping. | ||
3 | * | ||
4 | * Copyright (c) 1991,1992,1995 Linus Torvalds | ||
5 | * Copyright (c) 1994 Alan Modra | ||
6 | * Copyright (c) 1995 Markus Kuhn | ||
7 | * Copyright (c) 1996 Ingo Molnar | ||
8 | * Copyright (c) 1998 Andrea Arcangeli | ||
9 | * Copyright (c) 2002,2006 Vojtech Pavlik | ||
10 | * Copyright (c) 2003 Andi Kleen | ||
11 | * RTC support code taken from arch/i386/kernel/timers/time_hpet.c | ||
12 | */ | ||
13 | |||
14 | #include <linux/clockchips.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/interrupt.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/time.h> | ||
19 | #include <linux/mca.h> | ||
20 | #include <linux/nmi.h> | ||
21 | |||
22 | #include <asm/i8253.h> | ||
23 | #include <asm/hpet.h> | ||
24 | #include <asm/vgtod.h> | ||
25 | #include <asm/time.h> | ||
26 | #include <asm/timer.h> | ||
27 | |||
28 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | ||
29 | |||
30 | unsigned long profile_pc(struct pt_regs *regs) | ||
31 | { | ||
32 | unsigned long pc = instruction_pointer(regs); | ||
33 | |||
34 | /* Assume the lock function has either no stack frame or a copy | ||
35 | of flags from PUSHF | ||
36 | Eflags always has bits 22 and up cleared unlike kernel addresses. */ | ||
37 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | ||
38 | #ifdef CONFIG_FRAME_POINTER | ||
39 | return *(unsigned long *)(regs->bp + sizeof(long)); | ||
40 | #else | ||
41 | unsigned long *sp = (unsigned long *)regs->sp; | ||
42 | if (sp[0] >> 22) | ||
43 | return sp[0]; | ||
44 | if (sp[1] >> 22) | ||
45 | return sp[1]; | ||
46 | #endif | ||
47 | } | ||
48 | return pc; | ||
49 | } | ||
50 | EXPORT_SYMBOL(profile_pc); | ||
51 | |||
52 | static irqreturn_t timer_interrupt(int irq, void *dev_id) | ||
53 | { | ||
54 | inc_irq_stat(irq0_irqs); | ||
55 | |||
56 | global_clock_event->event_handler(global_clock_event); | ||
57 | |||
58 | #ifdef CONFIG_MCA | ||
59 | if (MCA_bus) { | ||
60 | u8 irq_v = inb_p(0x61); /* read the current state */ | ||
61 | outb_p(irq_v|0x80, 0x61); /* reset the IRQ */ | ||
62 | } | ||
63 | #endif | ||
64 | |||
65 | return IRQ_HANDLED; | ||
66 | } | ||
67 | |||
68 | /* calibrate_cpu is used on systems with fixed rate TSCs to determine | ||
69 | * processor frequency */ | ||
70 | #define TICK_COUNT 100000000 | ||
71 | unsigned long __init calibrate_cpu(void) | ||
72 | { | ||
73 | int tsc_start, tsc_now; | ||
74 | int i, no_ctr_free; | ||
75 | unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; | ||
76 | unsigned long flags; | ||
77 | |||
78 | for (i = 0; i < 4; i++) | ||
79 | if (avail_to_resrv_perfctr_nmi_bit(i)) | ||
80 | break; | ||
81 | no_ctr_free = (i == 4); | ||
82 | if (no_ctr_free) { | ||
83 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
84 | "cpu_khz value may be incorrect.\n"); | ||
85 | i = 3; | ||
86 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
87 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
88 | rdmsrl(MSR_K7_PERFCTR3, pmc3); | ||
89 | } else { | ||
90 | reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
91 | reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
92 | } | ||
93 | local_irq_save(flags); | ||
94 | /* start measuring cycles, incrementing from 0 */ | ||
95 | wrmsrl(MSR_K7_PERFCTR0 + i, 0); | ||
96 | wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); | ||
97 | rdtscl(tsc_start); | ||
98 | do { | ||
99 | rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); | ||
100 | tsc_now = get_cycles(); | ||
101 | } while ((tsc_now - tsc_start) < TICK_COUNT); | ||
102 | |||
103 | local_irq_restore(flags); | ||
104 | if (no_ctr_free) { | ||
105 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
106 | wrmsrl(MSR_K7_PERFCTR3, pmc3); | ||
107 | wrmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
108 | } else { | ||
109 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
110 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
111 | } | ||
112 | |||
113 | return pmc_now * tsc_khz / (tsc_now - tsc_start); | ||
114 | } | ||
115 | |||
116 | static struct irqaction irq0 = { | ||
117 | .handler = timer_interrupt, | ||
118 | .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER, | ||
119 | .name = "timer" | ||
120 | }; | ||
121 | |||
122 | void __init hpet_time_init(void) | ||
123 | { | ||
124 | if (!hpet_enable()) | ||
125 | setup_pit_timer(); | ||
126 | |||
127 | setup_irq(0, &irq0); | ||
128 | } | ||
129 | |||
130 | void __init time_init(void) | ||
131 | { | ||
132 | tsc_init(); | ||
133 | |||
134 | late_time_init = choose_time_init(); | ||
135 | } | ||
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index 808031a5ba19..cd022121cab6 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -3,8 +3,16 @@ | |||
3 | #include <asm/trampoline.h> | 3 | #include <asm/trampoline.h> |
4 | #include <asm/e820.h> | 4 | #include <asm/e820.h> |
5 | 5 | ||
6 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) | ||
7 | #define __trampinit | ||
8 | #define __trampinitdata | ||
9 | #else | ||
10 | #define __trampinit __cpuinit | ||
11 | #define __trampinitdata __cpuinitdata | ||
12 | #endif | ||
13 | |||
6 | /* ready for x86_64 and x86 */ | 14 | /* ready for x86_64 and x86 */ |
7 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | 15 | unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE); |
8 | 16 | ||
9 | void __init reserve_trampoline_memory(void) | 17 | void __init reserve_trampoline_memory(void) |
10 | { | 18 | { |
@@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void) | |||
26 | * bootstrap into the page concerned. The caller | 34 | * bootstrap into the page concerned. The caller |
27 | * has made sure it's suitably aligned. | 35 | * has made sure it's suitably aligned. |
28 | */ | 36 | */ |
29 | unsigned long setup_trampoline(void) | 37 | unsigned long __trampinit setup_trampoline(void) |
30 | { | 38 | { |
31 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); | 39 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); |
32 | return virt_to_phys(trampoline_base); | 40 | return virt_to_phys(trampoline_base); |
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 66d874e5404c..8508237e8e43 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S | |||
@@ -28,16 +28,12 @@ | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | #include <linux/linkage.h> | 30 | #include <linux/linkage.h> |
31 | #include <linux/init.h> | ||
31 | #include <asm/segment.h> | 32 | #include <asm/segment.h> |
32 | #include <asm/page_types.h> | 33 | #include <asm/page_types.h> |
33 | 34 | ||
34 | /* We can free up trampoline after bootup if cpu hotplug is not supported. */ | 35 | /* We can free up trampoline after bootup if cpu hotplug is not supported. */ |
35 | #ifndef CONFIG_HOTPLUG_CPU | 36 | __CPUINITRODATA |
36 | .section ".cpuinit.data","aw",@progbits | ||
37 | #else | ||
38 | .section .rodata,"a",@progbits | ||
39 | #endif | ||
40 | |||
41 | .code16 | 37 | .code16 |
42 | 38 | ||
43 | ENTRY(trampoline_data) | 39 | ENTRY(trampoline_data) |
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index cddfb8d386b9..3af2dff58b21 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S | |||
@@ -25,14 +25,19 @@ | |||
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/linkage.h> | 27 | #include <linux/linkage.h> |
28 | #include <linux/init.h> | ||
28 | #include <asm/pgtable_types.h> | 29 | #include <asm/pgtable_types.h> |
29 | #include <asm/page_types.h> | 30 | #include <asm/page_types.h> |
30 | #include <asm/msr.h> | 31 | #include <asm/msr.h> |
31 | #include <asm/segment.h> | 32 | #include <asm/segment.h> |
32 | #include <asm/processor-flags.h> | 33 | #include <asm/processor-flags.h> |
33 | 34 | ||
35 | #ifdef CONFIG_ACPI_SLEEP | ||
34 | .section .rodata, "a", @progbits | 36 | .section .rodata, "a", @progbits |
35 | 37 | #else | |
38 | /* We can free up the trampoline after bootup if cpu hotplug is not supported. */ | ||
39 | __CPUINITRODATA | ||
40 | #endif | ||
36 | .code16 | 41 | .code16 |
37 | 42 | ||
38 | ENTRY(trampoline_data) | 43 | ENTRY(trampoline_data) |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 49a401b1d4d7..33399176512a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
15 | #include <linux/kprobes.h> | 15 | #include <linux/kprobes.h> |
16 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
17 | #include <linux/utsname.h> | ||
18 | #include <linux/kdebug.h> | 17 | #include <linux/kdebug.h> |
19 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
20 | #include <linux/module.h> | 19 | #include <linux/module.h> |
@@ -59,12 +58,12 @@ | |||
59 | #include <asm/mach_traps.h> | 58 | #include <asm/mach_traps.h> |
60 | 59 | ||
61 | #ifdef CONFIG_X86_64 | 60 | #ifdef CONFIG_X86_64 |
61 | #include <asm/x86_init.h> | ||
62 | #include <asm/pgalloc.h> | 62 | #include <asm/pgalloc.h> |
63 | #include <asm/proto.h> | 63 | #include <asm/proto.h> |
64 | #else | 64 | #else |
65 | #include <asm/processor-flags.h> | 65 | #include <asm/processor-flags.h> |
66 | #include <asm/setup.h> | 66 | #include <asm/setup.h> |
67 | #include <asm/traps.h> | ||
68 | 67 | ||
69 | asmlinkage int system_call(void); | 68 | asmlinkage int system_call(void); |
70 | 69 | ||
@@ -73,11 +72,9 @@ char ignore_fpu_irq; | |||
73 | 72 | ||
74 | /* | 73 | /* |
75 | * The IDT has to be page-aligned to simplify the Pentium | 74 | * The IDT has to be page-aligned to simplify the Pentium |
76 | * F0 0F bug workaround.. We have a special link segment | 75 | * F0 0F bug workaround. |
77 | * for this. | ||
78 | */ | 76 | */ |
79 | gate_desc idt_table[NR_VECTORS] | 77 | gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; |
80 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | ||
81 | #endif | 78 | #endif |
82 | 79 | ||
83 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | 80 | DECLARE_BITMAP(used_vectors, NR_VECTORS); |
@@ -951,7 +948,5 @@ void __init trap_init(void) | |||
951 | */ | 948 | */ |
952 | cpu_init(); | 949 | cpu_init(); |
953 | 950 | ||
954 | #ifdef CONFIG_X86_32 | 951 | x86_init.irqs.trap_init(); |
955 | x86_quirk_trap_init(); | ||
956 | #endif | ||
957 | } | 952 | } |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 71f4368b357e..cd982f48e23e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -17,6 +17,8 @@ | |||
17 | #include <asm/time.h> | 17 | #include <asm/time.h> |
18 | #include <asm/delay.h> | 18 | #include <asm/delay.h> |
19 | #include <asm/hypervisor.h> | 19 | #include <asm/hypervisor.h> |
20 | #include <asm/nmi.h> | ||
21 | #include <asm/x86_init.h> | ||
20 | 22 | ||
21 | unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ | 23 | unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ |
22 | EXPORT_SYMBOL(cpu_khz); | 24 | EXPORT_SYMBOL(cpu_khz); |
@@ -400,15 +402,9 @@ unsigned long native_calibrate_tsc(void) | |||
400 | { | 402 | { |
401 | u64 tsc1, tsc2, delta, ref1, ref2; | 403 | u64 tsc1, tsc2, delta, ref1, ref2; |
402 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | 404 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; |
403 | unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz; | 405 | unsigned long flags, latch, ms, fast_calibrate; |
404 | int hpet = is_hpet_enabled(), i, loopmin; | 406 | int hpet = is_hpet_enabled(), i, loopmin; |
405 | 407 | ||
406 | hv_tsc_khz = get_hypervisor_tsc_freq(); | ||
407 | if (hv_tsc_khz) { | ||
408 | printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); | ||
409 | return hv_tsc_khz; | ||
410 | } | ||
411 | |||
412 | local_irq_save(flags); | 408 | local_irq_save(flags); |
413 | fast_calibrate = quick_pit_calibrate(); | 409 | fast_calibrate = quick_pit_calibrate(); |
414 | local_irq_restore(flags); | 410 | local_irq_restore(flags); |
@@ -566,7 +562,7 @@ int recalibrate_cpu_khz(void) | |||
566 | unsigned long cpu_khz_old = cpu_khz; | 562 | unsigned long cpu_khz_old = cpu_khz; |
567 | 563 | ||
568 | if (cpu_has_tsc) { | 564 | if (cpu_has_tsc) { |
569 | tsc_khz = calibrate_tsc(); | 565 | tsc_khz = x86_platform.calibrate_tsc(); |
570 | cpu_khz = tsc_khz; | 566 | cpu_khz = tsc_khz; |
571 | cpu_data(0).loops_per_jiffy = | 567 | cpu_data(0).loops_per_jiffy = |
572 | cpufreq_scale(cpu_data(0).loops_per_jiffy, | 568 | cpufreq_scale(cpu_data(0).loops_per_jiffy, |
@@ -670,7 +666,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
670 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | 666 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || |
671 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | 667 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || |
672 | (val == CPUFREQ_RESUMECHANGE)) { | 668 | (val == CPUFREQ_RESUMECHANGE)) { |
673 | *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | 669 | *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); |
674 | 670 | ||
675 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | 671 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); |
676 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | 672 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) |
@@ -744,10 +740,16 @@ static cycle_t __vsyscall_fn vread_tsc(void) | |||
744 | } | 740 | } |
745 | #endif | 741 | #endif |
746 | 742 | ||
743 | static void resume_tsc(void) | ||
744 | { | ||
745 | clocksource_tsc.cycle_last = 0; | ||
746 | } | ||
747 | |||
747 | static struct clocksource clocksource_tsc = { | 748 | static struct clocksource clocksource_tsc = { |
748 | .name = "tsc", | 749 | .name = "tsc", |
749 | .rating = 300, | 750 | .rating = 300, |
750 | .read = read_tsc, | 751 | .read = read_tsc, |
752 | .resume = resume_tsc, | ||
751 | .mask = CLOCKSOURCE_MASK(64), | 753 | .mask = CLOCKSOURCE_MASK(64), |
752 | .shift = 22, | 754 | .shift = 22, |
753 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | 755 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
@@ -761,12 +763,14 @@ void mark_tsc_unstable(char *reason) | |||
761 | { | 763 | { |
762 | if (!tsc_unstable) { | 764 | if (!tsc_unstable) { |
763 | tsc_unstable = 1; | 765 | tsc_unstable = 1; |
764 | printk("Marking TSC unstable due to %s\n", reason); | 766 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); |
765 | /* Change only the rating, when not registered */ | 767 | /* Change only the rating, when not registered */ |
766 | if (clocksource_tsc.mult) | 768 | if (clocksource_tsc.mult) |
767 | clocksource_change_rating(&clocksource_tsc, 0); | 769 | clocksource_mark_unstable(&clocksource_tsc); |
768 | else | 770 | else { |
771 | clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; | ||
769 | clocksource_tsc.rating = 0; | 772 | clocksource_tsc.rating = 0; |
773 | } | ||
770 | } | 774 | } |
771 | } | 775 | } |
772 | 776 | ||
@@ -852,15 +856,71 @@ static void __init init_tsc_clocksource(void) | |||
852 | clocksource_register(&clocksource_tsc); | 856 | clocksource_register(&clocksource_tsc); |
853 | } | 857 | } |
854 | 858 | ||
859 | #ifdef CONFIG_X86_64 | ||
860 | /* | ||
861 | * calibrate_cpu is used on systems with fixed rate TSCs to determine | ||
862 | * processor frequency | ||
863 | */ | ||
864 | #define TICK_COUNT 100000000 | ||
865 | static unsigned long __init calibrate_cpu(void) | ||
866 | { | ||
867 | int tsc_start, tsc_now; | ||
868 | int i, no_ctr_free; | ||
869 | unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; | ||
870 | unsigned long flags; | ||
871 | |||
872 | for (i = 0; i < 4; i++) | ||
873 | if (avail_to_resrv_perfctr_nmi_bit(i)) | ||
874 | break; | ||
875 | no_ctr_free = (i == 4); | ||
876 | if (no_ctr_free) { | ||
877 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
878 | "cpu_khz value may be incorrect.\n"); | ||
879 | i = 3; | ||
880 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
881 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
882 | rdmsrl(MSR_K7_PERFCTR3, pmc3); | ||
883 | } else { | ||
884 | reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
885 | reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
886 | } | ||
887 | local_irq_save(flags); | ||
888 | /* start measuring cycles, incrementing from 0 */ | ||
889 | wrmsrl(MSR_K7_PERFCTR0 + i, 0); | ||
890 | wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); | ||
891 | rdtscl(tsc_start); | ||
892 | do { | ||
893 | rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); | ||
894 | tsc_now = get_cycles(); | ||
895 | } while ((tsc_now - tsc_start) < TICK_COUNT); | ||
896 | |||
897 | local_irq_restore(flags); | ||
898 | if (no_ctr_free) { | ||
899 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
900 | wrmsrl(MSR_K7_PERFCTR3, pmc3); | ||
901 | wrmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
902 | } else { | ||
903 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
904 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
905 | } | ||
906 | |||
907 | return pmc_now * tsc_khz / (tsc_now - tsc_start); | ||
908 | } | ||
909 | #else | ||
910 | static inline unsigned long calibrate_cpu(void) { return cpu_khz; } | ||
911 | #endif | ||
912 | |||
855 | void __init tsc_init(void) | 913 | void __init tsc_init(void) |
856 | { | 914 | { |
857 | u64 lpj; | 915 | u64 lpj; |
858 | int cpu; | 916 | int cpu; |
859 | 917 | ||
918 | x86_init.timers.tsc_pre_init(); | ||
919 | |||
860 | if (!cpu_has_tsc) | 920 | if (!cpu_has_tsc) |
861 | return; | 921 | return; |
862 | 922 | ||
863 | tsc_khz = calibrate_tsc(); | 923 | tsc_khz = x86_platform.calibrate_tsc(); |
864 | cpu_khz = tsc_khz; | 924 | cpu_khz = tsc_khz; |
865 | 925 | ||
866 | if (!tsc_khz) { | 926 | if (!tsc_khz) { |
@@ -868,11 +928,9 @@ void __init tsc_init(void) | |||
868 | return; | 928 | return; |
869 | } | 929 | } |
870 | 930 | ||
871 | #ifdef CONFIG_X86_64 | ||
872 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && | 931 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && |
873 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) | 932 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) |
874 | cpu_khz = calibrate_cpu(); | 933 | cpu_khz = calibrate_cpu(); |
875 | #endif | ||
876 | 934 | ||
877 | printk("Detected %lu.%03lu MHz processor.\n", | 935 | printk("Detected %lu.%03lu MHz processor.\n", |
878 | (unsigned long)cpu_khz / 1000, | 936 | (unsigned long)cpu_khz / 1000, |
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 027b5b498993..f37930954d15 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -114,7 +114,7 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
114 | return; | 114 | return; |
115 | 115 | ||
116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | 116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { |
117 | pr_info("Skipping synchronization checks as TSC is reliable.\n"); | 117 | printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); |
118 | return; | 118 | return; |
119 | } | 119 | } |
120 | 120 | ||
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 31ffc24eec4d..f068553a1b17 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/setup.h> | 30 | #include <asm/setup.h> |
31 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
32 | #include <asm/e820.h> | 32 | #include <asm/e820.h> |
33 | #include <asm/time.h> | ||
33 | #include <asm/io.h> | 34 | #include <asm/io.h> |
34 | 35 | ||
35 | #include <linux/kernel_stat.h> | 36 | #include <linux/kernel_stat.h> |
@@ -53,7 +54,7 @@ int is_visws_box(void) | |||
53 | return visws_board_type >= 0; | 54 | return visws_board_type >= 0; |
54 | } | 55 | } |
55 | 56 | ||
56 | static int __init visws_time_init(void) | 57 | static void __init visws_time_init(void) |
57 | { | 58 | { |
58 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); | 59 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); |
59 | 60 | ||
@@ -66,21 +67,13 @@ static int __init visws_time_init(void) | |||
66 | /* Enable (unmask) the timer interrupt */ | 67 | /* Enable (unmask) the timer interrupt */ |
67 | co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); | 68 | co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); |
68 | 69 | ||
69 | /* | 70 | setup_default_timer_irq(); |
70 | * Zero return means the generic timer setup code will set up | ||
71 | * the standard vector: | ||
72 | */ | ||
73 | return 0; | ||
74 | } | 71 | } |
75 | 72 | ||
76 | static int __init visws_pre_intr_init(void) | 73 | /* Replaces the default init_ISA_irqs in the generic setup */ |
74 | static void __init visws_pre_intr_init(void) | ||
77 | { | 75 | { |
78 | init_VISWS_APIC_irqs(); | 76 | init_VISWS_APIC_irqs(); |
79 | |||
80 | /* | ||
81 | * We dont want ISA irqs to be set up by the generic code: | ||
82 | */ | ||
83 | return 1; | ||
84 | } | 77 | } |
85 | 78 | ||
86 | /* Quirk for machine specific memory setup. */ | 79 | /* Quirk for machine specific memory setup. */ |
@@ -156,12 +149,8 @@ static void visws_machine_power_off(void) | |||
156 | outl(PIIX_SPECIAL_STOP, 0xCFC); | 149 | outl(PIIX_SPECIAL_STOP, 0xCFC); |
157 | } | 150 | } |
158 | 151 | ||
159 | static int __init visws_get_smp_config(unsigned int early) | 152 | static void __init visws_get_smp_config(unsigned int early) |
160 | { | 153 | { |
161 | /* | ||
162 | * Prevent MP-table parsing by the generic code: | ||
163 | */ | ||
164 | return 1; | ||
165 | } | 154 | } |
166 | 155 | ||
167 | /* | 156 | /* |
@@ -208,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
208 | apic_version[m->apicid] = ver; | 197 | apic_version[m->apicid] = ver; |
209 | } | 198 | } |
210 | 199 | ||
211 | static int __init visws_find_smp_config(unsigned int reserve) | 200 | static void __init visws_find_smp_config(unsigned int reserve) |
212 | { | 201 | { |
213 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); | 202 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); |
214 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | 203 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); |
@@ -230,21 +219,9 @@ static int __init visws_find_smp_config(unsigned int reserve) | |||
230 | MP_processor_info(mp++); | 219 | MP_processor_info(mp++); |
231 | 220 | ||
232 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | 221 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; |
233 | |||
234 | return 1; | ||
235 | } | 222 | } |
236 | 223 | ||
237 | static int visws_trap_init(void); | 224 | static void visws_trap_init(void); |
238 | |||
239 | static struct x86_quirks visws_x86_quirks __initdata = { | ||
240 | .arch_time_init = visws_time_init, | ||
241 | .arch_pre_intr_init = visws_pre_intr_init, | ||
242 | .arch_memory_setup = visws_memory_setup, | ||
243 | .arch_intr_init = NULL, | ||
244 | .arch_trap_init = visws_trap_init, | ||
245 | .mach_get_smp_config = visws_get_smp_config, | ||
246 | .mach_find_smp_config = visws_find_smp_config, | ||
247 | }; | ||
248 | 225 | ||
249 | void __init visws_early_detect(void) | 226 | void __init visws_early_detect(void) |
250 | { | 227 | { |
@@ -257,11 +234,14 @@ void __init visws_early_detect(void) | |||
257 | return; | 234 | return; |
258 | 235 | ||
259 | /* | 236 | /* |
260 | * Install special quirks for timer, interrupt and memory setup: | 237 | * Override the default platform setup functions |
261 | * Fall back to generic behavior for traps: | ||
262 | * Override generic MP-table parsing: | ||
263 | */ | 238 | */ |
264 | x86_quirks = &visws_x86_quirks; | 239 | x86_init.resources.memory_setup = visws_memory_setup; |
240 | x86_init.mpparse.get_smp_config = visws_get_smp_config; | ||
241 | x86_init.mpparse.find_smp_config = visws_find_smp_config; | ||
242 | x86_init.irqs.pre_vector_init = visws_pre_intr_init; | ||
243 | x86_init.irqs.trap_init = visws_trap_init; | ||
244 | x86_init.timers.timer_init = visws_time_init; | ||
265 | 245 | ||
266 | /* | 246 | /* |
267 | * Install reboot quirks: | 247 | * Install reboot quirks: |
@@ -400,12 +380,10 @@ static __init void cobalt_init(void) | |||
400 | co_apic_read(CO_APIC_ID)); | 380 | co_apic_read(CO_APIC_ID)); |
401 | } | 381 | } |
402 | 382 | ||
403 | static int __init visws_trap_init(void) | 383 | static void __init visws_trap_init(void) |
404 | { | 384 | { |
405 | lithium_init(); | 385 | lithium_init(); |
406 | cobalt_init(); | 386 | cobalt_init(); |
407 | |||
408 | return 1; | ||
409 | } | 387 | } |
410 | 388 | ||
411 | /* | 389 | /* |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 95a7289e4b0c..d430e4c30193 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -648,7 +648,7 @@ static inline int __init activate_vmi(void) | |||
648 | 648 | ||
649 | pv_info.paravirt_enabled = 1; | 649 | pv_info.paravirt_enabled = 1; |
650 | pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; | 650 | pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; |
651 | pv_info.name = "vmi"; | 651 | pv_info.name = "vmi [deprecated]"; |
652 | 652 | ||
653 | pv_init_ops.patch = vmi_patch; | 653 | pv_init_ops.patch = vmi_patch; |
654 | 654 | ||
@@ -817,15 +817,15 @@ static inline int __init activate_vmi(void) | |||
817 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); | 817 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); |
818 | vmi_timer_ops.cancel_alarm = | 818 | vmi_timer_ops.cancel_alarm = |
819 | vmi_get_function(VMI_CALL_CancelAlarm); | 819 | vmi_get_function(VMI_CALL_CancelAlarm); |
820 | pv_time_ops.time_init = vmi_time_init; | 820 | x86_init.timers.timer_init = vmi_time_init; |
821 | pv_time_ops.get_wallclock = vmi_get_wallclock; | ||
822 | pv_time_ops.set_wallclock = vmi_set_wallclock; | ||
823 | #ifdef CONFIG_X86_LOCAL_APIC | 821 | #ifdef CONFIG_X86_LOCAL_APIC |
824 | pv_apic_ops.setup_boot_clock = vmi_time_bsp_init; | 822 | x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init; |
825 | pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; | 823 | x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init; |
826 | #endif | 824 | #endif |
827 | pv_time_ops.sched_clock = vmi_sched_clock; | 825 | pv_time_ops.sched_clock = vmi_sched_clock; |
828 | pv_time_ops.get_tsc_khz = vmi_tsc_khz; | 826 | x86_platform.calibrate_tsc = vmi_tsc_khz; |
827 | x86_platform.get_wallclock = vmi_get_wallclock; | ||
828 | x86_platform.set_wallclock = vmi_set_wallclock; | ||
829 | 829 | ||
830 | /* We have true wallclock functions; disable CMOS clock sync */ | 830 | /* We have true wallclock functions; disable CMOS clock sync */ |
831 | no_sync_cmos_clock = 1; | 831 | no_sync_cmos_clock = 1; |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 2b3eb82efeeb..611b9e2360d3 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -68,7 +68,7 @@ unsigned long long vmi_sched_clock(void) | |||
68 | return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); | 68 | return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); |
69 | } | 69 | } |
70 | 70 | ||
71 | /* paravirt_ops.get_tsc_khz = vmi_tsc_khz */ | 71 | /* x86_platform.calibrate_tsc = vmi_tsc_khz */ |
72 | unsigned long vmi_tsc_khz(void) | 72 | unsigned long vmi_tsc_khz(void) |
73 | { | 73 | { |
74 | unsigned long long khz; | 74 | unsigned long long khz; |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9fc178255c04..8d6001ad8d8d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -45,9 +45,9 @@ PHDRS { | |||
45 | text PT_LOAD FLAGS(5); /* R_E */ | 45 | text PT_LOAD FLAGS(5); /* R_E */ |
46 | data PT_LOAD FLAGS(7); /* RWE */ | 46 | data PT_LOAD FLAGS(7); /* RWE */ |
47 | #ifdef CONFIG_X86_64 | 47 | #ifdef CONFIG_X86_64 |
48 | user PT_LOAD FLAGS(7); /* RWE */ | 48 | user PT_LOAD FLAGS(5); /* R_E */ |
49 | #ifdef CONFIG_SMP | 49 | #ifdef CONFIG_SMP |
50 | percpu PT_LOAD FLAGS(7); /* RWE */ | 50 | percpu PT_LOAD FLAGS(6); /* RW_ */ |
51 | #endif | 51 | #endif |
52 | init PT_LOAD FLAGS(7); /* RWE */ | 52 | init PT_LOAD FLAGS(7); /* RWE */ |
53 | #endif | 53 | #endif |
@@ -65,17 +65,11 @@ SECTIONS | |||
65 | #endif | 65 | #endif |
66 | 66 | ||
67 | /* Text and read-only data */ | 67 | /* Text and read-only data */ |
68 | |||
69 | /* bootstrapping code */ | ||
70 | .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { | ||
71 | _text = .; | ||
72 | *(.text.head) | ||
73 | } :text = 0x9090 | ||
74 | |||
75 | /* The rest of the text */ | ||
76 | .text : AT(ADDR(.text) - LOAD_OFFSET) { | 68 | .text : AT(ADDR(.text) - LOAD_OFFSET) { |
69 | _text = .; | ||
70 | /* bootstrapping code */ | ||
71 | HEAD_TEXT | ||
77 | #ifdef CONFIG_X86_32 | 72 | #ifdef CONFIG_X86_32 |
78 | /* not really needed, already page aligned */ | ||
79 | . = ALIGN(PAGE_SIZE); | 73 | . = ALIGN(PAGE_SIZE); |
80 | *(.text.page_aligned) | 74 | *(.text.page_aligned) |
81 | #endif | 75 | #endif |
@@ -94,13 +88,7 @@ SECTIONS | |||
94 | 88 | ||
95 | NOTES :text :note | 89 | NOTES :text :note |
96 | 90 | ||
97 | /* Exception table */ | 91 | EXCEPTION_TABLE(16) :text = 0x9090 |
98 | . = ALIGN(16); | ||
99 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { | ||
100 | __start___ex_table = .; | ||
101 | *(__ex_table) | ||
102 | __stop___ex_table = .; | ||
103 | } :text = 0x9090 | ||
104 | 92 | ||
105 | RO_DATA(PAGE_SIZE) | 93 | RO_DATA(PAGE_SIZE) |
106 | 94 | ||
@@ -118,7 +106,6 @@ SECTIONS | |||
118 | #endif | 106 | #endif |
119 | 107 | ||
120 | PAGE_ALIGNED_DATA(PAGE_SIZE) | 108 | PAGE_ALIGNED_DATA(PAGE_SIZE) |
121 | *(.data.idt) | ||
122 | 109 | ||
123 | CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) | 110 | CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) |
124 | 111 | ||
@@ -135,24 +122,21 @@ SECTIONS | |||
135 | #ifdef CONFIG_X86_64 | 122 | #ifdef CONFIG_X86_64 |
136 | 123 | ||
137 | #define VSYSCALL_ADDR (-10*1024*1024) | 124 | #define VSYSCALL_ADDR (-10*1024*1024) |
138 | #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \ | ||
139 | PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) | ||
140 | #define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \ | ||
141 | PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) | ||
142 | 125 | ||
143 | #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) | 126 | #define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET) |
144 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) | 127 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) |
145 | 128 | ||
146 | #define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) | 129 | #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) |
147 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) | 130 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) |
148 | 131 | ||
132 | . = ALIGN(4096); | ||
133 | __vsyscall_0 = .; | ||
134 | |||
149 | . = VSYSCALL_ADDR; | 135 | . = VSYSCALL_ADDR; |
150 | .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { | 136 | .vsyscall_0 : AT(VLOAD(.vsyscall_0)) { |
151 | *(.vsyscall_0) | 137 | *(.vsyscall_0) |
152 | } :user | 138 | } :user |
153 | 139 | ||
154 | __vsyscall_0 = VSYSCALL_VIRT_ADDR; | ||
155 | |||
156 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 140 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); |
157 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { | 141 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { |
158 | *(.vsyscall_fn) | 142 | *(.vsyscall_fn) |
@@ -192,11 +176,9 @@ SECTIONS | |||
192 | *(.vsyscall_3) | 176 | *(.vsyscall_3) |
193 | } | 177 | } |
194 | 178 | ||
195 | . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; | 179 | . = __vsyscall_0 + PAGE_SIZE; |
196 | 180 | ||
197 | #undef VSYSCALL_ADDR | 181 | #undef VSYSCALL_ADDR |
198 | #undef VSYSCALL_PHYS_ADDR | ||
199 | #undef VSYSCALL_VIRT_ADDR | ||
200 | #undef VLOAD_OFFSET | 182 | #undef VLOAD_OFFSET |
201 | #undef VLOAD | 183 | #undef VLOAD |
202 | #undef VVIRT_OFFSET | 184 | #undef VVIRT_OFFSET |
@@ -219,36 +201,12 @@ SECTIONS | |||
219 | PERCPU_VADDR(0, :percpu) | 201 | PERCPU_VADDR(0, :percpu) |
220 | #endif | 202 | #endif |
221 | 203 | ||
222 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { | 204 | INIT_TEXT_SECTION(PAGE_SIZE) |
223 | _sinittext = .; | ||
224 | INIT_TEXT | ||
225 | _einittext = .; | ||
226 | } | ||
227 | #ifdef CONFIG_X86_64 | 205 | #ifdef CONFIG_X86_64 |
228 | :init | 206 | :init |
229 | #endif | 207 | #endif |
230 | 208 | ||
231 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { | 209 | INIT_DATA_SECTION(16) |
232 | INIT_DATA | ||
233 | } | ||
234 | |||
235 | . = ALIGN(16); | ||
236 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { | ||
237 | __setup_start = .; | ||
238 | *(.init.setup) | ||
239 | __setup_end = .; | ||
240 | } | ||
241 | .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { | ||
242 | __initcall_start = .; | ||
243 | INITCALLS | ||
244 | __initcall_end = .; | ||
245 | } | ||
246 | |||
247 | .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { | ||
248 | __con_initcall_start = .; | ||
249 | *(.con_initcall.init) | ||
250 | __con_initcall_end = .; | ||
251 | } | ||
252 | 210 | ||
253 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { | 211 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { |
254 | __x86_cpu_dev_start = .; | 212 | __x86_cpu_dev_start = .; |
@@ -256,8 +214,6 @@ SECTIONS | |||
256 | __x86_cpu_dev_end = .; | 214 | __x86_cpu_dev_end = .; |
257 | } | 215 | } |
258 | 216 | ||
259 | SECURITY_INIT | ||
260 | |||
261 | . = ALIGN(8); | 217 | . = ALIGN(8); |
262 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { | 218 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { |
263 | __parainstructions = .; | 219 | __parainstructions = .; |
@@ -288,15 +244,6 @@ SECTIONS | |||
288 | EXIT_DATA | 244 | EXIT_DATA |
289 | } | 245 | } |
290 | 246 | ||
291 | #ifdef CONFIG_BLK_DEV_INITRD | ||
292 | . = ALIGN(PAGE_SIZE); | ||
293 | .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { | ||
294 | __initramfs_start = .; | ||
295 | *(.init.ramfs) | ||
296 | __initramfs_end = .; | ||
297 | } | ||
298 | #endif | ||
299 | |||
300 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) | 247 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) |
301 | PERCPU(PAGE_SIZE) | 248 | PERCPU(PAGE_SIZE) |
302 | #endif | 249 | #endif |
@@ -348,21 +295,18 @@ SECTIONS | |||
348 | _end = .; | 295 | _end = .; |
349 | } | 296 | } |
350 | 297 | ||
351 | /* Sections to be discarded */ | ||
352 | /DISCARD/ : { | ||
353 | *(.exitcall.exit) | ||
354 | *(.eh_frame) | ||
355 | *(.discard) | ||
356 | } | ||
357 | |||
358 | STABS_DEBUG | 298 | STABS_DEBUG |
359 | DWARF_DEBUG | 299 | DWARF_DEBUG |
300 | |||
301 | /* Sections to be discarded */ | ||
302 | DISCARDS | ||
303 | /DISCARD/ : { *(.eh_frame) } | ||
360 | } | 304 | } |
361 | 305 | ||
362 | 306 | ||
363 | #ifdef CONFIG_X86_32 | 307 | #ifdef CONFIG_X86_32 |
364 | . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), | 308 | ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), |
365 | "kernel image bigger than KERNEL_IMAGE_SIZE"); | 309 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
366 | #else | 310 | #else |
367 | /* | 311 | /* |
368 | * Per-cpu symbols which need to be offset from __per_cpu_load | 312 | * Per-cpu symbols which need to be offset from __per_cpu_load |
@@ -375,12 +319,12 @@ INIT_PER_CPU(irq_stack_union); | |||
375 | /* | 319 | /* |
376 | * Build-time check on the image size: | 320 | * Build-time check on the image size: |
377 | */ | 321 | */ |
378 | . = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), | 322 | ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), |
379 | "kernel image bigger than KERNEL_IMAGE_SIZE"); | 323 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
380 | 324 | ||
381 | #ifdef CONFIG_SMP | 325 | #ifdef CONFIG_SMP |
382 | . = ASSERT((per_cpu__irq_stack_union == 0), | 326 | ASSERT((per_cpu__irq_stack_union == 0), |
383 | "irq_stack_union is not at start of per-cpu area"); | 327 | "irq_stack_union is not at start of per-cpu area"); |
384 | #endif | 328 | #endif |
385 | 329 | ||
386 | #endif /* CONFIG_X86_32 */ | 330 | #endif /* CONFIG_X86_32 */ |
@@ -388,7 +332,6 @@ INIT_PER_CPU(irq_stack_union); | |||
388 | #ifdef CONFIG_KEXEC | 332 | #ifdef CONFIG_KEXEC |
389 | #include <asm/kexec.h> | 333 | #include <asm/kexec.h> |
390 | 334 | ||
391 | . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | 335 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, |
392 | "kexec control code size is too big"); | 336 | "kexec control code size is too big"); |
393 | #endif | 337 | #endif |
394 | |||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 25ee06a80aad..8cb4974ff599 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | |||
87 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 87 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
88 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 88 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
89 | vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; | 89 | vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; |
90 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); | ||
90 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 91 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
91 | } | 92 | } |
92 | 93 | ||
@@ -227,19 +228,11 @@ static long __vsyscall(3) venosys_1(void) | |||
227 | } | 228 | } |
228 | 229 | ||
229 | #ifdef CONFIG_SYSCTL | 230 | #ifdef CONFIG_SYSCTL |
230 | |||
231 | static int | ||
232 | vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | ||
233 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
234 | { | ||
235 | return proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | ||
236 | } | ||
237 | |||
238 | static ctl_table kernel_table2[] = { | 231 | static ctl_table kernel_table2[] = { |
239 | { .procname = "vsyscall64", | 232 | { .procname = "vsyscall64", |
240 | .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), | 233 | .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), |
241 | .mode = 0644, | 234 | .mode = 0644, |
242 | .proc_handler = vsyscall_sysctl_change }, | 235 | .proc_handler = proc_dointvec }, |
243 | {} | 236 | {} |
244 | }; | 237 | }; |
245 | 238 | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c new file mode 100644 index 000000000000..4449a4a2c2ed --- /dev/null +++ b/arch/x86/kernel/x86_init.c | |||
@@ -0,0 +1,75 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de> | ||
3 | * | ||
4 | * For licencing details see kernel-base/COPYING | ||
5 | */ | ||
6 | #include <linux/init.h> | ||
7 | |||
8 | #include <asm/bios_ebda.h> | ||
9 | #include <asm/paravirt.h> | ||
10 | #include <asm/mpspec.h> | ||
11 | #include <asm/setup.h> | ||
12 | #include <asm/apic.h> | ||
13 | #include <asm/e820.h> | ||
14 | #include <asm/time.h> | ||
15 | #include <asm/irq.h> | ||
16 | #include <asm/tsc.h> | ||
17 | |||
18 | void __cpuinit x86_init_noop(void) { } | ||
19 | void __init x86_init_uint_noop(unsigned int unused) { } | ||
20 | void __init x86_init_pgd_noop(pgd_t *unused) { } | ||
21 | |||
22 | /* | ||
23 | * The platform setup functions are preset with the default functions | ||
24 | * for standard PC hardware. | ||
25 | */ | ||
26 | struct x86_init_ops x86_init __initdata = { | ||
27 | |||
28 | .resources = { | ||
29 | .probe_roms = x86_init_noop, | ||
30 | .reserve_resources = reserve_standard_io_resources, | ||
31 | .memory_setup = default_machine_specific_memory_setup, | ||
32 | }, | ||
33 | |||
34 | .mpparse = { | ||
35 | .mpc_record = x86_init_uint_noop, | ||
36 | .setup_ioapic_ids = x86_init_noop, | ||
37 | .mpc_apic_id = default_mpc_apic_id, | ||
38 | .smp_read_mpc_oem = default_smp_read_mpc_oem, | ||
39 | .mpc_oem_bus_info = default_mpc_oem_bus_info, | ||
40 | .find_smp_config = default_find_smp_config, | ||
41 | .get_smp_config = default_get_smp_config, | ||
42 | }, | ||
43 | |||
44 | .irqs = { | ||
45 | .pre_vector_init = init_ISA_irqs, | ||
46 | .intr_init = native_init_IRQ, | ||
47 | .trap_init = x86_init_noop, | ||
48 | }, | ||
49 | |||
50 | .oem = { | ||
51 | .arch_setup = x86_init_noop, | ||
52 | .banner = default_banner, | ||
53 | }, | ||
54 | |||
55 | .paging = { | ||
56 | .pagetable_setup_start = native_pagetable_setup_start, | ||
57 | .pagetable_setup_done = native_pagetable_setup_done, | ||
58 | }, | ||
59 | |||
60 | .timers = { | ||
61 | .setup_percpu_clockev = setup_boot_APIC_clock, | ||
62 | .tsc_pre_init = x86_init_noop, | ||
63 | .timer_init = hpet_time_init, | ||
64 | }, | ||
65 | }; | ||
66 | |||
67 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | ||
68 | .setup_percpu_clockev = setup_secondary_APIC_clock, | ||
69 | }; | ||
70 | |||
71 | struct x86_platform_ops x86_platform = { | ||
72 | .calibrate_tsc = native_calibrate_tsc, | ||
73 | .get_wallclock = mach_get_cmos_time, | ||
74 | .set_wallclock = mach_set_rtc_mmss, | ||
75 | }; | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 1ae5ceba7eb2..7024224f0fc8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
664 | { | 664 | { |
665 | ktime_t now = apic->lapic_timer.timer.base->get_time(); | 665 | ktime_t now = apic->lapic_timer.timer.base->get_time(); |
666 | 666 | ||
667 | apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * | 667 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * |
668 | APIC_BUS_CYCLE_NS * apic->divide_count; | 668 | APIC_BUS_CYCLE_NS * apic->divide_count; |
669 | atomic_set(&apic->lapic_timer.pending, 0); | 669 | atomic_set(&apic->lapic_timer.pending, 0); |
670 | 670 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index eca41ae9f453..685a4ffac8e6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644); | |||
156 | #define CREATE_TRACE_POINTS | 156 | #define CREATE_TRACE_POINTS |
157 | #include "mmutrace.h" | 157 | #include "mmutrace.h" |
158 | 158 | ||
159 | #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||
160 | |||
159 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 161 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
160 | 162 | ||
161 | struct kvm_rmap_desc { | 163 | struct kvm_rmap_desc { |
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
634 | if (*spte & shadow_accessed_mask) | 636 | if (*spte & shadow_accessed_mask) |
635 | kvm_set_pfn_accessed(pfn); | 637 | kvm_set_pfn_accessed(pfn); |
636 | if (is_writeble_pte(*spte)) | 638 | if (is_writeble_pte(*spte)) |
637 | kvm_release_pfn_dirty(pfn); | 639 | kvm_set_pfn_dirty(pfn); |
638 | else | ||
639 | kvm_release_pfn_clean(pfn); | ||
640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | 640 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); |
641 | if (!*rmapp) { | 641 | if (!*rmapp) { |
642 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 642 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
@@ -748,7 +748,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
748 | return write_protected; | 748 | return write_protected; |
749 | } | 749 | } |
750 | 750 | ||
751 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | 751 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) |
752 | { | 752 | { |
753 | u64 *spte; | 753 | u64 *spte; |
754 | int need_tlb_flush = 0; | 754 | int need_tlb_flush = 0; |
@@ -763,8 +763,45 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | |||
763 | return need_tlb_flush; | 763 | return need_tlb_flush; |
764 | } | 764 | } |
765 | 765 | ||
766 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 766 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) |
767 | int (*handler)(struct kvm *kvm, unsigned long *rmapp)) | 767 | { |
768 | int need_flush = 0; | ||
769 | u64 *spte, new_spte; | ||
770 | pte_t *ptep = (pte_t *)data; | ||
771 | pfn_t new_pfn; | ||
772 | |||
773 | WARN_ON(pte_huge(*ptep)); | ||
774 | new_pfn = pte_pfn(*ptep); | ||
775 | spte = rmap_next(kvm, rmapp, NULL); | ||
776 | while (spte) { | ||
777 | BUG_ON(!is_shadow_present_pte(*spte)); | ||
778 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | ||
779 | need_flush = 1; | ||
780 | if (pte_write(*ptep)) { | ||
781 | rmap_remove(kvm, spte); | ||
782 | __set_spte(spte, shadow_trap_nonpresent_pte); | ||
783 | spte = rmap_next(kvm, rmapp, NULL); | ||
784 | } else { | ||
785 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | ||
786 | new_spte |= (u64)new_pfn << PAGE_SHIFT; | ||
787 | |||
788 | new_spte &= ~PT_WRITABLE_MASK; | ||
789 | new_spte &= ~SPTE_HOST_WRITEABLE; | ||
790 | if (is_writeble_pte(*spte)) | ||
791 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | ||
792 | __set_spte(spte, new_spte); | ||
793 | spte = rmap_next(kvm, rmapp, spte); | ||
794 | } | ||
795 | } | ||
796 | if (need_flush) | ||
797 | kvm_flush_remote_tlbs(kvm); | ||
798 | |||
799 | return 0; | ||
800 | } | ||
801 | |||
802 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data, | ||
803 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
804 | u64 data)) | ||
768 | { | 805 | { |
769 | int i, j; | 806 | int i, j; |
770 | int retval = 0; | 807 | int retval = 0; |
@@ -786,13 +823,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
786 | if (hva >= start && hva < end) { | 823 | if (hva >= start && hva < end) { |
787 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 824 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
788 | 825 | ||
789 | retval |= handler(kvm, &memslot->rmap[gfn_offset]); | 826 | retval |= handler(kvm, &memslot->rmap[gfn_offset], |
827 | data); | ||
790 | 828 | ||
791 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 829 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
792 | int idx = gfn_offset; | 830 | int idx = gfn_offset; |
793 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 831 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); |
794 | retval |= handler(kvm, | 832 | retval |= handler(kvm, |
795 | &memslot->lpage_info[j][idx].rmap_pde); | 833 | &memslot->lpage_info[j][idx].rmap_pde, |
834 | data); | ||
796 | } | 835 | } |
797 | } | 836 | } |
798 | } | 837 | } |
@@ -802,10 +841,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
802 | 841 | ||
803 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | 842 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) |
804 | { | 843 | { |
805 | return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | 844 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); |
806 | } | 845 | } |
807 | 846 | ||
808 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) | 847 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) |
848 | { | ||
849 | kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp); | ||
850 | } | ||
851 | |||
852 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data) | ||
809 | { | 853 | { |
810 | u64 *spte; | 854 | u64 *spte; |
811 | int young = 0; | 855 | int young = 0; |
@@ -841,13 +885,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
841 | gfn = unalias_gfn(vcpu->kvm, gfn); | 885 | gfn = unalias_gfn(vcpu->kvm, gfn); |
842 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 886 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
843 | 887 | ||
844 | kvm_unmap_rmapp(vcpu->kvm, rmapp); | 888 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); |
845 | kvm_flush_remote_tlbs(vcpu->kvm); | 889 | kvm_flush_remote_tlbs(vcpu->kvm); |
846 | } | 890 | } |
847 | 891 | ||
848 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 892 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) |
849 | { | 893 | { |
850 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | 894 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); |
851 | } | 895 | } |
852 | 896 | ||
853 | #ifdef MMU_DEBUG | 897 | #ifdef MMU_DEBUG |
@@ -1756,7 +1800,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1756 | unsigned pte_access, int user_fault, | 1800 | unsigned pte_access, int user_fault, |
1757 | int write_fault, int dirty, int level, | 1801 | int write_fault, int dirty, int level, |
1758 | gfn_t gfn, pfn_t pfn, bool speculative, | 1802 | gfn_t gfn, pfn_t pfn, bool speculative, |
1759 | bool can_unsync) | 1803 | bool can_unsync, bool reset_host_protection) |
1760 | { | 1804 | { |
1761 | u64 spte; | 1805 | u64 spte; |
1762 | int ret = 0; | 1806 | int ret = 0; |
@@ -1783,6 +1827,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1783 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, | 1827 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
1784 | kvm_is_mmio_pfn(pfn)); | 1828 | kvm_is_mmio_pfn(pfn)); |
1785 | 1829 | ||
1830 | if (reset_host_protection) | ||
1831 | spte |= SPTE_HOST_WRITEABLE; | ||
1832 | |||
1786 | spte |= (u64)pfn << PAGE_SHIFT; | 1833 | spte |= (u64)pfn << PAGE_SHIFT; |
1787 | 1834 | ||
1788 | if ((pte_access & ACC_WRITE_MASK) | 1835 | if ((pte_access & ACC_WRITE_MASK) |
@@ -1828,7 +1875,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1828 | unsigned pt_access, unsigned pte_access, | 1875 | unsigned pt_access, unsigned pte_access, |
1829 | int user_fault, int write_fault, int dirty, | 1876 | int user_fault, int write_fault, int dirty, |
1830 | int *ptwrite, int level, gfn_t gfn, | 1877 | int *ptwrite, int level, gfn_t gfn, |
1831 | pfn_t pfn, bool speculative) | 1878 | pfn_t pfn, bool speculative, |
1879 | bool reset_host_protection) | ||
1832 | { | 1880 | { |
1833 | int was_rmapped = 0; | 1881 | int was_rmapped = 0; |
1834 | int was_writeble = is_writeble_pte(*sptep); | 1882 | int was_writeble = is_writeble_pte(*sptep); |
@@ -1860,7 +1908,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1860 | } | 1908 | } |
1861 | 1909 | ||
1862 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 1910 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, |
1863 | dirty, level, gfn, pfn, speculative, true)) { | 1911 | dirty, level, gfn, pfn, speculative, true, |
1912 | reset_host_protection)) { | ||
1864 | if (write_fault) | 1913 | if (write_fault) |
1865 | *ptwrite = 1; | 1914 | *ptwrite = 1; |
1866 | kvm_x86_ops->tlb_flush(vcpu); | 1915 | kvm_x86_ops->tlb_flush(vcpu); |
@@ -1877,8 +1926,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1877 | page_header_update_slot(vcpu->kvm, sptep, gfn); | 1926 | page_header_update_slot(vcpu->kvm, sptep, gfn); |
1878 | if (!was_rmapped) { | 1927 | if (!was_rmapped) { |
1879 | rmap_count = rmap_add(vcpu, sptep, gfn); | 1928 | rmap_count = rmap_add(vcpu, sptep, gfn); |
1880 | if (!is_rmap_spte(*sptep)) | 1929 | kvm_release_pfn_clean(pfn); |
1881 | kvm_release_pfn_clean(pfn); | ||
1882 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 1930 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1883 | rmap_recycle(vcpu, sptep, gfn); | 1931 | rmap_recycle(vcpu, sptep, gfn); |
1884 | } else { | 1932 | } else { |
@@ -1909,7 +1957,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1909 | if (iterator.level == level) { | 1957 | if (iterator.level == level) { |
1910 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 1958 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, |
1911 | 0, write, 1, &pt_write, | 1959 | 0, write, 1, &pt_write, |
1912 | level, gfn, pfn, false); | 1960 | level, gfn, pfn, false, true); |
1913 | ++vcpu->stat.pf_fixed; | 1961 | ++vcpu->stat.pf_fixed; |
1914 | break; | 1962 | break; |
1915 | } | 1963 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d2fec9c12d22..72558f8ff3f5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
273 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) | 273 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) |
274 | return; | 274 | return; |
275 | kvm_get_pfn(pfn); | 275 | kvm_get_pfn(pfn); |
276 | /* | ||
277 | * we call mmu_set_spte() with reset_host_protection = true beacuse that | ||
278 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | ||
279 | */ | ||
276 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 280 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
277 | gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, | 281 | gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL, |
278 | gpte_to_gfn(gpte), pfn, true); | 282 | gpte_to_gfn(gpte), pfn, true, true); |
279 | } | 283 | } |
280 | 284 | ||
281 | /* | 285 | /* |
@@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
308 | user_fault, write_fault, | 312 | user_fault, write_fault, |
309 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | 313 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, |
310 | ptwrite, level, | 314 | ptwrite, level, |
311 | gw->gfn, pfn, false); | 315 | gw->gfn, pfn, false, true); |
312 | break; | 316 | break; |
313 | } | 317 | } |
314 | 318 | ||
@@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
558 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 562 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
559 | { | 563 | { |
560 | int i, offset, nr_present; | 564 | int i, offset, nr_present; |
565 | bool reset_host_protection; | ||
561 | 566 | ||
562 | offset = nr_present = 0; | 567 | offset = nr_present = 0; |
563 | 568 | ||
@@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
595 | 600 | ||
596 | nr_present++; | 601 | nr_present++; |
597 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 602 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
603 | if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) { | ||
604 | pte_access &= ~ACC_WRITE_MASK; | ||
605 | reset_host_protection = 0; | ||
606 | } else { | ||
607 | reset_host_protection = 1; | ||
608 | } | ||
598 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 609 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, |
599 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, | 610 | is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, |
600 | spte_to_pfn(sp->spt[i]), true, false); | 611 | spte_to_pfn(sp->spt[i]), true, false, |
612 | reset_host_protection); | ||
601 | } | 613 | } |
602 | 614 | ||
603 | return !nr_present; | 615 | return !nr_present; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 944cc9c04b3c..c17404add91f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
767 | rdtscll(tsc_this); | 767 | rdtscll(tsc_this); |
768 | delta = vcpu->arch.host_tsc - tsc_this; | 768 | delta = vcpu->arch.host_tsc - tsc_this; |
769 | svm->vmcb->control.tsc_offset += delta; | 769 | svm->vmcb->control.tsc_offset += delta; |
770 | if (is_nested(svm)) | ||
771 | svm->nested.hsave->control.tsc_offset += delta; | ||
770 | vcpu->cpu = cpu; | 772 | vcpu->cpu = cpu; |
771 | kvm_migrate_timers(vcpu); | 773 | kvm_migrate_timers(vcpu); |
772 | svm->asid_generation = 0; | 774 | svm->asid_generation = 0; |
@@ -2057,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
2057 | 2059 | ||
2058 | switch (ecx) { | 2060 | switch (ecx) { |
2059 | case MSR_IA32_TSC: { | 2061 | case MSR_IA32_TSC: { |
2060 | u64 tsc; | 2062 | u64 tsc_offset; |
2063 | |||
2064 | if (is_nested(svm)) | ||
2065 | tsc_offset = svm->nested.hsave->control.tsc_offset; | ||
2066 | else | ||
2067 | tsc_offset = svm->vmcb->control.tsc_offset; | ||
2061 | 2068 | ||
2062 | rdtscll(tsc); | 2069 | *data = tsc_offset + native_read_tsc(); |
2063 | *data = svm->vmcb->control.tsc_offset + tsc; | ||
2064 | break; | 2070 | break; |
2065 | } | 2071 | } |
2066 | case MSR_K6_STAR: | 2072 | case MSR_K6_STAR: |
@@ -2146,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
2146 | 2152 | ||
2147 | switch (ecx) { | 2153 | switch (ecx) { |
2148 | case MSR_IA32_TSC: { | 2154 | case MSR_IA32_TSC: { |
2149 | u64 tsc; | 2155 | u64 tsc_offset = data - native_read_tsc(); |
2156 | u64 g_tsc_offset = 0; | ||
2157 | |||
2158 | if (is_nested(svm)) { | ||
2159 | g_tsc_offset = svm->vmcb->control.tsc_offset - | ||
2160 | svm->nested.hsave->control.tsc_offset; | ||
2161 | svm->nested.hsave->control.tsc_offset = tsc_offset; | ||
2162 | } | ||
2163 | |||
2164 | svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset; | ||
2150 | 2165 | ||
2151 | rdtscll(tsc); | ||
2152 | svm->vmcb->control.tsc_offset = data - tsc; | ||
2153 | break; | 2166 | break; |
2154 | } | 2167 | } |
2155 | case MSR_K6_STAR: | 2168 | case MSR_K6_STAR: |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f3812014bd0b..ed53b42caba1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
709 | if (vcpu->cpu != cpu) { | 709 | if (vcpu->cpu != cpu) { |
710 | vcpu_clear(vmx); | 710 | vcpu_clear(vmx); |
711 | kvm_migrate_timers(vcpu); | 711 | kvm_migrate_timers(vcpu); |
712 | vpid_sync_vcpu_all(vmx); | 712 | set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); |
713 | local_irq_disable(); | 713 | local_irq_disable(); |
714 | list_add(&vmx->local_vcpus_link, | 714 | list_add(&vmx->local_vcpus_link, |
715 | &per_cpu(vcpus_on_cpu, cpu)); | 715 | &per_cpu(vcpus_on_cpu, cpu)); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 74029f50b26a..fc2974adf9b6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
1591 | 1591 | ||
1592 | if (cpuid->nent < 1) | 1592 | if (cpuid->nent < 1) |
1593 | goto out; | 1593 | goto out; |
1594 | if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) | ||
1595 | cpuid->nent = KVM_MAX_CPUID_ENTRIES; | ||
1594 | r = -ENOMEM; | 1596 | r = -ENOMEM; |
1595 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); | 1597 | cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); |
1596 | if (!cpuid_entries) | 1598 | if (!cpuid_entries) |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index d677fa9ca650..7e59dc1d3fc2 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -1135,11 +1135,6 @@ static struct notifier_block paniced = { | |||
1135 | /* Setting up memory is fairly easy. */ | 1135 | /* Setting up memory is fairly easy. */ |
1136 | static __init char *lguest_memory_setup(void) | 1136 | static __init char *lguest_memory_setup(void) |
1137 | { | 1137 | { |
1138 | /* We do this here and not earlier because lockcheck used to barf if we | ||
1139 | * did it before start_kernel(). I think we fixed that, so it'd be | ||
1140 | * nice to move it back to lguest_init. Patch welcome... */ | ||
1141 | atomic_notifier_chain_register(&panic_notifier_list, &paniced); | ||
1142 | |||
1143 | /* | 1138 | /* |
1144 | *The Linux bootloader header contains an "e820" memory map: the | 1139 | *The Linux bootloader header contains an "e820" memory map: the |
1145 | * Launcher populated the first entry with our memory limit. | 1140 | * Launcher populated the first entry with our memory limit. |
@@ -1262,7 +1257,6 @@ __init void lguest_init(void) | |||
1262 | */ | 1257 | */ |
1263 | 1258 | ||
1264 | /* Interrupt-related operations */ | 1259 | /* Interrupt-related operations */ |
1265 | pv_irq_ops.init_IRQ = lguest_init_IRQ; | ||
1266 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); | 1260 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); |
1267 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); | 1261 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); |
1268 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); | 1262 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); |
@@ -1270,7 +1264,6 @@ __init void lguest_init(void) | |||
1270 | pv_irq_ops.safe_halt = lguest_safe_halt; | 1264 | pv_irq_ops.safe_halt = lguest_safe_halt; |
1271 | 1265 | ||
1272 | /* Setup operations */ | 1266 | /* Setup operations */ |
1273 | pv_init_ops.memory_setup = lguest_memory_setup; | ||
1274 | pv_init_ops.patch = lguest_patch; | 1267 | pv_init_ops.patch = lguest_patch; |
1275 | 1268 | ||
1276 | /* Intercepts of various CPU instructions */ | 1269 | /* Intercepts of various CPU instructions */ |
@@ -1320,10 +1313,11 @@ __init void lguest_init(void) | |||
1320 | set_lguest_basic_apic_ops(); | 1313 | set_lguest_basic_apic_ops(); |
1321 | #endif | 1314 | #endif |
1322 | 1315 | ||
1323 | /* Time operations */ | 1316 | x86_init.resources.memory_setup = lguest_memory_setup; |
1324 | pv_time_ops.get_wallclock = lguest_get_wallclock; | 1317 | x86_init.irqs.intr_init = lguest_init_IRQ; |
1325 | pv_time_ops.time_init = lguest_time_init; | 1318 | x86_init.timers.timer_init = lguest_time_init; |
1326 | pv_time_ops.get_tsc_khz = lguest_tsc_khz; | 1319 | x86_platform.calibrate_tsc = lguest_tsc_khz; |
1320 | x86_platform.get_wallclock = lguest_get_wallclock; | ||
1327 | 1321 | ||
1328 | /* | 1322 | /* |
1329 | * Now is a good time to look at the implementations of these functions | 1323 | * Now is a good time to look at the implementations of these functions |
@@ -1365,10 +1359,13 @@ __init void lguest_init(void) | |||
1365 | 1359 | ||
1366 | /* | 1360 | /* |
1367 | * If we don't initialize the lock dependency checker now, it crashes | 1361 | * If we don't initialize the lock dependency checker now, it crashes |
1368 | * paravirt_disable_iospace. | 1362 | * atomic_notifier_chain_register, then paravirt_disable_iospace. |
1369 | */ | 1363 | */ |
1370 | lockdep_init(); | 1364 | lockdep_init(); |
1371 | 1365 | ||
1366 | /* Hook in our special panic hypercall code. */ | ||
1367 | atomic_notifier_chain_register(&panic_notifier_list, &paniced); | ||
1368 | |||
1372 | /* | 1369 | /* |
1373 | * The IDE code spends about 3 seconds probing for disks: if we reserve | 1370 | * The IDE code spends about 3 seconds probing for disks: if we reserve |
1374 | * all the I/O ports up front it can't get them and so doesn't probe. | 1371 | * all the I/O ports up front it can't get them and so doesn't probe. |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 9e609206fac9..85f5db95c60f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -16,7 +16,9 @@ ifeq ($(CONFIG_X86_32),y) | |||
16 | lib-y += checksum_32.o | 16 | lib-y += checksum_32.o |
17 | lib-y += strstr_32.o | 17 | lib-y += strstr_32.o |
18 | lib-y += semaphore_32.o string_32.o | 18 | lib-y += semaphore_32.o string_32.o |
19 | 19 | ifneq ($(CONFIG_X86_CMPXCHG64),y) | |
20 | lib-y += cmpxchg8b_emu.o | ||
21 | endif | ||
20 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o | 22 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o |
21 | else | 23 | else |
22 | obj-y += io_64.o iomap_copy_64.o | 24 | obj-y += io_64.o iomap_copy_64.o |
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S new file mode 100644 index 000000000000..828cb710dec2 --- /dev/null +++ b/arch/x86/lib/cmpxchg8b_emu.S | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License | ||
4 | * as published by the Free Software Foundation; version 2 | ||
5 | * of the License. | ||
6 | * | ||
7 | */ | ||
8 | |||
9 | #include <linux/linkage.h> | ||
10 | #include <asm/alternative-asm.h> | ||
11 | #include <asm/frame.h> | ||
12 | #include <asm/dwarf2.h> | ||
13 | |||
14 | |||
15 | .text | ||
16 | |||
17 | /* | ||
18 | * Inputs: | ||
19 | * %esi : memory location to compare | ||
20 | * %eax : low 32 bits of old value | ||
21 | * %edx : high 32 bits of old value | ||
22 | * %ebx : low 32 bits of new value | ||
23 | * %ecx : high 32 bits of new value | ||
24 | */ | ||
25 | ENTRY(cmpxchg8b_emu) | ||
26 | CFI_STARTPROC | ||
27 | |||
28 | # | ||
29 | # Emulate 'cmpxchg8b (%esi)' on UP except we don't | ||
30 | # set the whole ZF thing (caller will just compare | ||
31 | # eax:edx with the expected value) | ||
32 | # | ||
33 | cmpxchg8b_emu: | ||
34 | pushfl | ||
35 | cli | ||
36 | |||
37 | cmpl (%esi), %eax | ||
38 | jne not_same | ||
39 | cmpl 4(%esi), %edx | ||
40 | jne half_same | ||
41 | |||
42 | movl %ebx, (%esi) | ||
43 | movl %ecx, 4(%esi) | ||
44 | |||
45 | popfl | ||
46 | ret | ||
47 | |||
48 | not_same: | ||
49 | movl (%esi), %eax | ||
50 | half_same: | ||
51 | movl 4(%esi), %edx | ||
52 | |||
53 | popfl | ||
54 | ret | ||
55 | |||
56 | CFI_ENDPROC | ||
57 | ENDPROC(cmpxchg8b_emu) | ||
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 9b5a9f59a478..06630d26e56d 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -1,9 +1,10 @@ | |||
1 | obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 1 | obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
2 | pat.o pgtable.o physaddr.o gup.o | 2 | pat.o pgtable.o physaddr.o gup.o setup_nx.o |
3 | 3 | ||
4 | # Make sure __phys_addr has no stackprotector | 4 | # Make sure __phys_addr has no stackprotector |
5 | nostackp := $(call cc-option, -fno-stack-protector) | 5 | nostackp := $(call cc-option, -fno-stack-protector) |
6 | CFLAGS_physaddr.o := $(nostackp) | 6 | CFLAGS_physaddr.o := $(nostackp) |
7 | CFLAGS_setup_nx.o := $(nostackp) | ||
7 | 8 | ||
8 | obj-$(CONFIG_SMP) += tlb.o | 9 | obj-$(CONFIG_SMP) += tlb.o |
9 | 10 | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 775a020990a5..f4cee9028cf0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include <linux/bootmem.h> /* max_low_pfn */ | 10 | #include <linux/bootmem.h> /* max_low_pfn */ |
11 | #include <linux/kprobes.h> /* __kprobes, ... */ | 11 | #include <linux/kprobes.h> /* __kprobes, ... */ |
12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ | 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ |
13 | #include <linux/perf_counter.h> /* perf_swcounter_event */ | 13 | #include <linux/perf_event.h> /* perf_sw_event */ |
14 | 14 | ||
15 | #include <asm/traps.h> /* dotraplinkage, ... */ | 15 | #include <asm/traps.h> /* dotraplinkage, ... */ |
16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, | |||
167 | info.si_errno = 0; | 167 | info.si_errno = 0; |
168 | info.si_code = si_code; | 168 | info.si_code = si_code; |
169 | info.si_addr = (void __user *)address; | 169 | info.si_addr = (void __user *)address; |
170 | info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0; | ||
170 | 171 | ||
171 | force_sig_info(si_signo, &info, tsk); | 172 | force_sig_info(si_signo, &info, tsk); |
172 | } | 173 | } |
@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code, | |||
790 | } | 791 | } |
791 | 792 | ||
792 | static void | 793 | static void |
793 | do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) | 794 | do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, |
795 | unsigned int fault) | ||
794 | { | 796 | { |
795 | struct task_struct *tsk = current; | 797 | struct task_struct *tsk = current; |
796 | struct mm_struct *mm = tsk->mm; | 798 | struct mm_struct *mm = tsk->mm; |
799 | int code = BUS_ADRERR; | ||
797 | 800 | ||
798 | up_read(&mm->mmap_sem); | 801 | up_read(&mm->mmap_sem); |
799 | 802 | ||
@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) | |||
809 | tsk->thread.error_code = error_code; | 812 | tsk->thread.error_code = error_code; |
810 | tsk->thread.trap_no = 14; | 813 | tsk->thread.trap_no = 14; |
811 | 814 | ||
812 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | 815 | #ifdef CONFIG_MEMORY_FAILURE |
816 | if (fault & VM_FAULT_HWPOISON) { | ||
817 | printk(KERN_ERR | ||
818 | "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", | ||
819 | tsk->comm, tsk->pid, address); | ||
820 | code = BUS_MCEERR_AR; | ||
821 | } | ||
822 | #endif | ||
823 | force_sig_info_fault(SIGBUS, code, address, tsk); | ||
813 | } | 824 | } |
814 | 825 | ||
815 | static noinline void | 826 | static noinline void |
@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, | |||
819 | if (fault & VM_FAULT_OOM) { | 830 | if (fault & VM_FAULT_OOM) { |
820 | out_of_memory(regs, error_code, address); | 831 | out_of_memory(regs, error_code, address); |
821 | } else { | 832 | } else { |
822 | if (fault & VM_FAULT_SIGBUS) | 833 | if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON)) |
823 | do_sigbus(regs, error_code, address); | 834 | do_sigbus(regs, error_code, address, fault); |
824 | else | 835 | else |
825 | BUG(); | 836 | BUG(); |
826 | } | 837 | } |
@@ -1017,7 +1028,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1017 | if (unlikely(error_code & PF_RSVD)) | 1028 | if (unlikely(error_code & PF_RSVD)) |
1018 | pgtable_bad(regs, error_code, address); | 1029 | pgtable_bad(regs, error_code, address); |
1019 | 1030 | ||
1020 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); | 1031 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); |
1021 | 1032 | ||
1022 | /* | 1033 | /* |
1023 | * If we're in an interrupt, have no user context or are running | 1034 | * If we're in an interrupt, have no user context or are running |
@@ -1114,11 +1125,11 @@ good_area: | |||
1114 | 1125 | ||
1115 | if (fault & VM_FAULT_MAJOR) { | 1126 | if (fault & VM_FAULT_MAJOR) { |
1116 | tsk->maj_flt++; | 1127 | tsk->maj_flt++; |
1117 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, | 1128 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, |
1118 | regs, address); | 1129 | regs, address); |
1119 | } else { | 1130 | } else { |
1120 | tsk->min_flt++; | 1131 | tsk->min_flt++; |
1121 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, | 1132 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, |
1122 | regs, address); | 1133 | regs, address); |
1123 | } | 1134 | } |
1124 | 1135 | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 0607119cef94..73ffd5536f62 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -28,69 +28,6 @@ int direct_gbpages | |||
28 | #endif | 28 | #endif |
29 | ; | 29 | ; |
30 | 30 | ||
31 | int nx_enabled; | ||
32 | |||
33 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
34 | static int disable_nx __cpuinitdata; | ||
35 | |||
36 | /* | ||
37 | * noexec = on|off | ||
38 | * | ||
39 | * Control non-executable mappings for processes. | ||
40 | * | ||
41 | * on Enable | ||
42 | * off Disable | ||
43 | */ | ||
44 | static int __init noexec_setup(char *str) | ||
45 | { | ||
46 | if (!str) | ||
47 | return -EINVAL; | ||
48 | if (!strncmp(str, "on", 2)) { | ||
49 | __supported_pte_mask |= _PAGE_NX; | ||
50 | disable_nx = 0; | ||
51 | } else if (!strncmp(str, "off", 3)) { | ||
52 | disable_nx = 1; | ||
53 | __supported_pte_mask &= ~_PAGE_NX; | ||
54 | } | ||
55 | return 0; | ||
56 | } | ||
57 | early_param("noexec", noexec_setup); | ||
58 | #endif | ||
59 | |||
60 | #ifdef CONFIG_X86_PAE | ||
61 | static void __init set_nx(void) | ||
62 | { | ||
63 | unsigned int v[4], l, h; | ||
64 | |||
65 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
66 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
67 | |||
68 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
69 | rdmsr(MSR_EFER, l, h); | ||
70 | l |= EFER_NX; | ||
71 | wrmsr(MSR_EFER, l, h); | ||
72 | nx_enabled = 1; | ||
73 | __supported_pte_mask |= _PAGE_NX; | ||
74 | } | ||
75 | } | ||
76 | } | ||
77 | #else | ||
78 | static inline void set_nx(void) | ||
79 | { | ||
80 | } | ||
81 | #endif | ||
82 | |||
83 | #ifdef CONFIG_X86_64 | ||
84 | void __cpuinit check_efer(void) | ||
85 | { | ||
86 | unsigned long efer; | ||
87 | |||
88 | rdmsrl(MSR_EFER, efer); | ||
89 | if (!(efer & EFER_NX) || disable_nx) | ||
90 | __supported_pte_mask &= ~_PAGE_NX; | ||
91 | } | ||
92 | #endif | ||
93 | |||
94 | static void __init find_early_table_space(unsigned long end, int use_pse, | 31 | static void __init find_early_table_space(unsigned long end, int use_pse, |
95 | int use_gbpages) | 32 | int use_gbpages) |
96 | { | 33 | { |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 3cd7711bb949..30938c1d8d5d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -84,7 +84,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
84 | #ifdef CONFIG_X86_PAE | 84 | #ifdef CONFIG_X86_PAE |
85 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 85 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
86 | if (after_bootmem) | 86 | if (after_bootmem) |
87 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 87 | pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE); |
88 | else | 88 | else |
89 | pmd_table = (pmd_t *)alloc_low_page(); | 89 | pmd_table = (pmd_t *)alloc_low_page(); |
90 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | 90 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); |
@@ -116,7 +116,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
116 | #endif | 116 | #endif |
117 | if (!page_table) | 117 | if (!page_table) |
118 | page_table = | 118 | page_table = |
119 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 119 | (pte_t *)alloc_bootmem_pages(PAGE_SIZE); |
120 | } else | 120 | } else |
121 | page_table = (pte_t *)alloc_low_page(); | 121 | page_table = (pte_t *)alloc_low_page(); |
122 | 122 | ||
@@ -857,8 +857,6 @@ static void __init test_wp_bit(void) | |||
857 | } | 857 | } |
858 | } | 858 | } |
859 | 859 | ||
860 | static struct kcore_list kcore_mem, kcore_vmalloc; | ||
861 | |||
862 | void __init mem_init(void) | 860 | void __init mem_init(void) |
863 | { | 861 | { |
864 | int codesize, reservedpages, datasize, initsize; | 862 | int codesize, reservedpages, datasize, initsize; |
@@ -886,13 +884,9 @@ void __init mem_init(void) | |||
886 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | 884 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; |
887 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | 885 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; |
888 | 886 | ||
889 | kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); | ||
890 | kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, | ||
891 | VMALLOC_END-VMALLOC_START); | ||
892 | |||
893 | printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " | 887 | printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " |
894 | "%dk reserved, %dk data, %dk init, %ldk highmem)\n", | 888 | "%dk reserved, %dk data, %dk init, %ldk highmem)\n", |
895 | (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), | 889 | nr_free_pages() << (PAGE_SHIFT-10), |
896 | num_physpages << (PAGE_SHIFT-10), | 890 | num_physpages << (PAGE_SHIFT-10), |
897 | codesize >> 10, | 891 | codesize >> 10, |
898 | reservedpages << (PAGE_SHIFT-10), | 892 | reservedpages << (PAGE_SHIFT-10), |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ea56b8cbb6a6..5a4398a6006b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -647,8 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); | |||
647 | 647 | ||
648 | #endif /* CONFIG_MEMORY_HOTPLUG */ | 648 | #endif /* CONFIG_MEMORY_HOTPLUG */ |
649 | 649 | ||
650 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, | 650 | static struct kcore_list kcore_vsyscall; |
651 | kcore_modules, kcore_vsyscall; | ||
652 | 651 | ||
653 | void __init mem_init(void) | 652 | void __init mem_init(void) |
654 | { | 653 | { |
@@ -677,17 +676,12 @@ void __init mem_init(void) | |||
677 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | 676 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; |
678 | 677 | ||
679 | /* Register memory areas for /proc/kcore */ | 678 | /* Register memory areas for /proc/kcore */ |
680 | kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); | ||
681 | kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, | ||
682 | VMALLOC_END-VMALLOC_START); | ||
683 | kclist_add(&kcore_kernel, &_stext, _end - _stext); | ||
684 | kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN); | ||
685 | kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, | 679 | kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, |
686 | VSYSCALL_END - VSYSCALL_START); | 680 | VSYSCALL_END - VSYSCALL_START, KCORE_OTHER); |
687 | 681 | ||
688 | printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " | 682 | printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " |
689 | "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", | 683 | "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", |
690 | (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), | 684 | nr_free_pages() << (PAGE_SHIFT-10), |
691 | max_pfn << (PAGE_SHIFT-10), | 685 | max_pfn << (PAGE_SHIFT-10), |
692 | codesize >> 10, | 686 | codesize >> 10, |
693 | absent_pages << (PAGE_SHIFT-10), | 687 | absent_pages << (PAGE_SHIFT-10), |
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84ca121e..84e236ce76ba 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | 23 | ||
24 | int is_io_mapping_possible(resource_size_t base, unsigned long size) | 24 | static int is_io_mapping_possible(resource_size_t base, unsigned long size) |
25 | { | 25 | { |
26 | #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) | 26 | #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) |
27 | /* There is no way to map greater than 1 << 32 address without PAE */ | 27 | /* There is no way to map greater than 1 << 32 address without PAE */ |
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) | |||
30 | #endif | 30 | #endif |
31 | return 1; | 31 | return 1; |
32 | } | 32 | } |
33 | EXPORT_SYMBOL_GPL(is_io_mapping_possible); | 33 | |
34 | int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) | ||
35 | { | ||
36 | unsigned long flag = _PAGE_CACHE_WC; | ||
37 | int ret; | ||
38 | |||
39 | if (!is_io_mapping_possible(base, size)) | ||
40 | return -EINVAL; | ||
41 | |||
42 | ret = io_reserve_memtype(base, base + size, &flag); | ||
43 | if (ret) | ||
44 | return ret; | ||
45 | |||
46 | *prot = __pgprot(__PAGE_KERNEL | flag); | ||
47 | return 0; | ||
48 | } | ||
49 | EXPORT_SYMBOL_GPL(iomap_create_wc); | ||
50 | |||
51 | void | ||
52 | iomap_free(resource_size_t base, unsigned long size) | ||
53 | { | ||
54 | io_free_memtype(base, base + size); | ||
55 | } | ||
56 | EXPORT_SYMBOL_GPL(iomap_free); | ||
34 | 57 | ||
35 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) | 58 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) |
36 | { | 59 | { |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 04e1ad60c63a..334e63ca7b2b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -158,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
158 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, | 158 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, |
159 | prot_val, &new_prot_val); | 159 | prot_val, &new_prot_val); |
160 | if (retval) { | 160 | if (retval) { |
161 | pr_debug("Warning: reserve_memtype returned %d\n", retval); | 161 | printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); |
162 | return NULL; | 162 | return NULL; |
163 | } | 163 | } |
164 | 164 | ||
165 | if (prot_val != new_prot_val) { | 165 | if (prot_val != new_prot_val) { |
166 | /* | 166 | if (!is_new_memtype_allowed(phys_addr, size, |
167 | * Do not fallback to certain memory types with certain | 167 | prot_val, new_prot_val)) { |
168 | * requested type: | 168 | printk(KERN_ERR |
169 | * - request is uc-, return cannot be write-back | ||
170 | * - request is uc-, return cannot be write-combine | ||
171 | * - request is write-combine, return cannot be write-back | ||
172 | */ | ||
173 | if ((prot_val == _PAGE_CACHE_UC_MINUS && | ||
174 | (new_prot_val == _PAGE_CACHE_WB || | ||
175 | new_prot_val == _PAGE_CACHE_WC)) || | ||
176 | (prot_val == _PAGE_CACHE_WC && | ||
177 | new_prot_val == _PAGE_CACHE_WB)) { | ||
178 | pr_debug( | ||
179 | "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", | 169 | "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", |
180 | (unsigned long long)phys_addr, | 170 | (unsigned long long)phys_addr, |
181 | (unsigned long long)(phys_addr + size), | 171 | (unsigned long long)(phys_addr + size), |
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 528bf954eb74..8cc183344140 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -225,9 +225,6 @@ void kmemcheck_hide(struct pt_regs *regs) | |||
225 | 225 | ||
226 | BUG_ON(!irqs_disabled()); | 226 | BUG_ON(!irqs_disabled()); |
227 | 227 | ||
228 | if (data->balance == 0) | ||
229 | return; | ||
230 | |||
231 | if (unlikely(data->balance != 1)) { | 228 | if (unlikely(data->balance != 1)) { |
232 | kmemcheck_show_all(); | 229 | kmemcheck_show_all(); |
233 | kmemcheck_error_save_bug(regs); | 230 | kmemcheck_error_save_bug(regs); |
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c index e773b6bd0079..3f66b82076a3 100644 --- a/arch/x86/mm/kmemcheck/shadow.c +++ b/arch/x86/mm/kmemcheck/shadow.c | |||
@@ -1,7 +1,6 @@ | |||
1 | #include <linux/kmemcheck.h> | 1 | #include <linux/kmemcheck.h> |
2 | #include <linux/module.h> | 2 | #include <linux/module.h> |
3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
4 | #include <linux/module.h> | ||
5 | 4 | ||
6 | #include <asm/page.h> | 5 | #include <asm/page.h> |
7 | #include <asm/pgtable.h> | 6 | #include <asm/pgtable.h> |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 165829600566..c8191defc38a 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -29,13 +29,26 @@ | |||
29 | #include <linux/random.h> | 29 | #include <linux/random.h> |
30 | #include <linux/limits.h> | 30 | #include <linux/limits.h> |
31 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
32 | #include <asm/elf.h> | ||
33 | |||
34 | static unsigned int stack_maxrandom_size(void) | ||
35 | { | ||
36 | unsigned int max = 0; | ||
37 | if ((current->flags & PF_RANDOMIZE) && | ||
38 | !(current->personality & ADDR_NO_RANDOMIZE)) { | ||
39 | max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT; | ||
40 | } | ||
41 | |||
42 | return max; | ||
43 | } | ||
44 | |||
32 | 45 | ||
33 | /* | 46 | /* |
34 | * Top of mmap area (just below the process stack). | 47 | * Top of mmap area (just below the process stack). |
35 | * | 48 | * |
36 | * Leave an at least ~128 MB hole. | 49 | * Leave an at least ~128 MB hole with possible stack randomization. |
37 | */ | 50 | */ |
38 | #define MIN_GAP (128*1024*1024) | 51 | #define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) |
39 | #define MAX_GAP (TASK_SIZE/6*5) | 52 | #define MAX_GAP (TASK_SIZE/6*5) |
40 | 53 | ||
41 | /* | 54 | /* |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7e600c1962db..dd38bfbefd1f 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
13 | #include <linux/debugfs.h> | 13 | #include <linux/debugfs.h> |
14 | #include <linux/pfn.h> | 14 | #include <linux/pfn.h> |
15 | #include <linux/percpu.h> | ||
15 | 16 | ||
16 | #include <asm/e820.h> | 17 | #include <asm/e820.h> |
17 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
@@ -143,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size) | |||
143 | 144 | ||
144 | mb(); | 145 | mb(); |
145 | } | 146 | } |
147 | EXPORT_SYMBOL_GPL(clflush_cache_range); | ||
146 | 148 | ||
147 | static void __cpa_flush_all(void *arg) | 149 | static void __cpa_flush_all(void *arg) |
148 | { | 150 | { |
@@ -686,7 +688,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
686 | { | 688 | { |
687 | struct cpa_data alias_cpa; | 689 | struct cpa_data alias_cpa; |
688 | unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); | 690 | unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); |
689 | unsigned long vaddr, remapped; | 691 | unsigned long vaddr; |
690 | int ret; | 692 | int ret; |
691 | 693 | ||
692 | if (cpa->pfn >= max_pfn_mapped) | 694 | if (cpa->pfn >= max_pfn_mapped) |
@@ -744,24 +746,6 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
744 | } | 746 | } |
745 | #endif | 747 | #endif |
746 | 748 | ||
747 | /* | ||
748 | * If the PMD page was partially used for per-cpu remapping, | ||
749 | * the recycled area needs to be split and modified. Because | ||
750 | * the area is always proper subset of a PMD page | ||
751 | * cpa->numpages is guaranteed to be 1 for these areas, so | ||
752 | * there's no need to loop over and check for further remaps. | ||
753 | */ | ||
754 | remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); | ||
755 | if (remapped) { | ||
756 | WARN_ON(cpa->numpages > 1); | ||
757 | alias_cpa = *cpa; | ||
758 | alias_cpa.vaddr = &remapped; | ||
759 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | ||
760 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | ||
761 | if (ret) | ||
762 | return ret; | ||
763 | } | ||
764 | |||
765 | return 0; | 749 | return 0; |
766 | } | 750 | } |
767 | 751 | ||
@@ -822,6 +806,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
822 | { | 806 | { |
823 | struct cpa_data cpa; | 807 | struct cpa_data cpa; |
824 | int ret, cache, checkalias; | 808 | int ret, cache, checkalias; |
809 | unsigned long baddr = 0; | ||
825 | 810 | ||
826 | /* | 811 | /* |
827 | * Check, if we are requested to change a not supported | 812 | * Check, if we are requested to change a not supported |
@@ -853,6 +838,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
853 | */ | 838 | */ |
854 | WARN_ON_ONCE(1); | 839 | WARN_ON_ONCE(1); |
855 | } | 840 | } |
841 | /* | ||
842 | * Save address for cache flush. *addr is modified in the call | ||
843 | * to __change_page_attr_set_clr() below. | ||
844 | */ | ||
845 | baddr = *addr; | ||
856 | } | 846 | } |
857 | 847 | ||
858 | /* Must avoid aliasing mappings in the highmem code */ | 848 | /* Must avoid aliasing mappings in the highmem code */ |
@@ -900,7 +890,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
900 | cpa_flush_array(addr, numpages, cache, | 890 | cpa_flush_array(addr, numpages, cache, |
901 | cpa.flags, pages); | 891 | cpa.flags, pages); |
902 | } else | 892 | } else |
903 | cpa_flush_range(*addr, numpages, cache); | 893 | cpa_flush_range(baddr, numpages, cache); |
904 | } else | 894 | } else |
905 | cpa_flush_all(cache); | 895 | cpa_flush_all(cache); |
906 | 896 | ||
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index b2f7d3e59b86..e78cd0ec2bcf 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/rbtree.h> | ||
18 | 19 | ||
19 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
20 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
@@ -80,6 +81,7 @@ enum { | |||
80 | void pat_init(void) | 81 | void pat_init(void) |
81 | { | 82 | { |
82 | u64 pat; | 83 | u64 pat; |
84 | bool boot_cpu = !boot_pat_state; | ||
83 | 85 | ||
84 | if (!pat_enabled) | 86 | if (!pat_enabled) |
85 | return; | 87 | return; |
@@ -121,8 +123,10 @@ void pat_init(void) | |||
121 | rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); | 123 | rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); |
122 | 124 | ||
123 | wrmsrl(MSR_IA32_CR_PAT, pat); | 125 | wrmsrl(MSR_IA32_CR_PAT, pat); |
124 | printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", | 126 | |
125 | smp_processor_id(), boot_pat_state, pat); | 127 | if (boot_cpu) |
128 | printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", | ||
129 | smp_processor_id(), boot_pat_state, pat); | ||
126 | } | 130 | } |
127 | 131 | ||
128 | #undef PAT | 132 | #undef PAT |
@@ -148,11 +152,10 @@ static char *cattr_name(unsigned long flags) | |||
148 | * areas). All the aliases have the same cache attributes of course. | 152 | * areas). All the aliases have the same cache attributes of course. |
149 | * Zero attributes are represented as holes. | 153 | * Zero attributes are represented as holes. |
150 | * | 154 | * |
151 | * Currently the data structure is a list because the number of mappings | 155 | * The data structure is a list that is also organized as an rbtree |
152 | * are expected to be relatively small. If this should be a problem | 156 | * sorted on the start address of memtype range. |
153 | * it could be changed to a rbtree or similar. | ||
154 | * | 157 | * |
155 | * memtype_lock protects the whole list. | 158 | * memtype_lock protects both the linear list and rbtree. |
156 | */ | 159 | */ |
157 | 160 | ||
158 | struct memtype { | 161 | struct memtype { |
@@ -160,11 +163,53 @@ struct memtype { | |||
160 | u64 end; | 163 | u64 end; |
161 | unsigned long type; | 164 | unsigned long type; |
162 | struct list_head nd; | 165 | struct list_head nd; |
166 | struct rb_node rb; | ||
163 | }; | 167 | }; |
164 | 168 | ||
169 | static struct rb_root memtype_rbroot = RB_ROOT; | ||
165 | static LIST_HEAD(memtype_list); | 170 | static LIST_HEAD(memtype_list); |
166 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | 171 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ |
167 | 172 | ||
173 | static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) | ||
174 | { | ||
175 | struct rb_node *node = root->rb_node; | ||
176 | struct memtype *last_lower = NULL; | ||
177 | |||
178 | while (node) { | ||
179 | struct memtype *data = container_of(node, struct memtype, rb); | ||
180 | |||
181 | if (data->start < start) { | ||
182 | last_lower = data; | ||
183 | node = node->rb_right; | ||
184 | } else if (data->start > start) { | ||
185 | node = node->rb_left; | ||
186 | } else | ||
187 | return data; | ||
188 | } | ||
189 | |||
190 | /* Will return NULL if there is no entry with its start <= start */ | ||
191 | return last_lower; | ||
192 | } | ||
193 | |||
194 | static void memtype_rb_insert(struct rb_root *root, struct memtype *data) | ||
195 | { | ||
196 | struct rb_node **new = &(root->rb_node); | ||
197 | struct rb_node *parent = NULL; | ||
198 | |||
199 | while (*new) { | ||
200 | struct memtype *this = container_of(*new, struct memtype, rb); | ||
201 | |||
202 | parent = *new; | ||
203 | if (data->start <= this->start) | ||
204 | new = &((*new)->rb_left); | ||
205 | else if (data->start > this->start) | ||
206 | new = &((*new)->rb_right); | ||
207 | } | ||
208 | |||
209 | rb_link_node(&data->rb, parent, new); | ||
210 | rb_insert_color(&data->rb, root); | ||
211 | } | ||
212 | |||
168 | /* | 213 | /* |
169 | * Does intersection of PAT memory type and MTRR memory type and returns | 214 | * Does intersection of PAT memory type and MTRR memory type and returns |
170 | * the resulting memory type as PAT understands it. | 215 | * the resulting memory type as PAT understands it. |
@@ -218,9 +263,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) | |||
218 | return -EBUSY; | 263 | return -EBUSY; |
219 | } | 264 | } |
220 | 265 | ||
221 | static struct memtype *cached_entry; | ||
222 | static u64 cached_start; | ||
223 | |||
224 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | 266 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) |
225 | { | 267 | { |
226 | int ram_page = 0, not_rampage = 0; | 268 | int ram_page = 0, not_rampage = 0; |
@@ -249,63 +291,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | |||
249 | } | 291 | } |
250 | 292 | ||
251 | /* | 293 | /* |
252 | * For RAM pages, mark the pages as non WB memory type using | 294 | * For RAM pages, we use page flags to mark the pages with appropriate type. |
253 | * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or | 295 | * Here we do two pass: |
254 | * set_memory_wc() on a RAM page at a time before marking it as WB again. | 296 | * - Find the memtype of all the pages in the range, look for any conflicts |
255 | * This is ok, because only one driver will be owning the page and | 297 | * - In case of no conflicts, set the new memtype for pages in the range |
256 | * doing set_memory_*() calls. | ||
257 | * | 298 | * |
258 | * For now, we use PageNonWB to track that the RAM page is being mapped | 299 | * Caller must hold memtype_lock for atomicity. |
259 | * as non WB. In future, we will have to use one more flag | ||
260 | * (or some other mechanism in page_struct) to distinguish between | ||
261 | * UC and WC mapping. | ||
262 | */ | 300 | */ |
263 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | 301 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, |
264 | unsigned long *new_type) | 302 | unsigned long *new_type) |
265 | { | 303 | { |
266 | struct page *page; | 304 | struct page *page; |
267 | u64 pfn, end_pfn; | 305 | u64 pfn; |
306 | |||
307 | if (req_type == _PAGE_CACHE_UC) { | ||
308 | /* We do not support strong UC */ | ||
309 | WARN_ON_ONCE(1); | ||
310 | req_type = _PAGE_CACHE_UC_MINUS; | ||
311 | } | ||
268 | 312 | ||
269 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 313 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { |
270 | page = pfn_to_page(pfn); | 314 | unsigned long type; |
271 | if (page_mapped(page) || PageNonWB(page)) | ||
272 | goto out; | ||
273 | 315 | ||
274 | SetPageNonWB(page); | 316 | page = pfn_to_page(pfn); |
317 | type = get_page_memtype(page); | ||
318 | if (type != -1) { | ||
319 | printk(KERN_INFO "reserve_ram_pages_type failed " | ||
320 | "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", | ||
321 | start, end, type, req_type); | ||
322 | if (new_type) | ||
323 | *new_type = type; | ||
324 | |||
325 | return -EBUSY; | ||
326 | } | ||
275 | } | 327 | } |
276 | return 0; | ||
277 | 328 | ||
278 | out: | 329 | if (new_type) |
279 | end_pfn = pfn; | 330 | *new_type = req_type; |
280 | for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { | 331 | |
332 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | ||
281 | page = pfn_to_page(pfn); | 333 | page = pfn_to_page(pfn); |
282 | ClearPageNonWB(page); | 334 | set_page_memtype(page, req_type); |
283 | } | 335 | } |
284 | 336 | return 0; | |
285 | return -EINVAL; | ||
286 | } | 337 | } |
287 | 338 | ||
288 | static int free_ram_pages_type(u64 start, u64 end) | 339 | static int free_ram_pages_type(u64 start, u64 end) |
289 | { | 340 | { |
290 | struct page *page; | 341 | struct page *page; |
291 | u64 pfn, end_pfn; | 342 | u64 pfn; |
292 | 343 | ||
293 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 344 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { |
294 | page = pfn_to_page(pfn); | 345 | page = pfn_to_page(pfn); |
295 | if (page_mapped(page) || !PageNonWB(page)) | 346 | set_page_memtype(page, -1); |
296 | goto out; | ||
297 | |||
298 | ClearPageNonWB(page); | ||
299 | } | 347 | } |
300 | return 0; | 348 | return 0; |
301 | |||
302 | out: | ||
303 | end_pfn = pfn; | ||
304 | for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { | ||
305 | page = pfn_to_page(pfn); | ||
306 | SetPageNonWB(page); | ||
307 | } | ||
308 | return -EINVAL; | ||
309 | } | 349 | } |
310 | 350 | ||
311 | /* | 351 | /* |
@@ -339,6 +379,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
339 | if (new_type) { | 379 | if (new_type) { |
340 | if (req_type == -1) | 380 | if (req_type == -1) |
341 | *new_type = _PAGE_CACHE_WB; | 381 | *new_type = _PAGE_CACHE_WB; |
382 | else if (req_type == _PAGE_CACHE_WC) | ||
383 | *new_type = _PAGE_CACHE_UC_MINUS; | ||
342 | else | 384 | else |
343 | *new_type = req_type & _PAGE_CACHE_MASK; | 385 | *new_type = req_type & _PAGE_CACHE_MASK; |
344 | } | 386 | } |
@@ -364,11 +406,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
364 | *new_type = actual_type; | 406 | *new_type = actual_type; |
365 | 407 | ||
366 | is_range_ram = pat_pagerange_is_ram(start, end); | 408 | is_range_ram = pat_pagerange_is_ram(start, end); |
367 | if (is_range_ram == 1) | 409 | if (is_range_ram == 1) { |
368 | return reserve_ram_pages_type(start, end, req_type, | 410 | |
369 | new_type); | 411 | spin_lock(&memtype_lock); |
370 | else if (is_range_ram < 0) | 412 | err = reserve_ram_pages_type(start, end, req_type, new_type); |
413 | spin_unlock(&memtype_lock); | ||
414 | |||
415 | return err; | ||
416 | } else if (is_range_ram < 0) { | ||
371 | return -EINVAL; | 417 | return -EINVAL; |
418 | } | ||
372 | 419 | ||
373 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); | 420 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); |
374 | if (!new) | 421 | if (!new) |
@@ -380,17 +427,11 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
380 | 427 | ||
381 | spin_lock(&memtype_lock); | 428 | spin_lock(&memtype_lock); |
382 | 429 | ||
383 | if (cached_entry && start >= cached_start) | ||
384 | entry = cached_entry; | ||
385 | else | ||
386 | entry = list_entry(&memtype_list, struct memtype, nd); | ||
387 | |||
388 | /* Search for existing mapping that overlaps the current range */ | 430 | /* Search for existing mapping that overlaps the current range */ |
389 | where = NULL; | 431 | where = NULL; |
390 | list_for_each_entry_continue(entry, &memtype_list, nd) { | 432 | list_for_each_entry(entry, &memtype_list, nd) { |
391 | if (end <= entry->start) { | 433 | if (end <= entry->start) { |
392 | where = entry->nd.prev; | 434 | where = entry->nd.prev; |
393 | cached_entry = list_entry(where, struct memtype, nd); | ||
394 | break; | 435 | break; |
395 | } else if (start <= entry->start) { /* end > entry->start */ | 436 | } else if (start <= entry->start) { /* end > entry->start */ |
396 | err = chk_conflict(new, entry, new_type); | 437 | err = chk_conflict(new, entry, new_type); |
@@ -398,8 +439,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
398 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 439 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
399 | entry->start, entry->end); | 440 | entry->start, entry->end); |
400 | where = entry->nd.prev; | 441 | where = entry->nd.prev; |
401 | cached_entry = list_entry(where, | ||
402 | struct memtype, nd); | ||
403 | } | 442 | } |
404 | break; | 443 | break; |
405 | } else if (start < entry->end) { /* start > entry->start */ | 444 | } else if (start < entry->end) { /* start > entry->start */ |
@@ -407,8 +446,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
407 | if (!err) { | 446 | if (!err) { |
408 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 447 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
409 | entry->start, entry->end); | 448 | entry->start, entry->end); |
410 | cached_entry = list_entry(entry->nd.prev, | ||
411 | struct memtype, nd); | ||
412 | 449 | ||
413 | /* | 450 | /* |
414 | * Move to right position in the linked | 451 | * Move to right position in the linked |
@@ -436,13 +473,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
436 | return err; | 473 | return err; |
437 | } | 474 | } |
438 | 475 | ||
439 | cached_start = start; | ||
440 | |||
441 | if (where) | 476 | if (where) |
442 | list_add(&new->nd, where); | 477 | list_add(&new->nd, where); |
443 | else | 478 | else |
444 | list_add_tail(&new->nd, &memtype_list); | 479 | list_add_tail(&new->nd, &memtype_list); |
445 | 480 | ||
481 | memtype_rb_insert(&memtype_rbroot, new); | ||
482 | |||
446 | spin_unlock(&memtype_lock); | 483 | spin_unlock(&memtype_lock); |
447 | 484 | ||
448 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | 485 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", |
@@ -454,7 +491,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
454 | 491 | ||
455 | int free_memtype(u64 start, u64 end) | 492 | int free_memtype(u64 start, u64 end) |
456 | { | 493 | { |
457 | struct memtype *entry; | 494 | struct memtype *entry, *saved_entry; |
458 | int err = -EINVAL; | 495 | int err = -EINVAL; |
459 | int is_range_ram; | 496 | int is_range_ram; |
460 | 497 | ||
@@ -466,23 +503,58 @@ int free_memtype(u64 start, u64 end) | |||
466 | return 0; | 503 | return 0; |
467 | 504 | ||
468 | is_range_ram = pat_pagerange_is_ram(start, end); | 505 | is_range_ram = pat_pagerange_is_ram(start, end); |
469 | if (is_range_ram == 1) | 506 | if (is_range_ram == 1) { |
470 | return free_ram_pages_type(start, end); | 507 | |
471 | else if (is_range_ram < 0) | 508 | spin_lock(&memtype_lock); |
509 | err = free_ram_pages_type(start, end); | ||
510 | spin_unlock(&memtype_lock); | ||
511 | |||
512 | return err; | ||
513 | } else if (is_range_ram < 0) { | ||
472 | return -EINVAL; | 514 | return -EINVAL; |
515 | } | ||
473 | 516 | ||
474 | spin_lock(&memtype_lock); | 517 | spin_lock(&memtype_lock); |
475 | list_for_each_entry(entry, &memtype_list, nd) { | 518 | |
519 | entry = memtype_rb_search(&memtype_rbroot, start); | ||
520 | if (unlikely(entry == NULL)) | ||
521 | goto unlock_ret; | ||
522 | |||
523 | /* | ||
524 | * Saved entry points to an entry with start same or less than what | ||
525 | * we searched for. Now go through the list in both directions to look | ||
526 | * for the entry that matches with both start and end, with list stored | ||
527 | * in sorted start address | ||
528 | */ | ||
529 | saved_entry = entry; | ||
530 | list_for_each_entry_from(entry, &memtype_list, nd) { | ||
476 | if (entry->start == start && entry->end == end) { | 531 | if (entry->start == start && entry->end == end) { |
477 | if (cached_entry == entry || cached_start == start) | 532 | rb_erase(&entry->rb, &memtype_rbroot); |
478 | cached_entry = NULL; | 533 | list_del(&entry->nd); |
534 | kfree(entry); | ||
535 | err = 0; | ||
536 | break; | ||
537 | } else if (entry->start > start) { | ||
538 | break; | ||
539 | } | ||
540 | } | ||
479 | 541 | ||
542 | if (!err) | ||
543 | goto unlock_ret; | ||
544 | |||
545 | entry = saved_entry; | ||
546 | list_for_each_entry_reverse(entry, &memtype_list, nd) { | ||
547 | if (entry->start == start && entry->end == end) { | ||
548 | rb_erase(&entry->rb, &memtype_rbroot); | ||
480 | list_del(&entry->nd); | 549 | list_del(&entry->nd); |
481 | kfree(entry); | 550 | kfree(entry); |
482 | err = 0; | 551 | err = 0; |
483 | break; | 552 | break; |
553 | } else if (entry->start < start) { | ||
554 | break; | ||
484 | } | 555 | } |
485 | } | 556 | } |
557 | unlock_ret: | ||
486 | spin_unlock(&memtype_lock); | 558 | spin_unlock(&memtype_lock); |
487 | 559 | ||
488 | if (err) { | 560 | if (err) { |
@@ -496,6 +568,101 @@ int free_memtype(u64 start, u64 end) | |||
496 | } | 568 | } |
497 | 569 | ||
498 | 570 | ||
571 | /** | ||
572 | * lookup_memtype - Looksup the memory type for a physical address | ||
573 | * @paddr: physical address of which memory type needs to be looked up | ||
574 | * | ||
575 | * Only to be called when PAT is enabled | ||
576 | * | ||
577 | * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or | ||
578 | * _PAGE_CACHE_UC | ||
579 | */ | ||
580 | static unsigned long lookup_memtype(u64 paddr) | ||
581 | { | ||
582 | int rettype = _PAGE_CACHE_WB; | ||
583 | struct memtype *entry; | ||
584 | |||
585 | if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) | ||
586 | return rettype; | ||
587 | |||
588 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | ||
589 | struct page *page; | ||
590 | spin_lock(&memtype_lock); | ||
591 | page = pfn_to_page(paddr >> PAGE_SHIFT); | ||
592 | rettype = get_page_memtype(page); | ||
593 | spin_unlock(&memtype_lock); | ||
594 | /* | ||
595 | * -1 from get_page_memtype() implies RAM page is in its | ||
596 | * default state and not reserved, and hence of type WB | ||
597 | */ | ||
598 | if (rettype == -1) | ||
599 | rettype = _PAGE_CACHE_WB; | ||
600 | |||
601 | return rettype; | ||
602 | } | ||
603 | |||
604 | spin_lock(&memtype_lock); | ||
605 | |||
606 | entry = memtype_rb_search(&memtype_rbroot, paddr); | ||
607 | if (entry != NULL) | ||
608 | rettype = entry->type; | ||
609 | else | ||
610 | rettype = _PAGE_CACHE_UC_MINUS; | ||
611 | |||
612 | spin_unlock(&memtype_lock); | ||
613 | return rettype; | ||
614 | } | ||
615 | |||
616 | /** | ||
617 | * io_reserve_memtype - Request a memory type mapping for a region of memory | ||
618 | * @start: start (physical address) of the region | ||
619 | * @end: end (physical address) of the region | ||
620 | * @type: A pointer to memtype, with requested type. On success, requested | ||
621 | * or any other compatible type that was available for the region is returned | ||
622 | * | ||
623 | * On success, returns 0 | ||
624 | * On failure, returns non-zero | ||
625 | */ | ||
626 | int io_reserve_memtype(resource_size_t start, resource_size_t end, | ||
627 | unsigned long *type) | ||
628 | { | ||
629 | resource_size_t size = end - start; | ||
630 | unsigned long req_type = *type; | ||
631 | unsigned long new_type; | ||
632 | int ret; | ||
633 | |||
634 | WARN_ON_ONCE(iomem_map_sanity_check(start, size)); | ||
635 | |||
636 | ret = reserve_memtype(start, end, req_type, &new_type); | ||
637 | if (ret) | ||
638 | goto out_err; | ||
639 | |||
640 | if (!is_new_memtype_allowed(start, size, req_type, new_type)) | ||
641 | goto out_free; | ||
642 | |||
643 | if (kernel_map_sync_memtype(start, size, new_type) < 0) | ||
644 | goto out_free; | ||
645 | |||
646 | *type = new_type; | ||
647 | return 0; | ||
648 | |||
649 | out_free: | ||
650 | free_memtype(start, end); | ||
651 | ret = -EBUSY; | ||
652 | out_err: | ||
653 | return ret; | ||
654 | } | ||
655 | |||
656 | /** | ||
657 | * io_free_memtype - Release a memory type mapping for a region of memory | ||
658 | * @start: start (physical address) of the region | ||
659 | * @end: end (physical address) of the region | ||
660 | */ | ||
661 | void io_free_memtype(resource_size_t start, resource_size_t end) | ||
662 | { | ||
663 | free_memtype(start, end); | ||
664 | } | ||
665 | |||
499 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | 666 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
500 | unsigned long size, pgprot_t vma_prot) | 667 | unsigned long size, pgprot_t vma_prot) |
501 | { | 668 | { |
@@ -577,7 +744,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) | |||
577 | { | 744 | { |
578 | unsigned long id_sz; | 745 | unsigned long id_sz; |
579 | 746 | ||
580 | if (!pat_enabled || base >= __pa(high_memory)) | 747 | if (base >= __pa(high_memory)) |
581 | return 0; | 748 | return 0; |
582 | 749 | ||
583 | id_sz = (__pa(high_memory) < base + size) ? | 750 | id_sz = (__pa(high_memory) < base + size) ? |
@@ -612,11 +779,29 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
612 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); | 779 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); |
613 | 780 | ||
614 | /* | 781 | /* |
615 | * reserve_pfn_range() doesn't support RAM pages. Maintain the current | 782 | * reserve_pfn_range() for RAM pages. We do not refcount to keep |
616 | * behavior with RAM pages by returning success. | 783 | * track of number of mappings of RAM pages. We can assert that |
784 | * the type requested matches the type of first page in the range. | ||
617 | */ | 785 | */ |
618 | if (is_ram != 0) | 786 | if (is_ram) { |
787 | if (!pat_enabled) | ||
788 | return 0; | ||
789 | |||
790 | flags = lookup_memtype(paddr); | ||
791 | if (want_flags != flags) { | ||
792 | printk(KERN_WARNING | ||
793 | "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", | ||
794 | current->comm, current->pid, | ||
795 | cattr_name(want_flags), | ||
796 | (unsigned long long)paddr, | ||
797 | (unsigned long long)(paddr + size), | ||
798 | cattr_name(flags)); | ||
799 | *vma_prot = __pgprot((pgprot_val(*vma_prot) & | ||
800 | (~_PAGE_CACHE_MASK)) | | ||
801 | flags); | ||
802 | } | ||
619 | return 0; | 803 | return 0; |
804 | } | ||
620 | 805 | ||
621 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); | 806 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); |
622 | if (ret) | 807 | if (ret) |
@@ -678,14 +863,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) | |||
678 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 863 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
679 | pgprot_t pgprot; | 864 | pgprot_t pgprot; |
680 | 865 | ||
681 | if (!pat_enabled) | ||
682 | return 0; | ||
683 | |||
684 | /* | ||
685 | * For now, only handle remap_pfn_range() vmas where | ||
686 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
687 | * vm_insert_pfn() is TBD. | ||
688 | */ | ||
689 | if (is_linear_pfn_mapping(vma)) { | 866 | if (is_linear_pfn_mapping(vma)) { |
690 | /* | 867 | /* |
691 | * reserve the whole chunk covered by vma. We need the | 868 | * reserve the whole chunk covered by vma. We need the |
@@ -713,23 +890,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) | |||
713 | int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, | 890 | int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, |
714 | unsigned long pfn, unsigned long size) | 891 | unsigned long pfn, unsigned long size) |
715 | { | 892 | { |
893 | unsigned long flags; | ||
716 | resource_size_t paddr; | 894 | resource_size_t paddr; |
717 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 895 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
718 | 896 | ||
719 | if (!pat_enabled) | ||
720 | return 0; | ||
721 | |||
722 | /* | ||
723 | * For now, only handle remap_pfn_range() vmas where | ||
724 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
725 | * vm_insert_pfn() is TBD. | ||
726 | */ | ||
727 | if (is_linear_pfn_mapping(vma)) { | 897 | if (is_linear_pfn_mapping(vma)) { |
728 | /* reserve the whole chunk starting from vm_pgoff */ | 898 | /* reserve the whole chunk starting from vm_pgoff */ |
729 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | 899 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; |
730 | return reserve_pfn_range(paddr, vma_size, prot, 0); | 900 | return reserve_pfn_range(paddr, vma_size, prot, 0); |
731 | } | 901 | } |
732 | 902 | ||
903 | if (!pat_enabled) | ||
904 | return 0; | ||
905 | |||
906 | /* for vm_insert_pfn and friends, we set prot based on lookup */ | ||
907 | flags = lookup_memtype(pfn << PAGE_SHIFT); | ||
908 | *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | | ||
909 | flags); | ||
910 | |||
733 | return 0; | 911 | return 0; |
734 | } | 912 | } |
735 | 913 | ||
@@ -744,14 +922,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, | |||
744 | resource_size_t paddr; | 922 | resource_size_t paddr; |
745 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 923 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
746 | 924 | ||
747 | if (!pat_enabled) | ||
748 | return; | ||
749 | |||
750 | /* | ||
751 | * For now, only handle remap_pfn_range() vmas where | ||
752 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
753 | * vm_insert_pfn() is TBD. | ||
754 | */ | ||
755 | if (is_linear_pfn_mapping(vma)) { | 925 | if (is_linear_pfn_mapping(vma)) { |
756 | /* free the whole chunk starting from vm_pgoff */ | 926 | /* free the whole chunk starting from vm_pgoff */ |
757 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | 927 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; |
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c new file mode 100644 index 000000000000..513d8ed5d2ec --- /dev/null +++ b/arch/x86/mm/setup_nx.c | |||
@@ -0,0 +1,69 @@ | |||
1 | #include <linux/spinlock.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/init.h> | ||
4 | |||
5 | #include <asm/pgtable.h> | ||
6 | |||
7 | int nx_enabled; | ||
8 | |||
9 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
10 | static int disable_nx __cpuinitdata; | ||
11 | |||
12 | /* | ||
13 | * noexec = on|off | ||
14 | * | ||
15 | * Control non-executable mappings for processes. | ||
16 | * | ||
17 | * on Enable | ||
18 | * off Disable | ||
19 | */ | ||
20 | static int __init noexec_setup(char *str) | ||
21 | { | ||
22 | if (!str) | ||
23 | return -EINVAL; | ||
24 | if (!strncmp(str, "on", 2)) { | ||
25 | __supported_pte_mask |= _PAGE_NX; | ||
26 | disable_nx = 0; | ||
27 | } else if (!strncmp(str, "off", 3)) { | ||
28 | disable_nx = 1; | ||
29 | __supported_pte_mask &= ~_PAGE_NX; | ||
30 | } | ||
31 | return 0; | ||
32 | } | ||
33 | early_param("noexec", noexec_setup); | ||
34 | #endif | ||
35 | |||
36 | #ifdef CONFIG_X86_PAE | ||
37 | void __init set_nx(void) | ||
38 | { | ||
39 | unsigned int v[4], l, h; | ||
40 | |||
41 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
42 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
43 | |||
44 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
45 | rdmsr(MSR_EFER, l, h); | ||
46 | l |= EFER_NX; | ||
47 | wrmsr(MSR_EFER, l, h); | ||
48 | nx_enabled = 1; | ||
49 | __supported_pte_mask |= _PAGE_NX; | ||
50 | } | ||
51 | } | ||
52 | } | ||
53 | #else | ||
54 | void set_nx(void) | ||
55 | { | ||
56 | } | ||
57 | #endif | ||
58 | |||
59 | #ifdef CONFIG_X86_64 | ||
60 | void __cpuinit check_efer(void) | ||
61 | { | ||
62 | unsigned long efer; | ||
63 | |||
64 | rdmsrl(MSR_EFER, efer); | ||
65 | if (!(efer & EFER_NX) || disable_nx) | ||
66 | __supported_pte_mask &= ~_PAGE_NX; | ||
67 | } | ||
68 | #endif | ||
69 | |||
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 427fd1b56df5..8565d944f7cf 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c | |||
@@ -1,12 +1,13 @@ | |||
1 | /* | 1 | /* |
2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> | 2 | * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> |
3 | */ | 3 | */ |
4 | |||
5 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
6 | |||
4 | #include <linux/module.h> | 7 | #include <linux/module.h> |
5 | #include <linux/io.h> | 8 | #include <linux/io.h> |
6 | #include <linux/mmiotrace.h> | 9 | #include <linux/mmiotrace.h> |
7 | 10 | ||
8 | #define MODULE_NAME "testmmiotrace" | ||
9 | |||
10 | static unsigned long mmio_address; | 11 | static unsigned long mmio_address; |
11 | module_param(mmio_address, ulong, 0); | 12 | module_param(mmio_address, ulong, 0); |
12 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " | 13 | MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " |
@@ -30,7 +31,7 @@ static unsigned v32(unsigned i) | |||
30 | static void do_write_test(void __iomem *p) | 31 | static void do_write_test(void __iomem *p) |
31 | { | 32 | { |
32 | unsigned int i; | 33 | unsigned int i; |
33 | pr_info(MODULE_NAME ": write test.\n"); | 34 | pr_info("write test.\n"); |
34 | mmiotrace_printk("Write test.\n"); | 35 | mmiotrace_printk("Write test.\n"); |
35 | 36 | ||
36 | for (i = 0; i < 256; i++) | 37 | for (i = 0; i < 256; i++) |
@@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p) | |||
47 | { | 48 | { |
48 | unsigned int i; | 49 | unsigned int i; |
49 | unsigned errs[3] = { 0 }; | 50 | unsigned errs[3] = { 0 }; |
50 | pr_info(MODULE_NAME ": read test.\n"); | 51 | pr_info("read test.\n"); |
51 | mmiotrace_printk("Read test.\n"); | 52 | mmiotrace_printk("Read test.\n"); |
52 | 53 | ||
53 | for (i = 0; i < 256; i++) | 54 | for (i = 0; i < 256; i++) |
@@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p) | |||
68 | 69 | ||
69 | static void do_read_far_test(void __iomem *p) | 70 | static void do_read_far_test(void __iomem *p) |
70 | { | 71 | { |
71 | pr_info(MODULE_NAME ": read far test.\n"); | 72 | pr_info("read far test.\n"); |
72 | mmiotrace_printk("Read far test.\n"); | 73 | mmiotrace_printk("Read far test.\n"); |
73 | 74 | ||
74 | ioread32(p + read_far); | 75 | ioread32(p + read_far); |
@@ -78,7 +79,7 @@ static void do_test(unsigned long size) | |||
78 | { | 79 | { |
79 | void __iomem *p = ioremap_nocache(mmio_address, size); | 80 | void __iomem *p = ioremap_nocache(mmio_address, size); |
80 | if (!p) { | 81 | if (!p) { |
81 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); | 82 | pr_err("could not ioremap, aborting.\n"); |
82 | return; | 83 | return; |
83 | } | 84 | } |
84 | mmiotrace_printk("ioremap returned %p.\n", p); | 85 | mmiotrace_printk("ioremap returned %p.\n", p); |
@@ -94,24 +95,22 @@ static int __init init(void) | |||
94 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); | 95 | unsigned long size = (read_far) ? (8 << 20) : (16 << 10); |
95 | 96 | ||
96 | if (mmio_address == 0) { | 97 | if (mmio_address == 0) { |
97 | pr_err(MODULE_NAME ": you have to use the module argument " | 98 | pr_err("you have to use the module argument mmio_address.\n"); |
98 | "mmio_address.\n"); | 99 | pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n"); |
99 | pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" | ||
100 | " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); | ||
101 | return -ENXIO; | 100 | return -ENXIO; |
102 | } | 101 | } |
103 | 102 | ||
104 | pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " | 103 | pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " |
105 | "address space, and writing 16 kB of rubbish in there.\n", | 104 | "and writing 16 kB of rubbish in there.\n", |
106 | size >> 10, mmio_address); | 105 | size >> 10, mmio_address); |
107 | do_test(size); | 106 | do_test(size); |
108 | pr_info(MODULE_NAME ": All done.\n"); | 107 | pr_info("All done.\n"); |
109 | return 0; | 108 | return 0; |
110 | } | 109 | } |
111 | 110 | ||
112 | static void __exit cleanup(void) | 111 | static void __exit cleanup(void) |
113 | { | 112 | { |
114 | pr_debug(MODULE_NAME ": unloaded.\n"); | 113 | pr_debug("unloaded.\n"); |
115 | } | 114 | } |
116 | 115 | ||
117 | module_init(init); | 116 | module_init(init); |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c814e144a3f0..36fe08eeb5c3 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -59,7 +59,8 @@ void leave_mm(int cpu) | |||
59 | { | 59 | { |
60 | if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) | 60 | if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) |
61 | BUG(); | 61 | BUG(); |
62 | cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); | 62 | cpumask_clear_cpu(cpu, |
63 | mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); | ||
63 | load_cr3(swapper_pg_dir); | 64 | load_cr3(swapper_pg_dir); |
64 | } | 65 | } |
65 | EXPORT_SYMBOL_GPL(leave_mm); | 66 | EXPORT_SYMBOL_GPL(leave_mm); |
@@ -234,8 +235,8 @@ void flush_tlb_current_task(void) | |||
234 | preempt_disable(); | 235 | preempt_disable(); |
235 | 236 | ||
236 | local_flush_tlb(); | 237 | local_flush_tlb(); |
237 | if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) | 238 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
238 | flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); | 239 | flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); |
239 | preempt_enable(); | 240 | preempt_enable(); |
240 | } | 241 | } |
241 | 242 | ||
@@ -249,8 +250,8 @@ void flush_tlb_mm(struct mm_struct *mm) | |||
249 | else | 250 | else |
250 | leave_mm(smp_processor_id()); | 251 | leave_mm(smp_processor_id()); |
251 | } | 252 | } |
252 | if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) | 253 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
253 | flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); | 254 | flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); |
254 | 255 | ||
255 | preempt_enable(); | 256 | preempt_enable(); |
256 | } | 257 | } |
@@ -268,8 +269,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) | |||
268 | leave_mm(smp_processor_id()); | 269 | leave_mm(smp_processor_id()); |
269 | } | 270 | } |
270 | 271 | ||
271 | if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) | 272 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
272 | flush_tlb_others(&mm->cpu_vm_mask, mm, va); | 273 | flush_tlb_others(mm_cpumask(mm), mm, va); |
273 | 274 | ||
274 | preempt_enable(); | 275 | preempt_enable(); |
275 | } | 276 | } |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 4899215999de..8eb05878554c 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void) | |||
234 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | 234 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && |
235 | current_cpu_data.x86_model == 15) { | 235 | current_cpu_data.x86_model == 15) { |
236 | eax.split.version_id = 2; | 236 | eax.split.version_id = 2; |
237 | eax.split.num_counters = 2; | 237 | eax.split.num_events = 2; |
238 | eax.split.bit_width = 40; | 238 | eax.split.bit_width = 40; |
239 | } | 239 | } |
240 | 240 | ||
241 | num_counters = eax.split.num_counters; | 241 | num_counters = eax.split.num_events; |
242 | 242 | ||
243 | op_arch_perfmon_spec.num_counters = num_counters; | 243 | op_arch_perfmon_spec.num_counters = num_counters; |
244 | op_arch_perfmon_spec.num_controls = num_counters; | 244 | op_arch_perfmon_spec.num_controls = num_counters; |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index b83776180c7f..7b8e75d16081 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
@@ -13,7 +13,7 @@ | |||
13 | #define OP_X86_MODEL_H | 13 | #define OP_X86_MODEL_H |
14 | 14 | ||
15 | #include <asm/types.h> | 15 | #include <asm/types.h> |
16 | #include <asm/perf_counter.h> | 16 | #include <asm/perf_event.h> |
17 | 17 | ||
18 | struct op_msr { | 18 | struct op_msr { |
19 | unsigned long addr; | 19 | unsigned long addr; |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 3ffa10df20b9..572ee9782f2a 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -15,63 +15,6 @@ | |||
15 | * also get peer root bus resource for io,mmio | 15 | * also get peer root bus resource for io,mmio |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #ifdef CONFIG_NUMA | ||
19 | |||
20 | #define BUS_NR 256 | ||
21 | |||
22 | #ifdef CONFIG_X86_64 | ||
23 | |||
24 | static int mp_bus_to_node[BUS_NR]; | ||
25 | |||
26 | void set_mp_bus_to_node(int busnum, int node) | ||
27 | { | ||
28 | if (busnum >= 0 && busnum < BUS_NR) | ||
29 | mp_bus_to_node[busnum] = node; | ||
30 | } | ||
31 | |||
32 | int get_mp_bus_to_node(int busnum) | ||
33 | { | ||
34 | int node = -1; | ||
35 | |||
36 | if (busnum < 0 || busnum > (BUS_NR - 1)) | ||
37 | return node; | ||
38 | |||
39 | node = mp_bus_to_node[busnum]; | ||
40 | |||
41 | /* | ||
42 | * let numa_node_id to decide it later in dma_alloc_pages | ||
43 | * if there is no ram on that node | ||
44 | */ | ||
45 | if (node != -1 && !node_online(node)) | ||
46 | node = -1; | ||
47 | |||
48 | return node; | ||
49 | } | ||
50 | |||
51 | #else /* CONFIG_X86_32 */ | ||
52 | |||
53 | static unsigned char mp_bus_to_node[BUS_NR]; | ||
54 | |||
55 | void set_mp_bus_to_node(int busnum, int node) | ||
56 | { | ||
57 | if (busnum >= 0 && busnum < BUS_NR) | ||
58 | mp_bus_to_node[busnum] = (unsigned char) node; | ||
59 | } | ||
60 | |||
61 | int get_mp_bus_to_node(int busnum) | ||
62 | { | ||
63 | int node; | ||
64 | |||
65 | if (busnum < 0 || busnum > (BUS_NR - 1)) | ||
66 | return 0; | ||
67 | node = mp_bus_to_node[busnum]; | ||
68 | return node; | ||
69 | } | ||
70 | |||
71 | #endif /* CONFIG_X86_32 */ | ||
72 | |||
73 | #endif /* CONFIG_NUMA */ | ||
74 | |||
75 | #ifdef CONFIG_X86_64 | 18 | #ifdef CONFIG_X86_64 |
76 | 19 | ||
77 | /* | 20 | /* |
@@ -301,11 +244,6 @@ static int __init early_fill_mp_bus_info(void) | |||
301 | u64 val; | 244 | u64 val; |
302 | u32 address; | 245 | u32 address; |
303 | 246 | ||
304 | #ifdef CONFIG_NUMA | ||
305 | for (i = 0; i < BUS_NR; i++) | ||
306 | mp_bus_to_node[i] = -1; | ||
307 | #endif | ||
308 | |||
309 | if (!early_pci_allowed()) | 247 | if (!early_pci_allowed()) |
310 | return -1; | 248 | return -1; |
311 | 249 | ||
@@ -346,7 +284,7 @@ static int __init early_fill_mp_bus_info(void) | |||
346 | node = (reg >> 4) & 0x07; | 284 | node = (reg >> 4) & 0x07; |
347 | #ifdef CONFIG_NUMA | 285 | #ifdef CONFIG_NUMA |
348 | for (j = min_bus; j <= max_bus; j++) | 286 | for (j = min_bus; j <= max_bus; j++) |
349 | mp_bus_to_node[j] = (unsigned char) node; | 287 | set_mp_bus_to_node(j, node); |
350 | #endif | 288 | #endif |
351 | link = (reg >> 8) & 0x03; | 289 | link = (reg >> 8) & 0x03; |
352 | 290 | ||
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2202b6257b82..1331fcf26143 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -600,3 +600,72 @@ struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno) | |||
600 | { | 600 | { |
601 | return pci_scan_bus_on_node(busno, &pci_root_ops, -1); | 601 | return pci_scan_bus_on_node(busno, &pci_root_ops, -1); |
602 | } | 602 | } |
603 | |||
604 | /* | ||
605 | * NUMA info for PCI busses | ||
606 | * | ||
607 | * Early arch code is responsible for filling in reasonable values here. | ||
608 | * A node id of "-1" means "use current node". In other words, if a bus | ||
609 | * has a -1 node id, it's not tightly coupled to any particular chunk | ||
610 | * of memory (as is the case on some Nehalem systems). | ||
611 | */ | ||
612 | #ifdef CONFIG_NUMA | ||
613 | |||
614 | #define BUS_NR 256 | ||
615 | |||
616 | #ifdef CONFIG_X86_64 | ||
617 | |||
618 | static int mp_bus_to_node[BUS_NR] = { | ||
619 | [0 ... BUS_NR - 1] = -1 | ||
620 | }; | ||
621 | |||
622 | void set_mp_bus_to_node(int busnum, int node) | ||
623 | { | ||
624 | if (busnum >= 0 && busnum < BUS_NR) | ||
625 | mp_bus_to_node[busnum] = node; | ||
626 | } | ||
627 | |||
628 | int get_mp_bus_to_node(int busnum) | ||
629 | { | ||
630 | int node = -1; | ||
631 | |||
632 | if (busnum < 0 || busnum > (BUS_NR - 1)) | ||
633 | return node; | ||
634 | |||
635 | node = mp_bus_to_node[busnum]; | ||
636 | |||
637 | /* | ||
638 | * let numa_node_id to decide it later in dma_alloc_pages | ||
639 | * if there is no ram on that node | ||
640 | */ | ||
641 | if (node != -1 && !node_online(node)) | ||
642 | node = -1; | ||
643 | |||
644 | return node; | ||
645 | } | ||
646 | |||
647 | #else /* CONFIG_X86_32 */ | ||
648 | |||
649 | static int mp_bus_to_node[BUS_NR] = { | ||
650 | [0 ... BUS_NR - 1] = -1 | ||
651 | }; | ||
652 | |||
653 | void set_mp_bus_to_node(int busnum, int node) | ||
654 | { | ||
655 | if (busnum >= 0 && busnum < BUS_NR) | ||
656 | mp_bus_to_node[busnum] = (unsigned char) node; | ||
657 | } | ||
658 | |||
659 | int get_mp_bus_to_node(int busnum) | ||
660 | { | ||
661 | int node; | ||
662 | |||
663 | if (busnum < 0 || busnum > (BUS_NR - 1)) | ||
664 | return 0; | ||
665 | node = mp_bus_to_node[busnum]; | ||
666 | return node; | ||
667 | } | ||
668 | |||
669 | #endif /* CONFIG_X86_32 */ | ||
670 | |||
671 | #endif /* CONFIG_NUMA */ | ||
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 52e62e57fedd..b22d13b0c71d 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -266,7 +266,7 @@ void pcibios_set_master(struct pci_dev *dev) | |||
266 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); | 266 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); |
267 | } | 267 | } |
268 | 268 | ||
269 | static struct vm_operations_struct pci_mmap_ops = { | 269 | static const struct vm_operations_struct pci_mmap_ops = { |
270 | .access = generic_access_phys, | 270 | .access = generic_access_phys, |
271 | }; | 271 | }; |
272 | 272 | ||
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 712443ec6d43..602c172d3bd5 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -13,10 +13,14 @@ | |||
13 | #include <linux/pci.h> | 13 | #include <linux/pci.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/acpi.h> | 15 | #include <linux/acpi.h> |
16 | #include <linux/sfi_acpi.h> | ||
16 | #include <linux/bitmap.h> | 17 | #include <linux/bitmap.h> |
17 | #include <linux/sort.h> | 18 | #include <linux/sort.h> |
18 | #include <asm/e820.h> | 19 | #include <asm/e820.h> |
19 | #include <asm/pci_x86.h> | 20 | #include <asm/pci_x86.h> |
21 | #include <asm/acpi.h> | ||
22 | |||
23 | #define PREFIX "PCI: " | ||
20 | 24 | ||
21 | /* aperture is up to 256MB but BIOS may reserve less */ | 25 | /* aperture is up to 256MB but BIOS may reserve less */ |
22 | #define MMCONFIG_APER_MIN (2 * 1024*1024) | 26 | #define MMCONFIG_APER_MIN (2 * 1024*1024) |
@@ -491,7 +495,7 @@ static void __init pci_mmcfg_reject_broken(int early) | |||
491 | (unsigned int)cfg->start_bus_number, | 495 | (unsigned int)cfg->start_bus_number, |
492 | (unsigned int)cfg->end_bus_number); | 496 | (unsigned int)cfg->end_bus_number); |
493 | 497 | ||
494 | if (!early) | 498 | if (!early && !acpi_disabled) |
495 | valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); | 499 | valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); |
496 | 500 | ||
497 | if (valid) | 501 | if (valid) |
@@ -606,7 +610,7 @@ static void __init __pci_mmcfg_init(int early) | |||
606 | } | 610 | } |
607 | 611 | ||
608 | if (!known_bridge) | 612 | if (!known_bridge) |
609 | acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); | 613 | acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); |
610 | 614 | ||
611 | pci_mmcfg_reject_broken(early); | 615 | pci_mmcfg_reject_broken(early); |
612 | 616 | ||
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 8b2d561046a3..f10a7e94a84c 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c | |||
@@ -11,9 +11,9 @@ | |||
11 | 11 | ||
12 | #include <linux/pci.h> | 12 | #include <linux/pci.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/acpi.h> | ||
15 | #include <asm/e820.h> | 14 | #include <asm/e820.h> |
16 | #include <asm/pci_x86.h> | 15 | #include <asm/pci_x86.h> |
16 | #include <acpi/acpi.h> | ||
17 | 17 | ||
18 | /* Assume systems with more busses have correct MCFG */ | 18 | /* Assume systems with more busses have correct MCFG */ |
19 | #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) | 19 | #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 9e63db8cdee4..e09a44fc4664 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -224,11 +224,7 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
224 | fix_processor_context(); | 224 | fix_processor_context(); |
225 | 225 | ||
226 | do_fpu_end(); | 226 | do_fpu_end(); |
227 | mtrr_ap_init(); | 227 | mtrr_bp_restore(); |
228 | |||
229 | #ifdef CONFIG_X86_OLD_MCE | ||
230 | mcheck_init(&boot_cpu_data); | ||
231 | #endif | ||
232 | } | 228 | } |
233 | 229 | ||
234 | /* Needed by apm.c */ | 230 | /* Needed by apm.c */ |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 88112b49f02c..6b4ffedb93c9 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -122,7 +122,7 @@ quiet_cmd_vdso = VDSO $@ | |||
122 | $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ | 122 | $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ |
123 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) | 123 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) |
124 | 124 | ||
125 | VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv) | 125 | VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) |
126 | GCOV_PROFILE := n | 126 | GCOV_PROFILE := n |
127 | 127 | ||
128 | # | 128 | # |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6a40b78b46aa..ee55754cc3c5 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts) | |||
86 | return 0; | 86 | return 0; |
87 | } | 87 | } |
88 | 88 | ||
89 | notrace static noinline int do_realtime_coarse(struct timespec *ts) | ||
90 | { | ||
91 | unsigned long seq; | ||
92 | do { | ||
93 | seq = read_seqbegin(>od->lock); | ||
94 | ts->tv_sec = gtod->wall_time_coarse.tv_sec; | ||
95 | ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; | ||
96 | } while (unlikely(read_seqretry(>od->lock, seq))); | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | notrace static noinline int do_monotonic_coarse(struct timespec *ts) | ||
101 | { | ||
102 | unsigned long seq, ns, secs; | ||
103 | do { | ||
104 | seq = read_seqbegin(>od->lock); | ||
105 | secs = gtod->wall_time_coarse.tv_sec; | ||
106 | ns = gtod->wall_time_coarse.tv_nsec; | ||
107 | secs += gtod->wall_to_monotonic.tv_sec; | ||
108 | ns += gtod->wall_to_monotonic.tv_nsec; | ||
109 | } while (unlikely(read_seqretry(>od->lock, seq))); | ||
110 | vset_normalized_timespec(ts, secs, ns); | ||
111 | return 0; | ||
112 | } | ||
113 | |||
89 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | 114 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
90 | { | 115 | { |
91 | if (likely(gtod->sysctl_enabled && gtod->clock.vread)) | 116 | if (likely(gtod->sysctl_enabled)) |
92 | switch (clock) { | 117 | switch (clock) { |
93 | case CLOCK_REALTIME: | 118 | case CLOCK_REALTIME: |
94 | return do_realtime(ts); | 119 | if (likely(gtod->clock.vread)) |
120 | return do_realtime(ts); | ||
121 | break; | ||
95 | case CLOCK_MONOTONIC: | 122 | case CLOCK_MONOTONIC: |
96 | return do_monotonic(ts); | 123 | if (likely(gtod->clock.vread)) |
124 | return do_monotonic(ts); | ||
125 | break; | ||
126 | case CLOCK_REALTIME_COARSE: | ||
127 | return do_realtime_coarse(ts); | ||
128 | case CLOCK_MONOTONIC_COARSE: | ||
129 | return do_monotonic_coarse(ts); | ||
97 | } | 130 | } |
98 | return vdso_fallback_gettime(clock, ts); | 131 | return vdso_fallback_gettime(clock, ts); |
99 | } | 132 | } |
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index b53225d2cac3..e133ce25e290 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c | |||
@@ -100,7 +100,7 @@ static int xen_array_release(struct inode *inode, struct file *file) | |||
100 | return 0; | 100 | return 0; |
101 | } | 101 | } |
102 | 102 | ||
103 | static struct file_operations u32_array_fops = { | 103 | static const struct file_operations u32_array_fops = { |
104 | .owner = THIS_MODULE, | 104 | .owner = THIS_MODULE, |
105 | .open = u32_array_open, | 105 | .open = u32_array_open, |
106 | .release= xen_array_release, | 106 | .release= xen_array_release, |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0dd0c2c6cae0..3439616d69f1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -912,19 +912,9 @@ static const struct pv_info xen_info __initdata = { | |||
912 | 912 | ||
913 | static const struct pv_init_ops xen_init_ops __initdata = { | 913 | static const struct pv_init_ops xen_init_ops __initdata = { |
914 | .patch = xen_patch, | 914 | .patch = xen_patch, |
915 | |||
916 | .banner = xen_banner, | ||
917 | .memory_setup = xen_memory_setup, | ||
918 | .arch_setup = xen_arch_setup, | ||
919 | .post_allocator_init = xen_post_allocator_init, | ||
920 | }; | 915 | }; |
921 | 916 | ||
922 | static const struct pv_time_ops xen_time_ops __initdata = { | 917 | static const struct pv_time_ops xen_time_ops __initdata = { |
923 | .time_init = xen_time_init, | ||
924 | |||
925 | .set_wallclock = xen_set_wallclock, | ||
926 | .get_wallclock = xen_get_wallclock, | ||
927 | .get_tsc_khz = xen_tsc_khz, | ||
928 | .sched_clock = xen_sched_clock, | 918 | .sched_clock = xen_sched_clock, |
929 | }; | 919 | }; |
930 | 920 | ||
@@ -990,8 +980,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
990 | 980 | ||
991 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 981 | static const struct pv_apic_ops xen_apic_ops __initdata = { |
992 | #ifdef CONFIG_X86_LOCAL_APIC | 982 | #ifdef CONFIG_X86_LOCAL_APIC |
993 | .setup_boot_clock = paravirt_nop, | ||
994 | .setup_secondary_clock = paravirt_nop, | ||
995 | .startup_ipi_hook = paravirt_nop, | 983 | .startup_ipi_hook = paravirt_nop, |
996 | #endif | 984 | #endif |
997 | }; | 985 | }; |
@@ -1070,7 +1058,18 @@ asmlinkage void __init xen_start_kernel(void) | |||
1070 | pv_time_ops = xen_time_ops; | 1058 | pv_time_ops = xen_time_ops; |
1071 | pv_cpu_ops = xen_cpu_ops; | 1059 | pv_cpu_ops = xen_cpu_ops; |
1072 | pv_apic_ops = xen_apic_ops; | 1060 | pv_apic_ops = xen_apic_ops; |
1073 | pv_mmu_ops = xen_mmu_ops; | 1061 | |
1062 | x86_init.resources.memory_setup = xen_memory_setup; | ||
1063 | x86_init.oem.arch_setup = xen_arch_setup; | ||
1064 | x86_init.oem.banner = xen_banner; | ||
1065 | |||
1066 | x86_init.timers.timer_init = xen_time_init; | ||
1067 | x86_init.timers.setup_percpu_clockev = x86_init_noop; | ||
1068 | x86_cpuinit.setup_percpu_clockev = x86_init_noop; | ||
1069 | |||
1070 | x86_platform.calibrate_tsc = xen_tsc_khz; | ||
1071 | x86_platform.get_wallclock = xen_get_wallclock; | ||
1072 | x86_platform.set_wallclock = xen_set_wallclock; | ||
1074 | 1073 | ||
1075 | /* | 1074 | /* |
1076 | * Set up some pagetable state before starting to set any ptes. | 1075 | * Set up some pagetable state before starting to set any ptes. |
@@ -1083,6 +1082,11 @@ asmlinkage void __init xen_start_kernel(void) | |||
1083 | 1082 | ||
1084 | __supported_pte_mask |= _PAGE_IOMAP; | 1083 | __supported_pte_mask |= _PAGE_IOMAP; |
1085 | 1084 | ||
1085 | #ifdef CONFIG_X86_64 | ||
1086 | /* Work out if we support NX */ | ||
1087 | check_efer(); | ||
1088 | #endif | ||
1089 | |||
1086 | xen_setup_features(); | 1090 | xen_setup_features(); |
1087 | 1091 | ||
1088 | /* Get mfn list */ | 1092 | /* Get mfn list */ |
@@ -1095,6 +1099,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1095 | */ | 1099 | */ |
1096 | xen_setup_stackprotector(); | 1100 | xen_setup_stackprotector(); |
1097 | 1101 | ||
1102 | xen_init_mmu_ops(); | ||
1098 | xen_init_irq_ops(); | 1103 | xen_init_irq_ops(); |
1099 | xen_init_cpuid_mask(); | 1104 | xen_init_cpuid_mask(); |
1100 | 1105 | ||
@@ -1123,11 +1128,6 @@ asmlinkage void __init xen_start_kernel(void) | |||
1123 | 1128 | ||
1124 | pgd = (pgd_t *)xen_start_info->pt_base; | 1129 | pgd = (pgd_t *)xen_start_info->pt_base; |
1125 | 1130 | ||
1126 | #ifdef CONFIG_X86_64 | ||
1127 | /* Work out if we support NX */ | ||
1128 | check_efer(); | ||
1129 | #endif | ||
1130 | |||
1131 | /* Don't do the full vcpu_info placement stuff until we have a | 1131 | /* Don't do the full vcpu_info placement stuff until we have a |
1132 | possible map and a non-dummy shared_info. */ | 1132 | possible map and a non-dummy shared_info. */ |
1133 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; | 1133 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; |
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index cfd17799bd6d..9d30105a0c4a 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -1,5 +1,7 @@ | |||
1 | #include <linux/hardirq.h> | 1 | #include <linux/hardirq.h> |
2 | 2 | ||
3 | #include <asm/x86_init.h> | ||
4 | |||
3 | #include <xen/interface/xen.h> | 5 | #include <xen/interface/xen.h> |
4 | #include <xen/interface/sched.h> | 6 | #include <xen/interface/sched.h> |
5 | #include <xen/interface/vcpu.h> | 7 | #include <xen/interface/vcpu.h> |
@@ -112,8 +114,6 @@ static void xen_halt(void) | |||
112 | } | 114 | } |
113 | 115 | ||
114 | static const struct pv_irq_ops xen_irq_ops __initdata = { | 116 | static const struct pv_irq_ops xen_irq_ops __initdata = { |
115 | .init_IRQ = xen_init_IRQ, | ||
116 | |||
117 | .save_fl = PV_CALLEE_SAVE(xen_save_fl), | 117 | .save_fl = PV_CALLEE_SAVE(xen_save_fl), |
118 | .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), | 118 | .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), |
119 | .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), | 119 | .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), |
@@ -129,4 +129,5 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { | |||
129 | void __init xen_init_irq_ops() | 129 | void __init xen_init_irq_ops() |
130 | { | 130 | { |
131 | pv_irq_ops = xen_irq_ops; | 131 | pv_irq_ops = xen_irq_ops; |
132 | x86_init.irqs.intr_init = xen_init_IRQ; | ||
132 | } | 133 | } |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4ceb28581652..3bf7b1d250ce 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1165,14 +1165,14 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1165 | /* Get the "official" set of cpus referring to our pagetable. */ | 1165 | /* Get the "official" set of cpus referring to our pagetable. */ |
1166 | if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { | 1166 | if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { |
1167 | for_each_online_cpu(cpu) { | 1167 | for_each_online_cpu(cpu) { |
1168 | if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) | 1168 | if (!cpumask_test_cpu(cpu, mm_cpumask(mm)) |
1169 | && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) | 1169 | && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) |
1170 | continue; | 1170 | continue; |
1171 | smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); | 1171 | smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); |
1172 | } | 1172 | } |
1173 | return; | 1173 | return; |
1174 | } | 1174 | } |
1175 | cpumask_copy(mask, &mm->cpu_vm_mask); | 1175 | cpumask_copy(mask, mm_cpumask(mm)); |
1176 | 1176 | ||
1177 | /* It's possible that a vcpu may have a stale reference to our | 1177 | /* It's possible that a vcpu may have a stale reference to our |
1178 | cr3, because its in lazy mode, and it hasn't yet flushed | 1178 | cr3, because its in lazy mode, and it hasn't yet flushed |
@@ -1229,9 +1229,12 @@ static __init void xen_pagetable_setup_start(pgd_t *base) | |||
1229 | { | 1229 | { |
1230 | } | 1230 | } |
1231 | 1231 | ||
1232 | static void xen_post_allocator_init(void); | ||
1233 | |||
1232 | static __init void xen_pagetable_setup_done(pgd_t *base) | 1234 | static __init void xen_pagetable_setup_done(pgd_t *base) |
1233 | { | 1235 | { |
1234 | xen_setup_shared_info(); | 1236 | xen_setup_shared_info(); |
1237 | xen_post_allocator_init(); | ||
1235 | } | 1238 | } |
1236 | 1239 | ||
1237 | static void xen_write_cr2(unsigned long cr2) | 1240 | static void xen_write_cr2(unsigned long cr2) |
@@ -1841,7 +1844,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1841 | #endif | 1844 | #endif |
1842 | } | 1845 | } |
1843 | 1846 | ||
1844 | __init void xen_post_allocator_init(void) | 1847 | static __init void xen_post_allocator_init(void) |
1845 | { | 1848 | { |
1846 | pv_mmu_ops.set_pte = xen_set_pte; | 1849 | pv_mmu_ops.set_pte = xen_set_pte; |
1847 | pv_mmu_ops.set_pmd = xen_set_pmd; | 1850 | pv_mmu_ops.set_pmd = xen_set_pmd; |
@@ -1875,10 +1878,7 @@ static void xen_leave_lazy_mmu(void) | |||
1875 | preempt_enable(); | 1878 | preempt_enable(); |
1876 | } | 1879 | } |
1877 | 1880 | ||
1878 | const struct pv_mmu_ops xen_mmu_ops __initdata = { | 1881 | static const struct pv_mmu_ops xen_mmu_ops __initdata = { |
1879 | .pagetable_setup_start = xen_pagetable_setup_start, | ||
1880 | .pagetable_setup_done = xen_pagetable_setup_done, | ||
1881 | |||
1882 | .read_cr2 = xen_read_cr2, | 1882 | .read_cr2 = xen_read_cr2, |
1883 | .write_cr2 = xen_write_cr2, | 1883 | .write_cr2 = xen_write_cr2, |
1884 | 1884 | ||
@@ -1954,6 +1954,12 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1954 | .set_fixmap = xen_set_fixmap, | 1954 | .set_fixmap = xen_set_fixmap, |
1955 | }; | 1955 | }; |
1956 | 1956 | ||
1957 | void __init xen_init_mmu_ops(void) | ||
1958 | { | ||
1959 | x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; | ||
1960 | x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; | ||
1961 | pv_mmu_ops = xen_mmu_ops; | ||
1962 | } | ||
1957 | 1963 | ||
1958 | #ifdef CONFIG_XEN_DEBUG_FS | 1964 | #ifdef CONFIG_XEN_DEBUG_FS |
1959 | 1965 | ||
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index da7302624897..5fe6bc7f5ecf 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -59,5 +59,5 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
59 | 59 | ||
60 | unsigned long xen_read_cr2_direct(void); | 60 | unsigned long xen_read_cr2_direct(void); |
61 | 61 | ||
62 | extern const struct pv_mmu_ops xen_mmu_ops; | 62 | extern void xen_init_mmu_ops(void); |
63 | #endif /* _XEN_MMU_H */ | 63 | #endif /* _XEN_MMU_H */ |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 22494fd4c9b5..355fa6b99c9c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -30,8 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | |||
30 | void xen_ident_map_ISA(void); | 30 | void xen_ident_map_ISA(void); |
31 | void xen_reserve_top(void); | 31 | void xen_reserve_top(void); |
32 | 32 | ||
33 | void xen_post_allocator_init(void); | ||
34 | |||
35 | char * __init xen_memory_setup(void); | 33 | char * __init xen_memory_setup(void); |
36 | void __init xen_arch_setup(void); | 34 | void __init xen_arch_setup(void); |
37 | void __init xen_init_IRQ(void); | 35 | void __init xen_init_IRQ(void); |