diff options
Diffstat (limited to 'arch/x86')
111 files changed, 5780 insertions, 4822 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9458685902bd..a2d3a5fbeeda 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -53,11 +53,15 @@ config X86 | |||
| 53 | select HAVE_KERNEL_LZMA | 53 | select HAVE_KERNEL_LZMA |
| 54 | select HAVE_KERNEL_LZO | 54 | select HAVE_KERNEL_LZO |
| 55 | select HAVE_HW_BREAKPOINT | 55 | select HAVE_HW_BREAKPOINT |
| 56 | select HAVE_MIXED_BREAKPOINTS_REGS | ||
| 56 | select PERF_EVENTS | 57 | select PERF_EVENTS |
| 57 | select ANON_INODES | 58 | select ANON_INODES |
| 58 | select HAVE_ARCH_KMEMCHECK | 59 | select HAVE_ARCH_KMEMCHECK |
| 59 | select HAVE_USER_RETURN_NOTIFIER | 60 | select HAVE_USER_RETURN_NOTIFIER |
| 60 | 61 | ||
| 62 | config INSTRUCTION_DECODER | ||
| 63 | def_bool (KPROBES || PERF_EVENTS) | ||
| 64 | |||
| 61 | config OUTPUT_FORMAT | 65 | config OUTPUT_FORMAT |
| 62 | string | 66 | string |
| 63 | default "elf32-i386" if X86_32 | 67 | default "elf32-i386" if X86_32 |
| @@ -197,20 +201,17 @@ config HAVE_INTEL_TXT | |||
| 197 | 201 | ||
| 198 | # Use the generic interrupt handling code in kernel/irq/: | 202 | # Use the generic interrupt handling code in kernel/irq/: |
| 199 | config GENERIC_HARDIRQS | 203 | config GENERIC_HARDIRQS |
| 200 | bool | 204 | def_bool y |
| 201 | default y | ||
| 202 | 205 | ||
| 203 | config GENERIC_HARDIRQS_NO__DO_IRQ | 206 | config GENERIC_HARDIRQS_NO__DO_IRQ |
| 204 | def_bool y | 207 | def_bool y |
| 205 | 208 | ||
| 206 | config GENERIC_IRQ_PROBE | 209 | config GENERIC_IRQ_PROBE |
| 207 | bool | 210 | def_bool y |
| 208 | default y | ||
| 209 | 211 | ||
| 210 | config GENERIC_PENDING_IRQ | 212 | config GENERIC_PENDING_IRQ |
| 211 | bool | 213 | def_bool y |
| 212 | depends on GENERIC_HARDIRQS && SMP | 214 | depends on GENERIC_HARDIRQS && SMP |
| 213 | default y | ||
| 214 | 215 | ||
| 215 | config USE_GENERIC_SMP_HELPERS | 216 | config USE_GENERIC_SMP_HELPERS |
| 216 | def_bool y | 217 | def_bool y |
| @@ -225,19 +226,22 @@ config X86_64_SMP | |||
| 225 | depends on X86_64 && SMP | 226 | depends on X86_64 && SMP |
| 226 | 227 | ||
| 227 | config X86_HT | 228 | config X86_HT |
| 228 | bool | 229 | def_bool y |
| 229 | depends on SMP | 230 | depends on SMP |
| 230 | default y | ||
| 231 | 231 | ||
| 232 | config X86_TRAMPOLINE | 232 | config X86_TRAMPOLINE |
| 233 | bool | 233 | def_bool y |
| 234 | depends on SMP || (64BIT && ACPI_SLEEP) | 234 | depends on SMP || (64BIT && ACPI_SLEEP) |
| 235 | default y | ||
| 236 | 235 | ||
| 237 | config X86_32_LAZY_GS | 236 | config X86_32_LAZY_GS |
| 238 | def_bool y | 237 | def_bool y |
| 239 | depends on X86_32 && !CC_STACKPROTECTOR | 238 | depends on X86_32 && !CC_STACKPROTECTOR |
| 240 | 239 | ||
| 240 | config ARCH_HWEIGHT_CFLAGS | ||
| 241 | string | ||
| 242 | default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 | ||
| 243 | default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 | ||
| 244 | |||
| 241 | config KTIME_SCALAR | 245 | config KTIME_SCALAR |
| 242 | def_bool X86_32 | 246 | def_bool X86_32 |
| 243 | source "init/Kconfig" | 247 | source "init/Kconfig" |
| @@ -447,7 +451,7 @@ config X86_NUMAQ | |||
| 447 | firmware with - send email to <Martin.Bligh@us.ibm.com>. | 451 | firmware with - send email to <Martin.Bligh@us.ibm.com>. |
| 448 | 452 | ||
| 449 | config X86_SUPPORTS_MEMORY_FAILURE | 453 | config X86_SUPPORTS_MEMORY_FAILURE |
| 450 | bool | 454 | def_bool y |
| 451 | # MCE code calls memory_failure(): | 455 | # MCE code calls memory_failure(): |
| 452 | depends on X86_MCE | 456 | depends on X86_MCE |
| 453 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: | 457 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: |
| @@ -455,7 +459,6 @@ config X86_SUPPORTS_MEMORY_FAILURE | |||
| 455 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: | 459 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: |
| 456 | depends on X86_64 || !SPARSEMEM | 460 | depends on X86_64 || !SPARSEMEM |
| 457 | select ARCH_SUPPORTS_MEMORY_FAILURE | 461 | select ARCH_SUPPORTS_MEMORY_FAILURE |
| 458 | default y | ||
| 459 | 462 | ||
| 460 | config X86_VISWS | 463 | config X86_VISWS |
| 461 | bool "SGI 320/540 (Visual Workstation)" | 464 | bool "SGI 320/540 (Visual Workstation)" |
| @@ -570,7 +573,6 @@ config PARAVIRT_SPINLOCKS | |||
| 570 | 573 | ||
| 571 | config PARAVIRT_CLOCK | 574 | config PARAVIRT_CLOCK |
| 572 | bool | 575 | bool |
| 573 | default n | ||
| 574 | 576 | ||
| 575 | endif | 577 | endif |
| 576 | 578 | ||
| @@ -749,7 +751,6 @@ config MAXSMP | |||
| 749 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" | 751 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" |
| 750 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL | 752 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL |
| 751 | select CPUMASK_OFFSTACK | 753 | select CPUMASK_OFFSTACK |
| 752 | default n | ||
| 753 | ---help--- | 754 | ---help--- |
| 754 | Configure maximum number of CPUS and NUMA Nodes for this architecture. | 755 | Configure maximum number of CPUS and NUMA Nodes for this architecture. |
| 755 | If unsure, say N. | 756 | If unsure, say N. |
| @@ -829,7 +830,6 @@ config X86_VISWS_APIC | |||
| 829 | 830 | ||
| 830 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | 831 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS |
| 831 | bool "Reroute for broken boot IRQs" | 832 | bool "Reroute for broken boot IRQs" |
| 832 | default n | ||
| 833 | depends on X86_IO_APIC | 833 | depends on X86_IO_APIC |
| 834 | ---help--- | 834 | ---help--- |
| 835 | This option enables a workaround that fixes a source of | 835 | This option enables a workaround that fixes a source of |
| @@ -876,9 +876,8 @@ config X86_MCE_AMD | |||
| 876 | the DRAM Error Threshold. | 876 | the DRAM Error Threshold. |
| 877 | 877 | ||
| 878 | config X86_ANCIENT_MCE | 878 | config X86_ANCIENT_MCE |
| 879 | def_bool n | 879 | bool "Support for old Pentium 5 / WinChip machine checks" |
| 880 | depends on X86_32 && X86_MCE | 880 | depends on X86_32 && X86_MCE |
| 881 | prompt "Support for old Pentium 5 / WinChip machine checks" | ||
| 882 | ---help--- | 881 | ---help--- |
| 883 | Include support for machine check handling on old Pentium 5 or WinChip | 882 | Include support for machine check handling on old Pentium 5 or WinChip |
| 884 | systems. These typically need to be enabled explicitely on the command | 883 | systems. These typically need to be enabled explicitely on the command |
| @@ -886,8 +885,7 @@ config X86_ANCIENT_MCE | |||
| 886 | 885 | ||
| 887 | config X86_MCE_THRESHOLD | 886 | config X86_MCE_THRESHOLD |
| 888 | depends on X86_MCE_AMD || X86_MCE_INTEL | 887 | depends on X86_MCE_AMD || X86_MCE_INTEL |
| 889 | bool | 888 | def_bool y |
| 890 | default y | ||
| 891 | 889 | ||
| 892 | config X86_MCE_INJECT | 890 | config X86_MCE_INJECT |
| 893 | depends on X86_MCE | 891 | depends on X86_MCE |
| @@ -1026,8 +1024,8 @@ config X86_CPUID | |||
| 1026 | 1024 | ||
| 1027 | choice | 1025 | choice |
| 1028 | prompt "High Memory Support" | 1026 | prompt "High Memory Support" |
| 1029 | default HIGHMEM4G if !X86_NUMAQ | ||
| 1030 | default HIGHMEM64G if X86_NUMAQ | 1027 | default HIGHMEM64G if X86_NUMAQ |
| 1028 | default HIGHMEM4G | ||
| 1031 | depends on X86_32 | 1029 | depends on X86_32 |
| 1032 | 1030 | ||
| 1033 | config NOHIGHMEM | 1031 | config NOHIGHMEM |
| @@ -1285,7 +1283,7 @@ source "mm/Kconfig" | |||
| 1285 | 1283 | ||
| 1286 | config HIGHPTE | 1284 | config HIGHPTE |
| 1287 | bool "Allocate 3rd-level pagetables from highmem" | 1285 | bool "Allocate 3rd-level pagetables from highmem" |
| 1288 | depends on X86_32 && (HIGHMEM4G || HIGHMEM64G) | 1286 | depends on HIGHMEM |
| 1289 | ---help--- | 1287 | ---help--- |
| 1290 | The VM uses one page table entry for each page of physical memory. | 1288 | The VM uses one page table entry for each page of physical memory. |
| 1291 | For systems with a lot of RAM, this can be wasteful of precious | 1289 | For systems with a lot of RAM, this can be wasteful of precious |
| @@ -1369,8 +1367,7 @@ config MATH_EMULATION | |||
| 1369 | kernel, it won't hurt. | 1367 | kernel, it won't hurt. |
| 1370 | 1368 | ||
| 1371 | config MTRR | 1369 | config MTRR |
| 1372 | bool | 1370 | def_bool y |
| 1373 | default y | ||
| 1374 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED | 1371 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED |
| 1375 | ---help--- | 1372 | ---help--- |
| 1376 | On Intel P6 family processors (Pentium Pro, Pentium II and later) | 1373 | On Intel P6 family processors (Pentium Pro, Pentium II and later) |
| @@ -1436,8 +1433,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT | |||
| 1436 | mtrr_spare_reg_nr=N on the kernel command line. | 1433 | mtrr_spare_reg_nr=N on the kernel command line. |
| 1437 | 1434 | ||
| 1438 | config X86_PAT | 1435 | config X86_PAT |
| 1439 | bool | 1436 | def_bool y |
| 1440 | default y | ||
| 1441 | prompt "x86 PAT support" if EMBEDDED | 1437 | prompt "x86 PAT support" if EMBEDDED |
| 1442 | depends on MTRR | 1438 | depends on MTRR |
| 1443 | ---help--- | 1439 | ---help--- |
| @@ -1605,8 +1601,7 @@ config X86_NEED_RELOCS | |||
| 1605 | depends on X86_32 && RELOCATABLE | 1601 | depends on X86_32 && RELOCATABLE |
| 1606 | 1602 | ||
| 1607 | config PHYSICAL_ALIGN | 1603 | config PHYSICAL_ALIGN |
| 1608 | hex | 1604 | hex "Alignment value to which kernel should be aligned" if X86_32 |
| 1609 | prompt "Alignment value to which kernel should be aligned" if X86_32 | ||
| 1610 | default "0x1000000" | 1605 | default "0x1000000" |
| 1611 | range 0x2000 0x1000000 | 1606 | range 0x2000 0x1000000 |
| 1612 | ---help--- | 1607 | ---help--- |
| @@ -1653,7 +1648,6 @@ config COMPAT_VDSO | |||
| 1653 | 1648 | ||
| 1654 | config CMDLINE_BOOL | 1649 | config CMDLINE_BOOL |
| 1655 | bool "Built-in kernel command line" | 1650 | bool "Built-in kernel command line" |
| 1656 | default n | ||
| 1657 | ---help--- | 1651 | ---help--- |
| 1658 | Allow for specifying boot arguments to the kernel at | 1652 | Allow for specifying boot arguments to the kernel at |
| 1659 | build time. On some systems (e.g. embedded ones), it is | 1653 | build time. On some systems (e.g. embedded ones), it is |
| @@ -1687,7 +1681,6 @@ config CMDLINE | |||
| 1687 | 1681 | ||
| 1688 | config CMDLINE_OVERRIDE | 1682 | config CMDLINE_OVERRIDE |
| 1689 | bool "Built-in command line overrides boot loader arguments" | 1683 | bool "Built-in command line overrides boot loader arguments" |
| 1690 | default n | ||
| 1691 | depends on CMDLINE_BOOL | 1684 | depends on CMDLINE_BOOL |
| 1692 | ---help--- | 1685 | ---help--- |
| 1693 | Set this option to 'Y' to have the kernel ignore the boot loader | 1686 | Set this option to 'Y' to have the kernel ignore the boot loader |
| @@ -1723,8 +1716,7 @@ source "drivers/acpi/Kconfig" | |||
| 1723 | source "drivers/sfi/Kconfig" | 1716 | source "drivers/sfi/Kconfig" |
| 1724 | 1717 | ||
| 1725 | config X86_APM_BOOT | 1718 | config X86_APM_BOOT |
| 1726 | bool | 1719 | def_bool y |
| 1727 | default y | ||
| 1728 | depends on APM || APM_MODULE | 1720 | depends on APM || APM_MODULE |
| 1729 | 1721 | ||
| 1730 | menuconfig APM | 1722 | menuconfig APM |
| @@ -1953,8 +1945,7 @@ config DMAR_DEFAULT_ON | |||
| 1953 | experimental. | 1945 | experimental. |
| 1954 | 1946 | ||
| 1955 | config DMAR_BROKEN_GFX_WA | 1947 | config DMAR_BROKEN_GFX_WA |
| 1956 | def_bool n | 1948 | bool "Workaround broken graphics drivers (going away soon)" |
| 1957 | prompt "Workaround broken graphics drivers (going away soon)" | ||
| 1958 | depends on DMAR && BROKEN | 1949 | depends on DMAR && BROKEN |
| 1959 | ---help--- | 1950 | ---help--- |
| 1960 | Current Graphics drivers tend to use physical address | 1951 | Current Graphics drivers tend to use physical address |
| @@ -2052,7 +2043,6 @@ config SCx200HR_TIMER | |||
| 2052 | config OLPC | 2043 | config OLPC |
| 2053 | bool "One Laptop Per Child support" | 2044 | bool "One Laptop Per Child support" |
| 2054 | select GPIOLIB | 2045 | select GPIOLIB |
| 2055 | default n | ||
| 2056 | ---help--- | 2046 | ---help--- |
| 2057 | Add support for detecting the unique features of the OLPC | 2047 | Add support for detecting the unique features of the OLPC |
| 2058 | XO hardware. | 2048 | XO hardware. |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index a19829374e6a..2ac9069890cd 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
| @@ -338,6 +338,10 @@ config X86_F00F_BUG | |||
| 338 | def_bool y | 338 | def_bool y |
| 339 | depends on M586MMX || M586TSC || M586 || M486 || M386 | 339 | depends on M586MMX || M586TSC || M586 || M486 || M386 |
| 340 | 340 | ||
| 341 | config X86_INVD_BUG | ||
| 342 | def_bool y | ||
| 343 | depends on M486 || M386 | ||
| 344 | |||
| 341 | config X86_WP_WORKS_OK | 345 | config X86_WP_WORKS_OK |
| 342 | def_bool y | 346 | def_bool y |
| 343 | depends on !M386 | 347 | depends on !M386 |
| @@ -502,23 +506,3 @@ config CPU_SUP_UMC_32 | |||
| 502 | CPU might render the kernel unbootable. | 506 | CPU might render the kernel unbootable. |
| 503 | 507 | ||
| 504 | If unsure, say N. | 508 | If unsure, say N. |
| 505 | |||
| 506 | config X86_DS | ||
| 507 | def_bool X86_PTRACE_BTS | ||
| 508 | depends on X86_DEBUGCTLMSR | ||
| 509 | select HAVE_HW_BRANCH_TRACER | ||
| 510 | |||
| 511 | config X86_PTRACE_BTS | ||
| 512 | bool "Branch Trace Store" | ||
| 513 | default y | ||
| 514 | depends on X86_DEBUGCTLMSR | ||
| 515 | depends on BROKEN | ||
| 516 | ---help--- | ||
| 517 | This adds a ptrace interface to the hardware's branch trace store. | ||
| 518 | |||
| 519 | Debuggers may use it to collect an execution trace of the debugged | ||
| 520 | application in order to answer the question 'how did I get here?'. | ||
| 521 | Debuggers may trace user mode as well as kernel mode. | ||
| 522 | |||
| 523 | Say Y unless there is no application development on this machine | ||
| 524 | and you want to save a small amount of code size. | ||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bc01e3ebfeb2..75085080b63e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
| @@ -45,7 +45,6 @@ config EARLY_PRINTK | |||
| 45 | 45 | ||
| 46 | config EARLY_PRINTK_DBGP | 46 | config EARLY_PRINTK_DBGP |
| 47 | bool "Early printk via EHCI debug port" | 47 | bool "Early printk via EHCI debug port" |
| 48 | default n | ||
| 49 | depends on EARLY_PRINTK && PCI | 48 | depends on EARLY_PRINTK && PCI |
| 50 | ---help--- | 49 | ---help--- |
| 51 | Write kernel log output directly into the EHCI debug port. | 50 | Write kernel log output directly into the EHCI debug port. |
| @@ -76,7 +75,6 @@ config DEBUG_PER_CPU_MAPS | |||
| 76 | bool "Debug access to per_cpu maps" | 75 | bool "Debug access to per_cpu maps" |
| 77 | depends on DEBUG_KERNEL | 76 | depends on DEBUG_KERNEL |
| 78 | depends on SMP | 77 | depends on SMP |
| 79 | default n | ||
| 80 | ---help--- | 78 | ---help--- |
| 81 | Say Y to verify that the per_cpu map being accessed has | 79 | Say Y to verify that the per_cpu map being accessed has |
| 82 | been setup. Adds a fair amount of code to kernel memory | 80 | been setup. Adds a fair amount of code to kernel memory |
| @@ -174,15 +172,6 @@ config IOMMU_LEAK | |||
| 174 | Add a simple leak tracer to the IOMMU code. This is useful when you | 172 | Add a simple leak tracer to the IOMMU code. This is useful when you |
| 175 | are debugging a buggy device driver that leaks IOMMU mappings. | 173 | are debugging a buggy device driver that leaks IOMMU mappings. |
| 176 | 174 | ||
| 177 | config X86_DS_SELFTEST | ||
| 178 | bool "DS selftest" | ||
| 179 | default y | ||
| 180 | depends on DEBUG_KERNEL | ||
| 181 | depends on X86_DS | ||
| 182 | ---help--- | ||
| 183 | Perform Debug Store selftests at boot time. | ||
| 184 | If in doubt, say "N". | ||
| 185 | |||
| 186 | config HAVE_MMIOTRACE_SUPPORT | 175 | config HAVE_MMIOTRACE_SUPPORT |
| 187 | def_bool y | 176 | def_bool y |
| 188 | 177 | ||
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 0a43dc515e4c..8aa1b59b9074 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
| @@ -95,8 +95,9 @@ sp-$(CONFIG_X86_64) := rsp | |||
| 95 | cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) | 95 | cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) |
| 96 | # is .cfi_signal_frame supported too? | 96 | # is .cfi_signal_frame supported too? |
| 97 | cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) | 97 | cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) |
| 98 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) | 98 | cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) |
| 99 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) | 99 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) |
| 100 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) | ||
| 100 | 101 | ||
| 101 | LDFLAGS := -m elf_$(UTS_MACHINE) | 102 | LDFLAGS := -m elf_$(UTS_MACHINE) |
| 102 | 103 | ||
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index b97f786a48d5..a63a68be1cce 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
| @@ -6,8 +6,8 @@ | |||
| 6 | .macro LOCK_PREFIX | 6 | .macro LOCK_PREFIX |
| 7 | 1: lock | 7 | 1: lock |
| 8 | .section .smp_locks,"a" | 8 | .section .smp_locks,"a" |
| 9 | _ASM_ALIGN | 9 | .balign 4 |
| 10 | _ASM_PTR 1b | 10 | .long 1b - . |
| 11 | .previous | 11 | .previous |
| 12 | .endm | 12 | .endm |
| 13 | #else | 13 | #else |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index b09ec55650b3..03b6bb5394a0 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
| @@ -28,20 +28,20 @@ | |||
| 28 | */ | 28 | */ |
| 29 | 29 | ||
| 30 | #ifdef CONFIG_SMP | 30 | #ifdef CONFIG_SMP |
| 31 | #define LOCK_PREFIX \ | 31 | #define LOCK_PREFIX_HERE \ |
| 32 | ".section .smp_locks,\"a\"\n" \ | 32 | ".section .smp_locks,\"a\"\n" \ |
| 33 | _ASM_ALIGN "\n" \ | 33 | ".balign 4\n" \ |
| 34 | _ASM_PTR "661f\n" /* address */ \ | 34 | ".long 671f - .\n" /* offset */ \ |
| 35 | ".previous\n" \ | 35 | ".previous\n" \ |
| 36 | "661:\n\tlock; " | 36 | "671:" |
| 37 | |||
| 38 | #define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " | ||
| 37 | 39 | ||
| 38 | #else /* ! CONFIG_SMP */ | 40 | #else /* ! CONFIG_SMP */ |
| 41 | #define LOCK_PREFIX_HERE "" | ||
| 39 | #define LOCK_PREFIX "" | 42 | #define LOCK_PREFIX "" |
| 40 | #endif | 43 | #endif |
| 41 | 44 | ||
| 42 | /* This must be included *after* the definition of LOCK_PREFIX */ | ||
| 43 | #include <asm/cpufeature.h> | ||
| 44 | |||
| 45 | struct alt_instr { | 45 | struct alt_instr { |
| 46 | u8 *instr; /* original instruction */ | 46 | u8 *instr; /* original instruction */ |
| 47 | u8 *replacement; | 47 | u8 *replacement; |
| @@ -96,6 +96,12 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
| 96 | ".previous" | 96 | ".previous" |
| 97 | 97 | ||
| 98 | /* | 98 | /* |
| 99 | * This must be included *after* the definition of ALTERNATIVE due to | ||
| 100 | * <asm/arch_hweight.h> | ||
| 101 | */ | ||
| 102 | #include <asm/cpufeature.h> | ||
| 103 | |||
| 104 | /* | ||
| 99 | * Alternative instructions for different CPU types or capabilities. | 105 | * Alternative instructions for different CPU types or capabilities. |
| 100 | * | 106 | * |
| 101 | * This allows to use optimized instructions even on generic binary | 107 | * This allows to use optimized instructions even on generic binary |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 86a0ff0aeac7..7014e88bc779 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
| @@ -174,6 +174,40 @@ | |||
| 174 | (~((1ULL << (12 + ((lvl) * 9))) - 1))) | 174 | (~((1ULL << (12 + ((lvl) * 9))) - 1))) |
| 175 | #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) | 175 | #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) |
| 176 | 176 | ||
| 177 | /* | ||
| 178 | * Returns the page table level to use for a given page size | ||
| 179 | * Pagesize is expected to be a power-of-two | ||
| 180 | */ | ||
| 181 | #define PAGE_SIZE_LEVEL(pagesize) \ | ||
| 182 | ((__ffs(pagesize) - 12) / 9) | ||
| 183 | /* | ||
| 184 | * Returns the number of ptes to use for a given page size | ||
| 185 | * Pagesize is expected to be a power-of-two | ||
| 186 | */ | ||
| 187 | #define PAGE_SIZE_PTE_COUNT(pagesize) \ | ||
| 188 | (1ULL << ((__ffs(pagesize) - 12) % 9)) | ||
| 189 | |||
| 190 | /* | ||
| 191 | * Aligns a given io-virtual address to a given page size | ||
| 192 | * Pagesize is expected to be a power-of-two | ||
| 193 | */ | ||
| 194 | #define PAGE_SIZE_ALIGN(address, pagesize) \ | ||
| 195 | ((address) & ~((pagesize) - 1)) | ||
| 196 | /* | ||
| 197 | * Creates an IOMMU PTE for an address an a given pagesize | ||
| 198 | * The PTE has no permission bits set | ||
| 199 | * Pagesize is expected to be a power-of-two larger than 4096 | ||
| 200 | */ | ||
| 201 | #define PAGE_SIZE_PTE(address, pagesize) \ | ||
| 202 | (((address) | ((pagesize) - 1)) & \ | ||
| 203 | (~(pagesize >> 1)) & PM_ADDR_MASK) | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Takes a PTE value with mode=0x07 and returns the page size it maps | ||
| 207 | */ | ||
| 208 | #define PTE_PAGE_SIZE(pte) \ | ||
| 209 | (1ULL << (1 + ffz(((pte) | 0xfffULL)))) | ||
| 210 | |||
| 177 | #define IOMMU_PTE_P (1ULL << 0) | 211 | #define IOMMU_PTE_P (1ULL << 0) |
| 178 | #define IOMMU_PTE_TV (1ULL << 1) | 212 | #define IOMMU_PTE_TV (1ULL << 1) |
| 179 | #define IOMMU_PTE_U (1ULL << 59) | 213 | #define IOMMU_PTE_U (1ULL << 59) |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b4ac2cdcb64f..1fa03e04ae44 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
| @@ -373,6 +373,7 @@ extern atomic_t init_deasserted; | |||
| 373 | extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); | 373 | extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); |
| 374 | #endif | 374 | #endif |
| 375 | 375 | ||
| 376 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 376 | static inline u32 apic_read(u32 reg) | 377 | static inline u32 apic_read(u32 reg) |
| 377 | { | 378 | { |
| 378 | return apic->read(reg); | 379 | return apic->read(reg); |
| @@ -403,10 +404,19 @@ static inline u32 safe_apic_wait_icr_idle(void) | |||
| 403 | return apic->safe_wait_icr_idle(); | 404 | return apic->safe_wait_icr_idle(); |
| 404 | } | 405 | } |
| 405 | 406 | ||
| 407 | #else /* CONFIG_X86_LOCAL_APIC */ | ||
| 408 | |||
| 409 | static inline u32 apic_read(u32 reg) { return 0; } | ||
| 410 | static inline void apic_write(u32 reg, u32 val) { } | ||
| 411 | static inline u64 apic_icr_read(void) { return 0; } | ||
| 412 | static inline void apic_icr_write(u32 low, u32 high) { } | ||
| 413 | static inline void apic_wait_icr_idle(void) { } | ||
| 414 | static inline u32 safe_apic_wait_icr_idle(void) { return 0; } | ||
| 415 | |||
| 416 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
| 406 | 417 | ||
| 407 | static inline void ack_APIC_irq(void) | 418 | static inline void ack_APIC_irq(void) |
| 408 | { | 419 | { |
| 409 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 410 | /* | 420 | /* |
| 411 | * ack_APIC_irq() actually gets compiled as a single instruction | 421 | * ack_APIC_irq() actually gets compiled as a single instruction |
| 412 | * ... yummie. | 422 | * ... yummie. |
| @@ -414,7 +424,6 @@ static inline void ack_APIC_irq(void) | |||
| 414 | 424 | ||
| 415 | /* Docs say use 0 for future compatibility */ | 425 | /* Docs say use 0 for future compatibility */ |
| 416 | apic_write(APIC_EOI, 0); | 426 | apic_write(APIC_EOI, 0); |
| 417 | #endif | ||
| 418 | } | 427 | } |
| 419 | 428 | ||
| 420 | static inline unsigned default_get_apic_id(unsigned long x) | 429 | static inline unsigned default_get_apic_id(unsigned long x) |
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h new file mode 100644 index 000000000000..9686c3d9ff73 --- /dev/null +++ b/arch/x86/include/asm/arch_hweight.h | |||
| @@ -0,0 +1,61 @@ | |||
| 1 | #ifndef _ASM_X86_HWEIGHT_H | ||
| 2 | #define _ASM_X86_HWEIGHT_H | ||
| 3 | |||
| 4 | #ifdef CONFIG_64BIT | ||
| 5 | /* popcnt %edi, %eax -- redundant REX prefix for alignment */ | ||
| 6 | #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" | ||
| 7 | /* popcnt %rdi, %rax */ | ||
| 8 | #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" | ||
| 9 | #define REG_IN "D" | ||
| 10 | #define REG_OUT "a" | ||
| 11 | #else | ||
| 12 | /* popcnt %eax, %eax */ | ||
| 13 | #define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" | ||
| 14 | #define REG_IN "a" | ||
| 15 | #define REG_OUT "a" | ||
| 16 | #endif | ||
| 17 | |||
| 18 | /* | ||
| 19 | * __sw_hweightXX are called from within the alternatives below | ||
| 20 | * and callee-clobbered registers need to be taken care of. See | ||
| 21 | * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective | ||
| 22 | * compiler switches. | ||
| 23 | */ | ||
| 24 | static inline unsigned int __arch_hweight32(unsigned int w) | ||
| 25 | { | ||
| 26 | unsigned int res = 0; | ||
| 27 | |||
| 28 | asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) | ||
| 29 | : "="REG_OUT (res) | ||
| 30 | : REG_IN (w)); | ||
| 31 | |||
| 32 | return res; | ||
| 33 | } | ||
| 34 | |||
| 35 | static inline unsigned int __arch_hweight16(unsigned int w) | ||
| 36 | { | ||
| 37 | return __arch_hweight32(w & 0xffff); | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline unsigned int __arch_hweight8(unsigned int w) | ||
| 41 | { | ||
| 42 | return __arch_hweight32(w & 0xff); | ||
| 43 | } | ||
| 44 | |||
| 45 | static inline unsigned long __arch_hweight64(__u64 w) | ||
| 46 | { | ||
| 47 | unsigned long res = 0; | ||
| 48 | |||
| 49 | #ifdef CONFIG_X86_32 | ||
| 50 | return __arch_hweight32((u32)w) + | ||
| 51 | __arch_hweight32((u32)(w >> 32)); | ||
| 52 | #else | ||
| 53 | asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) | ||
| 54 | : "="REG_OUT (res) | ||
| 55 | : REG_IN (w)); | ||
| 56 | #endif /* CONFIG_X86_32 */ | ||
| 57 | |||
| 58 | return res; | ||
| 59 | } | ||
| 60 | |||
| 61 | #endif | ||
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 8f8217b9bdac..952a826ac4e5 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | */ | 22 | */ |
| 23 | static inline int atomic_read(const atomic_t *v) | 23 | static inline int atomic_read(const atomic_t *v) |
| 24 | { | 24 | { |
| 25 | return v->counter; | 25 | return (*(volatile int *)&(v)->counter); |
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | /** | 28 | /** |
| @@ -246,6 +246,29 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
| 246 | 246 | ||
| 247 | #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) | 247 | #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) |
| 248 | 248 | ||
| 249 | /* | ||
| 250 | * atomic_dec_if_positive - decrement by 1 if old value positive | ||
| 251 | * @v: pointer of type atomic_t | ||
| 252 | * | ||
| 253 | * The function returns the old value of *v minus 1, even if | ||
| 254 | * the atomic variable, v, was not decremented. | ||
| 255 | */ | ||
| 256 | static inline int atomic_dec_if_positive(atomic_t *v) | ||
| 257 | { | ||
| 258 | int c, old, dec; | ||
| 259 | c = atomic_read(v); | ||
| 260 | for (;;) { | ||
| 261 | dec = c - 1; | ||
| 262 | if (unlikely(dec < 0)) | ||
| 263 | break; | ||
| 264 | old = atomic_cmpxchg((v), c, dec); | ||
| 265 | if (likely(old == c)) | ||
| 266 | break; | ||
| 267 | c = old; | ||
| 268 | } | ||
| 269 | return dec; | ||
| 270 | } | ||
| 271 | |||
| 249 | /** | 272 | /** |
| 250 | * atomic_inc_short - increment of a short integer | 273 | * atomic_inc_short - increment of a short integer |
| 251 | * @v: pointer to type int | 274 | * @v: pointer to type int |
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 03027bf28de5..2a934aa19a43 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h | |||
| @@ -14,109 +14,193 @@ typedef struct { | |||
| 14 | 14 | ||
| 15 | #define ATOMIC64_INIT(val) { (val) } | 15 | #define ATOMIC64_INIT(val) { (val) } |
| 16 | 16 | ||
| 17 | extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); | 17 | #ifdef CONFIG_X86_CMPXCHG64 |
| 18 | #define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8" | ||
| 19 | #else | ||
| 20 | #define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8) | ||
| 21 | #endif | ||
| 22 | |||
| 23 | #define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f) | ||
| 24 | |||
| 25 | /** | ||
| 26 | * atomic64_cmpxchg - cmpxchg atomic64 variable | ||
| 27 | * @p: pointer to type atomic64_t | ||
| 28 | * @o: expected value | ||
| 29 | * @n: new value | ||
| 30 | * | ||
| 31 | * Atomically sets @v to @n if it was equal to @o and returns | ||
| 32 | * the old value. | ||
| 33 | */ | ||
| 34 | |||
| 35 | static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) | ||
| 36 | { | ||
| 37 | return cmpxchg64(&v->counter, o, n); | ||
| 38 | } | ||
| 18 | 39 | ||
| 19 | /** | 40 | /** |
| 20 | * atomic64_xchg - xchg atomic64 variable | 41 | * atomic64_xchg - xchg atomic64 variable |
| 21 | * @ptr: pointer to type atomic64_t | 42 | * @v: pointer to type atomic64_t |
| 22 | * @new_val: value to assign | 43 | * @n: value to assign |
| 23 | * | 44 | * |
| 24 | * Atomically xchgs the value of @ptr to @new_val and returns | 45 | * Atomically xchgs the value of @v to @n and returns |
| 25 | * the old value. | 46 | * the old value. |
| 26 | */ | 47 | */ |
| 27 | extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); | 48 | static inline long long atomic64_xchg(atomic64_t *v, long long n) |
| 49 | { | ||
| 50 | long long o; | ||
| 51 | unsigned high = (unsigned)(n >> 32); | ||
| 52 | unsigned low = (unsigned)n; | ||
| 53 | asm volatile(ATOMIC64_ALTERNATIVE(xchg) | ||
| 54 | : "=A" (o), "+b" (low), "+c" (high) | ||
| 55 | : "S" (v) | ||
| 56 | : "memory" | ||
| 57 | ); | ||
| 58 | return o; | ||
| 59 | } | ||
| 28 | 60 | ||
| 29 | /** | 61 | /** |
| 30 | * atomic64_set - set atomic64 variable | 62 | * atomic64_set - set atomic64 variable |
| 31 | * @ptr: pointer to type atomic64_t | 63 | * @v: pointer to type atomic64_t |
| 32 | * @new_val: value to assign | 64 | * @n: value to assign |
| 33 | * | 65 | * |
| 34 | * Atomically sets the value of @ptr to @new_val. | 66 | * Atomically sets the value of @v to @n. |
| 35 | */ | 67 | */ |
| 36 | extern void atomic64_set(atomic64_t *ptr, u64 new_val); | 68 | static inline void atomic64_set(atomic64_t *v, long long i) |
| 69 | { | ||
| 70 | unsigned high = (unsigned)(i >> 32); | ||
| 71 | unsigned low = (unsigned)i; | ||
| 72 | asm volatile(ATOMIC64_ALTERNATIVE(set) | ||
| 73 | : "+b" (low), "+c" (high) | ||
| 74 | : "S" (v) | ||
| 75 | : "eax", "edx", "memory" | ||
| 76 | ); | ||
| 77 | } | ||
| 37 | 78 | ||
| 38 | /** | 79 | /** |
| 39 | * atomic64_read - read atomic64 variable | 80 | * atomic64_read - read atomic64 variable |
| 40 | * @ptr: pointer to type atomic64_t | 81 | * @v: pointer to type atomic64_t |
| 41 | * | 82 | * |
| 42 | * Atomically reads the value of @ptr and returns it. | 83 | * Atomically reads the value of @v and returns it. |
| 43 | */ | 84 | */ |
| 44 | static inline u64 atomic64_read(atomic64_t *ptr) | 85 | static inline long long atomic64_read(atomic64_t *v) |
| 45 | { | 86 | { |
| 46 | u64 res; | 87 | long long r; |
| 47 | 88 | asm volatile(ATOMIC64_ALTERNATIVE(read) | |
| 48 | /* | 89 | : "=A" (r), "+c" (v) |
| 49 | * Note, we inline this atomic64_t primitive because | 90 | : : "memory" |
| 50 | * it only clobbers EAX/EDX and leaves the others | 91 | ); |
| 51 | * untouched. We also (somewhat subtly) rely on the | 92 | return r; |
| 52 | * fact that cmpxchg8b returns the current 64-bit value | 93 | } |
| 53 | * of the memory location we are touching: | ||
| 54 | */ | ||
| 55 | asm volatile( | ||
| 56 | "mov %%ebx, %%eax\n\t" | ||
| 57 | "mov %%ecx, %%edx\n\t" | ||
| 58 | LOCK_PREFIX "cmpxchg8b %1\n" | ||
| 59 | : "=&A" (res) | ||
| 60 | : "m" (*ptr) | ||
| 61 | ); | ||
| 62 | |||
| 63 | return res; | ||
| 64 | } | ||
| 65 | |||
| 66 | extern u64 atomic64_read(atomic64_t *ptr); | ||
| 67 | 94 | ||
| 68 | /** | 95 | /** |
| 69 | * atomic64_add_return - add and return | 96 | * atomic64_add_return - add and return |
| 70 | * @delta: integer value to add | 97 | * @i: integer value to add |
| 71 | * @ptr: pointer to type atomic64_t | 98 | * @v: pointer to type atomic64_t |
| 72 | * | 99 | * |
| 73 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | 100 | * Atomically adds @i to @v and returns @i + *@v |
| 74 | */ | 101 | */ |
| 75 | extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr); | 102 | static inline long long atomic64_add_return(long long i, atomic64_t *v) |
| 103 | { | ||
| 104 | asm volatile(ATOMIC64_ALTERNATIVE(add_return) | ||
| 105 | : "+A" (i), "+c" (v) | ||
| 106 | : : "memory" | ||
| 107 | ); | ||
| 108 | return i; | ||
| 109 | } | ||
| 76 | 110 | ||
| 77 | /* | 111 | /* |
| 78 | * Other variants with different arithmetic operators: | 112 | * Other variants with different arithmetic operators: |
| 79 | */ | 113 | */ |
| 80 | extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr); | 114 | static inline long long atomic64_sub_return(long long i, atomic64_t *v) |
| 81 | extern u64 atomic64_inc_return(atomic64_t *ptr); | 115 | { |
| 82 | extern u64 atomic64_dec_return(atomic64_t *ptr); | 116 | asm volatile(ATOMIC64_ALTERNATIVE(sub_return) |
| 117 | : "+A" (i), "+c" (v) | ||
| 118 | : : "memory" | ||
| 119 | ); | ||
| 120 | return i; | ||
| 121 | } | ||
| 122 | |||
| 123 | static inline long long atomic64_inc_return(atomic64_t *v) | ||
| 124 | { | ||
| 125 | long long a; | ||
| 126 | asm volatile(ATOMIC64_ALTERNATIVE(inc_return) | ||
| 127 | : "=A" (a) | ||
| 128 | : "S" (v) | ||
| 129 | : "memory", "ecx" | ||
| 130 | ); | ||
| 131 | return a; | ||
| 132 | } | ||
| 133 | |||
| 134 | static inline long long atomic64_dec_return(atomic64_t *v) | ||
| 135 | { | ||
| 136 | long long a; | ||
| 137 | asm volatile(ATOMIC64_ALTERNATIVE(dec_return) | ||
| 138 | : "=A" (a) | ||
| 139 | : "S" (v) | ||
| 140 | : "memory", "ecx" | ||
| 141 | ); | ||
| 142 | return a; | ||
| 143 | } | ||
| 83 | 144 | ||
| 84 | /** | 145 | /** |
| 85 | * atomic64_add - add integer to atomic64 variable | 146 | * atomic64_add - add integer to atomic64 variable |
| 86 | * @delta: integer value to add | 147 | * @i: integer value to add |
| 87 | * @ptr: pointer to type atomic64_t | 148 | * @v: pointer to type atomic64_t |
| 88 | * | 149 | * |
| 89 | * Atomically adds @delta to @ptr. | 150 | * Atomically adds @i to @v. |
| 90 | */ | 151 | */ |
| 91 | extern void atomic64_add(u64 delta, atomic64_t *ptr); | 152 | static inline long long atomic64_add(long long i, atomic64_t *v) |
| 153 | { | ||
| 154 | asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return) | ||
| 155 | : "+A" (i), "+c" (v) | ||
| 156 | : : "memory" | ||
| 157 | ); | ||
| 158 | return i; | ||
| 159 | } | ||
| 92 | 160 | ||
| 93 | /** | 161 | /** |
| 94 | * atomic64_sub - subtract the atomic64 variable | 162 | * atomic64_sub - subtract the atomic64 variable |
| 95 | * @delta: integer value to subtract | 163 | * @i: integer value to subtract |
| 96 | * @ptr: pointer to type atomic64_t | 164 | * @v: pointer to type atomic64_t |
| 97 | * | 165 | * |
| 98 | * Atomically subtracts @delta from @ptr. | 166 | * Atomically subtracts @i from @v. |
| 99 | */ | 167 | */ |
| 100 | extern void atomic64_sub(u64 delta, atomic64_t *ptr); | 168 | static inline long long atomic64_sub(long long i, atomic64_t *v) |
| 169 | { | ||
| 170 | asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return) | ||
| 171 | : "+A" (i), "+c" (v) | ||
| 172 | : : "memory" | ||
| 173 | ); | ||
| 174 | return i; | ||
| 175 | } | ||
| 101 | 176 | ||
| 102 | /** | 177 | /** |
| 103 | * atomic64_sub_and_test - subtract value from variable and test result | 178 | * atomic64_sub_and_test - subtract value from variable and test result |
| 104 | * @delta: integer value to subtract | 179 | * @i: integer value to subtract |
| 105 | * @ptr: pointer to type atomic64_t | 180 | * @v: pointer to type atomic64_t |
| 106 | * | 181 | * |
| 107 | * Atomically subtracts @delta from @ptr and returns | 182 | * Atomically subtracts @i from @v and returns |
| 108 | * true if the result is zero, or false for all | 183 | * true if the result is zero, or false for all |
| 109 | * other cases. | 184 | * other cases. |
| 110 | */ | 185 | */ |
| 111 | extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr); | 186 | static inline int atomic64_sub_and_test(long long i, atomic64_t *v) |
| 187 | { | ||
| 188 | return atomic64_sub_return(i, v) == 0; | ||
| 189 | } | ||
| 112 | 190 | ||
| 113 | /** | 191 | /** |
| 114 | * atomic64_inc - increment atomic64 variable | 192 | * atomic64_inc - increment atomic64 variable |
| 115 | * @ptr: pointer to type atomic64_t | 193 | * @v: pointer to type atomic64_t |
| 116 | * | 194 | * |
| 117 | * Atomically increments @ptr by 1. | 195 | * Atomically increments @v by 1. |
| 118 | */ | 196 | */ |
| 119 | extern void atomic64_inc(atomic64_t *ptr); | 197 | static inline void atomic64_inc(atomic64_t *v) |
| 198 | { | ||
| 199 | asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return) | ||
| 200 | : : "S" (v) | ||
| 201 | : "memory", "eax", "ecx", "edx" | ||
| 202 | ); | ||
| 203 | } | ||
| 120 | 204 | ||
| 121 | /** | 205 | /** |
| 122 | * atomic64_dec - decrement atomic64 variable | 206 | * atomic64_dec - decrement atomic64 variable |
| @@ -124,37 +208,97 @@ extern void atomic64_inc(atomic64_t *ptr); | |||
| 124 | * | 208 | * |
| 125 | * Atomically decrements @ptr by 1. | 209 | * Atomically decrements @ptr by 1. |
| 126 | */ | 210 | */ |
| 127 | extern void atomic64_dec(atomic64_t *ptr); | 211 | static inline void atomic64_dec(atomic64_t *v) |
| 212 | { | ||
| 213 | asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return) | ||
| 214 | : : "S" (v) | ||
| 215 | : "memory", "eax", "ecx", "edx" | ||
| 216 | ); | ||
| 217 | } | ||
| 128 | 218 | ||
| 129 | /** | 219 | /** |
| 130 | * atomic64_dec_and_test - decrement and test | 220 | * atomic64_dec_and_test - decrement and test |
| 131 | * @ptr: pointer to type atomic64_t | 221 | * @v: pointer to type atomic64_t |
| 132 | * | 222 | * |
| 133 | * Atomically decrements @ptr by 1 and | 223 | * Atomically decrements @v by 1 and |
| 134 | * returns true if the result is 0, or false for all other | 224 | * returns true if the result is 0, or false for all other |
| 135 | * cases. | 225 | * cases. |
| 136 | */ | 226 | */ |
| 137 | extern int atomic64_dec_and_test(atomic64_t *ptr); | 227 | static inline int atomic64_dec_and_test(atomic64_t *v) |
| 228 | { | ||
| 229 | return atomic64_dec_return(v) == 0; | ||
| 230 | } | ||
| 138 | 231 | ||
| 139 | /** | 232 | /** |
| 140 | * atomic64_inc_and_test - increment and test | 233 | * atomic64_inc_and_test - increment and test |
| 141 | * @ptr: pointer to type atomic64_t | 234 | * @v: pointer to type atomic64_t |
| 142 | * | 235 | * |
| 143 | * Atomically increments @ptr by 1 | 236 | * Atomically increments @v by 1 |
| 144 | * and returns true if the result is zero, or false for all | 237 | * and returns true if the result is zero, or false for all |
| 145 | * other cases. | 238 | * other cases. |
| 146 | */ | 239 | */ |
| 147 | extern int atomic64_inc_and_test(atomic64_t *ptr); | 240 | static inline int atomic64_inc_and_test(atomic64_t *v) |
| 241 | { | ||
| 242 | return atomic64_inc_return(v) == 0; | ||
| 243 | } | ||
| 148 | 244 | ||
| 149 | /** | 245 | /** |
| 150 | * atomic64_add_negative - add and test if negative | 246 | * atomic64_add_negative - add and test if negative |
| 151 | * @delta: integer value to add | 247 | * @i: integer value to add |
| 152 | * @ptr: pointer to type atomic64_t | 248 | * @v: pointer to type atomic64_t |
| 153 | * | 249 | * |
| 154 | * Atomically adds @delta to @ptr and returns true | 250 | * Atomically adds @i to @v and returns true |
| 155 | * if the result is negative, or false when | 251 | * if the result is negative, or false when |
| 156 | * result is greater than or equal to zero. | 252 | * result is greater than or equal to zero. |
| 157 | */ | 253 | */ |
| 158 | extern int atomic64_add_negative(u64 delta, atomic64_t *ptr); | 254 | static inline int atomic64_add_negative(long long i, atomic64_t *v) |
| 255 | { | ||
| 256 | return atomic64_add_return(i, v) < 0; | ||
| 257 | } | ||
| 258 | |||
| 259 | /** | ||
| 260 | * atomic64_add_unless - add unless the number is a given value | ||
| 261 | * @v: pointer of type atomic64_t | ||
| 262 | * @a: the amount to add to v... | ||
| 263 | * @u: ...unless v is equal to u. | ||
| 264 | * | ||
| 265 | * Atomically adds @a to @v, so long as it was not @u. | ||
| 266 | * Returns non-zero if @v was not @u, and zero otherwise. | ||
| 267 | */ | ||
| 268 | static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) | ||
| 269 | { | ||
| 270 | unsigned low = (unsigned)u; | ||
| 271 | unsigned high = (unsigned)(u >> 32); | ||
| 272 | asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t" | ||
| 273 | : "+A" (a), "+c" (v), "+S" (low), "+D" (high) | ||
| 274 | : : "memory"); | ||
| 275 | return (int)a; | ||
| 276 | } | ||
| 277 | |||
| 278 | |||
| 279 | static inline int atomic64_inc_not_zero(atomic64_t *v) | ||
| 280 | { | ||
| 281 | int r; | ||
| 282 | asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero) | ||
| 283 | : "=a" (r) | ||
| 284 | : "S" (v) | ||
| 285 | : "ecx", "edx", "memory" | ||
| 286 | ); | ||
| 287 | return r; | ||
| 288 | } | ||
| 289 | |||
| 290 | static inline long long atomic64_dec_if_positive(atomic64_t *v) | ||
| 291 | { | ||
| 292 | long long r; | ||
| 293 | asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive) | ||
| 294 | : "=A" (r) | ||
| 295 | : "S" (v) | ||
| 296 | : "ecx", "memory" | ||
| 297 | ); | ||
| 298 | return r; | ||
| 299 | } | ||
| 300 | |||
| 301 | #undef ATOMIC64_ALTERNATIVE | ||
| 302 | #undef ATOMIC64_ALTERNATIVE_ | ||
| 159 | 303 | ||
| 160 | #endif /* _ASM_X86_ATOMIC64_32_H */ | 304 | #endif /* _ASM_X86_ATOMIC64_32_H */ |
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 51c5b4056929..49fd1ea22951 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | */ | 18 | */ |
| 19 | static inline long atomic64_read(const atomic64_t *v) | 19 | static inline long atomic64_read(const atomic64_t *v) |
| 20 | { | 20 | { |
| 21 | return v->counter; | 21 | return (*(volatile long *)&(v)->counter); |
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | /** | 24 | /** |
| @@ -221,4 +221,27 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) | |||
| 221 | 221 | ||
| 222 | #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) | 222 | #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) |
| 223 | 223 | ||
| 224 | /* | ||
| 225 | * atomic64_dec_if_positive - decrement by 1 if old value positive | ||
| 226 | * @v: pointer of type atomic_t | ||
| 227 | * | ||
| 228 | * The function returns the old value of *v minus 1, even if | ||
| 229 | * the atomic variable, v, was not decremented. | ||
| 230 | */ | ||
| 231 | static inline long atomic64_dec_if_positive(atomic64_t *v) | ||
| 232 | { | ||
| 233 | long c, old, dec; | ||
| 234 | c = atomic64_read(v); | ||
| 235 | for (;;) { | ||
| 236 | dec = c - 1; | ||
| 237 | if (unlikely(dec < 0)) | ||
| 238 | break; | ||
| 239 | old = atomic64_cmpxchg((v), c, dec); | ||
| 240 | if (likely(old == c)) | ||
| 241 | break; | ||
| 242 | c = old; | ||
| 243 | } | ||
| 244 | return dec; | ||
| 245 | } | ||
| 246 | |||
| 224 | #endif /* _ASM_X86_ATOMIC64_64_H */ | 247 | #endif /* _ASM_X86_ATOMIC64_64_H */ |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 02b47a603fc8..545776efeb16 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
| @@ -444,7 +444,9 @@ static inline int fls(int x) | |||
| 444 | 444 | ||
| 445 | #define ARCH_HAS_FAST_MULTIPLIER 1 | 445 | #define ARCH_HAS_FAST_MULTIPLIER 1 |
| 446 | 446 | ||
| 447 | #include <asm-generic/bitops/hweight.h> | 447 | #include <asm/arch_hweight.h> |
| 448 | |||
| 449 | #include <asm-generic/bitops/const_hweight.h> | ||
| 448 | 450 | ||
| 449 | #endif /* __KERNEL__ */ | 451 | #endif /* __KERNEL__ */ |
| 450 | 452 | ||
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 7a1065958ba9..3b62ab56c7a0 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) | 24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) |
| 25 | 25 | ||
| 26 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ | 26 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ |
| 27 | (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)) | 27 | (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) |
| 28 | #error "Invalid value for CONFIG_PHYSICAL_ALIGN" | 28 | #error "Invalid value for CONFIG_PHYSICAL_ALIGN" |
| 29 | #endif | 29 | #endif |
| 30 | 30 | ||
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ffb9bb6b6c37..8859e12dd3cf 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
| @@ -271,7 +271,8 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); | |||
| 271 | __typeof__(*(ptr)) __ret; \ | 271 | __typeof__(*(ptr)) __ret; \ |
| 272 | __typeof__(*(ptr)) __old = (o); \ | 272 | __typeof__(*(ptr)) __old = (o); \ |
| 273 | __typeof__(*(ptr)) __new = (n); \ | 273 | __typeof__(*(ptr)) __new = (n); \ |
| 274 | alternative_io("call cmpxchg8b_emu", \ | 274 | alternative_io(LOCK_PREFIX_HERE \ |
| 275 | "call cmpxchg8b_emu", \ | ||
| 275 | "lock; cmpxchg8b (%%esi)" , \ | 276 | "lock; cmpxchg8b (%%esi)" , \ |
| 276 | X86_FEATURE_CX8, \ | 277 | X86_FEATURE_CX8, \ |
| 277 | "=A" (__ret), \ | 278 | "=A" (__ret), \ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0cd82d068613..dca9c545f44e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
| @@ -161,6 +161,7 @@ | |||
| 161 | */ | 161 | */ |
| 162 | #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ | 162 | #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ |
| 163 | #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ | 163 | #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ |
| 164 | #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ | ||
| 164 | 165 | ||
| 165 | /* Virtualization flags: Linux defined */ | 166 | /* Virtualization flags: Linux defined */ |
| 166 | #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ | 167 | #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ |
| @@ -175,6 +176,7 @@ | |||
| 175 | 176 | ||
| 176 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 177 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
| 177 | 178 | ||
| 179 | #include <asm/asm.h> | ||
| 178 | #include <linux/bitops.h> | 180 | #include <linux/bitops.h> |
| 179 | 181 | ||
| 180 | extern const char * const x86_cap_flags[NCAPINTS*32]; | 182 | extern const char * const x86_cap_flags[NCAPINTS*32]; |
| @@ -283,6 +285,62 @@ extern const char * const x86_power_flags[32]; | |||
| 283 | 285 | ||
| 284 | #endif /* CONFIG_X86_64 */ | 286 | #endif /* CONFIG_X86_64 */ |
| 285 | 287 | ||
| 288 | /* | ||
| 289 | * Static testing of CPU features. Used the same as boot_cpu_has(). | ||
| 290 | * These are only valid after alternatives have run, but will statically | ||
| 291 | * patch the target code for additional performance. | ||
| 292 | * | ||
| 293 | */ | ||
| 294 | static __always_inline __pure bool __static_cpu_has(u8 bit) | ||
| 295 | { | ||
| 296 | #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) | ||
| 297 | asm goto("1: jmp %l[t_no]\n" | ||
| 298 | "2:\n" | ||
| 299 | ".section .altinstructions,\"a\"\n" | ||
| 300 | _ASM_ALIGN "\n" | ||
| 301 | _ASM_PTR "1b\n" | ||
| 302 | _ASM_PTR "0\n" /* no replacement */ | ||
| 303 | " .byte %P0\n" /* feature bit */ | ||
| 304 | " .byte 2b - 1b\n" /* source len */ | ||
| 305 | " .byte 0\n" /* replacement len */ | ||
| 306 | " .byte 0xff + 0 - (2b-1b)\n" /* padding */ | ||
| 307 | ".previous\n" | ||
| 308 | : : "i" (bit) : : t_no); | ||
| 309 | return true; | ||
| 310 | t_no: | ||
| 311 | return false; | ||
| 312 | #else | ||
| 313 | u8 flag; | ||
| 314 | /* Open-coded due to __stringify() in ALTERNATIVE() */ | ||
| 315 | asm volatile("1: movb $0,%0\n" | ||
| 316 | "2:\n" | ||
| 317 | ".section .altinstructions,\"a\"\n" | ||
| 318 | _ASM_ALIGN "\n" | ||
| 319 | _ASM_PTR "1b\n" | ||
| 320 | _ASM_PTR "3f\n" | ||
| 321 | " .byte %P1\n" /* feature bit */ | ||
| 322 | " .byte 2b - 1b\n" /* source len */ | ||
| 323 | " .byte 4f - 3f\n" /* replacement len */ | ||
| 324 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */ | ||
| 325 | ".previous\n" | ||
| 326 | ".section .altinstr_replacement,\"ax\"\n" | ||
| 327 | "3: movb $1,%0\n" | ||
| 328 | "4:\n" | ||
| 329 | ".previous\n" | ||
| 330 | : "=qm" (flag) : "i" (bit)); | ||
| 331 | return flag; | ||
| 332 | #endif | ||
| 333 | } | ||
| 334 | |||
| 335 | #define static_cpu_has(bit) \ | ||
| 336 | ( \ | ||
| 337 | __builtin_constant_p(boot_cpu_has(bit)) ? \ | ||
| 338 | boot_cpu_has(bit) : \ | ||
| 339 | (__builtin_constant_p(bit) && !((bit) & ~0xff)) ? \ | ||
| 340 | __static_cpu_has(bit) : \ | ||
| 341 | boot_cpu_has(bit) \ | ||
| 342 | ) | ||
| 343 | |||
| 286 | #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ | 344 | #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ |
| 287 | 345 | ||
| 288 | #endif /* _ASM_X86_CPUFEATURE_H */ | 346 | #endif /* _ASM_X86_CPUFEATURE_H */ |
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h deleted file mode 100644 index 70dac199b093..000000000000 --- a/arch/x86/include/asm/ds.h +++ /dev/null | |||
| @@ -1,302 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Debug Store (DS) support | ||
| 3 | * | ||
| 4 | * This provides a low-level interface to the hardware's Debug Store | ||
| 5 | * feature that is used for branch trace store (BTS) and | ||
| 6 | * precise-event based sampling (PEBS). | ||
| 7 | * | ||
| 8 | * It manages: | ||
| 9 | * - DS and BTS hardware configuration | ||
| 10 | * - buffer overflow handling (to be done) | ||
| 11 | * - buffer access | ||
| 12 | * | ||
| 13 | * It does not do: | ||
| 14 | * - security checking (is the caller allowed to trace the task) | ||
| 15 | * - buffer allocation (memory accounting) | ||
| 16 | * | ||
| 17 | * | ||
| 18 | * Copyright (C) 2007-2009 Intel Corporation. | ||
| 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | ||
| 20 | */ | ||
| 21 | |||
| 22 | #ifndef _ASM_X86_DS_H | ||
| 23 | #define _ASM_X86_DS_H | ||
| 24 | |||
| 25 | |||
| 26 | #include <linux/types.h> | ||
| 27 | #include <linux/init.h> | ||
| 28 | #include <linux/err.h> | ||
| 29 | |||
| 30 | |||
| 31 | #ifdef CONFIG_X86_DS | ||
| 32 | |||
| 33 | struct task_struct; | ||
| 34 | struct ds_context; | ||
| 35 | struct ds_tracer; | ||
| 36 | struct bts_tracer; | ||
| 37 | struct pebs_tracer; | ||
| 38 | |||
| 39 | typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); | ||
| 40 | typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); | ||
| 41 | |||
| 42 | |||
| 43 | /* | ||
| 44 | * A list of features plus corresponding macros to talk about them in | ||
| 45 | * the ds_request function's flags parameter. | ||
| 46 | * | ||
| 47 | * We use the enum to index an array of corresponding control bits; | ||
| 48 | * we use the macro to index a flags bit-vector. | ||
| 49 | */ | ||
| 50 | enum ds_feature { | ||
| 51 | dsf_bts = 0, | ||
| 52 | dsf_bts_kernel, | ||
| 53 | #define BTS_KERNEL (1 << dsf_bts_kernel) | ||
| 54 | /* trace kernel-mode branches */ | ||
| 55 | |||
| 56 | dsf_bts_user, | ||
| 57 | #define BTS_USER (1 << dsf_bts_user) | ||
| 58 | /* trace user-mode branches */ | ||
| 59 | |||
| 60 | dsf_bts_overflow, | ||
| 61 | dsf_bts_max, | ||
| 62 | dsf_pebs = dsf_bts_max, | ||
| 63 | |||
| 64 | dsf_pebs_max, | ||
| 65 | dsf_ctl_max = dsf_pebs_max, | ||
| 66 | dsf_bts_timestamps = dsf_ctl_max, | ||
| 67 | #define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) | ||
| 68 | /* add timestamps into BTS trace */ | ||
| 69 | |||
| 70 | #define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) | ||
| 71 | }; | ||
| 72 | |||
| 73 | |||
| 74 | /* | ||
| 75 | * Request BTS or PEBS | ||
| 76 | * | ||
| 77 | * Due to alignement constraints, the actual buffer may be slightly | ||
| 78 | * smaller than the requested or provided buffer. | ||
| 79 | * | ||
| 80 | * Returns a pointer to a tracer structure on success, or | ||
| 81 | * ERR_PTR(errcode) on failure. | ||
| 82 | * | ||
| 83 | * The interrupt threshold is independent from the overflow callback | ||
| 84 | * to allow users to use their own overflow interrupt handling mechanism. | ||
| 85 | * | ||
| 86 | * The function might sleep. | ||
| 87 | * | ||
| 88 | * task: the task to request recording for | ||
| 89 | * cpu: the cpu to request recording for | ||
| 90 | * base: the base pointer for the (non-pageable) buffer; | ||
| 91 | * size: the size of the provided buffer in bytes | ||
| 92 | * ovfl: pointer to a function to be called on buffer overflow; | ||
| 93 | * NULL if cyclic buffer requested | ||
| 94 | * th: the interrupt threshold in records from the end of the buffer; | ||
| 95 | * -1 if no interrupt threshold is requested. | ||
| 96 | * flags: a bit-mask of the above flags | ||
| 97 | */ | ||
| 98 | extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, | ||
| 99 | void *base, size_t size, | ||
| 100 | bts_ovfl_callback_t ovfl, | ||
| 101 | size_t th, unsigned int flags); | ||
| 102 | extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
| 103 | bts_ovfl_callback_t ovfl, | ||
| 104 | size_t th, unsigned int flags); | ||
| 105 | extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, | ||
| 106 | void *base, size_t size, | ||
| 107 | pebs_ovfl_callback_t ovfl, | ||
| 108 | size_t th, unsigned int flags); | ||
| 109 | extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, | ||
| 110 | void *base, size_t size, | ||
| 111 | pebs_ovfl_callback_t ovfl, | ||
| 112 | size_t th, unsigned int flags); | ||
| 113 | |||
| 114 | /* | ||
| 115 | * Release BTS or PEBS resources | ||
| 116 | * Suspend and resume BTS or PEBS tracing | ||
| 117 | * | ||
| 118 | * Must be called with irq's enabled. | ||
| 119 | * | ||
| 120 | * tracer: the tracer handle returned from ds_request_~() | ||
| 121 | */ | ||
| 122 | extern void ds_release_bts(struct bts_tracer *tracer); | ||
| 123 | extern void ds_suspend_bts(struct bts_tracer *tracer); | ||
| 124 | extern void ds_resume_bts(struct bts_tracer *tracer); | ||
| 125 | extern void ds_release_pebs(struct pebs_tracer *tracer); | ||
| 126 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); | ||
| 127 | extern void ds_resume_pebs(struct pebs_tracer *tracer); | ||
| 128 | |||
| 129 | /* | ||
| 130 | * Release BTS or PEBS resources | ||
| 131 | * Suspend and resume BTS or PEBS tracing | ||
| 132 | * | ||
| 133 | * Cpu tracers must call this on the traced cpu. | ||
| 134 | * Task tracers must call ds_release_~_noirq() for themselves. | ||
| 135 | * | ||
| 136 | * May be called with irq's disabled. | ||
| 137 | * | ||
| 138 | * Returns 0 if successful; | ||
| 139 | * -EPERM if the cpu tracer does not trace the current cpu. | ||
| 140 | * -EPERM if the task tracer does not trace itself. | ||
| 141 | * | ||
| 142 | * tracer: the tracer handle returned from ds_request_~() | ||
| 143 | */ | ||
| 144 | extern int ds_release_bts_noirq(struct bts_tracer *tracer); | ||
| 145 | extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); | ||
| 146 | extern int ds_resume_bts_noirq(struct bts_tracer *tracer); | ||
| 147 | extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); | ||
| 148 | extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); | ||
| 149 | extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); | ||
| 150 | |||
| 151 | |||
| 152 | /* | ||
| 153 | * The raw DS buffer state as it is used for BTS and PEBS recording. | ||
| 154 | * | ||
| 155 | * This is the low-level, arch-dependent interface for working | ||
| 156 | * directly on the raw trace data. | ||
| 157 | */ | ||
| 158 | struct ds_trace { | ||
| 159 | /* the number of bts/pebs records */ | ||
| 160 | size_t n; | ||
| 161 | /* the size of a bts/pebs record in bytes */ | ||
| 162 | size_t size; | ||
| 163 | /* pointers into the raw buffer: | ||
| 164 | - to the first entry */ | ||
| 165 | void *begin; | ||
| 166 | /* - one beyond the last entry */ | ||
| 167 | void *end; | ||
| 168 | /* - one beyond the newest entry */ | ||
| 169 | void *top; | ||
| 170 | /* - the interrupt threshold */ | ||
| 171 | void *ith; | ||
| 172 | /* flags given on ds_request() */ | ||
| 173 | unsigned int flags; | ||
| 174 | }; | ||
| 175 | |||
| 176 | /* | ||
| 177 | * An arch-independent view on branch trace data. | ||
| 178 | */ | ||
| 179 | enum bts_qualifier { | ||
| 180 | bts_invalid, | ||
| 181 | #define BTS_INVALID bts_invalid | ||
| 182 | |||
| 183 | bts_branch, | ||
| 184 | #define BTS_BRANCH bts_branch | ||
| 185 | |||
| 186 | bts_task_arrives, | ||
| 187 | #define BTS_TASK_ARRIVES bts_task_arrives | ||
| 188 | |||
| 189 | bts_task_departs, | ||
| 190 | #define BTS_TASK_DEPARTS bts_task_departs | ||
| 191 | |||
| 192 | bts_qual_bit_size = 4, | ||
| 193 | bts_qual_max = (1 << bts_qual_bit_size), | ||
| 194 | }; | ||
| 195 | |||
| 196 | struct bts_struct { | ||
| 197 | __u64 qualifier; | ||
| 198 | union { | ||
| 199 | /* BTS_BRANCH */ | ||
| 200 | struct { | ||
| 201 | __u64 from; | ||
| 202 | __u64 to; | ||
| 203 | } lbr; | ||
| 204 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ | ||
| 205 | struct { | ||
| 206 | __u64 clock; | ||
| 207 | pid_t pid; | ||
| 208 | } event; | ||
| 209 | } variant; | ||
| 210 | }; | ||
| 211 | |||
| 212 | |||
| 213 | /* | ||
| 214 | * The BTS state. | ||
| 215 | * | ||
| 216 | * This gives access to the raw DS state and adds functions to provide | ||
| 217 | * an arch-independent view of the BTS data. | ||
| 218 | */ | ||
| 219 | struct bts_trace { | ||
| 220 | struct ds_trace ds; | ||
| 221 | |||
| 222 | int (*read)(struct bts_tracer *tracer, const void *at, | ||
| 223 | struct bts_struct *out); | ||
| 224 | int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); | ||
| 225 | }; | ||
| 226 | |||
| 227 | |||
| 228 | /* | ||
| 229 | * The PEBS state. | ||
| 230 | * | ||
| 231 | * This gives access to the raw DS state and the PEBS-specific counter | ||
| 232 | * reset value. | ||
| 233 | */ | ||
| 234 | struct pebs_trace { | ||
| 235 | struct ds_trace ds; | ||
| 236 | |||
| 237 | /* the number of valid counters in the below array */ | ||
| 238 | unsigned int counters; | ||
| 239 | |||
| 240 | #define MAX_PEBS_COUNTERS 4 | ||
| 241 | /* the counter reset value */ | ||
| 242 | unsigned long long counter_reset[MAX_PEBS_COUNTERS]; | ||
| 243 | }; | ||
| 244 | |||
| 245 | |||
| 246 | /* | ||
| 247 | * Read the BTS or PEBS trace. | ||
| 248 | * | ||
| 249 | * Returns a view on the trace collected for the parameter tracer. | ||
| 250 | * | ||
| 251 | * The view remains valid as long as the traced task is not running or | ||
| 252 | * the tracer is suspended. | ||
| 253 | * Writes into the trace buffer are not reflected. | ||
| 254 | * | ||
| 255 | * tracer: the tracer handle returned from ds_request_~() | ||
| 256 | */ | ||
| 257 | extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); | ||
| 258 | extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); | ||
| 259 | |||
| 260 | |||
| 261 | /* | ||
| 262 | * Reset the write pointer of the BTS/PEBS buffer. | ||
| 263 | * | ||
| 264 | * Returns 0 on success; -Eerrno on error | ||
| 265 | * | ||
| 266 | * tracer: the tracer handle returned from ds_request_~() | ||
| 267 | */ | ||
| 268 | extern int ds_reset_bts(struct bts_tracer *tracer); | ||
| 269 | extern int ds_reset_pebs(struct pebs_tracer *tracer); | ||
| 270 | |||
| 271 | /* | ||
| 272 | * Set the PEBS counter reset value. | ||
| 273 | * | ||
| 274 | * Returns 0 on success; -Eerrno on error | ||
| 275 | * | ||
| 276 | * tracer: the tracer handle returned from ds_request_pebs() | ||
| 277 | * counter: the index of the counter | ||
| 278 | * value: the new counter reset value | ||
| 279 | */ | ||
| 280 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, | ||
| 281 | unsigned int counter, u64 value); | ||
| 282 | |||
| 283 | /* | ||
| 284 | * Initialization | ||
| 285 | */ | ||
| 286 | struct cpuinfo_x86; | ||
| 287 | extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); | ||
| 288 | |||
| 289 | /* | ||
| 290 | * Context switch work | ||
| 291 | */ | ||
| 292 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); | ||
| 293 | |||
| 294 | #else /* CONFIG_X86_DS */ | ||
| 295 | |||
| 296 | struct cpuinfo_x86; | ||
| 297 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} | ||
| 298 | static inline void ds_switch_to(struct task_struct *prev, | ||
| 299 | struct task_struct *next) {} | ||
| 300 | |||
| 301 | #endif /* CONFIG_X86_DS */ | ||
| 302 | #endif /* _ASM_X86_DS_H */ | ||
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index ae6253ab9029..733f7e91e7a9 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
| @@ -34,6 +34,18 @@ | |||
| 34 | #define CFI_SIGNAL_FRAME | 34 | #define CFI_SIGNAL_FRAME |
| 35 | #endif | 35 | #endif |
| 36 | 36 | ||
| 37 | #if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) | ||
| 38 | /* | ||
| 39 | * Emit CFI data in .debug_frame sections, not .eh_frame sections. | ||
| 40 | * The latter we currently just discard since we don't do DWARF | ||
| 41 | * unwinding at runtime. So only the offline DWARF information is | ||
| 42 | * useful to anyone. Note we should not use this directive if this | ||
| 43 | * file is used in the vDSO assembly, or if vmlinux.lds.S gets | ||
| 44 | * changed so it doesn't discard .eh_frame. | ||
| 45 | */ | ||
| 46 | .cfi_sections .debug_frame | ||
| 47 | #endif | ||
| 48 | |||
| 37 | #else | 49 | #else |
| 38 | 50 | ||
| 39 | /* | 51 | /* |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f8576427cfe..aeab29aee617 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
| @@ -35,7 +35,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); | |||
| 35 | 35 | ||
| 36 | #define __ARCH_IRQ_STAT | 36 | #define __ARCH_IRQ_STAT |
| 37 | 37 | ||
| 38 | #define inc_irq_stat(member) percpu_add(irq_stat.member, 1) | 38 | #define inc_irq_stat(member) percpu_inc(irq_stat.member) |
| 39 | 39 | ||
| 40 | #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) | 40 | #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) |
| 41 | 41 | ||
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 2a1bd8f4f23a..942255310e6a 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
| @@ -41,12 +41,16 @@ struct arch_hw_breakpoint { | |||
| 41 | /* Total number of available HW breakpoint registers */ | 41 | /* Total number of available HW breakpoint registers */ |
| 42 | #define HBP_NUM 4 | 42 | #define HBP_NUM 4 |
| 43 | 43 | ||
| 44 | static inline int hw_breakpoint_slots(int type) | ||
| 45 | { | ||
| 46 | return HBP_NUM; | ||
| 47 | } | ||
| 48 | |||
| 44 | struct perf_event; | 49 | struct perf_event; |
| 45 | struct pmu; | 50 | struct pmu; |
| 46 | 51 | ||
| 47 | extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); | 52 | extern int arch_check_bp_in_kernelspace(struct perf_event *bp); |
| 48 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp, | 53 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp); |
| 49 | struct task_struct *tsk); | ||
| 50 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, | 54 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, |
| 51 | unsigned long val, void *data); | 55 | unsigned long val, void *data); |
| 52 | 56 | ||
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h index e153a2b3889a..5df477ac3af7 100644 --- a/arch/x86/include/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifndef _ASM_X86_KVM_HYPERV_H | 1 | #ifndef _ASM_X86_HYPERV_H |
| 2 | #define _ASM_X86_KVM_HYPERV_H | 2 | #define _ASM_X86_HYPERV_H |
| 3 | 3 | ||
| 4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 5 | 5 | ||
| @@ -14,6 +14,10 @@ | |||
| 14 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 | 14 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 |
| 15 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 | 15 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 |
| 16 | 16 | ||
| 17 | #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 | ||
| 18 | #define HYPERV_CPUID_MIN 0x40000005 | ||
| 19 | #define HYPERV_CPUID_MAX 0x4000ffff | ||
| 20 | |||
| 17 | /* | 21 | /* |
| 18 | * Feature identification. EAX indicates which features are available | 22 | * Feature identification. EAX indicates which features are available |
| 19 | * to the partition based upon the current partition privileges. | 23 | * to the partition based upon the current partition privileges. |
| @@ -129,6 +133,9 @@ | |||
| 129 | /* MSR used to provide vcpu index */ | 133 | /* MSR used to provide vcpu index */ |
| 130 | #define HV_X64_MSR_VP_INDEX 0x40000002 | 134 | #define HV_X64_MSR_VP_INDEX 0x40000002 |
| 131 | 135 | ||
| 136 | /* MSR used to read the per-partition time reference counter */ | ||
| 137 | #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 | ||
| 138 | |||
| 132 | /* Define the virtual APIC registers */ | 139 | /* Define the virtual APIC registers */ |
| 133 | #define HV_X64_MSR_EOI 0x40000070 | 140 | #define HV_X64_MSR_EOI 0x40000070 |
| 134 | #define HV_X64_MSR_ICR 0x40000071 | 141 | #define HV_X64_MSR_ICR 0x40000071 |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index b78c0941e422..70abda7058c8 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
| @@ -17,10 +17,33 @@ | |||
| 17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * | 18 | * |
| 19 | */ | 19 | */ |
| 20 | #ifndef ASM_X86__HYPERVISOR_H | 20 | #ifndef _ASM_X86_HYPERVISOR_H |
| 21 | #define ASM_X86__HYPERVISOR_H | 21 | #define _ASM_X86_HYPERVISOR_H |
| 22 | 22 | ||
| 23 | extern void init_hypervisor(struct cpuinfo_x86 *c); | 23 | extern void init_hypervisor(struct cpuinfo_x86 *c); |
| 24 | extern void init_hypervisor_platform(void); | 24 | extern void init_hypervisor_platform(void); |
| 25 | 25 | ||
| 26 | /* | ||
| 27 | * x86 hypervisor information | ||
| 28 | */ | ||
| 29 | struct hypervisor_x86 { | ||
| 30 | /* Hypervisor name */ | ||
| 31 | const char *name; | ||
| 32 | |||
| 33 | /* Detection routine */ | ||
| 34 | bool (*detect)(void); | ||
| 35 | |||
| 36 | /* Adjust CPU feature bits (run once per CPU) */ | ||
| 37 | void (*set_cpu_features)(struct cpuinfo_x86 *); | ||
| 38 | |||
| 39 | /* Platform setup (run once per boot) */ | ||
| 40 | void (*init_platform)(void); | ||
| 41 | }; | ||
| 42 | |||
| 43 | extern const struct hypervisor_x86 *x86_hyper; | ||
| 44 | |||
| 45 | /* Recognized hypervisors */ | ||
| 46 | extern const struct hypervisor_x86 x86_hyper_vmware; | ||
| 47 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | ||
| 48 | |||
| 26 | #endif | 49 | #endif |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index da2930924501..c991b3a7b904 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
| @@ -16,7 +16,9 @@ | |||
| 16 | #include <linux/kernel_stat.h> | 16 | #include <linux/kernel_stat.h> |
| 17 | #include <linux/regset.h> | 17 | #include <linux/regset.h> |
| 18 | #include <linux/hardirq.h> | 18 | #include <linux/hardirq.h> |
| 19 | #include <linux/slab.h> | ||
| 19 | #include <asm/asm.h> | 20 | #include <asm/asm.h> |
| 21 | #include <asm/cpufeature.h> | ||
| 20 | #include <asm/processor.h> | 22 | #include <asm/processor.h> |
| 21 | #include <asm/sigcontext.h> | 23 | #include <asm/sigcontext.h> |
| 22 | #include <asm/user.h> | 24 | #include <asm/user.h> |
| @@ -56,6 +58,11 @@ extern int restore_i387_xstate_ia32(void __user *buf); | |||
| 56 | 58 | ||
| 57 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | 59 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ |
| 58 | 60 | ||
| 61 | static __always_inline __pure bool use_xsave(void) | ||
| 62 | { | ||
| 63 | return static_cpu_has(X86_FEATURE_XSAVE); | ||
| 64 | } | ||
| 65 | |||
| 59 | #ifdef CONFIG_X86_64 | 66 | #ifdef CONFIG_X86_64 |
| 60 | 67 | ||
| 61 | /* Ignore delayed exceptions from user space */ | 68 | /* Ignore delayed exceptions from user space */ |
| @@ -91,15 +98,15 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
| 91 | values. The kernel data segment can be sometimes 0 and sometimes | 98 | values. The kernel data segment can be sometimes 0 and sometimes |
| 92 | new user value. Both should be ok. | 99 | new user value. Both should be ok. |
| 93 | Use the PDA as safe address because it should be already in L1. */ | 100 | Use the PDA as safe address because it should be already in L1. */ |
| 94 | static inline void clear_fpu_state(struct task_struct *tsk) | 101 | static inline void fpu_clear(struct fpu *fpu) |
| 95 | { | 102 | { |
| 96 | struct xsave_struct *xstate = &tsk->thread.xstate->xsave; | 103 | struct xsave_struct *xstate = &fpu->state->xsave; |
| 97 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 104 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
| 98 | 105 | ||
| 99 | /* | 106 | /* |
| 100 | * xsave header may indicate the init state of the FP. | 107 | * xsave header may indicate the init state of the FP. |
| 101 | */ | 108 | */ |
| 102 | if ((task_thread_info(tsk)->status & TS_XSAVE) && | 109 | if (use_xsave() && |
| 103 | !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) | 110 | !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) |
| 104 | return; | 111 | return; |
| 105 | 112 | ||
| @@ -111,6 +118,11 @@ static inline void clear_fpu_state(struct task_struct *tsk) | |||
| 111 | X86_FEATURE_FXSAVE_LEAK); | 118 | X86_FEATURE_FXSAVE_LEAK); |
| 112 | } | 119 | } |
| 113 | 120 | ||
| 121 | static inline void clear_fpu_state(struct task_struct *tsk) | ||
| 122 | { | ||
| 123 | fpu_clear(&tsk->thread.fpu); | ||
| 124 | } | ||
| 125 | |||
| 114 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | 126 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) |
| 115 | { | 127 | { |
| 116 | int err; | 128 | int err; |
| @@ -135,7 +147,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
| 135 | return err; | 147 | return err; |
| 136 | } | 148 | } |
| 137 | 149 | ||
| 138 | static inline void fxsave(struct task_struct *tsk) | 150 | static inline void fpu_fxsave(struct fpu *fpu) |
| 139 | { | 151 | { |
| 140 | /* Using "rex64; fxsave %0" is broken because, if the memory operand | 152 | /* Using "rex64; fxsave %0" is broken because, if the memory operand |
| 141 | uses any extended registers for addressing, a second REX prefix | 153 | uses any extended registers for addressing, a second REX prefix |
| @@ -145,42 +157,45 @@ static inline void fxsave(struct task_struct *tsk) | |||
| 145 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported | 157 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported |
| 146 | starting with gas 2.16. */ | 158 | starting with gas 2.16. */ |
| 147 | __asm__ __volatile__("fxsaveq %0" | 159 | __asm__ __volatile__("fxsaveq %0" |
| 148 | : "=m" (tsk->thread.xstate->fxsave)); | 160 | : "=m" (fpu->state->fxsave)); |
| 149 | #elif 0 | 161 | #elif 0 |
| 150 | /* Using, as a workaround, the properly prefixed form below isn't | 162 | /* Using, as a workaround, the properly prefixed form below isn't |
| 151 | accepted by any binutils version so far released, complaining that | 163 | accepted by any binutils version so far released, complaining that |
| 152 | the same type of prefix is used twice if an extended register is | 164 | the same type of prefix is used twice if an extended register is |
| 153 | needed for addressing (fix submitted to mainline 2005-11-21). */ | 165 | needed for addressing (fix submitted to mainline 2005-11-21). */ |
| 154 | __asm__ __volatile__("rex64/fxsave %0" | 166 | __asm__ __volatile__("rex64/fxsave %0" |
| 155 | : "=m" (tsk->thread.xstate->fxsave)); | 167 | : "=m" (fpu->state->fxsave)); |
| 156 | #else | 168 | #else |
| 157 | /* This, however, we can work around by forcing the compiler to select | 169 | /* This, however, we can work around by forcing the compiler to select |
| 158 | an addressing mode that doesn't require extended registers. */ | 170 | an addressing mode that doesn't require extended registers. */ |
| 159 | __asm__ __volatile__("rex64/fxsave (%1)" | 171 | __asm__ __volatile__("rex64/fxsave (%1)" |
| 160 | : "=m" (tsk->thread.xstate->fxsave) | 172 | : "=m" (fpu->state->fxsave) |
| 161 | : "cdaSDb" (&tsk->thread.xstate->fxsave)); | 173 | : "cdaSDb" (&fpu->state->fxsave)); |
| 162 | #endif | 174 | #endif |
| 163 | } | 175 | } |
| 164 | 176 | ||
| 165 | static inline void __save_init_fpu(struct task_struct *tsk) | 177 | static inline void fpu_save_init(struct fpu *fpu) |
| 166 | { | 178 | { |
| 167 | if (task_thread_info(tsk)->status & TS_XSAVE) | 179 | if (use_xsave()) |
| 168 | xsave(tsk); | 180 | fpu_xsave(fpu); |
| 169 | else | 181 | else |
| 170 | fxsave(tsk); | 182 | fpu_fxsave(fpu); |
| 183 | |||
| 184 | fpu_clear(fpu); | ||
| 185 | } | ||
| 171 | 186 | ||
| 172 | clear_fpu_state(tsk); | 187 | static inline void __save_init_fpu(struct task_struct *tsk) |
| 188 | { | ||
| 189 | fpu_save_init(&tsk->thread.fpu); | ||
| 173 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 190 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
| 174 | } | 191 | } |
| 175 | 192 | ||
| 176 | #else /* CONFIG_X86_32 */ | 193 | #else /* CONFIG_X86_32 */ |
| 177 | 194 | ||
| 178 | #ifdef CONFIG_MATH_EMULATION | 195 | #ifdef CONFIG_MATH_EMULATION |
| 179 | extern void finit_task(struct task_struct *tsk); | 196 | extern void finit_soft_fpu(struct i387_soft_struct *soft); |
| 180 | #else | 197 | #else |
| 181 | static inline void finit_task(struct task_struct *tsk) | 198 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} |
| 182 | { | ||
| 183 | } | ||
| 184 | #endif | 199 | #endif |
| 185 | 200 | ||
| 186 | static inline void tolerant_fwait(void) | 201 | static inline void tolerant_fwait(void) |
| @@ -216,13 +231,13 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
| 216 | /* | 231 | /* |
| 217 | * These must be called with preempt disabled | 232 | * These must be called with preempt disabled |
| 218 | */ | 233 | */ |
| 219 | static inline void __save_init_fpu(struct task_struct *tsk) | 234 | static inline void fpu_save_init(struct fpu *fpu) |
| 220 | { | 235 | { |
| 221 | if (task_thread_info(tsk)->status & TS_XSAVE) { | 236 | if (use_xsave()) { |
| 222 | struct xsave_struct *xstate = &tsk->thread.xstate->xsave; | 237 | struct xsave_struct *xstate = &fpu->state->xsave; |
| 223 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 238 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
| 224 | 239 | ||
| 225 | xsave(tsk); | 240 | fpu_xsave(fpu); |
| 226 | 241 | ||
| 227 | /* | 242 | /* |
| 228 | * xsave header may indicate the init state of the FP. | 243 | * xsave header may indicate the init state of the FP. |
| @@ -246,8 +261,8 @@ static inline void __save_init_fpu(struct task_struct *tsk) | |||
| 246 | "fxsave %[fx]\n" | 261 | "fxsave %[fx]\n" |
| 247 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", | 262 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", |
| 248 | X86_FEATURE_FXSR, | 263 | X86_FEATURE_FXSR, |
| 249 | [fx] "m" (tsk->thread.xstate->fxsave), | 264 | [fx] "m" (fpu->state->fxsave), |
| 250 | [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); | 265 | [fsw] "m" (fpu->state->fxsave.swd) : "memory"); |
| 251 | clear_state: | 266 | clear_state: |
| 252 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 267 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
| 253 | is pending. Clear the x87 state here by setting it to fixed | 268 | is pending. Clear the x87 state here by setting it to fixed |
| @@ -259,17 +274,34 @@ clear_state: | |||
| 259 | X86_FEATURE_FXSAVE_LEAK, | 274 | X86_FEATURE_FXSAVE_LEAK, |
| 260 | [addr] "m" (safe_address)); | 275 | [addr] "m" (safe_address)); |
| 261 | end: | 276 | end: |
| 277 | ; | ||
| 278 | } | ||
| 279 | |||
| 280 | static inline void __save_init_fpu(struct task_struct *tsk) | ||
| 281 | { | ||
| 282 | fpu_save_init(&tsk->thread.fpu); | ||
| 262 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 283 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
| 263 | } | 284 | } |
| 264 | 285 | ||
| 286 | |||
| 265 | #endif /* CONFIG_X86_64 */ | 287 | #endif /* CONFIG_X86_64 */ |
| 266 | 288 | ||
| 267 | static inline int restore_fpu_checking(struct task_struct *tsk) | 289 | static inline int fpu_fxrstor_checking(struct fpu *fpu) |
| 268 | { | 290 | { |
| 269 | if (task_thread_info(tsk)->status & TS_XSAVE) | 291 | return fxrstor_checking(&fpu->state->fxsave); |
| 270 | return xrstor_checking(&tsk->thread.xstate->xsave); | 292 | } |
| 293 | |||
| 294 | static inline int fpu_restore_checking(struct fpu *fpu) | ||
| 295 | { | ||
| 296 | if (use_xsave()) | ||
| 297 | return fpu_xrstor_checking(fpu); | ||
| 271 | else | 298 | else |
| 272 | return fxrstor_checking(&tsk->thread.xstate->fxsave); | 299 | return fpu_fxrstor_checking(fpu); |
| 300 | } | ||
| 301 | |||
| 302 | static inline int restore_fpu_checking(struct task_struct *tsk) | ||
| 303 | { | ||
| 304 | return fpu_restore_checking(&tsk->thread.fpu); | ||
| 273 | } | 305 | } |
| 274 | 306 | ||
| 275 | /* | 307 | /* |
| @@ -397,30 +429,59 @@ static inline void clear_fpu(struct task_struct *tsk) | |||
| 397 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) | 429 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) |
| 398 | { | 430 | { |
| 399 | if (cpu_has_fxsr) { | 431 | if (cpu_has_fxsr) { |
| 400 | return tsk->thread.xstate->fxsave.cwd; | 432 | return tsk->thread.fpu.state->fxsave.cwd; |
| 401 | } else { | 433 | } else { |
| 402 | return (unsigned short)tsk->thread.xstate->fsave.cwd; | 434 | return (unsigned short)tsk->thread.fpu.state->fsave.cwd; |
| 403 | } | 435 | } |
| 404 | } | 436 | } |
| 405 | 437 | ||
| 406 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) | 438 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) |
| 407 | { | 439 | { |
| 408 | if (cpu_has_fxsr) { | 440 | if (cpu_has_fxsr) { |
| 409 | return tsk->thread.xstate->fxsave.swd; | 441 | return tsk->thread.fpu.state->fxsave.swd; |
| 410 | } else { | 442 | } else { |
| 411 | return (unsigned short)tsk->thread.xstate->fsave.swd; | 443 | return (unsigned short)tsk->thread.fpu.state->fsave.swd; |
| 412 | } | 444 | } |
| 413 | } | 445 | } |
| 414 | 446 | ||
| 415 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) | 447 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) |
| 416 | { | 448 | { |
| 417 | if (cpu_has_xmm) { | 449 | if (cpu_has_xmm) { |
| 418 | return tsk->thread.xstate->fxsave.mxcsr; | 450 | return tsk->thread.fpu.state->fxsave.mxcsr; |
| 419 | } else { | 451 | } else { |
| 420 | return MXCSR_DEFAULT; | 452 | return MXCSR_DEFAULT; |
| 421 | } | 453 | } |
| 422 | } | 454 | } |
| 423 | 455 | ||
| 456 | static bool fpu_allocated(struct fpu *fpu) | ||
| 457 | { | ||
| 458 | return fpu->state != NULL; | ||
| 459 | } | ||
| 460 | |||
| 461 | static inline int fpu_alloc(struct fpu *fpu) | ||
| 462 | { | ||
| 463 | if (fpu_allocated(fpu)) | ||
| 464 | return 0; | ||
| 465 | fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); | ||
| 466 | if (!fpu->state) | ||
| 467 | return -ENOMEM; | ||
| 468 | WARN_ON((unsigned long)fpu->state & 15); | ||
| 469 | return 0; | ||
| 470 | } | ||
| 471 | |||
| 472 | static inline void fpu_free(struct fpu *fpu) | ||
| 473 | { | ||
| 474 | if (fpu->state) { | ||
| 475 | kmem_cache_free(task_xstate_cachep, fpu->state); | ||
| 476 | fpu->state = NULL; | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | static inline void fpu_copy(struct fpu *dst, struct fpu *src) | ||
| 481 | { | ||
| 482 | memcpy(dst->state, src->state, xstate_size); | ||
| 483 | } | ||
| 484 | |||
| 424 | #endif /* __ASSEMBLY__ */ | 485 | #endif /* __ASSEMBLY__ */ |
| 425 | 486 | ||
| 426 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | 487 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 |
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h index 1edbf89680fd..fc1f579fb965 100644 --- a/arch/x86/include/asm/i8253.h +++ b/arch/x86/include/asm/i8253.h | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | #define PIT_CH0 0x40 | 6 | #define PIT_CH0 0x40 |
| 7 | #define PIT_CH2 0x42 | 7 | #define PIT_CH2 0x42 |
| 8 | 8 | ||
| 9 | extern spinlock_t i8253_lock; | 9 | extern raw_spinlock_t i8253_lock; |
| 10 | 10 | ||
| 11 | extern struct clock_event_device *global_clock_event; | 11 | extern struct clock_event_device *global_clock_event; |
| 12 | 12 | ||
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 96c2e0ad04ca..88c765e16410 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
| @@ -68,6 +68,8 @@ struct insn { | |||
| 68 | const insn_byte_t *next_byte; | 68 | const insn_byte_t *next_byte; |
| 69 | }; | 69 | }; |
| 70 | 70 | ||
| 71 | #define MAX_INSN_SIZE 16 | ||
| 72 | |||
| 71 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | 73 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) |
| 72 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | 74 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) |
| 73 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) | 75 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 35832a03a515..63cb4096c3dc 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
| @@ -159,7 +159,6 @@ struct io_apic_irq_attr; | |||
| 159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, | 159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, |
| 160 | struct io_apic_irq_attr *irq_attr); | 160 | struct io_apic_irq_attr *irq_attr); |
| 161 | void setup_IO_APIC_irq_extra(u32 gsi); | 161 | void setup_IO_APIC_irq_extra(u32 gsi); |
| 162 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
| 163 | extern void ioapic_init_mappings(void); | 162 | extern void ioapic_init_mappings(void); |
| 164 | extern void ioapic_insert_resources(void); | 163 | extern void ioapic_insert_resources(void); |
| 165 | 164 | ||
| @@ -180,12 +179,13 @@ extern void ioapic_write_entry(int apic, int pin, | |||
| 180 | extern void setup_ioapic_ids_from_mpc(void); | 179 | extern void setup_ioapic_ids_from_mpc(void); |
| 181 | 180 | ||
| 182 | struct mp_ioapic_gsi{ | 181 | struct mp_ioapic_gsi{ |
| 183 | int gsi_base; | 182 | u32 gsi_base; |
| 184 | int gsi_end; | 183 | u32 gsi_end; |
| 185 | }; | 184 | }; |
| 186 | extern struct mp_ioapic_gsi mp_gsi_routing[]; | 185 | extern struct mp_ioapic_gsi mp_gsi_routing[]; |
| 187 | int mp_find_ioapic(int gsi); | 186 | extern u32 gsi_end; |
| 188 | int mp_find_ioapic_pin(int ioapic, int gsi); | 187 | int mp_find_ioapic(u32 gsi); |
| 188 | int mp_find_ioapic_pin(int ioapic, u32 gsi); | ||
| 189 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); | 189 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); |
| 190 | extern void __init pre_init_apic_IRQ0(void); | 190 | extern void __init pre_init_apic_IRQ0(void); |
| 191 | 191 | ||
| @@ -197,7 +197,8 @@ static const int timer_through_8259 = 0; | |||
| 197 | static inline void ioapic_init_mappings(void) { } | 197 | static inline void ioapic_init_mappings(void) { } |
| 198 | static inline void ioapic_insert_resources(void) { } | 198 | static inline void ioapic_insert_resources(void) { } |
| 199 | static inline void probe_nr_irqs_gsi(void) { } | 199 | static inline void probe_nr_irqs_gsi(void) { } |
| 200 | static inline int mp_find_ioapic(int gsi) { return 0; } | 200 | #define gsi_end (NR_IRQS_LEGACY - 1) |
| 201 | static inline int mp_find_ioapic(u32 gsi) { return 0; } | ||
| 201 | 202 | ||
| 202 | struct io_apic_irq_attr; | 203 | struct io_apic_irq_attr; |
| 203 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, | 204 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, |
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index f70e60071fe8..af00bd1d2089 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h | |||
| @@ -16,11 +16,16 @@ extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); | |||
| 16 | extern int k8_scan_nodes(void); | 16 | extern int k8_scan_nodes(void); |
| 17 | 17 | ||
| 18 | #ifdef CONFIG_K8_NB | 18 | #ifdef CONFIG_K8_NB |
| 19 | extern int num_k8_northbridges; | ||
| 20 | |||
| 19 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 21 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
| 20 | { | 22 | { |
| 21 | return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; | 23 | return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; |
| 22 | } | 24 | } |
| 25 | |||
| 23 | #else | 26 | #else |
| 27 | #define num_k8_northbridges 0 | ||
| 28 | |||
| 24 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 29 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
| 25 | { | 30 | { |
| 26 | return NULL; | 31 | return NULL; |
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4ffa345a8ccb..547882539157 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
| 25 | #include <linux/ptrace.h> | 25 | #include <linux/ptrace.h> |
| 26 | #include <linux/percpu.h> | 26 | #include <linux/percpu.h> |
| 27 | #include <asm/insn.h> | ||
| 27 | 28 | ||
| 28 | #define __ARCH_WANT_KPROBES_INSN_SLOT | 29 | #define __ARCH_WANT_KPROBES_INSN_SLOT |
| 29 | 30 | ||
| @@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t; | |||
| 36 | #define RELATIVEJUMP_SIZE 5 | 37 | #define RELATIVEJUMP_SIZE 5 |
| 37 | #define RELATIVECALL_OPCODE 0xe8 | 38 | #define RELATIVECALL_OPCODE 0xe8 |
| 38 | #define RELATIVE_ADDR_SIZE 4 | 39 | #define RELATIVE_ADDR_SIZE 4 |
| 39 | #define MAX_INSN_SIZE 16 | ||
| 40 | #define MAX_STACK_SIZE 64 | 40 | #define MAX_STACK_SIZE 64 |
| 41 | #define MIN_STACK_SIZE(ADDR) \ | 41 | #define MIN_STACK_SIZE(ADDR) \ |
| 42 | (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ | 42 | (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index d8bf23a88d05..c82868e9f905 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
| @@ -105,16 +105,6 @@ extern void mp_config_acpi_legacy_irqs(void); | |||
| 105 | struct device; | 105 | struct device; |
| 106 | extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, | 106 | extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, |
| 107 | int active_high_low); | 107 | int active_high_low); |
| 108 | extern int acpi_probe_gsi(void); | ||
| 109 | #ifdef CONFIG_X86_IO_APIC | ||
| 110 | extern int mp_find_ioapic(int gsi); | ||
| 111 | extern int mp_find_ioapic_pin(int ioapic, int gsi); | ||
| 112 | #endif | ||
| 113 | #else /* !CONFIG_ACPI: */ | ||
| 114 | static inline int acpi_probe_gsi(void) | ||
| 115 | { | ||
| 116 | return 0; | ||
| 117 | } | ||
| 118 | #endif /* CONFIG_ACPI */ | 108 | #endif /* CONFIG_ACPI */ |
| 119 | 109 | ||
| 120 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) | 110 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) |
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h new file mode 100644 index 000000000000..79ce5685ab64 --- /dev/null +++ b/arch/x86/include/asm/mshyperv.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #ifndef _ASM_X86_MSHYPER_H | ||
| 2 | #define _ASM_X86_MSHYPER_H | ||
| 3 | |||
| 4 | #include <linux/types.h> | ||
| 5 | #include <asm/hyperv.h> | ||
| 6 | |||
| 7 | struct ms_hyperv_info { | ||
| 8 | u32 features; | ||
| 9 | u32 hints; | ||
| 10 | }; | ||
| 11 | |||
| 12 | extern struct ms_hyperv_info ms_hyperv; | ||
| 13 | |||
| 14 | #endif | ||
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4604e6a54d36..bc473acfa7f9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
| @@ -71,11 +71,14 @@ | |||
| 71 | #define MSR_IA32_LASTINTTOIP 0x000001de | 71 | #define MSR_IA32_LASTINTTOIP 0x000001de |
| 72 | 72 | ||
| 73 | /* DEBUGCTLMSR bits (others vary by model): */ | 73 | /* DEBUGCTLMSR bits (others vary by model): */ |
| 74 | #define _DEBUGCTLMSR_LBR 0 /* last branch recording */ | 74 | #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ |
| 75 | #define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ | 75 | #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ |
| 76 | 76 | #define DEBUGCTLMSR_TR (1UL << 6) | |
| 77 | #define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) | 77 | #define DEBUGCTLMSR_BTS (1UL << 7) |
| 78 | #define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) | 78 | #define DEBUGCTLMSR_BTINT (1UL << 8) |
| 79 | #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) | ||
| 80 | #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) | ||
| 81 | #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) | ||
| 79 | 82 | ||
| 80 | #define MSR_IA32_MC0_CTL 0x00000400 | 83 | #define MSR_IA32_MC0_CTL 0x00000400 |
| 81 | #define MSR_IA32_MC0_STATUS 0x00000401 | 84 | #define MSR_IA32_MC0_STATUS 0x00000401 |
| @@ -359,6 +362,8 @@ | |||
| 359 | #define MSR_P4_U2L_ESCR0 0x000003b0 | 362 | #define MSR_P4_U2L_ESCR0 0x000003b0 |
| 360 | #define MSR_P4_U2L_ESCR1 0x000003b1 | 363 | #define MSR_P4_U2L_ESCR1 0x000003b1 |
| 361 | 364 | ||
| 365 | #define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 | ||
| 366 | |||
| 362 | /* Intel Core-based CPU performance counters */ | 367 | /* Intel Core-based CPU performance counters */ |
| 363 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 | 368 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 |
| 364 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a | 369 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 66a272dfd8b8..0ec6d12d84e6 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
| @@ -190,6 +190,29 @@ do { \ | |||
| 190 | pfo_ret__; \ | 190 | pfo_ret__; \ |
| 191 | }) | 191 | }) |
| 192 | 192 | ||
| 193 | #define percpu_unary_op(op, var) \ | ||
| 194 | ({ \ | ||
| 195 | switch (sizeof(var)) { \ | ||
| 196 | case 1: \ | ||
| 197 | asm(op "b "__percpu_arg(0) \ | ||
| 198 | : "+m" (var)); \ | ||
| 199 | break; \ | ||
| 200 | case 2: \ | ||
| 201 | asm(op "w "__percpu_arg(0) \ | ||
| 202 | : "+m" (var)); \ | ||
| 203 | break; \ | ||
| 204 | case 4: \ | ||
| 205 | asm(op "l "__percpu_arg(0) \ | ||
| 206 | : "+m" (var)); \ | ||
| 207 | break; \ | ||
| 208 | case 8: \ | ||
| 209 | asm(op "q "__percpu_arg(0) \ | ||
| 210 | : "+m" (var)); \ | ||
| 211 | break; \ | ||
| 212 | default: __bad_percpu_size(); \ | ||
| 213 | } \ | ||
| 214 | }) | ||
| 215 | |||
| 193 | /* | 216 | /* |
| 194 | * percpu_read() makes gcc load the percpu variable every time it is | 217 | * percpu_read() makes gcc load the percpu variable every time it is |
| 195 | * accessed while percpu_read_stable() allows the value to be cached. | 218 | * accessed while percpu_read_stable() allows the value to be cached. |
| @@ -207,6 +230,7 @@ do { \ | |||
| 207 | #define percpu_and(var, val) percpu_to_op("and", var, val) | 230 | #define percpu_and(var, val) percpu_to_op("and", var, val) |
| 208 | #define percpu_or(var, val) percpu_to_op("or", var, val) | 231 | #define percpu_or(var, val) percpu_to_op("or", var, val) |
| 209 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) | 232 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) |
| 233 | #define percpu_inc(var) percpu_unary_op("inc", var) | ||
| 210 | 234 | ||
| 211 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 235 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
| 212 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 236 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index db6109a885a7..254883d0c7e0 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * Performance event hw details: | 5 | * Performance event hw details: |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #define X86_PMC_MAX_GENERIC 8 | 8 | #define X86_PMC_MAX_GENERIC 32 |
| 9 | #define X86_PMC_MAX_FIXED 3 | 9 | #define X86_PMC_MAX_FIXED 3 |
| 10 | 10 | ||
| 11 | #define X86_PMC_IDX_GENERIC 0 | 11 | #define X86_PMC_IDX_GENERIC 0 |
| @@ -18,39 +18,31 @@ | |||
| 18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | 18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 |
| 19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | 19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 |
| 20 | 20 | ||
| 21 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22) | 21 | #define ARCH_PERFMON_EVENTSEL_EVENT 0x000000FFULL |
| 22 | #define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) | 22 | #define ARCH_PERFMON_EVENTSEL_UMASK 0x0000FF00ULL |
| 23 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | 23 | #define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) |
| 24 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | 24 | #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) |
| 25 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | 25 | #define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) |
| 26 | 26 | #define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) | |
| 27 | /* | 27 | #define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) |
| 28 | * Includes eventsel and unit mask as well: | 28 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) |
| 29 | */ | 29 | #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) |
| 30 | 30 | #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL | |
| 31 | 31 | ||
| 32 | #define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL | 32 | #define AMD64_EVENTSEL_EVENT \ |
| 33 | #define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL | 33 | (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) |
| 34 | #define INTEL_ARCH_EDGE_MASK 0x00040000ULL | 34 | #define INTEL_ARCH_EVENT_MASK \ |
| 35 | #define INTEL_ARCH_INV_MASK 0x00800000ULL | 35 | (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) |
| 36 | #define INTEL_ARCH_CNT_MASK 0xFF000000ULL | 36 | |
| 37 | #define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) | 37 | #define X86_RAW_EVENT_MASK \ |
| 38 | 38 | (ARCH_PERFMON_EVENTSEL_EVENT | \ | |
| 39 | /* | 39 | ARCH_PERFMON_EVENTSEL_UMASK | \ |
| 40 | * filter mask to validate fixed counter events. | 40 | ARCH_PERFMON_EVENTSEL_EDGE | \ |
| 41 | * the following filters disqualify for fixed counters: | 41 | ARCH_PERFMON_EVENTSEL_INV | \ |
| 42 | * - inv | 42 | ARCH_PERFMON_EVENTSEL_CMASK) |
| 43 | * - edge | 43 | #define AMD64_RAW_EVENT_MASK \ |
| 44 | * - cnt-mask | 44 | (X86_RAW_EVENT_MASK | \ |
| 45 | * The other filters are supported by fixed counters. | 45 | AMD64_EVENTSEL_EVENT) |
| 46 | * The any-thread option is supported starting with v3. | ||
| 47 | */ | ||
| 48 | #define INTEL_ARCH_FIXED_MASK \ | ||
| 49 | (INTEL_ARCH_CNT_MASK| \ | ||
| 50 | INTEL_ARCH_INV_MASK| \ | ||
| 51 | INTEL_ARCH_EDGE_MASK|\ | ||
| 52 | INTEL_ARCH_UNIT_MASK|\ | ||
| 53 | INTEL_ARCH_EVENT_MASK) | ||
| 54 | 46 | ||
| 55 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 47 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
| 56 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 48 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
| @@ -67,7 +59,7 @@ | |||
| 67 | union cpuid10_eax { | 59 | union cpuid10_eax { |
| 68 | struct { | 60 | struct { |
| 69 | unsigned int version_id:8; | 61 | unsigned int version_id:8; |
| 70 | unsigned int num_events:8; | 62 | unsigned int num_counters:8; |
| 71 | unsigned int bit_width:8; | 63 | unsigned int bit_width:8; |
| 72 | unsigned int mask_length:8; | 64 | unsigned int mask_length:8; |
| 73 | } split; | 65 | } split; |
| @@ -76,7 +68,7 @@ union cpuid10_eax { | |||
| 76 | 68 | ||
| 77 | union cpuid10_edx { | 69 | union cpuid10_edx { |
| 78 | struct { | 70 | struct { |
| 79 | unsigned int num_events_fixed:4; | 71 | unsigned int num_counters_fixed:4; |
| 80 | unsigned int reserved:28; | 72 | unsigned int reserved:28; |
| 81 | } split; | 73 | } split; |
| 82 | unsigned int full; | 74 | unsigned int full; |
| @@ -136,6 +128,18 @@ extern void perf_events_lapic_init(void); | |||
| 136 | 128 | ||
| 137 | #define PERF_EVENT_INDEX_OFFSET 0 | 129 | #define PERF_EVENT_INDEX_OFFSET 0 |
| 138 | 130 | ||
| 131 | /* | ||
| 132 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | ||
| 133 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | ||
| 134 | * care what we do with it. | ||
| 135 | */ | ||
| 136 | #define PERF_EFLAGS_EXACT (1UL << 3) | ||
| 137 | |||
| 138 | struct pt_regs; | ||
| 139 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | ||
| 140 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | ||
| 141 | #define perf_misc_flags(regs) perf_misc_flags(regs) | ||
| 142 | |||
| 139 | #else | 143 | #else |
| 140 | static inline void init_hw_perf_events(void) { } | 144 | static inline void init_hw_perf_events(void) { } |
| 141 | static inline void perf_events_lapic_init(void) { } | 145 | static inline void perf_events_lapic_init(void) { } |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h new file mode 100644 index 000000000000..b05400a542ff --- /dev/null +++ b/arch/x86/include/asm/perf_event_p4.h | |||
| @@ -0,0 +1,794 @@ | |||
| 1 | /* | ||
| 2 | * Netburst Perfomance Events (P4, old Xeon) | ||
| 3 | */ | ||
| 4 | |||
| 5 | #ifndef PERF_EVENT_P4_H | ||
| 6 | #define PERF_EVENT_P4_H | ||
| 7 | |||
| 8 | #include <linux/cpu.h> | ||
| 9 | #include <linux/bitops.h> | ||
| 10 | |||
| 11 | /* | ||
| 12 | * NetBurst has perfomance MSRs shared between | ||
| 13 | * threads if HT is turned on, ie for both logical | ||
| 14 | * processors (mem: in turn in Atom with HT support | ||
| 15 | * perf-MSRs are not shared and every thread has its | ||
| 16 | * own perf-MSRs set) | ||
| 17 | */ | ||
| 18 | #define ARCH_P4_TOTAL_ESCR (46) | ||
| 19 | #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ | ||
| 20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) | ||
| 21 | #define ARCH_P4_MAX_CCCR (18) | ||
| 22 | #define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) | ||
| 23 | |||
| 24 | #define P4_ESCR_EVENT_MASK 0x7e000000U | ||
| 25 | #define P4_ESCR_EVENT_SHIFT 25 | ||
| 26 | #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U | ||
| 27 | #define P4_ESCR_EVENTMASK_SHIFT 9 | ||
| 28 | #define P4_ESCR_TAG_MASK 0x000001e0U | ||
| 29 | #define P4_ESCR_TAG_SHIFT 5 | ||
| 30 | #define P4_ESCR_TAG_ENABLE 0x00000010U | ||
| 31 | #define P4_ESCR_T0_OS 0x00000008U | ||
| 32 | #define P4_ESCR_T0_USR 0x00000004U | ||
| 33 | #define P4_ESCR_T1_OS 0x00000002U | ||
| 34 | #define P4_ESCR_T1_USR 0x00000001U | ||
| 35 | |||
| 36 | #define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT) | ||
| 37 | #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) | ||
| 38 | #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) | ||
| 39 | |||
| 40 | /* Non HT mask */ | ||
| 41 | #define P4_ESCR_MASK \ | ||
| 42 | (P4_ESCR_EVENT_MASK | \ | ||
| 43 | P4_ESCR_EVENTMASK_MASK | \ | ||
| 44 | P4_ESCR_TAG_MASK | \ | ||
| 45 | P4_ESCR_TAG_ENABLE | \ | ||
| 46 | P4_ESCR_T0_OS | \ | ||
| 47 | P4_ESCR_T0_USR) | ||
| 48 | |||
| 49 | /* HT mask */ | ||
| 50 | #define P4_ESCR_MASK_HT \ | ||
| 51 | (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) | ||
| 52 | |||
| 53 | #define P4_CCCR_OVF 0x80000000U | ||
| 54 | #define P4_CCCR_CASCADE 0x40000000U | ||
| 55 | #define P4_CCCR_OVF_PMI_T0 0x04000000U | ||
| 56 | #define P4_CCCR_OVF_PMI_T1 0x08000000U | ||
| 57 | #define P4_CCCR_FORCE_OVF 0x02000000U | ||
| 58 | #define P4_CCCR_EDGE 0x01000000U | ||
| 59 | #define P4_CCCR_THRESHOLD_MASK 0x00f00000U | ||
| 60 | #define P4_CCCR_THRESHOLD_SHIFT 20 | ||
| 61 | #define P4_CCCR_COMPLEMENT 0x00080000U | ||
| 62 | #define P4_CCCR_COMPARE 0x00040000U | ||
| 63 | #define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U | ||
| 64 | #define P4_CCCR_ESCR_SELECT_SHIFT 13 | ||
| 65 | #define P4_CCCR_ENABLE 0x00001000U | ||
| 66 | #define P4_CCCR_THREAD_SINGLE 0x00010000U | ||
| 67 | #define P4_CCCR_THREAD_BOTH 0x00020000U | ||
| 68 | #define P4_CCCR_THREAD_ANY 0x00030000U | ||
| 69 | #define P4_CCCR_RESERVED 0x00000fffU | ||
| 70 | |||
| 71 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) | ||
| 72 | #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) | ||
| 73 | |||
| 74 | /* Custom bits in reerved CCCR area */ | ||
| 75 | #define P4_CCCR_CACHE_OPS_MASK 0x0000003fU | ||
| 76 | |||
| 77 | |||
| 78 | /* Non HT mask */ | ||
| 79 | #define P4_CCCR_MASK \ | ||
| 80 | (P4_CCCR_OVF | \ | ||
| 81 | P4_CCCR_CASCADE | \ | ||
| 82 | P4_CCCR_OVF_PMI_T0 | \ | ||
| 83 | P4_CCCR_FORCE_OVF | \ | ||
| 84 | P4_CCCR_EDGE | \ | ||
| 85 | P4_CCCR_THRESHOLD_MASK | \ | ||
| 86 | P4_CCCR_COMPLEMENT | \ | ||
| 87 | P4_CCCR_COMPARE | \ | ||
| 88 | P4_CCCR_ESCR_SELECT_MASK | \ | ||
| 89 | P4_CCCR_ENABLE) | ||
| 90 | |||
| 91 | /* HT mask */ | ||
| 92 | #define P4_CCCR_MASK_HT (P4_CCCR_MASK | P4_CCCR_THREAD_ANY) | ||
| 93 | |||
| 94 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ | ||
| 95 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) | ||
| 96 | #define P4_ESCR_EMASK_BIT(class, name) class##__##name | ||
| 97 | |||
| 98 | /* | ||
| 99 | * config field is 64bit width and consists of | ||
| 100 | * HT << 63 | ESCR << 32 | CCCR | ||
| 101 | * where HT is HyperThreading bit (since ESCR | ||
| 102 | * has it reserved we may use it for own purpose) | ||
| 103 | * | ||
| 104 | * note that this is NOT the addresses of respective | ||
| 105 | * ESCR and CCCR but rather an only packed value should | ||
| 106 | * be unpacked and written to a proper addresses | ||
| 107 | * | ||
| 108 | * the base idea is to pack as much info as | ||
| 109 | * possible | ||
| 110 | */ | ||
| 111 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) | ||
| 112 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) | ||
| 113 | #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) | ||
| 114 | #define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) | ||
| 115 | |||
| 116 | #define p4_config_unpack_emask(v) \ | ||
| 117 | ({ \ | ||
| 118 | u32 t = p4_config_unpack_escr((v)); \ | ||
| 119 | t = t & P4_ESCR_EVENTMASK_MASK; \ | ||
| 120 | t = t >> P4_ESCR_EVENTMASK_SHIFT; \ | ||
| 121 | t; \ | ||
| 122 | }) | ||
| 123 | |||
| 124 | #define p4_config_unpack_event(v) \ | ||
| 125 | ({ \ | ||
| 126 | u32 t = p4_config_unpack_escr((v)); \ | ||
| 127 | t = t & P4_ESCR_EVENT_MASK; \ | ||
| 128 | t = t >> P4_ESCR_EVENT_SHIFT; \ | ||
| 129 | t; \ | ||
| 130 | }) | ||
| 131 | |||
| 132 | #define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) | ||
| 133 | |||
| 134 | #define P4_CONFIG_HT_SHIFT 63 | ||
| 135 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) | ||
| 136 | |||
| 137 | static inline bool p4_is_event_cascaded(u64 config) | ||
| 138 | { | ||
| 139 | u32 cccr = p4_config_unpack_cccr(config); | ||
| 140 | return !!(cccr & P4_CCCR_CASCADE); | ||
| 141 | } | ||
| 142 | |||
| 143 | static inline int p4_ht_config_thread(u64 config) | ||
| 144 | { | ||
| 145 | return !!(config & P4_CONFIG_HT); | ||
| 146 | } | ||
| 147 | |||
| 148 | static inline u64 p4_set_ht_bit(u64 config) | ||
| 149 | { | ||
| 150 | return config | P4_CONFIG_HT; | ||
| 151 | } | ||
| 152 | |||
| 153 | static inline u64 p4_clear_ht_bit(u64 config) | ||
| 154 | { | ||
| 155 | return config & ~P4_CONFIG_HT; | ||
| 156 | } | ||
| 157 | |||
| 158 | static inline int p4_ht_active(void) | ||
| 159 | { | ||
| 160 | #ifdef CONFIG_SMP | ||
| 161 | return smp_num_siblings > 1; | ||
| 162 | #endif | ||
| 163 | return 0; | ||
| 164 | } | ||
| 165 | |||
| 166 | static inline int p4_ht_thread(int cpu) | ||
| 167 | { | ||
| 168 | #ifdef CONFIG_SMP | ||
| 169 | if (smp_num_siblings == 2) | ||
| 170 | return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); | ||
| 171 | #endif | ||
| 172 | return 0; | ||
| 173 | } | ||
| 174 | |||
| 175 | static inline int p4_should_swap_ts(u64 config, int cpu) | ||
| 176 | { | ||
| 177 | return p4_ht_config_thread(config) ^ p4_ht_thread(cpu); | ||
| 178 | } | ||
| 179 | |||
| 180 | static inline u32 p4_default_cccr_conf(int cpu) | ||
| 181 | { | ||
| 182 | /* | ||
| 183 | * Note that P4_CCCR_THREAD_ANY is "required" on | ||
| 184 | * non-HT machines (on HT machines we count TS events | ||
| 185 | * regardless the state of second logical processor | ||
| 186 | */ | ||
| 187 | u32 cccr = P4_CCCR_THREAD_ANY; | ||
| 188 | |||
| 189 | if (!p4_ht_thread(cpu)) | ||
| 190 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
| 191 | else | ||
| 192 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
| 193 | |||
| 194 | return cccr; | ||
| 195 | } | ||
| 196 | |||
| 197 | static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) | ||
| 198 | { | ||
| 199 | u32 escr = 0; | ||
| 200 | |||
| 201 | if (!p4_ht_thread(cpu)) { | ||
| 202 | if (!exclude_os) | ||
| 203 | escr |= P4_ESCR_T0_OS; | ||
| 204 | if (!exclude_usr) | ||
| 205 | escr |= P4_ESCR_T0_USR; | ||
| 206 | } else { | ||
| 207 | if (!exclude_os) | ||
| 208 | escr |= P4_ESCR_T1_OS; | ||
| 209 | if (!exclude_usr) | ||
| 210 | escr |= P4_ESCR_T1_USR; | ||
| 211 | } | ||
| 212 | |||
| 213 | return escr; | ||
| 214 | } | ||
| 215 | |||
| 216 | enum P4_EVENTS { | ||
| 217 | P4_EVENT_TC_DELIVER_MODE, | ||
| 218 | P4_EVENT_BPU_FETCH_REQUEST, | ||
| 219 | P4_EVENT_ITLB_REFERENCE, | ||
| 220 | P4_EVENT_MEMORY_CANCEL, | ||
| 221 | P4_EVENT_MEMORY_COMPLETE, | ||
| 222 | P4_EVENT_LOAD_PORT_REPLAY, | ||
| 223 | P4_EVENT_STORE_PORT_REPLAY, | ||
| 224 | P4_EVENT_MOB_LOAD_REPLAY, | ||
| 225 | P4_EVENT_PAGE_WALK_TYPE, | ||
| 226 | P4_EVENT_BSQ_CACHE_REFERENCE, | ||
| 227 | P4_EVENT_IOQ_ALLOCATION, | ||
| 228 | P4_EVENT_IOQ_ACTIVE_ENTRIES, | ||
| 229 | P4_EVENT_FSB_DATA_ACTIVITY, | ||
| 230 | P4_EVENT_BSQ_ALLOCATION, | ||
| 231 | P4_EVENT_BSQ_ACTIVE_ENTRIES, | ||
| 232 | P4_EVENT_SSE_INPUT_ASSIST, | ||
| 233 | P4_EVENT_PACKED_SP_UOP, | ||
| 234 | P4_EVENT_PACKED_DP_UOP, | ||
| 235 | P4_EVENT_SCALAR_SP_UOP, | ||
| 236 | P4_EVENT_SCALAR_DP_UOP, | ||
| 237 | P4_EVENT_64BIT_MMX_UOP, | ||
| 238 | P4_EVENT_128BIT_MMX_UOP, | ||
| 239 | P4_EVENT_X87_FP_UOP, | ||
| 240 | P4_EVENT_TC_MISC, | ||
| 241 | P4_EVENT_GLOBAL_POWER_EVENTS, | ||
| 242 | P4_EVENT_TC_MS_XFER, | ||
| 243 | P4_EVENT_UOP_QUEUE_WRITES, | ||
| 244 | P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, | ||
| 245 | P4_EVENT_RETIRED_BRANCH_TYPE, | ||
| 246 | P4_EVENT_RESOURCE_STALL, | ||
| 247 | P4_EVENT_WC_BUFFER, | ||
| 248 | P4_EVENT_B2B_CYCLES, | ||
| 249 | P4_EVENT_BNR, | ||
| 250 | P4_EVENT_SNOOP, | ||
| 251 | P4_EVENT_RESPONSE, | ||
| 252 | P4_EVENT_FRONT_END_EVENT, | ||
| 253 | P4_EVENT_EXECUTION_EVENT, | ||
| 254 | P4_EVENT_REPLAY_EVENT, | ||
| 255 | P4_EVENT_INSTR_RETIRED, | ||
| 256 | P4_EVENT_UOPS_RETIRED, | ||
| 257 | P4_EVENT_UOP_TYPE, | ||
| 258 | P4_EVENT_BRANCH_RETIRED, | ||
| 259 | P4_EVENT_MISPRED_BRANCH_RETIRED, | ||
| 260 | P4_EVENT_X87_ASSIST, | ||
| 261 | P4_EVENT_MACHINE_CLEAR, | ||
| 262 | P4_EVENT_INSTR_COMPLETED, | ||
| 263 | }; | ||
| 264 | |||
| 265 | #define P4_OPCODE(event) event##_OPCODE | ||
| 266 | #define P4_OPCODE_ESEL(opcode) ((opcode & 0x00ff) >> 0) | ||
| 267 | #define P4_OPCODE_EVNT(opcode) ((opcode & 0xff00) >> 8) | ||
| 268 | #define P4_OPCODE_PACK(event, sel) (((event) << 8) | sel) | ||
| 269 | |||
| 270 | /* | ||
| 271 | * Comments below the event represent ESCR restriction | ||
| 272 | * for this event and counter index per ESCR | ||
| 273 | * | ||
| 274 | * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early | ||
| 275 | * processor builds (family 0FH, models 01H-02H). These MSRs | ||
| 276 | * are not available on later versions, so that we don't use | ||
| 277 | * them completely | ||
| 278 | * | ||
| 279 | * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly | ||
| 280 | * working so that we should not use this CCCR and respective | ||
| 281 | * counter as result | ||
| 282 | */ | ||
| 283 | enum P4_EVENT_OPCODES { | ||
| 284 | P4_OPCODE(P4_EVENT_TC_DELIVER_MODE) = P4_OPCODE_PACK(0x01, 0x01), | ||
| 285 | /* | ||
| 286 | * MSR_P4_TC_ESCR0: 4, 5 | ||
| 287 | * MSR_P4_TC_ESCR1: 6, 7 | ||
| 288 | */ | ||
| 289 | |||
| 290 | P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST) = P4_OPCODE_PACK(0x03, 0x00), | ||
| 291 | /* | ||
| 292 | * MSR_P4_BPU_ESCR0: 0, 1 | ||
| 293 | * MSR_P4_BPU_ESCR1: 2, 3 | ||
| 294 | */ | ||
| 295 | |||
| 296 | P4_OPCODE(P4_EVENT_ITLB_REFERENCE) = P4_OPCODE_PACK(0x18, 0x03), | ||
| 297 | /* | ||
| 298 | * MSR_P4_ITLB_ESCR0: 0, 1 | ||
| 299 | * MSR_P4_ITLB_ESCR1: 2, 3 | ||
| 300 | */ | ||
| 301 | |||
| 302 | P4_OPCODE(P4_EVENT_MEMORY_CANCEL) = P4_OPCODE_PACK(0x02, 0x05), | ||
| 303 | /* | ||
| 304 | * MSR_P4_DAC_ESCR0: 8, 9 | ||
| 305 | * MSR_P4_DAC_ESCR1: 10, 11 | ||
| 306 | */ | ||
| 307 | |||
| 308 | P4_OPCODE(P4_EVENT_MEMORY_COMPLETE) = P4_OPCODE_PACK(0x08, 0x02), | ||
| 309 | /* | ||
| 310 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
| 311 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
| 312 | */ | ||
| 313 | |||
| 314 | P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY) = P4_OPCODE_PACK(0x04, 0x02), | ||
| 315 | /* | ||
| 316 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
| 317 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
| 318 | */ | ||
| 319 | |||
| 320 | P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY) = P4_OPCODE_PACK(0x05, 0x02), | ||
| 321 | /* | ||
| 322 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
| 323 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
| 324 | */ | ||
| 325 | |||
| 326 | P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY) = P4_OPCODE_PACK(0x03, 0x02), | ||
| 327 | /* | ||
| 328 | * MSR_P4_MOB_ESCR0: 0, 1 | ||
| 329 | * MSR_P4_MOB_ESCR1: 2, 3 | ||
| 330 | */ | ||
| 331 | |||
| 332 | P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE) = P4_OPCODE_PACK(0x01, 0x04), | ||
| 333 | /* | ||
| 334 | * MSR_P4_PMH_ESCR0: 0, 1 | ||
| 335 | * MSR_P4_PMH_ESCR1: 2, 3 | ||
| 336 | */ | ||
| 337 | |||
| 338 | P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE) = P4_OPCODE_PACK(0x0c, 0x07), | ||
| 339 | /* | ||
| 340 | * MSR_P4_BSU_ESCR0: 0, 1 | ||
| 341 | * MSR_P4_BSU_ESCR1: 2, 3 | ||
| 342 | */ | ||
| 343 | |||
| 344 | P4_OPCODE(P4_EVENT_IOQ_ALLOCATION) = P4_OPCODE_PACK(0x03, 0x06), | ||
| 345 | /* | ||
| 346 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 347 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 348 | */ | ||
| 349 | |||
| 350 | P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x1a, 0x06), | ||
| 351 | /* | ||
| 352 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 353 | */ | ||
| 354 | |||
| 355 | P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY) = P4_OPCODE_PACK(0x17, 0x06), | ||
| 356 | /* | ||
| 357 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 358 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 359 | */ | ||
| 360 | |||
| 361 | P4_OPCODE(P4_EVENT_BSQ_ALLOCATION) = P4_OPCODE_PACK(0x05, 0x07), | ||
| 362 | /* | ||
| 363 | * MSR_P4_BSU_ESCR0: 0, 1 | ||
| 364 | */ | ||
| 365 | |||
| 366 | P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x06, 0x07), | ||
| 367 | /* | ||
| 368 | * NOTE: no ESCR name in docs, it's guessed | ||
| 369 | * MSR_P4_BSU_ESCR1: 2, 3 | ||
| 370 | */ | ||
| 371 | |||
| 372 | P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST) = P4_OPCODE_PACK(0x34, 0x01), | ||
| 373 | /* | ||
| 374 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 375 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 376 | */ | ||
| 377 | |||
| 378 | P4_OPCODE(P4_EVENT_PACKED_SP_UOP) = P4_OPCODE_PACK(0x08, 0x01), | ||
| 379 | /* | ||
| 380 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 381 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 382 | */ | ||
| 383 | |||
| 384 | P4_OPCODE(P4_EVENT_PACKED_DP_UOP) = P4_OPCODE_PACK(0x0c, 0x01), | ||
| 385 | /* | ||
| 386 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 387 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 388 | */ | ||
| 389 | |||
| 390 | P4_OPCODE(P4_EVENT_SCALAR_SP_UOP) = P4_OPCODE_PACK(0x0a, 0x01), | ||
| 391 | /* | ||
| 392 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 393 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 394 | */ | ||
| 395 | |||
| 396 | P4_OPCODE(P4_EVENT_SCALAR_DP_UOP) = P4_OPCODE_PACK(0x0e, 0x01), | ||
| 397 | /* | ||
| 398 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 399 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 400 | */ | ||
| 401 | |||
| 402 | P4_OPCODE(P4_EVENT_64BIT_MMX_UOP) = P4_OPCODE_PACK(0x02, 0x01), | ||
| 403 | /* | ||
| 404 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 405 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 406 | */ | ||
| 407 | |||
| 408 | P4_OPCODE(P4_EVENT_128BIT_MMX_UOP) = P4_OPCODE_PACK(0x1a, 0x01), | ||
| 409 | /* | ||
| 410 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 411 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 412 | */ | ||
| 413 | |||
| 414 | P4_OPCODE(P4_EVENT_X87_FP_UOP) = P4_OPCODE_PACK(0x04, 0x01), | ||
| 415 | /* | ||
| 416 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 417 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 418 | */ | ||
| 419 | |||
| 420 | P4_OPCODE(P4_EVENT_TC_MISC) = P4_OPCODE_PACK(0x06, 0x01), | ||
| 421 | /* | ||
| 422 | * MSR_P4_TC_ESCR0: 4, 5 | ||
| 423 | * MSR_P4_TC_ESCR1: 6, 7 | ||
| 424 | */ | ||
| 425 | |||
| 426 | P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS) = P4_OPCODE_PACK(0x13, 0x06), | ||
| 427 | /* | ||
| 428 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 429 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 430 | */ | ||
| 431 | |||
| 432 | P4_OPCODE(P4_EVENT_TC_MS_XFER) = P4_OPCODE_PACK(0x05, 0x00), | ||
| 433 | /* | ||
| 434 | * MSR_P4_MS_ESCR0: 4, 5 | ||
| 435 | * MSR_P4_MS_ESCR1: 6, 7 | ||
| 436 | */ | ||
| 437 | |||
| 438 | P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES) = P4_OPCODE_PACK(0x09, 0x00), | ||
| 439 | /* | ||
| 440 | * MSR_P4_MS_ESCR0: 4, 5 | ||
| 441 | * MSR_P4_MS_ESCR1: 6, 7 | ||
| 442 | */ | ||
| 443 | |||
| 444 | P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x05, 0x02), | ||
| 445 | /* | ||
| 446 | * MSR_P4_TBPU_ESCR0: 4, 5 | ||
| 447 | * MSR_P4_TBPU_ESCR1: 6, 7 | ||
| 448 | */ | ||
| 449 | |||
| 450 | P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x04, 0x02), | ||
| 451 | /* | ||
| 452 | * MSR_P4_TBPU_ESCR0: 4, 5 | ||
| 453 | * MSR_P4_TBPU_ESCR1: 6, 7 | ||
| 454 | */ | ||
| 455 | |||
| 456 | P4_OPCODE(P4_EVENT_RESOURCE_STALL) = P4_OPCODE_PACK(0x01, 0x01), | ||
| 457 | /* | ||
| 458 | * MSR_P4_ALF_ESCR0: 12, 13, 16 | ||
| 459 | * MSR_P4_ALF_ESCR1: 14, 15, 17 | ||
| 460 | */ | ||
| 461 | |||
| 462 | P4_OPCODE(P4_EVENT_WC_BUFFER) = P4_OPCODE_PACK(0x05, 0x05), | ||
| 463 | /* | ||
| 464 | * MSR_P4_DAC_ESCR0: 8, 9 | ||
| 465 | * MSR_P4_DAC_ESCR1: 10, 11 | ||
| 466 | */ | ||
| 467 | |||
| 468 | P4_OPCODE(P4_EVENT_B2B_CYCLES) = P4_OPCODE_PACK(0x16, 0x03), | ||
| 469 | /* | ||
| 470 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 471 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 472 | */ | ||
| 473 | |||
| 474 | P4_OPCODE(P4_EVENT_BNR) = P4_OPCODE_PACK(0x08, 0x03), | ||
| 475 | /* | ||
| 476 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 477 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 478 | */ | ||
| 479 | |||
| 480 | P4_OPCODE(P4_EVENT_SNOOP) = P4_OPCODE_PACK(0x06, 0x03), | ||
| 481 | /* | ||
| 482 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 483 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 484 | */ | ||
| 485 | |||
| 486 | P4_OPCODE(P4_EVENT_RESPONSE) = P4_OPCODE_PACK(0x04, 0x03), | ||
| 487 | /* | ||
| 488 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 489 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 490 | */ | ||
| 491 | |||
| 492 | P4_OPCODE(P4_EVENT_FRONT_END_EVENT) = P4_OPCODE_PACK(0x08, 0x05), | ||
| 493 | /* | ||
| 494 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 495 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 496 | */ | ||
| 497 | |||
| 498 | P4_OPCODE(P4_EVENT_EXECUTION_EVENT) = P4_OPCODE_PACK(0x0c, 0x05), | ||
| 499 | /* | ||
| 500 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 501 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 502 | */ | ||
| 503 | |||
| 504 | P4_OPCODE(P4_EVENT_REPLAY_EVENT) = P4_OPCODE_PACK(0x09, 0x05), | ||
| 505 | /* | ||
| 506 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 507 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 508 | */ | ||
| 509 | |||
| 510 | P4_OPCODE(P4_EVENT_INSTR_RETIRED) = P4_OPCODE_PACK(0x02, 0x04), | ||
| 511 | /* | ||
| 512 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
| 513 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
| 514 | */ | ||
| 515 | |||
| 516 | P4_OPCODE(P4_EVENT_UOPS_RETIRED) = P4_OPCODE_PACK(0x01, 0x04), | ||
| 517 | /* | ||
| 518 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
| 519 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
| 520 | */ | ||
| 521 | |||
| 522 | P4_OPCODE(P4_EVENT_UOP_TYPE) = P4_OPCODE_PACK(0x02, 0x02), | ||
| 523 | /* | ||
| 524 | * MSR_P4_RAT_ESCR0: 12, 13, 16 | ||
| 525 | * MSR_P4_RAT_ESCR1: 14, 15, 17 | ||
| 526 | */ | ||
| 527 | |||
| 528 | P4_OPCODE(P4_EVENT_BRANCH_RETIRED) = P4_OPCODE_PACK(0x06, 0x05), | ||
| 529 | /* | ||
| 530 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 531 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 532 | */ | ||
| 533 | |||
| 534 | P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED) = P4_OPCODE_PACK(0x03, 0x04), | ||
| 535 | /* | ||
| 536 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
| 537 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
| 538 | */ | ||
| 539 | |||
| 540 | P4_OPCODE(P4_EVENT_X87_ASSIST) = P4_OPCODE_PACK(0x03, 0x05), | ||
| 541 | /* | ||
| 542 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 543 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 544 | */ | ||
| 545 | |||
| 546 | P4_OPCODE(P4_EVENT_MACHINE_CLEAR) = P4_OPCODE_PACK(0x02, 0x05), | ||
| 547 | /* | ||
| 548 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 549 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 550 | */ | ||
| 551 | |||
| 552 | P4_OPCODE(P4_EVENT_INSTR_COMPLETED) = P4_OPCODE_PACK(0x07, 0x04), | ||
| 553 | /* | ||
| 554 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
| 555 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
| 556 | */ | ||
| 557 | }; | ||
| 558 | |||
| 559 | /* | ||
| 560 | * a caller should use P4_ESCR_EMASK_NAME helper to | ||
| 561 | * pick the EventMask needed, for example | ||
| 562 | * | ||
| 563 | * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) | ||
| 564 | */ | ||
| 565 | enum P4_ESCR_EMASKS { | ||
| 566 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), | ||
| 567 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DB, 1), | ||
| 568 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DI, 2), | ||
| 569 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BD, 3), | ||
| 570 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BB, 4), | ||
| 571 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BI, 5), | ||
| 572 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, ID, 6), | ||
| 573 | |||
| 574 | P4_GEN_ESCR_EMASK(P4_EVENT_BPU_FETCH_REQUEST, TCMISS, 0), | ||
| 575 | |||
| 576 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT, 0), | ||
| 577 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, MISS, 1), | ||
| 578 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT_UK, 2), | ||
| 579 | |||
| 580 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL, 2), | ||
| 581 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, 64K_CONF, 3), | ||
| 582 | |||
| 583 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, LSC, 0), | ||
| 584 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, SSC, 1), | ||
| 585 | |||
| 586 | P4_GEN_ESCR_EMASK(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD, 1), | ||
| 587 | |||
| 588 | P4_GEN_ESCR_EMASK(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST, 1), | ||
| 589 | |||
| 590 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STA, 1), | ||
| 591 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STD, 3), | ||
| 592 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), | ||
| 593 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), | ||
| 594 | |||
| 595 | P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, DTMISS, 0), | ||
| 596 | P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, ITMISS, 1), | ||
| 597 | |||
| 598 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), | ||
| 599 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), | ||
| 600 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), | ||
| 601 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), | ||
| 602 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), | ||
| 603 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), | ||
| 604 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), | ||
| 605 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), | ||
| 606 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), | ||
| 607 | |||
| 608 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, DEFAULT, 0), | ||
| 609 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_READ, 5), | ||
| 610 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE, 6), | ||
| 611 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_UC, 7), | ||
| 612 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WC, 8), | ||
| 613 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WT, 9), | ||
| 614 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WP, 10), | ||
| 615 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WB, 11), | ||
| 616 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OWN, 13), | ||
| 617 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OTHER, 14), | ||
| 618 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, PREFETCH, 15), | ||
| 619 | |||
| 620 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), | ||
| 621 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), | ||
| 622 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), | ||
| 623 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), | ||
| 624 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), | ||
| 625 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), | ||
| 626 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), | ||
| 627 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), | ||
| 628 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN, 13), | ||
| 629 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER, 14), | ||
| 630 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), | ||
| 631 | |||
| 632 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV, 0), | ||
| 633 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN, 1), | ||
| 634 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), | ||
| 635 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV, 3), | ||
| 636 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN, 4), | ||
| 637 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), | ||
| 638 | |||
| 639 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0, 0), | ||
| 640 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1, 1), | ||
| 641 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0, 2), | ||
| 642 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1, 3), | ||
| 643 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE, 5), | ||
| 644 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), | ||
| 645 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), | ||
| 646 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), | ||
| 647 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), | ||
| 648 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), | ||
| 649 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0, 11), | ||
| 650 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1, 12), | ||
| 651 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2, 13), | ||
| 652 | |||
| 653 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), | ||
| 654 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), | ||
| 655 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), | ||
| 656 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), | ||
| 657 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), | ||
| 658 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), | ||
| 659 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), | ||
| 660 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), | ||
| 661 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), | ||
| 662 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), | ||
| 663 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), | ||
| 664 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), | ||
| 665 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), | ||
| 666 | |||
| 667 | P4_GEN_ESCR_EMASK(P4_EVENT_SSE_INPUT_ASSIST, ALL, 15), | ||
| 668 | |||
| 669 | P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_SP_UOP, ALL, 15), | ||
| 670 | |||
| 671 | P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_DP_UOP, ALL, 15), | ||
| 672 | |||
| 673 | P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_SP_UOP, ALL, 15), | ||
| 674 | |||
| 675 | P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_DP_UOP, ALL, 15), | ||
| 676 | |||
| 677 | P4_GEN_ESCR_EMASK(P4_EVENT_64BIT_MMX_UOP, ALL, 15), | ||
| 678 | |||
| 679 | P4_GEN_ESCR_EMASK(P4_EVENT_128BIT_MMX_UOP, ALL, 15), | ||
| 680 | |||
| 681 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_FP_UOP, ALL, 15), | ||
| 682 | |||
| 683 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_MISC, FLUSH, 4), | ||
| 684 | |||
| 685 | P4_GEN_ESCR_EMASK(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING, 0), | ||
| 686 | |||
| 687 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_MS_XFER, CISC, 0), | ||
| 688 | |||
| 689 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), | ||
| 690 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), | ||
| 691 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM, 2), | ||
| 692 | |||
| 693 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), | ||
| 694 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), | ||
| 695 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), | ||
| 696 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), | ||
| 697 | |||
| 698 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), | ||
| 699 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CALL, 2), | ||
| 700 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN, 3), | ||
| 701 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT, 4), | ||
| 702 | |||
| 703 | P4_GEN_ESCR_EMASK(P4_EVENT_RESOURCE_STALL, SBFULL, 5), | ||
| 704 | |||
| 705 | P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_EVICTS, 0), | ||
| 706 | P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS, 1), | ||
| 707 | |||
| 708 | P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, NBOGUS, 0), | ||
| 709 | P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, BOGUS, 1), | ||
| 710 | |||
| 711 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS0, 0), | ||
| 712 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS1, 1), | ||
| 713 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS2, 2), | ||
| 714 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS3, 3), | ||
| 715 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS0, 4), | ||
| 716 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS1, 5), | ||
| 717 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS2, 6), | ||
| 718 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS3, 7), | ||
| 719 | |||
| 720 | P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, NBOGUS, 0), | ||
| 721 | P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, BOGUS, 1), | ||
| 722 | |||
| 723 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG, 0), | ||
| 724 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSTAG, 1), | ||
| 725 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSNTAG, 2), | ||
| 726 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSTAG, 3), | ||
| 727 | |||
| 728 | P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, NBOGUS, 0), | ||
| 729 | P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, BOGUS, 1), | ||
| 730 | |||
| 731 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGLOADS, 1), | ||
| 732 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGSTORES, 2), | ||
| 733 | |||
| 734 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNP, 0), | ||
| 735 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNM, 1), | ||
| 736 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTP, 2), | ||
| 737 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTM, 3), | ||
| 738 | |||
| 739 | P4_GEN_ESCR_EMASK(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS, 0), | ||
| 740 | |||
| 741 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSU, 0), | ||
| 742 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSO, 1), | ||
| 743 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAO, 2), | ||
| 744 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAU, 3), | ||
| 745 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, PREA, 4), | ||
| 746 | |||
| 747 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, CLEAR, 0), | ||
| 748 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, MOCLEAR, 1), | ||
| 749 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, SMCLEAR, 2), | ||
| 750 | |||
| 751 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, NBOGUS, 0), | ||
| 752 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), | ||
| 753 | }; | ||
| 754 | |||
| 755 | /* P4 PEBS: stale for a while */ | ||
| 756 | #define P4_PEBS_METRIC_MASK 0x00001fffU | ||
| 757 | #define P4_PEBS_UOB_TAG 0x01000000U | ||
| 758 | #define P4_PEBS_ENABLE 0x02000000U | ||
| 759 | |||
| 760 | /* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ | ||
| 761 | #define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 | ||
| 762 | #define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 | ||
| 763 | #define P4_PEBS__dtlb_load_miss_retired 0x3000004 | ||
| 764 | #define P4_PEBS__dtlb_store_miss_retired 0x3000004 | ||
| 765 | #define P4_PEBS__dtlb_all_miss_retired 0x3000004 | ||
| 766 | #define P4_PEBS__tagged_mispred_branch 0x3018000 | ||
| 767 | #define P4_PEBS__mob_load_replay_retired 0x3000200 | ||
| 768 | #define P4_PEBS__split_load_retired 0x3000400 | ||
| 769 | #define P4_PEBS__split_store_retired 0x3000400 | ||
| 770 | |||
| 771 | #define P4_VERT__1stl_cache_load_miss_retired 0x0000001 | ||
| 772 | #define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 | ||
| 773 | #define P4_VERT__dtlb_load_miss_retired 0x0000001 | ||
| 774 | #define P4_VERT__dtlb_store_miss_retired 0x0000002 | ||
| 775 | #define P4_VERT__dtlb_all_miss_retired 0x0000003 | ||
| 776 | #define P4_VERT__tagged_mispred_branch 0x0000010 | ||
| 777 | #define P4_VERT__mob_load_replay_retired 0x0000001 | ||
| 778 | #define P4_VERT__split_load_retired 0x0000001 | ||
| 779 | #define P4_VERT__split_store_retired 0x0000002 | ||
| 780 | |||
| 781 | enum P4_CACHE_EVENTS { | ||
| 782 | P4_CACHE__NONE, | ||
| 783 | |||
| 784 | P4_CACHE__1stl_cache_load_miss_retired, | ||
| 785 | P4_CACHE__2ndl_cache_load_miss_retired, | ||
| 786 | P4_CACHE__dtlb_load_miss_retired, | ||
| 787 | P4_CACHE__dtlb_store_miss_retired, | ||
| 788 | P4_CACHE__itlb_reference_hit, | ||
| 789 | P4_CACHE__itlb_reference_miss, | ||
| 790 | |||
| 791 | P4_CACHE__MAX | ||
| 792 | }; | ||
| 793 | |||
| 794 | #endif /* PERF_EVENT_P4_H */ | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b753ea59703a..5a51379dcbe4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -21,7 +21,6 @@ struct mm_struct; | |||
| 21 | #include <asm/msr.h> | 21 | #include <asm/msr.h> |
| 22 | #include <asm/desc_defs.h> | 22 | #include <asm/desc_defs.h> |
| 23 | #include <asm/nops.h> | 23 | #include <asm/nops.h> |
| 24 | #include <asm/ds.h> | ||
| 25 | 24 | ||
| 26 | #include <linux/personality.h> | 25 | #include <linux/personality.h> |
| 27 | #include <linux/cpumask.h> | 26 | #include <linux/cpumask.h> |
| @@ -29,6 +28,7 @@ struct mm_struct; | |||
| 29 | #include <linux/threads.h> | 28 | #include <linux/threads.h> |
| 30 | #include <linux/math64.h> | 29 | #include <linux/math64.h> |
| 31 | #include <linux/init.h> | 30 | #include <linux/init.h> |
| 31 | #include <linux/err.h> | ||
| 32 | 32 | ||
| 33 | #define HBP_NUM 4 | 33 | #define HBP_NUM 4 |
| 34 | /* | 34 | /* |
| @@ -113,7 +113,6 @@ struct cpuinfo_x86 { | |||
| 113 | /* Index into per_cpu list: */ | 113 | /* Index into per_cpu list: */ |
| 114 | u16 cpu_index; | 114 | u16 cpu_index; |
| 115 | #endif | 115 | #endif |
| 116 | unsigned int x86_hyper_vendor; | ||
| 117 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 116 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
| 118 | 117 | ||
| 119 | #define X86_VENDOR_INTEL 0 | 118 | #define X86_VENDOR_INTEL 0 |
| @@ -127,9 +126,6 @@ struct cpuinfo_x86 { | |||
| 127 | 126 | ||
| 128 | #define X86_VENDOR_UNKNOWN 0xff | 127 | #define X86_VENDOR_UNKNOWN 0xff |
| 129 | 128 | ||
| 130 | #define X86_HYPER_VENDOR_NONE 0 | ||
| 131 | #define X86_HYPER_VENDOR_VMWARE 1 | ||
| 132 | |||
| 133 | /* | 129 | /* |
| 134 | * capabilities of CPUs | 130 | * capabilities of CPUs |
| 135 | */ | 131 | */ |
| @@ -380,6 +376,10 @@ union thread_xstate { | |||
| 380 | struct xsave_struct xsave; | 376 | struct xsave_struct xsave; |
| 381 | }; | 377 | }; |
| 382 | 378 | ||
| 379 | struct fpu { | ||
| 380 | union thread_xstate *state; | ||
| 381 | }; | ||
| 382 | |||
| 383 | #ifdef CONFIG_X86_64 | 383 | #ifdef CONFIG_X86_64 |
| 384 | DECLARE_PER_CPU(struct orig_ist, orig_ist); | 384 | DECLARE_PER_CPU(struct orig_ist, orig_ist); |
| 385 | 385 | ||
| @@ -457,7 +457,7 @@ struct thread_struct { | |||
| 457 | unsigned long trap_no; | 457 | unsigned long trap_no; |
| 458 | unsigned long error_code; | 458 | unsigned long error_code; |
| 459 | /* floating point and extended processor state */ | 459 | /* floating point and extended processor state */ |
| 460 | union thread_xstate *xstate; | 460 | struct fpu fpu; |
| 461 | #ifdef CONFIG_X86_32 | 461 | #ifdef CONFIG_X86_32 |
| 462 | /* Virtual 86 mode info */ | 462 | /* Virtual 86 mode info */ |
| 463 | struct vm86_struct __user *vm86_info; | 463 | struct vm86_struct __user *vm86_info; |
| @@ -473,10 +473,6 @@ struct thread_struct { | |||
| 473 | unsigned long iopl; | 473 | unsigned long iopl; |
| 474 | /* Max allowed port in the bitmap, in bytes: */ | 474 | /* Max allowed port in the bitmap, in bytes: */ |
| 475 | unsigned io_bitmap_max; | 475 | unsigned io_bitmap_max; |
| 476 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ | ||
| 477 | unsigned long debugctlmsr; | ||
| 478 | /* Debug Store context; see asm/ds.h */ | ||
| 479 | struct ds_context *ds_ctx; | ||
| 480 | }; | 476 | }; |
| 481 | 477 | ||
| 482 | static inline unsigned long native_get_debugreg(int regno) | 478 | static inline unsigned long native_get_debugreg(int regno) |
| @@ -803,7 +799,7 @@ extern void cpu_init(void); | |||
| 803 | 799 | ||
| 804 | static inline unsigned long get_debugctlmsr(void) | 800 | static inline unsigned long get_debugctlmsr(void) |
| 805 | { | 801 | { |
| 806 | unsigned long debugctlmsr = 0; | 802 | unsigned long debugctlmsr = 0; |
| 807 | 803 | ||
| 808 | #ifndef CONFIG_X86_DEBUGCTLMSR | 804 | #ifndef CONFIG_X86_DEBUGCTLMSR |
| 809 | if (boot_cpu_data.x86 < 6) | 805 | if (boot_cpu_data.x86 < 6) |
| @@ -811,21 +807,6 @@ static inline unsigned long get_debugctlmsr(void) | |||
| 811 | #endif | 807 | #endif |
| 812 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 808 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
| 813 | 809 | ||
| 814 | return debugctlmsr; | ||
| 815 | } | ||
| 816 | |||
| 817 | static inline unsigned long get_debugctlmsr_on_cpu(int cpu) | ||
| 818 | { | ||
| 819 | u64 debugctlmsr = 0; | ||
| 820 | u32 val1, val2; | ||
| 821 | |||
| 822 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
| 823 | if (boot_cpu_data.x86 < 6) | ||
| 824 | return 0; | ||
| 825 | #endif | ||
| 826 | rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); | ||
| 827 | debugctlmsr = val1 | ((u64)val2 << 32); | ||
| 828 | |||
| 829 | return debugctlmsr; | 810 | return debugctlmsr; |
| 830 | } | 811 | } |
| 831 | 812 | ||
| @@ -838,18 +819,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) | |||
| 838 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 819 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
| 839 | } | 820 | } |
| 840 | 821 | ||
| 841 | static inline void update_debugctlmsr_on_cpu(int cpu, | ||
| 842 | unsigned long debugctlmsr) | ||
| 843 | { | ||
| 844 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
| 845 | if (boot_cpu_data.x86 < 6) | ||
| 846 | return; | ||
| 847 | #endif | ||
| 848 | wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, | ||
| 849 | (u32)((u64)debugctlmsr), | ||
| 850 | (u32)((u64)debugctlmsr >> 32)); | ||
| 851 | } | ||
| 852 | |||
| 853 | /* | 822 | /* |
| 854 | * from system description table in BIOS. Mostly for MCA use, but | 823 | * from system description table in BIOS. Mostly for MCA use, but |
| 855 | * others may find it useful: | 824 | * others may find it useful: |
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 86723035a515..52b098a6eebb 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h | |||
| @@ -82,61 +82,6 @@ | |||
| 82 | 82 | ||
| 83 | #ifndef __ASSEMBLY__ | 83 | #ifndef __ASSEMBLY__ |
| 84 | #include <linux/types.h> | 84 | #include <linux/types.h> |
| 85 | 85 | #endif | |
| 86 | /* configuration/status structure used in PTRACE_BTS_CONFIG and | ||
| 87 | PTRACE_BTS_STATUS commands. | ||
| 88 | */ | ||
| 89 | struct ptrace_bts_config { | ||
| 90 | /* requested or actual size of BTS buffer in bytes */ | ||
| 91 | __u32 size; | ||
| 92 | /* bitmask of below flags */ | ||
| 93 | __u32 flags; | ||
| 94 | /* buffer overflow signal */ | ||
| 95 | __u32 signal; | ||
| 96 | /* actual size of bts_struct in bytes */ | ||
| 97 | __u32 bts_size; | ||
| 98 | }; | ||
| 99 | #endif /* __ASSEMBLY__ */ | ||
| 100 | |||
| 101 | #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ | ||
| 102 | #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ | ||
| 103 | #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow | ||
| 104 | instead of wrapping around */ | ||
| 105 | #define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ | ||
| 106 | |||
| 107 | #define PTRACE_BTS_CONFIG 40 | ||
| 108 | /* Configure branch trace recording. | ||
| 109 | ADDR points to a struct ptrace_bts_config. | ||
| 110 | DATA gives the size of that buffer. | ||
| 111 | A new buffer is allocated, if requested in the flags. | ||
| 112 | An overflow signal may only be requested for new buffers. | ||
| 113 | Returns the number of bytes read. | ||
| 114 | */ | ||
| 115 | #define PTRACE_BTS_STATUS 41 | ||
| 116 | /* Return the current configuration in a struct ptrace_bts_config | ||
| 117 | pointed to by ADDR; DATA gives the size of that buffer. | ||
| 118 | Returns the number of bytes written. | ||
| 119 | */ | ||
| 120 | #define PTRACE_BTS_SIZE 42 | ||
| 121 | /* Return the number of available BTS records for draining. | ||
| 122 | DATA and ADDR are ignored. | ||
| 123 | */ | ||
| 124 | #define PTRACE_BTS_GET 43 | ||
| 125 | /* Get a single BTS record. | ||
| 126 | DATA defines the index into the BTS array, where 0 is the newest | ||
| 127 | entry, and higher indices refer to older entries. | ||
| 128 | ADDR is pointing to struct bts_struct (see asm/ds.h). | ||
| 129 | */ | ||
| 130 | #define PTRACE_BTS_CLEAR 44 | ||
| 131 | /* Clear the BTS buffer. | ||
| 132 | DATA and ADDR are ignored. | ||
| 133 | */ | ||
| 134 | #define PTRACE_BTS_DRAIN 45 | ||
| 135 | /* Read all available BTS records and clear the buffer. | ||
| 136 | ADDR points to an array of struct bts_struct. | ||
| 137 | DATA gives the size of that buffer. | ||
| 138 | BTS records are read from oldest to newest. | ||
| 139 | Returns number of BTS records drained. | ||
| 140 | */ | ||
| 141 | 86 | ||
| 142 | #endif /* _ASM_X86_PTRACE_ABI_H */ | 87 | #endif /* _ASM_X86_PTRACE_ABI_H */ |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 69a686a7dff0..78cd1ea94500 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
| @@ -289,12 +289,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx, | |||
| 289 | extern int do_set_thread_area(struct task_struct *p, int idx, | 289 | extern int do_set_thread_area(struct task_struct *p, int idx, |
| 290 | struct user_desc __user *info, int can_allocate); | 290 | struct user_desc __user *info, int can_allocate); |
| 291 | 291 | ||
| 292 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 293 | extern void ptrace_bts_untrace(struct task_struct *tsk); | ||
| 294 | |||
| 295 | #define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) | ||
| 296 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 297 | |||
| 298 | #endif /* __KERNEL__ */ | 292 | #endif /* __KERNEL__ */ |
| 299 | 293 | ||
| 300 | #endif /* !__ASSEMBLY__ */ | 294 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0d28901e969..d4092fac226b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
| @@ -92,8 +92,7 @@ struct thread_info { | |||
| 92 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ | 92 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ |
| 93 | #define TIF_FREEZE 23 /* is freezing for suspend */ | 93 | #define TIF_FREEZE 23 /* is freezing for suspend */ |
| 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
| 95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 95 | #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ |
| 96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | ||
| 97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ | 96 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
| 98 | #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ | 97 | #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ |
| 99 | 98 | ||
| @@ -115,8 +114,7 @@ struct thread_info { | |||
| 115 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) | 114 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
| 116 | #define _TIF_FREEZE (1 << TIF_FREEZE) | 115 | #define _TIF_FREEZE (1 << TIF_FREEZE) |
| 117 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 116 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
| 118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 117 | #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) |
| 119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | ||
| 120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | 118 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) |
| 121 | #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) | 119 | #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) |
| 122 | 120 | ||
| @@ -147,7 +145,7 @@ struct thread_info { | |||
| 147 | 145 | ||
| 148 | /* flags to check in __switch_to() */ | 146 | /* flags to check in __switch_to() */ |
| 149 | #define _TIF_WORK_CTXSW \ | 147 | #define _TIF_WORK_CTXSW \ |
| 150 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) | 148 | (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) |
| 151 | 149 | ||
| 152 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) | 150 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
| 153 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | 151 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) |
| @@ -244,7 +242,6 @@ static inline struct thread_info *current_thread_info(void) | |||
| 244 | #define TS_POLLING 0x0004 /* true if in idle loop | 242 | #define TS_POLLING 0x0004 /* true if in idle loop |
| 245 | and not sleeping */ | 243 | and not sleeping */ |
| 246 | #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ | 244 | #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ |
| 247 | #define TS_XSAVE 0x0010 /* Use xsave/xrstor */ | ||
| 248 | 245 | ||
| 249 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) | 246 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) |
| 250 | 247 | ||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 4da91ad69e0d..f66cda56781d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
| @@ -79,7 +79,7 @@ static inline int get_si_code(unsigned long condition) | |||
| 79 | 79 | ||
| 80 | extern int panic_on_unrecovered_nmi; | 80 | extern int panic_on_unrecovered_nmi; |
| 81 | 81 | ||
| 82 | void math_error(void __user *); | 82 | void math_error(struct pt_regs *, int, int); |
| 83 | void math_emulate(struct math_emu_info *); | 83 | void math_emulate(struct math_emu_info *); |
| 84 | #ifndef CONFIG_X86_32 | 84 | #ifndef CONFIG_X86_32 |
| 85 | asmlinkage void smp_thermal_interrupt(void); | 85 | asmlinkage void smp_thermal_interrupt(void); |
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h deleted file mode 100644 index e49ed6d2fd4e..000000000000 --- a/arch/x86/include/asm/vmware.h +++ /dev/null | |||
| @@ -1,27 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008, VMware, Inc. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, but | ||
| 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 12 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 13 | * details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program; if not, write to the Free Software | ||
| 17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 18 | * | ||
| 19 | */ | ||
| 20 | #ifndef ASM_X86__VMWARE_H | ||
| 21 | #define ASM_X86__VMWARE_H | ||
| 22 | |||
| 23 | extern void vmware_platform_setup(void); | ||
| 24 | extern int vmware_platform(void); | ||
| 25 | extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); | ||
| 26 | |||
| 27 | #endif | ||
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index ddc04ccad03b..2c4390cae228 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
| @@ -37,8 +37,9 @@ extern int check_for_xstate(struct i387_fxsave_struct __user *buf, | |||
| 37 | void __user *fpstate, | 37 | void __user *fpstate, |
| 38 | struct _fpx_sw_bytes *sw); | 38 | struct _fpx_sw_bytes *sw); |
| 39 | 39 | ||
| 40 | static inline int xrstor_checking(struct xsave_struct *fx) | 40 | static inline int fpu_xrstor_checking(struct fpu *fpu) |
| 41 | { | 41 | { |
| 42 | struct xsave_struct *fx = &fpu->state->xsave; | ||
| 42 | int err; | 43 | int err; |
| 43 | 44 | ||
| 44 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" | 45 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" |
| @@ -110,12 +111,12 @@ static inline void xrstor_state(struct xsave_struct *fx, u64 mask) | |||
| 110 | : "memory"); | 111 | : "memory"); |
| 111 | } | 112 | } |
| 112 | 113 | ||
| 113 | static inline void xsave(struct task_struct *tsk) | 114 | static inline void fpu_xsave(struct fpu *fpu) |
| 114 | { | 115 | { |
| 115 | /* This, however, we can work around by forcing the compiler to select | 116 | /* This, however, we can work around by forcing the compiler to select |
| 116 | an addressing mode that doesn't require extended registers. */ | 117 | an addressing mode that doesn't require extended registers. */ |
| 117 | __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" | 118 | __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" |
| 118 | : : "D" (&(tsk->thread.xstate->xsave)), | 119 | : : "D" (&(fpu->state->xsave)), |
| 119 | "a" (-1), "d"(-1) : "memory"); | 120 | "a" (-1), "d"(-1) : "memory"); |
| 120 | } | 121 | } |
| 121 | #endif | 122 | #endif |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4c58352209e0..e77b22083721 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -47,8 +47,6 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | |||
| 47 | obj-y += process.o | 47 | obj-y += process.o |
| 48 | obj-y += i387.o xsave.o | 48 | obj-y += i387.o xsave.o |
| 49 | obj-y += ptrace.o | 49 | obj-y += ptrace.o |
| 50 | obj-$(CONFIG_X86_DS) += ds.o | ||
| 51 | obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o | ||
| 52 | obj-$(CONFIG_X86_32) += tls.o | 50 | obj-$(CONFIG_X86_32) += tls.o |
| 53 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 51 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
| 54 | obj-y += step.o | 52 | obj-y += step.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index cd40aba6aa95..9a5ed58f09dc 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
| @@ -94,6 +94,53 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; | |||
| 94 | 94 | ||
| 95 | 95 | ||
| 96 | /* | 96 | /* |
| 97 | * ISA irqs by default are the first 16 gsis but can be | ||
| 98 | * any gsi as specified by an interrupt source override. | ||
| 99 | */ | ||
| 100 | static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { | ||
| 101 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 | ||
| 102 | }; | ||
| 103 | |||
| 104 | static unsigned int gsi_to_irq(unsigned int gsi) | ||
| 105 | { | ||
| 106 | unsigned int irq = gsi + NR_IRQS_LEGACY; | ||
| 107 | unsigned int i; | ||
| 108 | |||
| 109 | for (i = 0; i < NR_IRQS_LEGACY; i++) { | ||
| 110 | if (isa_irq_to_gsi[i] == gsi) { | ||
| 111 | return i; | ||
| 112 | } | ||
| 113 | } | ||
| 114 | |||
| 115 | /* Provide an identity mapping of gsi == irq | ||
| 116 | * except on truly weird platforms that have | ||
| 117 | * non isa irqs in the first 16 gsis. | ||
| 118 | */ | ||
| 119 | if (gsi >= NR_IRQS_LEGACY) | ||
| 120 | irq = gsi; | ||
| 121 | else | ||
| 122 | irq = gsi_end + 1 + gsi; | ||
| 123 | |||
| 124 | return irq; | ||
| 125 | } | ||
| 126 | |||
| 127 | static u32 irq_to_gsi(int irq) | ||
| 128 | { | ||
| 129 | unsigned int gsi; | ||
| 130 | |||
| 131 | if (irq < NR_IRQS_LEGACY) | ||
| 132 | gsi = isa_irq_to_gsi[irq]; | ||
| 133 | else if (irq <= gsi_end) | ||
| 134 | gsi = irq; | ||
| 135 | else if (irq <= (gsi_end + NR_IRQS_LEGACY)) | ||
| 136 | gsi = irq - gsi_end; | ||
| 137 | else | ||
| 138 | gsi = 0xffffffff; | ||
| 139 | |||
| 140 | return gsi; | ||
| 141 | } | ||
| 142 | |||
| 143 | /* | ||
| 97 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, | 144 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, |
| 98 | * to map the target physical address. The problem is that set_fixmap() | 145 | * to map the target physical address. The problem is that set_fixmap() |
| 99 | * provides a single page, and it is possible that the page is not | 146 | * provides a single page, and it is possible that the page is not |
| @@ -313,7 +360,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) | |||
| 313 | /* | 360 | /* |
| 314 | * Parse Interrupt Source Override for the ACPI SCI | 361 | * Parse Interrupt Source Override for the ACPI SCI |
| 315 | */ | 362 | */ |
| 316 | static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) | 363 | static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, u32 gsi) |
| 317 | { | 364 | { |
| 318 | if (trigger == 0) /* compatible SCI trigger is level */ | 365 | if (trigger == 0) /* compatible SCI trigger is level */ |
| 319 | trigger = 3; | 366 | trigger = 3; |
| @@ -333,7 +380,7 @@ static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) | |||
| 333 | * If GSI is < 16, this will update its flags, | 380 | * If GSI is < 16, this will update its flags, |
| 334 | * else it will create a new mp_irqs[] entry. | 381 | * else it will create a new mp_irqs[] entry. |
| 335 | */ | 382 | */ |
| 336 | mp_override_legacy_irq(gsi, polarity, trigger, gsi); | 383 | mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); |
| 337 | 384 | ||
| 338 | /* | 385 | /* |
| 339 | * stash over-ride to indicate we've been here | 386 | * stash over-ride to indicate we've been here |
| @@ -357,9 +404,10 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, | |||
| 357 | acpi_table_print_madt_entry(header); | 404 | acpi_table_print_madt_entry(header); |
| 358 | 405 | ||
| 359 | if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { | 406 | if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { |
| 360 | acpi_sci_ioapic_setup(intsrc->global_irq, | 407 | acpi_sci_ioapic_setup(intsrc->source_irq, |
| 361 | intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, | 408 | intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, |
| 362 | (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2); | 409 | (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2, |
| 410 | intsrc->global_irq); | ||
| 363 | return 0; | 411 | return 0; |
| 364 | } | 412 | } |
| 365 | 413 | ||
| @@ -448,7 +496,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) | |||
| 448 | 496 | ||
| 449 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | 497 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) |
| 450 | { | 498 | { |
| 451 | *irq = gsi; | 499 | *irq = gsi_to_irq(gsi); |
| 452 | 500 | ||
| 453 | #ifdef CONFIG_X86_IO_APIC | 501 | #ifdef CONFIG_X86_IO_APIC |
| 454 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) | 502 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) |
| @@ -458,6 +506,14 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | |||
| 458 | return 0; | 506 | return 0; |
| 459 | } | 507 | } |
| 460 | 508 | ||
| 509 | int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) | ||
| 510 | { | ||
| 511 | if (isa_irq >= 16) | ||
| 512 | return -1; | ||
| 513 | *gsi = irq_to_gsi(isa_irq); | ||
| 514 | return 0; | ||
| 515 | } | ||
| 516 | |||
| 461 | /* | 517 | /* |
| 462 | * success: return IRQ number (>=0) | 518 | * success: return IRQ number (>=0) |
| 463 | * failure: return < 0 | 519 | * failure: return < 0 |
| @@ -482,7 +538,7 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
| 482 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); | 538 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); |
| 483 | } | 539 | } |
| 484 | #endif | 540 | #endif |
| 485 | irq = plat_gsi; | 541 | irq = gsi_to_irq(plat_gsi); |
| 486 | 542 | ||
| 487 | return irq; | 543 | return irq; |
| 488 | } | 544 | } |
| @@ -867,29 +923,6 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
| 867 | extern int es7000_plat; | 923 | extern int es7000_plat; |
| 868 | #endif | 924 | #endif |
| 869 | 925 | ||
| 870 | int __init acpi_probe_gsi(void) | ||
| 871 | { | ||
| 872 | int idx; | ||
| 873 | int gsi; | ||
| 874 | int max_gsi = 0; | ||
| 875 | |||
| 876 | if (acpi_disabled) | ||
| 877 | return 0; | ||
| 878 | |||
| 879 | if (!acpi_ioapic) | ||
| 880 | return 0; | ||
| 881 | |||
| 882 | max_gsi = 0; | ||
| 883 | for (idx = 0; idx < nr_ioapics; idx++) { | ||
| 884 | gsi = mp_gsi_routing[idx].gsi_end; | ||
| 885 | |||
| 886 | if (gsi > max_gsi) | ||
| 887 | max_gsi = gsi; | ||
| 888 | } | ||
| 889 | |||
| 890 | return max_gsi + 1; | ||
| 891 | } | ||
| 892 | |||
| 893 | static void assign_to_mp_irq(struct mpc_intsrc *m, | 926 | static void assign_to_mp_irq(struct mpc_intsrc *m, |
| 894 | struct mpc_intsrc *mp_irq) | 927 | struct mpc_intsrc *mp_irq) |
| 895 | { | 928 | { |
| @@ -947,13 +980,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
| 947 | mp_irq.dstirq = pin; /* INTIN# */ | 980 | mp_irq.dstirq = pin; /* INTIN# */ |
| 948 | 981 | ||
| 949 | save_mp_irq(&mp_irq); | 982 | save_mp_irq(&mp_irq); |
| 983 | |||
| 984 | isa_irq_to_gsi[bus_irq] = gsi; | ||
| 950 | } | 985 | } |
| 951 | 986 | ||
| 952 | void __init mp_config_acpi_legacy_irqs(void) | 987 | void __init mp_config_acpi_legacy_irqs(void) |
| 953 | { | 988 | { |
| 954 | int i; | 989 | int i; |
| 955 | int ioapic; | ||
| 956 | unsigned int dstapic; | ||
| 957 | struct mpc_intsrc mp_irq; | 990 | struct mpc_intsrc mp_irq; |
| 958 | 991 | ||
| 959 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 992 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) |
| @@ -974,19 +1007,27 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 974 | #endif | 1007 | #endif |
| 975 | 1008 | ||
| 976 | /* | 1009 | /* |
| 977 | * Locate the IOAPIC that manages the ISA IRQs (0-15). | ||
| 978 | */ | ||
| 979 | ioapic = mp_find_ioapic(0); | ||
| 980 | if (ioapic < 0) | ||
| 981 | return; | ||
| 982 | dstapic = mp_ioapics[ioapic].apicid; | ||
| 983 | |||
| 984 | /* | ||
| 985 | * Use the default configuration for the IRQs 0-15. Unless | 1010 | * Use the default configuration for the IRQs 0-15. Unless |
| 986 | * overridden by (MADT) interrupt source override entries. | 1011 | * overridden by (MADT) interrupt source override entries. |
| 987 | */ | 1012 | */ |
| 988 | for (i = 0; i < 16; i++) { | 1013 | for (i = 0; i < 16; i++) { |
| 1014 | int ioapic, pin; | ||
| 1015 | unsigned int dstapic; | ||
| 989 | int idx; | 1016 | int idx; |
| 1017 | u32 gsi; | ||
| 1018 | |||
| 1019 | /* Locate the gsi that irq i maps to. */ | ||
| 1020 | if (acpi_isa_irq_to_gsi(i, &gsi)) | ||
| 1021 | continue; | ||
| 1022 | |||
| 1023 | /* | ||
| 1024 | * Locate the IOAPIC that manages the ISA IRQ. | ||
| 1025 | */ | ||
| 1026 | ioapic = mp_find_ioapic(gsi); | ||
| 1027 | if (ioapic < 0) | ||
| 1028 | continue; | ||
| 1029 | pin = mp_find_ioapic_pin(ioapic, gsi); | ||
| 1030 | dstapic = mp_ioapics[ioapic].apicid; | ||
| 990 | 1031 | ||
| 991 | for (idx = 0; idx < mp_irq_entries; idx++) { | 1032 | for (idx = 0; idx < mp_irq_entries; idx++) { |
| 992 | struct mpc_intsrc *irq = mp_irqs + idx; | 1033 | struct mpc_intsrc *irq = mp_irqs + idx; |
| @@ -996,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 996 | break; | 1037 | break; |
| 997 | 1038 | ||
| 998 | /* Do we already have a mapping for this IOAPIC pin */ | 1039 | /* Do we already have a mapping for this IOAPIC pin */ |
| 999 | if (irq->dstapic == dstapic && irq->dstirq == i) | 1040 | if (irq->dstapic == dstapic && irq->dstirq == pin) |
| 1000 | break; | 1041 | break; |
| 1001 | } | 1042 | } |
| 1002 | 1043 | ||
| @@ -1011,7 +1052,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 1011 | mp_irq.dstapic = dstapic; | 1052 | mp_irq.dstapic = dstapic; |
| 1012 | mp_irq.irqtype = mp_INT; | 1053 | mp_irq.irqtype = mp_INT; |
| 1013 | mp_irq.srcbusirq = i; /* Identity mapped */ | 1054 | mp_irq.srcbusirq = i; /* Identity mapped */ |
| 1014 | mp_irq.dstirq = i; | 1055 | mp_irq.dstirq = pin; |
| 1015 | 1056 | ||
| 1016 | save_mp_irq(&mp_irq); | 1057 | save_mp_irq(&mp_irq); |
| 1017 | } | 1058 | } |
| @@ -1076,11 +1117,6 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
| 1076 | 1117 | ||
| 1077 | ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); | 1118 | ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); |
| 1078 | 1119 | ||
| 1079 | #ifdef CONFIG_X86_32 | ||
| 1080 | if (ioapic_renumber_irq) | ||
| 1081 | gsi = ioapic_renumber_irq(ioapic, gsi); | ||
| 1082 | #endif | ||
| 1083 | |||
| 1084 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | 1120 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { |
| 1085 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | 1121 | printk(KERN_ERR "Invalid reference to IOAPIC pin " |
| 1086 | "%d-%d\n", mp_ioapics[ioapic].apicid, | 1122 | "%d-%d\n", mp_ioapics[ioapic].apicid, |
| @@ -1094,7 +1130,7 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
| 1094 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, | 1130 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, |
| 1095 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, | 1131 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, |
| 1096 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | 1132 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); |
| 1097 | io_apic_set_pci_routing(dev, gsi, &irq_attr); | 1133 | io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); |
| 1098 | 1134 | ||
| 1099 | return gsi; | 1135 | return gsi; |
| 1100 | } | 1136 | } |
| @@ -1154,7 +1190,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
| 1154 | * pretend we got one so we can set the SCI flags. | 1190 | * pretend we got one so we can set the SCI flags. |
| 1155 | */ | 1191 | */ |
| 1156 | if (!acpi_sci_override_gsi) | 1192 | if (!acpi_sci_override_gsi) |
| 1157 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); | 1193 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, |
| 1194 | acpi_gbl_FADT.sci_interrupt); | ||
| 1158 | 1195 | ||
| 1159 | /* Fill in identity legacy mappings where no override */ | 1196 | /* Fill in identity legacy mappings where no override */ |
| 1160 | mp_config_acpi_legacy_irqs(); | 1197 | mp_config_acpi_legacy_irqs(); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1a160d5d44d0..70237732a6c7 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
| @@ -194,7 +194,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
| 194 | } | 194 | } |
| 195 | 195 | ||
| 196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
| 197 | extern u8 *__smp_locks[], *__smp_locks_end[]; | 197 | extern s32 __smp_locks[], __smp_locks_end[]; |
| 198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); | 198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); |
| 199 | 199 | ||
| 200 | /* Replace instructions with better alternatives for this CPU type. | 200 | /* Replace instructions with better alternatives for this CPU type. |
| @@ -235,37 +235,41 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
| 235 | 235 | ||
| 236 | #ifdef CONFIG_SMP | 236 | #ifdef CONFIG_SMP |
| 237 | 237 | ||
| 238 | static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 238 | static void alternatives_smp_lock(const s32 *start, const s32 *end, |
| 239 | u8 *text, u8 *text_end) | ||
| 239 | { | 240 | { |
| 240 | u8 **ptr; | 241 | const s32 *poff; |
| 241 | 242 | ||
| 242 | mutex_lock(&text_mutex); | 243 | mutex_lock(&text_mutex); |
| 243 | for (ptr = start; ptr < end; ptr++) { | 244 | for (poff = start; poff < end; poff++) { |
| 244 | if (*ptr < text) | 245 | u8 *ptr = (u8 *)poff + *poff; |
| 245 | continue; | 246 | |
| 246 | if (*ptr > text_end) | 247 | if (!*poff || ptr < text || ptr >= text_end) |
| 247 | continue; | 248 | continue; |
| 248 | /* turn DS segment override prefix into lock prefix */ | 249 | /* turn DS segment override prefix into lock prefix */ |
| 249 | text_poke(*ptr, ((unsigned char []){0xf0}), 1); | 250 | if (*ptr == 0x3e) |
| 251 | text_poke(ptr, ((unsigned char []){0xf0}), 1); | ||
| 250 | }; | 252 | }; |
| 251 | mutex_unlock(&text_mutex); | 253 | mutex_unlock(&text_mutex); |
| 252 | } | 254 | } |
| 253 | 255 | ||
| 254 | static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 256 | static void alternatives_smp_unlock(const s32 *start, const s32 *end, |
| 257 | u8 *text, u8 *text_end) | ||
| 255 | { | 258 | { |
| 256 | u8 **ptr; | 259 | const s32 *poff; |
| 257 | 260 | ||
| 258 | if (noreplace_smp) | 261 | if (noreplace_smp) |
| 259 | return; | 262 | return; |
| 260 | 263 | ||
| 261 | mutex_lock(&text_mutex); | 264 | mutex_lock(&text_mutex); |
| 262 | for (ptr = start; ptr < end; ptr++) { | 265 | for (poff = start; poff < end; poff++) { |
| 263 | if (*ptr < text) | 266 | u8 *ptr = (u8 *)poff + *poff; |
| 264 | continue; | 267 | |
| 265 | if (*ptr > text_end) | 268 | if (!*poff || ptr < text || ptr >= text_end) |
| 266 | continue; | 269 | continue; |
| 267 | /* turn lock prefix into DS segment override prefix */ | 270 | /* turn lock prefix into DS segment override prefix */ |
| 268 | text_poke(*ptr, ((unsigned char []){0x3E}), 1); | 271 | if (*ptr == 0xf0) |
| 272 | text_poke(ptr, ((unsigned char []){0x3E}), 1); | ||
| 269 | }; | 273 | }; |
| 270 | mutex_unlock(&text_mutex); | 274 | mutex_unlock(&text_mutex); |
| 271 | } | 275 | } |
| @@ -276,8 +280,8 @@ struct smp_alt_module { | |||
| 276 | char *name; | 280 | char *name; |
| 277 | 281 | ||
| 278 | /* ptrs to lock prefixes */ | 282 | /* ptrs to lock prefixes */ |
| 279 | u8 **locks; | 283 | const s32 *locks; |
| 280 | u8 **locks_end; | 284 | const s32 *locks_end; |
| 281 | 285 | ||
| 282 | /* .text segment, needed to avoid patching init code ;) */ | 286 | /* .text segment, needed to avoid patching init code ;) */ |
| 283 | u8 *text; | 287 | u8 *text; |
| @@ -398,16 +402,19 @@ void alternatives_smp_switch(int smp) | |||
| 398 | int alternatives_text_reserved(void *start, void *end) | 402 | int alternatives_text_reserved(void *start, void *end) |
| 399 | { | 403 | { |
| 400 | struct smp_alt_module *mod; | 404 | struct smp_alt_module *mod; |
| 401 | u8 **ptr; | 405 | const s32 *poff; |
| 402 | u8 *text_start = start; | 406 | u8 *text_start = start; |
| 403 | u8 *text_end = end; | 407 | u8 *text_end = end; |
| 404 | 408 | ||
| 405 | list_for_each_entry(mod, &smp_alt_modules, next) { | 409 | list_for_each_entry(mod, &smp_alt_modules, next) { |
| 406 | if (mod->text > text_end || mod->text_end < text_start) | 410 | if (mod->text > text_end || mod->text_end < text_start) |
| 407 | continue; | 411 | continue; |
| 408 | for (ptr = mod->locks; ptr < mod->locks_end; ptr++) | 412 | for (poff = mod->locks; poff < mod->locks_end; poff++) { |
| 409 | if (text_start <= *ptr && text_end >= *ptr) | 413 | const u8 *ptr = (const u8 *)poff + *poff; |
| 414 | |||
| 415 | if (text_start <= ptr && text_end > ptr) | ||
| 410 | return 1; | 416 | return 1; |
| 417 | } | ||
| 411 | } | 418 | } |
| 412 | 419 | ||
| 413 | return 0; | 420 | return 0; |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f854d89b7edf..fa5a1474cd18 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -731,18 +731,22 @@ static bool increase_address_space(struct protection_domain *domain, | |||
| 731 | 731 | ||
| 732 | static u64 *alloc_pte(struct protection_domain *domain, | 732 | static u64 *alloc_pte(struct protection_domain *domain, |
| 733 | unsigned long address, | 733 | unsigned long address, |
| 734 | int end_lvl, | 734 | unsigned long page_size, |
| 735 | u64 **pte_page, | 735 | u64 **pte_page, |
| 736 | gfp_t gfp) | 736 | gfp_t gfp) |
| 737 | { | 737 | { |
| 738 | int level, end_lvl; | ||
| 738 | u64 *pte, *page; | 739 | u64 *pte, *page; |
| 739 | int level; | 740 | |
| 741 | BUG_ON(!is_power_of_2(page_size)); | ||
| 740 | 742 | ||
| 741 | while (address > PM_LEVEL_SIZE(domain->mode)) | 743 | while (address > PM_LEVEL_SIZE(domain->mode)) |
| 742 | increase_address_space(domain, gfp); | 744 | increase_address_space(domain, gfp); |
| 743 | 745 | ||
| 744 | level = domain->mode - 1; | 746 | level = domain->mode - 1; |
| 745 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | 747 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
| 748 | address = PAGE_SIZE_ALIGN(address, page_size); | ||
| 749 | end_lvl = PAGE_SIZE_LEVEL(page_size); | ||
| 746 | 750 | ||
| 747 | while (level > end_lvl) { | 751 | while (level > end_lvl) { |
| 748 | if (!IOMMU_PTE_PRESENT(*pte)) { | 752 | if (!IOMMU_PTE_PRESENT(*pte)) { |
| @@ -752,6 +756,10 @@ static u64 *alloc_pte(struct protection_domain *domain, | |||
| 752 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | 756 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); |
| 753 | } | 757 | } |
| 754 | 758 | ||
| 759 | /* No level skipping support yet */ | ||
| 760 | if (PM_PTE_LEVEL(*pte) != level) | ||
| 761 | return NULL; | ||
| 762 | |||
| 755 | level -= 1; | 763 | level -= 1; |
| 756 | 764 | ||
| 757 | pte = IOMMU_PTE_PAGE(*pte); | 765 | pte = IOMMU_PTE_PAGE(*pte); |
| @@ -769,28 +777,47 @@ static u64 *alloc_pte(struct protection_domain *domain, | |||
| 769 | * This function checks if there is a PTE for a given dma address. If | 777 | * This function checks if there is a PTE for a given dma address. If |
| 770 | * there is one, it returns the pointer to it. | 778 | * there is one, it returns the pointer to it. |
| 771 | */ | 779 | */ |
| 772 | static u64 *fetch_pte(struct protection_domain *domain, | 780 | static u64 *fetch_pte(struct protection_domain *domain, unsigned long address) |
| 773 | unsigned long address, int map_size) | ||
| 774 | { | 781 | { |
| 775 | int level; | 782 | int level; |
| 776 | u64 *pte; | 783 | u64 *pte; |
| 777 | 784 | ||
| 778 | level = domain->mode - 1; | 785 | if (address > PM_LEVEL_SIZE(domain->mode)) |
| 779 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | 786 | return NULL; |
| 787 | |||
| 788 | level = domain->mode - 1; | ||
| 789 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
| 780 | 790 | ||
| 781 | while (level > map_size) { | 791 | while (level > 0) { |
| 792 | |||
| 793 | /* Not Present */ | ||
| 782 | if (!IOMMU_PTE_PRESENT(*pte)) | 794 | if (!IOMMU_PTE_PRESENT(*pte)) |
| 783 | return NULL; | 795 | return NULL; |
| 784 | 796 | ||
| 797 | /* Large PTE */ | ||
| 798 | if (PM_PTE_LEVEL(*pte) == 0x07) { | ||
| 799 | unsigned long pte_mask, __pte; | ||
| 800 | |||
| 801 | /* | ||
| 802 | * If we have a series of large PTEs, make | ||
| 803 | * sure to return a pointer to the first one. | ||
| 804 | */ | ||
| 805 | pte_mask = PTE_PAGE_SIZE(*pte); | ||
| 806 | pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); | ||
| 807 | __pte = ((unsigned long)pte) & pte_mask; | ||
| 808 | |||
| 809 | return (u64 *)__pte; | ||
| 810 | } | ||
| 811 | |||
| 812 | /* No level skipping support yet */ | ||
| 813 | if (PM_PTE_LEVEL(*pte) != level) | ||
| 814 | return NULL; | ||
| 815 | |||
| 785 | level -= 1; | 816 | level -= 1; |
| 786 | 817 | ||
| 818 | /* Walk to the next level */ | ||
| 787 | pte = IOMMU_PTE_PAGE(*pte); | 819 | pte = IOMMU_PTE_PAGE(*pte); |
| 788 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | 820 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
| 789 | |||
| 790 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { | ||
| 791 | pte = NULL; | ||
| 792 | break; | ||
| 793 | } | ||
| 794 | } | 821 | } |
| 795 | 822 | ||
| 796 | return pte; | 823 | return pte; |
| @@ -807,44 +834,84 @@ static int iommu_map_page(struct protection_domain *dom, | |||
| 807 | unsigned long bus_addr, | 834 | unsigned long bus_addr, |
| 808 | unsigned long phys_addr, | 835 | unsigned long phys_addr, |
| 809 | int prot, | 836 | int prot, |
| 810 | int map_size) | 837 | unsigned long page_size) |
| 811 | { | 838 | { |
| 812 | u64 __pte, *pte; | 839 | u64 __pte, *pte; |
| 813 | 840 | int i, count; | |
| 814 | bus_addr = PAGE_ALIGN(bus_addr); | ||
| 815 | phys_addr = PAGE_ALIGN(phys_addr); | ||
| 816 | |||
| 817 | BUG_ON(!PM_ALIGNED(map_size, bus_addr)); | ||
| 818 | BUG_ON(!PM_ALIGNED(map_size, phys_addr)); | ||
| 819 | 841 | ||
| 820 | if (!(prot & IOMMU_PROT_MASK)) | 842 | if (!(prot & IOMMU_PROT_MASK)) |
| 821 | return -EINVAL; | 843 | return -EINVAL; |
| 822 | 844 | ||
| 823 | pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); | 845 | bus_addr = PAGE_ALIGN(bus_addr); |
| 846 | phys_addr = PAGE_ALIGN(phys_addr); | ||
| 847 | count = PAGE_SIZE_PTE_COUNT(page_size); | ||
| 848 | pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); | ||
| 849 | |||
| 850 | for (i = 0; i < count; ++i) | ||
| 851 | if (IOMMU_PTE_PRESENT(pte[i])) | ||
| 852 | return -EBUSY; | ||
| 824 | 853 | ||
| 825 | if (IOMMU_PTE_PRESENT(*pte)) | 854 | if (page_size > PAGE_SIZE) { |
| 826 | return -EBUSY; | 855 | __pte = PAGE_SIZE_PTE(phys_addr, page_size); |
| 856 | __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; | ||
| 857 | } else | ||
| 858 | __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; | ||
| 827 | 859 | ||
| 828 | __pte = phys_addr | IOMMU_PTE_P; | ||
| 829 | if (prot & IOMMU_PROT_IR) | 860 | if (prot & IOMMU_PROT_IR) |
| 830 | __pte |= IOMMU_PTE_IR; | 861 | __pte |= IOMMU_PTE_IR; |
| 831 | if (prot & IOMMU_PROT_IW) | 862 | if (prot & IOMMU_PROT_IW) |
| 832 | __pte |= IOMMU_PTE_IW; | 863 | __pte |= IOMMU_PTE_IW; |
| 833 | 864 | ||
| 834 | *pte = __pte; | 865 | for (i = 0; i < count; ++i) |
| 866 | pte[i] = __pte; | ||
| 835 | 867 | ||
| 836 | update_domain(dom); | 868 | update_domain(dom); |
| 837 | 869 | ||
| 838 | return 0; | 870 | return 0; |
| 839 | } | 871 | } |
| 840 | 872 | ||
| 841 | static void iommu_unmap_page(struct protection_domain *dom, | 873 | static unsigned long iommu_unmap_page(struct protection_domain *dom, |
| 842 | unsigned long bus_addr, int map_size) | 874 | unsigned long bus_addr, |
| 875 | unsigned long page_size) | ||
| 843 | { | 876 | { |
| 844 | u64 *pte = fetch_pte(dom, bus_addr, map_size); | 877 | unsigned long long unmap_size, unmapped; |
| 878 | u64 *pte; | ||
| 879 | |||
| 880 | BUG_ON(!is_power_of_2(page_size)); | ||
| 881 | |||
| 882 | unmapped = 0; | ||
| 845 | 883 | ||
| 846 | if (pte) | 884 | while (unmapped < page_size) { |
| 847 | *pte = 0; | 885 | |
| 886 | pte = fetch_pte(dom, bus_addr); | ||
| 887 | |||
| 888 | if (!pte) { | ||
| 889 | /* | ||
| 890 | * No PTE for this address | ||
| 891 | * move forward in 4kb steps | ||
| 892 | */ | ||
| 893 | unmap_size = PAGE_SIZE; | ||
| 894 | } else if (PM_PTE_LEVEL(*pte) == 0) { | ||
| 895 | /* 4kb PTE found for this address */ | ||
| 896 | unmap_size = PAGE_SIZE; | ||
| 897 | *pte = 0ULL; | ||
| 898 | } else { | ||
| 899 | int count, i; | ||
| 900 | |||
| 901 | /* Large PTE found which maps this address */ | ||
| 902 | unmap_size = PTE_PAGE_SIZE(*pte); | ||
| 903 | count = PAGE_SIZE_PTE_COUNT(unmap_size); | ||
| 904 | for (i = 0; i < count; i++) | ||
| 905 | pte[i] = 0ULL; | ||
| 906 | } | ||
| 907 | |||
| 908 | bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; | ||
| 909 | unmapped += unmap_size; | ||
| 910 | } | ||
| 911 | |||
| 912 | BUG_ON(!is_power_of_2(unmapped)); | ||
| 913 | |||
| 914 | return unmapped; | ||
| 848 | } | 915 | } |
| 849 | 916 | ||
| 850 | /* | 917 | /* |
| @@ -878,7 +945,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
| 878 | for (addr = e->address_start; addr < e->address_end; | 945 | for (addr = e->address_start; addr < e->address_end; |
| 879 | addr += PAGE_SIZE) { | 946 | addr += PAGE_SIZE) { |
| 880 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, | 947 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, |
| 881 | PM_MAP_4k); | 948 | PAGE_SIZE); |
| 882 | if (ret) | 949 | if (ret) |
| 883 | return ret; | 950 | return ret; |
| 884 | /* | 951 | /* |
| @@ -1006,7 +1073,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, | |||
| 1006 | u64 *pte, *pte_page; | 1073 | u64 *pte, *pte_page; |
| 1007 | 1074 | ||
| 1008 | for (i = 0; i < num_ptes; ++i) { | 1075 | for (i = 0; i < num_ptes; ++i) { |
| 1009 | pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, | 1076 | pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE, |
| 1010 | &pte_page, gfp); | 1077 | &pte_page, gfp); |
| 1011 | if (!pte) | 1078 | if (!pte) |
| 1012 | goto out_free; | 1079 | goto out_free; |
| @@ -1042,7 +1109,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, | |||
| 1042 | for (i = dma_dom->aperture[index]->offset; | 1109 | for (i = dma_dom->aperture[index]->offset; |
| 1043 | i < dma_dom->aperture_size; | 1110 | i < dma_dom->aperture_size; |
| 1044 | i += PAGE_SIZE) { | 1111 | i += PAGE_SIZE) { |
| 1045 | u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); | 1112 | u64 *pte = fetch_pte(&dma_dom->domain, i); |
| 1046 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 1113 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
| 1047 | continue; | 1114 | continue; |
| 1048 | 1115 | ||
| @@ -1712,7 +1779,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
| 1712 | 1779 | ||
| 1713 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | 1780 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; |
| 1714 | if (!pte) { | 1781 | if (!pte) { |
| 1715 | pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, | 1782 | pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page, |
| 1716 | GFP_ATOMIC); | 1783 | GFP_ATOMIC); |
| 1717 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; | 1784 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; |
| 1718 | } else | 1785 | } else |
| @@ -2439,12 +2506,11 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, | |||
| 2439 | return ret; | 2506 | return ret; |
| 2440 | } | 2507 | } |
| 2441 | 2508 | ||
| 2442 | static int amd_iommu_map_range(struct iommu_domain *dom, | 2509 | static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, |
| 2443 | unsigned long iova, phys_addr_t paddr, | 2510 | phys_addr_t paddr, int gfp_order, int iommu_prot) |
| 2444 | size_t size, int iommu_prot) | ||
| 2445 | { | 2511 | { |
| 2512 | unsigned long page_size = 0x1000UL << gfp_order; | ||
| 2446 | struct protection_domain *domain = dom->priv; | 2513 | struct protection_domain *domain = dom->priv; |
| 2447 | unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE); | ||
| 2448 | int prot = 0; | 2514 | int prot = 0; |
| 2449 | int ret; | 2515 | int ret; |
| 2450 | 2516 | ||
| @@ -2453,61 +2519,50 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
| 2453 | if (iommu_prot & IOMMU_WRITE) | 2519 | if (iommu_prot & IOMMU_WRITE) |
| 2454 | prot |= IOMMU_PROT_IW; | 2520 | prot |= IOMMU_PROT_IW; |
| 2455 | 2521 | ||
| 2456 | iova &= PAGE_MASK; | ||
| 2457 | paddr &= PAGE_MASK; | ||
| 2458 | |||
| 2459 | mutex_lock(&domain->api_lock); | 2522 | mutex_lock(&domain->api_lock); |
| 2460 | 2523 | ret = iommu_map_page(domain, iova, paddr, prot, page_size); | |
| 2461 | for (i = 0; i < npages; ++i) { | ||
| 2462 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); | ||
| 2463 | if (ret) | ||
| 2464 | return ret; | ||
| 2465 | |||
| 2466 | iova += PAGE_SIZE; | ||
| 2467 | paddr += PAGE_SIZE; | ||
| 2468 | } | ||
| 2469 | |||
| 2470 | mutex_unlock(&domain->api_lock); | 2524 | mutex_unlock(&domain->api_lock); |
| 2471 | 2525 | ||
| 2472 | return 0; | 2526 | return ret; |
| 2473 | } | 2527 | } |
| 2474 | 2528 | ||
| 2475 | static void amd_iommu_unmap_range(struct iommu_domain *dom, | 2529 | static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, |
| 2476 | unsigned long iova, size_t size) | 2530 | int gfp_order) |
| 2477 | { | 2531 | { |
| 2478 | |||
| 2479 | struct protection_domain *domain = dom->priv; | 2532 | struct protection_domain *domain = dom->priv; |
| 2480 | unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); | 2533 | unsigned long page_size, unmap_size; |
| 2481 | 2534 | ||
| 2482 | iova &= PAGE_MASK; | 2535 | page_size = 0x1000UL << gfp_order; |
| 2483 | 2536 | ||
| 2484 | mutex_lock(&domain->api_lock); | 2537 | mutex_lock(&domain->api_lock); |
| 2485 | 2538 | unmap_size = iommu_unmap_page(domain, iova, page_size); | |
| 2486 | for (i = 0; i < npages; ++i) { | 2539 | mutex_unlock(&domain->api_lock); |
| 2487 | iommu_unmap_page(domain, iova, PM_MAP_4k); | ||
| 2488 | iova += PAGE_SIZE; | ||
| 2489 | } | ||
| 2490 | 2540 | ||
| 2491 | iommu_flush_tlb_pde(domain); | 2541 | iommu_flush_tlb_pde(domain); |
| 2492 | 2542 | ||
| 2493 | mutex_unlock(&domain->api_lock); | 2543 | return get_order(unmap_size); |
| 2494 | } | 2544 | } |
| 2495 | 2545 | ||
| 2496 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | 2546 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, |
| 2497 | unsigned long iova) | 2547 | unsigned long iova) |
| 2498 | { | 2548 | { |
| 2499 | struct protection_domain *domain = dom->priv; | 2549 | struct protection_domain *domain = dom->priv; |
| 2500 | unsigned long offset = iova & ~PAGE_MASK; | 2550 | unsigned long offset_mask; |
| 2501 | phys_addr_t paddr; | 2551 | phys_addr_t paddr; |
| 2502 | u64 *pte; | 2552 | u64 *pte, __pte; |
| 2503 | 2553 | ||
| 2504 | pte = fetch_pte(domain, iova, PM_MAP_4k); | 2554 | pte = fetch_pte(domain, iova); |
| 2505 | 2555 | ||
| 2506 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 2556 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
| 2507 | return 0; | 2557 | return 0; |
| 2508 | 2558 | ||
| 2509 | paddr = *pte & IOMMU_PAGE_MASK; | 2559 | if (PM_PTE_LEVEL(*pte) == 0) |
| 2510 | paddr |= offset; | 2560 | offset_mask = PAGE_SIZE - 1; |
| 2561 | else | ||
| 2562 | offset_mask = PTE_PAGE_SIZE(*pte) - 1; | ||
| 2563 | |||
| 2564 | __pte = *pte & PM_ADDR_MASK; | ||
| 2565 | paddr = (__pte & ~offset_mask) | (iova & offset_mask); | ||
| 2511 | 2566 | ||
| 2512 | return paddr; | 2567 | return paddr; |
| 2513 | } | 2568 | } |
| @@ -2523,8 +2578,8 @@ static struct iommu_ops amd_iommu_ops = { | |||
| 2523 | .domain_destroy = amd_iommu_domain_destroy, | 2578 | .domain_destroy = amd_iommu_domain_destroy, |
| 2524 | .attach_dev = amd_iommu_attach_device, | 2579 | .attach_dev = amd_iommu_attach_device, |
| 2525 | .detach_dev = amd_iommu_detach_device, | 2580 | .detach_dev = amd_iommu_detach_device, |
| 2526 | .map = amd_iommu_map_range, | 2581 | .map = amd_iommu_map, |
| 2527 | .unmap = amd_iommu_unmap_range, | 2582 | .unmap = amd_iommu_unmap, |
| 2528 | .iova_to_phys = amd_iommu_iova_to_phys, | 2583 | .iova_to_phys = amd_iommu_iova_to_phys, |
| 2529 | .domain_has_cap = amd_iommu_domain_has_cap, | 2584 | .domain_has_cap = amd_iommu_domain_has_cap, |
| 2530 | }; | 2585 | }; |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 6360abf993d4..3bacb4d0844c 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -120,6 +120,7 @@ struct ivmd_header { | |||
| 120 | bool amd_iommu_dump; | 120 | bool amd_iommu_dump; |
| 121 | 121 | ||
| 122 | static int __initdata amd_iommu_detected; | 122 | static int __initdata amd_iommu_detected; |
| 123 | static bool __initdata amd_iommu_disabled; | ||
| 123 | 124 | ||
| 124 | u16 amd_iommu_last_bdf; /* largest PCI device id we have | 125 | u16 amd_iommu_last_bdf; /* largest PCI device id we have |
| 125 | to handle */ | 126 | to handle */ |
| @@ -1372,6 +1373,9 @@ void __init amd_iommu_detect(void) | |||
| 1372 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) | 1373 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) |
| 1373 | return; | 1374 | return; |
| 1374 | 1375 | ||
| 1376 | if (amd_iommu_disabled) | ||
| 1377 | return; | ||
| 1378 | |||
| 1375 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1379 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
| 1376 | iommu_detected = 1; | 1380 | iommu_detected = 1; |
| 1377 | amd_iommu_detected = 1; | 1381 | amd_iommu_detected = 1; |
| @@ -1401,6 +1405,8 @@ static int __init parse_amd_iommu_options(char *str) | |||
| 1401 | for (; *str; ++str) { | 1405 | for (; *str; ++str) { |
| 1402 | if (strncmp(str, "fullflush", 9) == 0) | 1406 | if (strncmp(str, "fullflush", 9) == 0) |
| 1403 | amd_iommu_unmap_flush = true; | 1407 | amd_iommu_unmap_flush = true; |
| 1408 | if (strncmp(str, "off", 3) == 0) | ||
| 1409 | amd_iommu_disabled = true; | ||
| 1404 | } | 1410 | } |
| 1405 | 1411 | ||
| 1406 | return 1; | 1412 | return 1; |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 03ba1b895f5e..425e53a87feb 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
| @@ -131,24 +131,6 @@ int es7000_plat; | |||
| 131 | 131 | ||
| 132 | static unsigned int base; | 132 | static unsigned int base; |
| 133 | 133 | ||
| 134 | static int | ||
| 135 | es7000_rename_gsi(int ioapic, int gsi) | ||
| 136 | { | ||
| 137 | if (es7000_plat == ES7000_ZORRO) | ||
| 138 | return gsi; | ||
| 139 | |||
| 140 | if (!base) { | ||
| 141 | int i; | ||
| 142 | for (i = 0; i < nr_ioapics; i++) | ||
| 143 | base += nr_ioapic_registers[i]; | ||
| 144 | } | ||
| 145 | |||
| 146 | if (!ioapic && (gsi < 16)) | ||
| 147 | gsi += base; | ||
| 148 | |||
| 149 | return gsi; | ||
| 150 | } | ||
| 151 | |||
| 152 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | 134 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) |
| 153 | { | 135 | { |
| 154 | unsigned long vect = 0, psaival = 0; | 136 | unsigned long vect = 0, psaival = 0; |
| @@ -190,7 +172,6 @@ static void setup_unisys(void) | |||
| 190 | es7000_plat = ES7000_ZORRO; | 172 | es7000_plat = ES7000_ZORRO; |
| 191 | else | 173 | else |
| 192 | es7000_plat = ES7000_CLASSIC; | 174 | es7000_plat = ES7000_CLASSIC; |
| 193 | ioapic_renumber_irq = es7000_rename_gsi; | ||
| 194 | } | 175 | } |
| 195 | 176 | ||
| 196 | /* | 177 | /* |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index eb2789c3f721..33f3563a2a52 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
| @@ -89,6 +89,9 @@ int nr_ioapics; | |||
| 89 | /* IO APIC gsi routing info */ | 89 | /* IO APIC gsi routing info */ |
| 90 | struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; | 90 | struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; |
| 91 | 91 | ||
| 92 | /* The last gsi number used */ | ||
| 93 | u32 gsi_end; | ||
| 94 | |||
| 92 | /* MP IRQ source entries */ | 95 | /* MP IRQ source entries */ |
| 93 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 96 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
| 94 | 97 | ||
| @@ -1013,10 +1016,9 @@ static inline int irq_trigger(int idx) | |||
| 1013 | return MPBIOS_trigger(idx); | 1016 | return MPBIOS_trigger(idx); |
| 1014 | } | 1017 | } |
| 1015 | 1018 | ||
| 1016 | int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
| 1017 | static int pin_2_irq(int idx, int apic, int pin) | 1019 | static int pin_2_irq(int idx, int apic, int pin) |
| 1018 | { | 1020 | { |
| 1019 | int irq, i; | 1021 | int irq; |
| 1020 | int bus = mp_irqs[idx].srcbus; | 1022 | int bus = mp_irqs[idx].srcbus; |
| 1021 | 1023 | ||
| 1022 | /* | 1024 | /* |
| @@ -1028,18 +1030,12 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
| 1028 | if (test_bit(bus, mp_bus_not_pci)) { | 1030 | if (test_bit(bus, mp_bus_not_pci)) { |
| 1029 | irq = mp_irqs[idx].srcbusirq; | 1031 | irq = mp_irqs[idx].srcbusirq; |
| 1030 | } else { | 1032 | } else { |
| 1031 | /* | 1033 | u32 gsi = mp_gsi_routing[apic].gsi_base + pin; |
| 1032 | * PCI IRQs are mapped in order | 1034 | |
| 1033 | */ | 1035 | if (gsi >= NR_IRQS_LEGACY) |
| 1034 | i = irq = 0; | 1036 | irq = gsi; |
| 1035 | while (i < apic) | 1037 | else |
| 1036 | irq += nr_ioapic_registers[i++]; | 1038 | irq = gsi_end + 1 + gsi; |
| 1037 | irq += pin; | ||
| 1038 | /* | ||
| 1039 | * For MPS mode, so far only needed by ES7000 platform | ||
| 1040 | */ | ||
| 1041 | if (ioapic_renumber_irq) | ||
| 1042 | irq = ioapic_renumber_irq(apic, irq); | ||
| 1043 | } | 1039 | } |
| 1044 | 1040 | ||
| 1045 | #ifdef CONFIG_X86_32 | 1041 | #ifdef CONFIG_X86_32 |
| @@ -1950,20 +1946,8 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | |||
| 1950 | 1946 | ||
| 1951 | void __init enable_IO_APIC(void) | 1947 | void __init enable_IO_APIC(void) |
| 1952 | { | 1948 | { |
| 1953 | union IO_APIC_reg_01 reg_01; | ||
| 1954 | int i8259_apic, i8259_pin; | 1949 | int i8259_apic, i8259_pin; |
| 1955 | int apic; | 1950 | int apic; |
| 1956 | unsigned long flags; | ||
| 1957 | |||
| 1958 | /* | ||
| 1959 | * The number of IO-APIC IRQ registers (== #pins): | ||
| 1960 | */ | ||
| 1961 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1962 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1963 | reg_01.raw = io_apic_read(apic, 1); | ||
| 1964 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1965 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | ||
| 1966 | } | ||
| 1967 | 1951 | ||
| 1968 | if (!legacy_pic->nr_legacy_irqs) | 1952 | if (!legacy_pic->nr_legacy_irqs) |
| 1969 | return; | 1953 | return; |
| @@ -3858,27 +3842,20 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
| 3858 | reg_01.raw = io_apic_read(ioapic, 1); | 3842 | reg_01.raw = io_apic_read(ioapic, 1); |
| 3859 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 3843 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 3860 | 3844 | ||
| 3861 | return reg_01.bits.entries; | 3845 | /* The register returns the maximum index redir index |
| 3846 | * supported, which is one less than the total number of redir | ||
| 3847 | * entries. | ||
| 3848 | */ | ||
| 3849 | return reg_01.bits.entries + 1; | ||
| 3862 | } | 3850 | } |
| 3863 | 3851 | ||
| 3864 | void __init probe_nr_irqs_gsi(void) | 3852 | void __init probe_nr_irqs_gsi(void) |
| 3865 | { | 3853 | { |
| 3866 | int nr = 0; | 3854 | int nr; |
| 3867 | 3855 | ||
| 3868 | nr = acpi_probe_gsi(); | 3856 | nr = gsi_end + 1 + NR_IRQS_LEGACY; |
| 3869 | if (nr > nr_irqs_gsi) { | 3857 | if (nr > nr_irqs_gsi) |
| 3870 | nr_irqs_gsi = nr; | 3858 | nr_irqs_gsi = nr; |
| 3871 | } else { | ||
| 3872 | /* for acpi=off or acpi is not compiled in */ | ||
| 3873 | int idx; | ||
| 3874 | |||
| 3875 | nr = 0; | ||
| 3876 | for (idx = 0; idx < nr_ioapics; idx++) | ||
| 3877 | nr += io_apic_get_redir_entries(idx) + 1; | ||
| 3878 | |||
| 3879 | if (nr > nr_irqs_gsi) | ||
| 3880 | nr_irqs_gsi = nr; | ||
| 3881 | } | ||
| 3882 | 3859 | ||
| 3883 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); | 3860 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); |
| 3884 | } | 3861 | } |
| @@ -4085,22 +4062,27 @@ int __init io_apic_get_version(int ioapic) | |||
| 4085 | return reg_01.bits.version; | 4062 | return reg_01.bits.version; |
| 4086 | } | 4063 | } |
| 4087 | 4064 | ||
| 4088 | int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | 4065 | int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) |
| 4089 | { | 4066 | { |
| 4090 | int i; | 4067 | int ioapic, pin, idx; |
| 4091 | 4068 | ||
| 4092 | if (skip_ioapic_setup) | 4069 | if (skip_ioapic_setup) |
| 4093 | return -1; | 4070 | return -1; |
| 4094 | 4071 | ||
| 4095 | for (i = 0; i < mp_irq_entries; i++) | 4072 | ioapic = mp_find_ioapic(gsi); |
| 4096 | if (mp_irqs[i].irqtype == mp_INT && | 4073 | if (ioapic < 0) |
| 4097 | mp_irqs[i].srcbusirq == bus_irq) | ||
| 4098 | break; | ||
| 4099 | if (i >= mp_irq_entries) | ||
| 4100 | return -1; | 4074 | return -1; |
| 4101 | 4075 | ||
| 4102 | *trigger = irq_trigger(i); | 4076 | pin = mp_find_ioapic_pin(ioapic, gsi); |
| 4103 | *polarity = irq_polarity(i); | 4077 | if (pin < 0) |
| 4078 | return -1; | ||
| 4079 | |||
| 4080 | idx = find_irq_entry(ioapic, pin, mp_INT); | ||
| 4081 | if (idx < 0) | ||
| 4082 | return -1; | ||
| 4083 | |||
| 4084 | *trigger = irq_trigger(idx); | ||
| 4085 | *polarity = irq_polarity(idx); | ||
| 4104 | return 0; | 4086 | return 0; |
| 4105 | } | 4087 | } |
| 4106 | 4088 | ||
| @@ -4241,7 +4223,7 @@ void __init ioapic_insert_resources(void) | |||
| 4241 | } | 4223 | } |
| 4242 | } | 4224 | } |
| 4243 | 4225 | ||
| 4244 | int mp_find_ioapic(int gsi) | 4226 | int mp_find_ioapic(u32 gsi) |
| 4245 | { | 4227 | { |
| 4246 | int i = 0; | 4228 | int i = 0; |
| 4247 | 4229 | ||
| @@ -4256,7 +4238,7 @@ int mp_find_ioapic(int gsi) | |||
| 4256 | return -1; | 4238 | return -1; |
| 4257 | } | 4239 | } |
| 4258 | 4240 | ||
| 4259 | int mp_find_ioapic_pin(int ioapic, int gsi) | 4241 | int mp_find_ioapic_pin(int ioapic, u32 gsi) |
| 4260 | { | 4242 | { |
| 4261 | if (WARN_ON(ioapic == -1)) | 4243 | if (WARN_ON(ioapic == -1)) |
| 4262 | return -1; | 4244 | return -1; |
| @@ -4284,6 +4266,7 @@ static int bad_ioapic(unsigned long address) | |||
| 4284 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | 4266 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) |
| 4285 | { | 4267 | { |
| 4286 | int idx = 0; | 4268 | int idx = 0; |
| 4269 | int entries; | ||
| 4287 | 4270 | ||
| 4288 | if (bad_ioapic(address)) | 4271 | if (bad_ioapic(address)) |
| 4289 | return; | 4272 | return; |
| @@ -4302,9 +4285,17 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
| 4302 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | 4285 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups |
| 4303 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | 4286 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). |
| 4304 | */ | 4287 | */ |
| 4288 | entries = io_apic_get_redir_entries(idx); | ||
| 4305 | mp_gsi_routing[idx].gsi_base = gsi_base; | 4289 | mp_gsi_routing[idx].gsi_base = gsi_base; |
| 4306 | mp_gsi_routing[idx].gsi_end = gsi_base + | 4290 | mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1; |
| 4307 | io_apic_get_redir_entries(idx); | 4291 | |
| 4292 | /* | ||
| 4293 | * The number of IO-APIC IRQ registers (== #pins): | ||
| 4294 | */ | ||
| 4295 | nr_ioapic_registers[idx] = entries; | ||
| 4296 | |||
| 4297 | if (mp_gsi_routing[idx].gsi_end > gsi_end) | ||
| 4298 | gsi_end = mp_gsi_routing[idx].gsi_end; | ||
| 4308 | 4299 | ||
| 4309 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | 4300 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " |
| 4310 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, | 4301 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 031aa887b0eb..c4f9182ca3ac 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
| @@ -1224,7 +1224,7 @@ static void reinit_timer(void) | |||
| 1224 | #ifdef INIT_TIMER_AFTER_SUSPEND | 1224 | #ifdef INIT_TIMER_AFTER_SUSPEND |
| 1225 | unsigned long flags; | 1225 | unsigned long flags; |
| 1226 | 1226 | ||
| 1227 | spin_lock_irqsave(&i8253_lock, flags); | 1227 | raw_spin_lock_irqsave(&i8253_lock, flags); |
| 1228 | /* set the clock to HZ */ | 1228 | /* set the clock to HZ */ |
| 1229 | outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | 1229 | outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ |
| 1230 | udelay(10); | 1230 | udelay(10); |
| @@ -1232,7 +1232,7 @@ static void reinit_timer(void) | |||
| 1232 | udelay(10); | 1232 | udelay(10); |
| 1233 | outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ | 1233 | outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ |
| 1234 | udelay(10); | 1234 | udelay(10); |
| 1235 | spin_unlock_irqrestore(&i8253_lock, flags); | 1235 | raw_spin_unlock_irqrestore(&i8253_lock, flags); |
| 1236 | #endif | 1236 | #endif |
| 1237 | } | 1237 | } |
| 1238 | 1238 | ||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c202b62f3671..3a785da34b6f 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
| @@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) | |||
| 14 | 14 | ||
| 15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
| 16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
| 17 | obj-y += vmware.o hypervisor.o sched.o | 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o |
| 18 | 18 | ||
| 19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
| 20 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 97ad79cdf688..10fa5684a662 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
| @@ -30,12 +30,14 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
| 30 | const struct cpuid_bit *cb; | 30 | const struct cpuid_bit *cb; |
| 31 | 31 | ||
| 32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | 32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { |
| 33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | 33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, |
| 34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | 34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, |
| 35 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, | 35 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006 }, |
| 36 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, | 36 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007 }, |
| 37 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | 37 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, |
| 38 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | 38 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, |
| 39 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | ||
| 40 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | ||
| 39 | { 0, 0, 0, 0 } | 41 | { 0, 0, 0, 0 } |
| 40 | }; | 42 | }; |
| 41 | 43 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4868e4a951ee..c1c00d0b1692 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -1243,10 +1243,7 @@ void __cpuinit cpu_init(void) | |||
| 1243 | /* | 1243 | /* |
| 1244 | * Force FPU initialization: | 1244 | * Force FPU initialization: |
| 1245 | */ | 1245 | */ |
| 1246 | if (cpu_has_xsave) | 1246 | current_thread_info()->status = 0; |
| 1247 | current_thread_info()->status = TS_XSAVE; | ||
| 1248 | else | ||
| 1249 | current_thread_info()->status = 0; | ||
| 1250 | clear_used_math(); | 1247 | clear_used_math(); |
| 1251 | mxcsr_feature_mask_init(); | 1248 | mxcsr_feature_mask_init(); |
| 1252 | 1249 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 1840c0a5170b..bd54bf67e6fb 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile | |||
| @@ -2,8 +2,8 @@ | |||
| 2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. | 2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. |
| 3 | # speedstep-* is preferred over p4-clockmod. | 3 | # speedstep-* is preferred over p4-clockmod. |
| 4 | 4 | ||
| 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o |
| 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o | 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o |
| 7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | 7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o |
| 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o |
| 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 459168083b77..1d3cddaa40ee 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
| @@ -46,6 +46,7 @@ | |||
| 46 | #include <asm/msr.h> | 46 | #include <asm/msr.h> |
| 47 | #include <asm/processor.h> | 47 | #include <asm/processor.h> |
| 48 | #include <asm/cpufeature.h> | 48 | #include <asm/cpufeature.h> |
| 49 | #include "mperf.h" | ||
| 49 | 50 | ||
| 50 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | 51 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ |
| 51 | "acpi-cpufreq", msg) | 52 | "acpi-cpufreq", msg) |
| @@ -71,8 +72,6 @@ struct acpi_cpufreq_data { | |||
| 71 | 72 | ||
| 72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); | 73 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); |
| 73 | 74 | ||
| 74 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
| 75 | |||
| 76 | /* acpi_perf_data is a pointer to percpu data. */ | 75 | /* acpi_perf_data is a pointer to percpu data. */ |
| 77 | static struct acpi_processor_performance *acpi_perf_data; | 76 | static struct acpi_processor_performance *acpi_perf_data; |
| 78 | 77 | ||
| @@ -240,45 +239,6 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
| 240 | return cmd.val; | 239 | return cmd.val; |
| 241 | } | 240 | } |
| 242 | 241 | ||
| 243 | /* Called via smp_call_function_single(), on the target CPU */ | ||
| 244 | static void read_measured_perf_ctrs(void *_cur) | ||
| 245 | { | ||
| 246 | struct aperfmperf *am = _cur; | ||
| 247 | |||
| 248 | get_aperfmperf(am); | ||
| 249 | } | ||
| 250 | |||
| 251 | /* | ||
| 252 | * Return the measured active (C0) frequency on this CPU since last call | ||
| 253 | * to this function. | ||
| 254 | * Input: cpu number | ||
| 255 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
| 256 | * | ||
| 257 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
| 258 | * over a period of time, while CPU is in C0 state. | ||
| 259 | * IA32_MPERF counts at the rate of max advertised frequency | ||
| 260 | * IA32_APERF counts at the rate of actual CPU frequency | ||
| 261 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
| 262 | * no meaning should be associated with absolute values of these MSRs. | ||
| 263 | */ | ||
| 264 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | ||
| 265 | unsigned int cpu) | ||
| 266 | { | ||
| 267 | struct aperfmperf perf; | ||
| 268 | unsigned long ratio; | ||
| 269 | unsigned int retval; | ||
| 270 | |||
| 271 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
| 272 | return 0; | ||
| 273 | |||
| 274 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
| 275 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
| 276 | |||
| 277 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
| 278 | |||
| 279 | return retval; | ||
| 280 | } | ||
| 281 | |||
| 282 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 242 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
| 283 | { | 243 | { |
| 284 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); | 244 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); |
| @@ -702,7 +662,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
| 702 | 662 | ||
| 703 | /* Check for APERF/MPERF support in hardware */ | 663 | /* Check for APERF/MPERF support in hardware */ |
| 704 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | 664 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
| 705 | acpi_cpufreq_driver.getavg = get_measured_perf; | 665 | acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; |
| 706 | 666 | ||
| 707 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 667 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
| 708 | for (i = 0; i < perf->state_count; i++) | 668 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c new file mode 100644 index 000000000000..911e193018ae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.c | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | #include <linux/smp.h> | ||
| 3 | #include <linux/module.h> | ||
| 4 | #include <linux/init.h> | ||
| 5 | #include <linux/cpufreq.h> | ||
| 6 | #include <linux/slab.h> | ||
| 7 | |||
| 8 | #include "mperf.h" | ||
| 9 | |||
| 10 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
| 11 | |||
| 12 | /* Called via smp_call_function_single(), on the target CPU */ | ||
| 13 | static void read_measured_perf_ctrs(void *_cur) | ||
| 14 | { | ||
| 15 | struct aperfmperf *am = _cur; | ||
| 16 | |||
| 17 | get_aperfmperf(am); | ||
| 18 | } | ||
| 19 | |||
| 20 | /* | ||
| 21 | * Return the measured active (C0) frequency on this CPU since last call | ||
| 22 | * to this function. | ||
| 23 | * Input: cpu number | ||
| 24 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
| 25 | * | ||
| 26 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
| 27 | * over a period of time, while CPU is in C0 state. | ||
| 28 | * IA32_MPERF counts at the rate of max advertised frequency | ||
| 29 | * IA32_APERF counts at the rate of actual CPU frequency | ||
| 30 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
| 31 | * no meaning should be associated with absolute values of these MSRs. | ||
| 32 | */ | ||
| 33 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
| 34 | unsigned int cpu) | ||
| 35 | { | ||
| 36 | struct aperfmperf perf; | ||
| 37 | unsigned long ratio; | ||
| 38 | unsigned int retval; | ||
| 39 | |||
| 40 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
| 41 | return 0; | ||
| 42 | |||
| 43 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
| 44 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
| 45 | |||
| 46 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
| 47 | |||
| 48 | return retval; | ||
| 49 | } | ||
| 50 | EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); | ||
| 51 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h new file mode 100644 index 000000000000..5dbf2950dc22 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.h | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | /* | ||
| 2 | * (c) 2010 Advanced Micro Devices, Inc. | ||
| 3 | * Your use of this code is subject to the terms and conditions of the | ||
| 4 | * GNU general public license version 2. See "COPYING" or | ||
| 5 | * http://www.gnu.org/licenses/gpl.html | ||
| 6 | */ | ||
| 7 | |||
| 8 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
| 9 | unsigned int cpu); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index b6215b9798e2..6f3dc8fbbfdc 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
| @@ -1,6 +1,5 @@ | |||
| 1 | |||
| 2 | /* | 1 | /* |
| 3 | * (c) 2003-2006 Advanced Micro Devices, Inc. | 2 | * (c) 2003-2010 Advanced Micro Devices, Inc. |
| 4 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
| 5 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
| 6 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
| @@ -46,6 +45,7 @@ | |||
| 46 | #define PFX "powernow-k8: " | 45 | #define PFX "powernow-k8: " |
| 47 | #define VERSION "version 2.20.00" | 46 | #define VERSION "version 2.20.00" |
| 48 | #include "powernow-k8.h" | 47 | #include "powernow-k8.h" |
| 48 | #include "mperf.h" | ||
| 49 | 49 | ||
| 50 | /* serialize freq changes */ | 50 | /* serialize freq changes */ |
| 51 | static DEFINE_MUTEX(fidvid_mutex); | 51 | static DEFINE_MUTEX(fidvid_mutex); |
| @@ -54,6 +54,12 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | |||
| 54 | 54 | ||
| 55 | static int cpu_family = CPU_OPTERON; | 55 | static int cpu_family = CPU_OPTERON; |
| 56 | 56 | ||
| 57 | /* core performance boost */ | ||
| 58 | static bool cpb_capable, cpb_enabled; | ||
| 59 | static struct msr __percpu *msrs; | ||
| 60 | |||
| 61 | static struct cpufreq_driver cpufreq_amd64_driver; | ||
| 62 | |||
| 57 | #ifndef CONFIG_SMP | 63 | #ifndef CONFIG_SMP |
| 58 | static inline const struct cpumask *cpu_core_mask(int cpu) | 64 | static inline const struct cpumask *cpu_core_mask(int cpu) |
| 59 | { | 65 | { |
| @@ -1249,6 +1255,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
| 1249 | struct powernow_k8_data *data; | 1255 | struct powernow_k8_data *data; |
| 1250 | struct init_on_cpu init_on_cpu; | 1256 | struct init_on_cpu init_on_cpu; |
| 1251 | int rc; | 1257 | int rc; |
| 1258 | struct cpuinfo_x86 *c = &cpu_data(pol->cpu); | ||
| 1252 | 1259 | ||
| 1253 | if (!cpu_online(pol->cpu)) | 1260 | if (!cpu_online(pol->cpu)) |
| 1254 | return -ENODEV; | 1261 | return -ENODEV; |
| @@ -1323,6 +1330,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
| 1323 | return -EINVAL; | 1330 | return -EINVAL; |
| 1324 | } | 1331 | } |
| 1325 | 1332 | ||
| 1333 | /* Check for APERF/MPERF support in hardware */ | ||
| 1334 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
| 1335 | cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; | ||
| 1336 | |||
| 1326 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | 1337 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); |
| 1327 | 1338 | ||
| 1328 | if (cpu_family == CPU_HW_PSTATE) | 1339 | if (cpu_family == CPU_HW_PSTATE) |
| @@ -1394,8 +1405,77 @@ out: | |||
| 1394 | return khz; | 1405 | return khz; |
| 1395 | } | 1406 | } |
| 1396 | 1407 | ||
| 1408 | static void _cpb_toggle_msrs(bool t) | ||
| 1409 | { | ||
| 1410 | int cpu; | ||
| 1411 | |||
| 1412 | get_online_cpus(); | ||
| 1413 | |||
| 1414 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
| 1415 | |||
| 1416 | for_each_cpu(cpu, cpu_online_mask) { | ||
| 1417 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
| 1418 | if (t) | ||
| 1419 | reg->l &= ~BIT(25); | ||
| 1420 | else | ||
| 1421 | reg->l |= BIT(25); | ||
| 1422 | } | ||
| 1423 | wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
| 1424 | |||
| 1425 | put_online_cpus(); | ||
| 1426 | } | ||
| 1427 | |||
| 1428 | /* | ||
| 1429 | * Switch on/off core performance boosting. | ||
| 1430 | * | ||
| 1431 | * 0=disable | ||
| 1432 | * 1=enable. | ||
| 1433 | */ | ||
| 1434 | static void cpb_toggle(bool t) | ||
| 1435 | { | ||
| 1436 | if (!cpb_capable) | ||
| 1437 | return; | ||
| 1438 | |||
| 1439 | if (t && !cpb_enabled) { | ||
| 1440 | cpb_enabled = true; | ||
| 1441 | _cpb_toggle_msrs(t); | ||
| 1442 | printk(KERN_INFO PFX "Core Boosting enabled.\n"); | ||
| 1443 | } else if (!t && cpb_enabled) { | ||
| 1444 | cpb_enabled = false; | ||
| 1445 | _cpb_toggle_msrs(t); | ||
| 1446 | printk(KERN_INFO PFX "Core Boosting disabled.\n"); | ||
| 1447 | } | ||
| 1448 | } | ||
| 1449 | |||
| 1450 | static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, | ||
| 1451 | size_t count) | ||
| 1452 | { | ||
| 1453 | int ret = -EINVAL; | ||
| 1454 | unsigned long val = 0; | ||
| 1455 | |||
| 1456 | ret = strict_strtoul(buf, 10, &val); | ||
| 1457 | if (!ret && (val == 0 || val == 1) && cpb_capable) | ||
| 1458 | cpb_toggle(val); | ||
| 1459 | else | ||
| 1460 | return -EINVAL; | ||
| 1461 | |||
| 1462 | return count; | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) | ||
| 1466 | { | ||
| 1467 | return sprintf(buf, "%u\n", cpb_enabled); | ||
| 1468 | } | ||
| 1469 | |||
| 1470 | #define define_one_rw(_name) \ | ||
| 1471 | static struct freq_attr _name = \ | ||
| 1472 | __ATTR(_name, 0644, show_##_name, store_##_name) | ||
| 1473 | |||
| 1474 | define_one_rw(cpb); | ||
| 1475 | |||
| 1397 | static struct freq_attr *powernow_k8_attr[] = { | 1476 | static struct freq_attr *powernow_k8_attr[] = { |
| 1398 | &cpufreq_freq_attr_scaling_available_freqs, | 1477 | &cpufreq_freq_attr_scaling_available_freqs, |
| 1478 | &cpb, | ||
| 1399 | NULL, | 1479 | NULL, |
| 1400 | }; | 1480 | }; |
| 1401 | 1481 | ||
| @@ -1411,10 +1491,51 @@ static struct cpufreq_driver cpufreq_amd64_driver = { | |||
| 1411 | .attr = powernow_k8_attr, | 1491 | .attr = powernow_k8_attr, |
| 1412 | }; | 1492 | }; |
| 1413 | 1493 | ||
| 1494 | /* | ||
| 1495 | * Clear the boost-disable flag on the CPU_DOWN path so that this cpu | ||
| 1496 | * cannot block the remaining ones from boosting. On the CPU_UP path we | ||
| 1497 | * simply keep the boost-disable flag in sync with the current global | ||
| 1498 | * state. | ||
| 1499 | */ | ||
| 1500 | static int __cpuinit cpb_notify(struct notifier_block *nb, unsigned long action, | ||
| 1501 | void *hcpu) | ||
| 1502 | { | ||
| 1503 | unsigned cpu = (long)hcpu; | ||
| 1504 | u32 lo, hi; | ||
| 1505 | |||
| 1506 | switch (action) { | ||
| 1507 | case CPU_UP_PREPARE: | ||
| 1508 | case CPU_UP_PREPARE_FROZEN: | ||
| 1509 | |||
| 1510 | if (!cpb_enabled) { | ||
| 1511 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
| 1512 | lo |= BIT(25); | ||
| 1513 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
| 1514 | } | ||
| 1515 | break; | ||
| 1516 | |||
| 1517 | case CPU_DOWN_PREPARE: | ||
| 1518 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 1519 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
| 1520 | lo &= ~BIT(25); | ||
| 1521 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
| 1522 | break; | ||
| 1523 | |||
| 1524 | default: | ||
| 1525 | break; | ||
| 1526 | } | ||
| 1527 | |||
| 1528 | return NOTIFY_OK; | ||
| 1529 | } | ||
| 1530 | |||
| 1531 | static struct notifier_block __cpuinitdata cpb_nb = { | ||
| 1532 | .notifier_call = cpb_notify, | ||
| 1533 | }; | ||
| 1534 | |||
| 1414 | /* driver entry point for init */ | 1535 | /* driver entry point for init */ |
| 1415 | static int __cpuinit powernowk8_init(void) | 1536 | static int __cpuinit powernowk8_init(void) |
| 1416 | { | 1537 | { |
| 1417 | unsigned int i, supported_cpus = 0; | 1538 | unsigned int i, supported_cpus = 0, cpu; |
| 1418 | 1539 | ||
| 1419 | for_each_online_cpu(i) { | 1540 | for_each_online_cpu(i) { |
| 1420 | int rc; | 1541 | int rc; |
| @@ -1423,15 +1544,36 @@ static int __cpuinit powernowk8_init(void) | |||
| 1423 | supported_cpus++; | 1544 | supported_cpus++; |
| 1424 | } | 1545 | } |
| 1425 | 1546 | ||
| 1426 | if (supported_cpus == num_online_cpus()) { | 1547 | if (supported_cpus != num_online_cpus()) |
| 1427 | printk(KERN_INFO PFX "Found %d %s " | 1548 | return -ENODEV; |
| 1428 | "processors (%d cpu cores) (" VERSION ")\n", | 1549 | |
| 1429 | num_online_nodes(), | 1550 | printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", |
| 1430 | boot_cpu_data.x86_model_id, supported_cpus); | 1551 | num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); |
| 1431 | return cpufreq_register_driver(&cpufreq_amd64_driver); | 1552 | |
| 1553 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
| 1554 | |||
| 1555 | cpb_capable = true; | ||
| 1556 | |||
| 1557 | register_cpu_notifier(&cpb_nb); | ||
| 1558 | |||
| 1559 | msrs = msrs_alloc(); | ||
| 1560 | if (!msrs) { | ||
| 1561 | printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); | ||
| 1562 | return -ENOMEM; | ||
| 1563 | } | ||
| 1564 | |||
| 1565 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
| 1566 | |||
| 1567 | for_each_cpu(cpu, cpu_online_mask) { | ||
| 1568 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
| 1569 | cpb_enabled |= !(!!(reg->l & BIT(25))); | ||
| 1570 | } | ||
| 1571 | |||
| 1572 | printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", | ||
| 1573 | (cpb_enabled ? "on" : "off")); | ||
| 1432 | } | 1574 | } |
| 1433 | 1575 | ||
| 1434 | return -ENODEV; | 1576 | return cpufreq_register_driver(&cpufreq_amd64_driver); |
| 1435 | } | 1577 | } |
| 1436 | 1578 | ||
| 1437 | /* driver entry point for term */ | 1579 | /* driver entry point for term */ |
| @@ -1439,6 +1581,13 @@ static void __exit powernowk8_exit(void) | |||
| 1439 | { | 1581 | { |
| 1440 | dprintk("exit\n"); | 1582 | dprintk("exit\n"); |
| 1441 | 1583 | ||
| 1584 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
| 1585 | msrs_free(msrs); | ||
| 1586 | msrs = NULL; | ||
| 1587 | |||
| 1588 | unregister_cpu_notifier(&cpb_nb); | ||
| 1589 | } | ||
| 1590 | |||
| 1442 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1591 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
| 1443 | } | 1592 | } |
| 1444 | 1593 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 02ce824073cb..df3529b1c02d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | |||
| 9 | enum pstate { | 8 | enum pstate { |
| 10 | HW_PSTATE_INVALID = 0xff, | 9 | HW_PSTATE_INVALID = 0xff, |
| 11 | HW_PSTATE_0 = 0, | 10 | HW_PSTATE_0 = 0, |
| @@ -55,7 +54,6 @@ struct powernow_k8_data { | |||
| 55 | struct cpumask *available_cores; | 54 | struct cpumask *available_cores; |
| 56 | }; | 55 | }; |
| 57 | 56 | ||
| 58 | |||
| 59 | /* processor's cpuid instruction support */ | 57 | /* processor's cpuid instruction support */ |
| 60 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ | 58 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ |
| 61 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | 59 | #define CPUID_XFAM 0x0ff00000 /* extended family */ |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 08be922de33a..dd531cc56a8f 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
| @@ -21,37 +21,55 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #include <linux/module.h> | ||
| 24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
| 25 | #include <asm/vmware.h> | ||
| 26 | #include <asm/hypervisor.h> | 26 | #include <asm/hypervisor.h> |
| 27 | 27 | ||
| 28 | static inline void __cpuinit | 28 | /* |
| 29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | 29 | * Hypervisor detect order. This is specified explicitly here because |
| 30 | * some hypervisors might implement compatibility modes for other | ||
| 31 | * hypervisors and therefore need to be detected in specific sequence. | ||
| 32 | */ | ||
| 33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | ||
| 30 | { | 34 | { |
| 31 | if (vmware_platform()) | 35 | &x86_hyper_vmware, |
| 32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | 36 | &x86_hyper_ms_hyperv, |
| 33 | else | 37 | }; |
| 34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | ||
| 35 | } | ||
| 36 | 38 | ||
| 37 | static inline void __cpuinit | 39 | const struct hypervisor_x86 *x86_hyper; |
| 38 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | 40 | EXPORT_SYMBOL(x86_hyper); |
| 41 | |||
| 42 | static inline void __init | ||
| 43 | detect_hypervisor_vendor(void) | ||
| 39 | { | 44 | { |
| 40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | 45 | const struct hypervisor_x86 *h, * const *p; |
| 41 | vmware_set_feature_bits(c); | 46 | |
| 42 | return; | 47 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { |
| 48 | h = *p; | ||
| 49 | if (h->detect()) { | ||
| 50 | x86_hyper = h; | ||
| 51 | printk(KERN_INFO "Hypervisor detected: %s\n", h->name); | ||
| 52 | break; | ||
| 53 | } | ||
| 43 | } | 54 | } |
| 44 | } | 55 | } |
| 45 | 56 | ||
| 46 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | 57 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) |
| 47 | { | 58 | { |
| 48 | detect_hypervisor_vendor(c); | 59 | if (x86_hyper && x86_hyper->set_cpu_features) |
| 49 | hypervisor_set_feature_bits(c); | 60 | x86_hyper->set_cpu_features(c); |
| 50 | } | 61 | } |
| 51 | 62 | ||
| 52 | void __init init_hypervisor_platform(void) | 63 | void __init init_hypervisor_platform(void) |
| 53 | { | 64 | { |
| 65 | |||
| 66 | detect_hypervisor_vendor(); | ||
| 67 | |||
| 68 | if (!x86_hyper) | ||
| 69 | return; | ||
| 70 | |||
| 54 | init_hypervisor(&boot_cpu_data); | 71 | init_hypervisor(&boot_cpu_data); |
| 55 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | 72 | |
| 56 | vmware_platform_setup(); | 73 | if (x86_hyper->init_platform) |
| 74 | x86_hyper->init_platform(); | ||
| 57 | } | 75 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1366c7cfd483..85f69cdeae10 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
| 13 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
| 14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
| 15 | #include <asm/ds.h> | ||
| 16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
| 17 | #include <asm/cpu.h> | 16 | #include <asm/cpu.h> |
| 18 | 17 | ||
| @@ -373,12 +372,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 373 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 372 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
| 374 | } | 373 | } |
| 375 | 374 | ||
| 376 | if (c->cpuid_level > 6) { | ||
| 377 | unsigned ecx = cpuid_ecx(6); | ||
| 378 | if (ecx & 0x01) | ||
| 379 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
| 380 | } | ||
| 381 | |||
| 382 | if (cpu_has_xmm2) | 375 | if (cpu_has_xmm2) |
| 383 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 376 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
| 384 | if (cpu_has_ds) { | 377 | if (cpu_has_ds) { |
| @@ -388,7 +381,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 388 | set_cpu_cap(c, X86_FEATURE_BTS); | 381 | set_cpu_cap(c, X86_FEATURE_BTS); |
| 389 | if (!(l1 & (1<<12))) | 382 | if (!(l1 & (1<<12))) |
| 390 | set_cpu_cap(c, X86_FEATURE_PEBS); | 383 | set_cpu_cap(c, X86_FEATURE_PEBS); |
| 391 | ds_init_intel(c); | ||
| 392 | } | 384 | } |
| 393 | 385 | ||
| 394 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) | 386 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index b3eeb66c0a51..33eae2062cf5 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
| @@ -148,13 +148,19 @@ union _cpuid4_leaf_ecx { | |||
| 148 | u32 full; | 148 | u32 full; |
| 149 | }; | 149 | }; |
| 150 | 150 | ||
| 151 | struct amd_l3_cache { | ||
| 152 | struct pci_dev *dev; | ||
| 153 | bool can_disable; | ||
| 154 | unsigned indices; | ||
| 155 | u8 subcaches[4]; | ||
| 156 | }; | ||
| 157 | |||
| 151 | struct _cpuid4_info { | 158 | struct _cpuid4_info { |
| 152 | union _cpuid4_leaf_eax eax; | 159 | union _cpuid4_leaf_eax eax; |
| 153 | union _cpuid4_leaf_ebx ebx; | 160 | union _cpuid4_leaf_ebx ebx; |
| 154 | union _cpuid4_leaf_ecx ecx; | 161 | union _cpuid4_leaf_ecx ecx; |
| 155 | unsigned long size; | 162 | unsigned long size; |
| 156 | bool can_disable; | 163 | struct amd_l3_cache *l3; |
| 157 | unsigned int l3_indices; | ||
| 158 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | 164 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
| 159 | }; | 165 | }; |
| 160 | 166 | ||
| @@ -164,8 +170,7 @@ struct _cpuid4_info_regs { | |||
| 164 | union _cpuid4_leaf_ebx ebx; | 170 | union _cpuid4_leaf_ebx ebx; |
| 165 | union _cpuid4_leaf_ecx ecx; | 171 | union _cpuid4_leaf_ecx ecx; |
| 166 | unsigned long size; | 172 | unsigned long size; |
| 167 | bool can_disable; | 173 | struct amd_l3_cache *l3; |
| 168 | unsigned int l3_indices; | ||
| 169 | }; | 174 | }; |
| 170 | 175 | ||
| 171 | unsigned short num_cache_leaves; | 176 | unsigned short num_cache_leaves; |
| @@ -302,87 +307,163 @@ struct _cache_attr { | |||
| 302 | }; | 307 | }; |
| 303 | 308 | ||
| 304 | #ifdef CONFIG_CPU_SUP_AMD | 309 | #ifdef CONFIG_CPU_SUP_AMD |
| 305 | static unsigned int __cpuinit amd_calc_l3_indices(void) | 310 | |
| 311 | /* | ||
| 312 | * L3 cache descriptors | ||
| 313 | */ | ||
| 314 | static struct amd_l3_cache **__cpuinitdata l3_caches; | ||
| 315 | |||
| 316 | static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | ||
| 306 | { | 317 | { |
| 307 | /* | ||
| 308 | * We're called over smp_call_function_single() and therefore | ||
| 309 | * are on the correct cpu. | ||
| 310 | */ | ||
| 311 | int cpu = smp_processor_id(); | ||
| 312 | int node = cpu_to_node(cpu); | ||
| 313 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
| 314 | unsigned int sc0, sc1, sc2, sc3; | 318 | unsigned int sc0, sc1, sc2, sc3; |
| 315 | u32 val = 0; | 319 | u32 val = 0; |
| 316 | 320 | ||
| 317 | pci_read_config_dword(dev, 0x1C4, &val); | 321 | pci_read_config_dword(l3->dev, 0x1C4, &val); |
| 318 | 322 | ||
| 319 | /* calculate subcache sizes */ | 323 | /* calculate subcache sizes */ |
| 320 | sc0 = !(val & BIT(0)); | 324 | l3->subcaches[0] = sc0 = !(val & BIT(0)); |
| 321 | sc1 = !(val & BIT(4)); | 325 | l3->subcaches[1] = sc1 = !(val & BIT(4)); |
| 322 | sc2 = !(val & BIT(8)) + !(val & BIT(9)); | 326 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); |
| 323 | sc3 = !(val & BIT(12)) + !(val & BIT(13)); | 327 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); |
| 324 | 328 | ||
| 325 | return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | 329 | l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; |
| 330 | } | ||
| 331 | |||
| 332 | static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) | ||
| 333 | { | ||
| 334 | struct amd_l3_cache *l3; | ||
| 335 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
| 336 | |||
| 337 | l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); | ||
| 338 | if (!l3) { | ||
| 339 | printk(KERN_WARNING "Error allocating L3 struct\n"); | ||
| 340 | return NULL; | ||
| 341 | } | ||
| 342 | |||
| 343 | l3->dev = dev; | ||
| 344 | |||
| 345 | amd_calc_l3_indices(l3); | ||
| 346 | |||
| 347 | return l3; | ||
| 326 | } | 348 | } |
| 327 | 349 | ||
| 328 | static void __cpuinit | 350 | static void __cpuinit |
| 329 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 351 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) |
| 330 | { | 352 | { |
| 331 | if (index < 3) | 353 | int node; |
| 354 | |||
| 355 | if (boot_cpu_data.x86 != 0x10) | ||
| 332 | return; | 356 | return; |
| 333 | 357 | ||
| 334 | if (boot_cpu_data.x86 == 0x11) | 358 | if (index < 3) |
| 335 | return; | 359 | return; |
| 336 | 360 | ||
| 337 | /* see errata #382 and #388 */ | 361 | /* see errata #382 and #388 */ |
| 338 | if ((boot_cpu_data.x86 == 0x10) && | 362 | if (boot_cpu_data.x86_model < 0x8) |
| 339 | ((boot_cpu_data.x86_model < 0x8) || | 363 | return; |
| 340 | (boot_cpu_data.x86_mask < 0x1))) | 364 | |
| 365 | if ((boot_cpu_data.x86_model == 0x8 || | ||
| 366 | boot_cpu_data.x86_model == 0x9) | ||
| 367 | && | ||
| 368 | boot_cpu_data.x86_mask < 0x1) | ||
| 369 | return; | ||
| 370 | |||
| 371 | /* not in virtualized environments */ | ||
| 372 | if (num_k8_northbridges == 0) | ||
| 341 | return; | 373 | return; |
| 342 | 374 | ||
| 343 | this_leaf->can_disable = true; | 375 | /* |
| 344 | this_leaf->l3_indices = amd_calc_l3_indices(); | 376 | * Strictly speaking, the amount in @size below is leaked since it is |
| 377 | * never freed but this is done only on shutdown so it doesn't matter. | ||
| 378 | */ | ||
| 379 | if (!l3_caches) { | ||
| 380 | int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); | ||
| 381 | |||
| 382 | l3_caches = kzalloc(size, GFP_ATOMIC); | ||
| 383 | if (!l3_caches) | ||
| 384 | return; | ||
| 385 | } | ||
| 386 | |||
| 387 | node = amd_get_nb_id(smp_processor_id()); | ||
| 388 | |||
| 389 | if (!l3_caches[node]) { | ||
| 390 | l3_caches[node] = amd_init_l3_cache(node); | ||
| 391 | l3_caches[node]->can_disable = true; | ||
| 392 | } | ||
| 393 | |||
| 394 | WARN_ON(!l3_caches[node]); | ||
| 395 | |||
| 396 | this_leaf->l3 = l3_caches[node]; | ||
| 345 | } | 397 | } |
| 346 | 398 | ||
| 347 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | 399 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, |
| 348 | unsigned int index) | 400 | unsigned int slot) |
| 349 | { | 401 | { |
| 350 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 402 | struct pci_dev *dev = this_leaf->l3->dev; |
| 351 | int node = amd_get_nb_id(cpu); | ||
| 352 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
| 353 | unsigned int reg = 0; | 403 | unsigned int reg = 0; |
| 354 | 404 | ||
| 355 | if (!this_leaf->can_disable) | 405 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
| 356 | return -EINVAL; | 406 | return -EINVAL; |
| 357 | 407 | ||
| 358 | if (!dev) | 408 | if (!dev) |
| 359 | return -EINVAL; | 409 | return -EINVAL; |
| 360 | 410 | ||
| 361 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | 411 | pci_read_config_dword(dev, 0x1BC + slot * 4, ®); |
| 362 | return sprintf(buf, "0x%08x\n", reg); | 412 | return sprintf(buf, "0x%08x\n", reg); |
| 363 | } | 413 | } |
| 364 | 414 | ||
| 365 | #define SHOW_CACHE_DISABLE(index) \ | 415 | #define SHOW_CACHE_DISABLE(slot) \ |
| 366 | static ssize_t \ | 416 | static ssize_t \ |
| 367 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | 417 | show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ |
| 368 | { \ | 418 | { \ |
| 369 | return show_cache_disable(this_leaf, buf, index); \ | 419 | return show_cache_disable(this_leaf, buf, slot); \ |
| 370 | } | 420 | } |
| 371 | SHOW_CACHE_DISABLE(0) | 421 | SHOW_CACHE_DISABLE(0) |
| 372 | SHOW_CACHE_DISABLE(1) | 422 | SHOW_CACHE_DISABLE(1) |
| 373 | 423 | ||
| 424 | static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | ||
| 425 | unsigned slot, unsigned long idx) | ||
| 426 | { | ||
| 427 | int i; | ||
| 428 | |||
| 429 | idx |= BIT(30); | ||
| 430 | |||
| 431 | /* | ||
| 432 | * disable index in all 4 subcaches | ||
| 433 | */ | ||
| 434 | for (i = 0; i < 4; i++) { | ||
| 435 | u32 reg = idx | (i << 20); | ||
| 436 | |||
| 437 | if (!l3->subcaches[i]) | ||
| 438 | continue; | ||
| 439 | |||
| 440 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
| 441 | |||
| 442 | /* | ||
| 443 | * We need to WBINVD on a core on the node containing the L3 | ||
| 444 | * cache which indices we disable therefore a simple wbinvd() | ||
| 445 | * is not sufficient. | ||
| 446 | */ | ||
| 447 | wbinvd_on_cpu(cpu); | ||
| 448 | |||
| 449 | reg |= BIT(31); | ||
| 450 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
| 451 | } | ||
| 452 | } | ||
| 453 | |||
| 454 | |||
| 374 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | 455 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, |
| 375 | const char *buf, size_t count, unsigned int index) | 456 | const char *buf, size_t count, |
| 457 | unsigned int slot) | ||
| 376 | { | 458 | { |
| 459 | struct pci_dev *dev = this_leaf->l3->dev; | ||
| 377 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 460 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
| 378 | int node = amd_get_nb_id(cpu); | ||
| 379 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
| 380 | unsigned long val = 0; | 461 | unsigned long val = 0; |
| 381 | 462 | ||
| 382 | #define SUBCACHE_MASK (3UL << 20) | 463 | #define SUBCACHE_MASK (3UL << 20) |
| 383 | #define SUBCACHE_INDEX 0xfff | 464 | #define SUBCACHE_INDEX 0xfff |
| 384 | 465 | ||
| 385 | if (!this_leaf->can_disable) | 466 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
| 386 | return -EINVAL; | 467 | return -EINVAL; |
| 387 | 468 | ||
| 388 | if (!capable(CAP_SYS_ADMIN)) | 469 | if (!capable(CAP_SYS_ADMIN)) |
| @@ -396,26 +477,20 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
| 396 | 477 | ||
| 397 | /* do not allow writes outside of allowed bits */ | 478 | /* do not allow writes outside of allowed bits */ |
| 398 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | 479 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || |
| 399 | ((val & SUBCACHE_INDEX) > this_leaf->l3_indices)) | 480 | ((val & SUBCACHE_INDEX) > this_leaf->l3->indices)) |
| 400 | return -EINVAL; | 481 | return -EINVAL; |
| 401 | 482 | ||
| 402 | val |= BIT(30); | 483 | amd_l3_disable_index(this_leaf->l3, cpu, slot, val); |
| 403 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | 484 | |
| 404 | /* | ||
| 405 | * We need to WBINVD on a core on the node containing the L3 cache which | ||
| 406 | * indices we disable therefore a simple wbinvd() is not sufficient. | ||
| 407 | */ | ||
| 408 | wbinvd_on_cpu(cpu); | ||
| 409 | pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31)); | ||
| 410 | return count; | 485 | return count; |
| 411 | } | 486 | } |
| 412 | 487 | ||
| 413 | #define STORE_CACHE_DISABLE(index) \ | 488 | #define STORE_CACHE_DISABLE(slot) \ |
| 414 | static ssize_t \ | 489 | static ssize_t \ |
| 415 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | 490 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ |
| 416 | const char *buf, size_t count) \ | 491 | const char *buf, size_t count) \ |
| 417 | { \ | 492 | { \ |
| 418 | return store_cache_disable(this_leaf, buf, count, index); \ | 493 | return store_cache_disable(this_leaf, buf, count, slot); \ |
| 419 | } | 494 | } |
| 420 | STORE_CACHE_DISABLE(0) | 495 | STORE_CACHE_DISABLE(0) |
| 421 | STORE_CACHE_DISABLE(1) | 496 | STORE_CACHE_DISABLE(1) |
| @@ -443,8 +518,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
| 443 | 518 | ||
| 444 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 519 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
| 445 | amd_cpuid4(index, &eax, &ebx, &ecx); | 520 | amd_cpuid4(index, &eax, &ebx, &ecx); |
| 446 | if (boot_cpu_data.x86 >= 0x10) | 521 | amd_check_l3_disable(index, this_leaf); |
| 447 | amd_check_l3_disable(index, this_leaf); | ||
| 448 | } else { | 522 | } else { |
| 449 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 523 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
| 450 | } | 524 | } |
| @@ -701,6 +775,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
| 701 | for (i = 0; i < num_cache_leaves; i++) | 775 | for (i = 0; i < num_cache_leaves; i++) |
| 702 | cache_remove_shared_cpu_map(cpu, i); | 776 | cache_remove_shared_cpu_map(cpu, i); |
| 703 | 777 | ||
| 778 | kfree(per_cpu(ici_cpuid4_info, cpu)->l3); | ||
| 704 | kfree(per_cpu(ici_cpuid4_info, cpu)); | 779 | kfree(per_cpu(ici_cpuid4_info, cpu)); |
| 705 | per_cpu(ici_cpuid4_info, cpu) = NULL; | 780 | per_cpu(ici_cpuid4_info, cpu) = NULL; |
| 706 | } | 781 | } |
| @@ -985,7 +1060,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
| 985 | 1060 | ||
| 986 | this_leaf = CPUID4_INFO_IDX(cpu, i); | 1061 | this_leaf = CPUID4_INFO_IDX(cpu, i); |
| 987 | 1062 | ||
| 988 | if (this_leaf->can_disable) | 1063 | if (this_leaf->l3 && this_leaf->l3->can_disable) |
| 989 | ktype_cache.default_attrs = default_l3_attrs; | 1064 | ktype_cache.default_attrs = default_l3_attrs; |
| 990 | else | 1065 | else |
| 991 | ktype_cache.default_attrs = default_attrs; | 1066 | ktype_cache.default_attrs = default_attrs; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0afa767e..7a355ddcc64b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -539,7 +539,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
| 539 | struct mce m; | 539 | struct mce m; |
| 540 | int i; | 540 | int i; |
| 541 | 541 | ||
| 542 | __get_cpu_var(mce_poll_count)++; | 542 | percpu_inc(mce_poll_count); |
| 543 | 543 | ||
| 544 | mce_setup(&m); | 544 | mce_setup(&m); |
| 545 | 545 | ||
| @@ -934,7 +934,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
| 934 | 934 | ||
| 935 | atomic_inc(&mce_entry); | 935 | atomic_inc(&mce_entry); |
| 936 | 936 | ||
| 937 | __get_cpu_var(mce_exception_count)++; | 937 | percpu_inc(mce_exception_count); |
| 938 | 938 | ||
| 939 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | 939 | if (notify_die(DIE_NMI, "machine check", regs, error_code, |
| 940 | 18, SIGKILL) == NOTIFY_STOP) | 940 | 18, SIGKILL) == NOTIFY_STOP) |
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c new file mode 100644 index 000000000000..16f41bbe46b6 --- /dev/null +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | /* | ||
| 2 | * HyperV Detection code. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2010, Novell, Inc. | ||
| 5 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License as published by | ||
| 9 | * the Free Software Foundation; version 2 of the License. | ||
| 10 | * | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/types.h> | ||
| 14 | #include <linux/module.h> | ||
| 15 | #include <asm/processor.h> | ||
| 16 | #include <asm/hypervisor.h> | ||
| 17 | #include <asm/hyperv.h> | ||
| 18 | #include <asm/mshyperv.h> | ||
| 19 | |||
| 20 | struct ms_hyperv_info ms_hyperv; | ||
| 21 | |||
| 22 | static bool __init ms_hyperv_platform(void) | ||
| 23 | { | ||
| 24 | u32 eax; | ||
| 25 | u32 hyp_signature[3]; | ||
| 26 | |||
| 27 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | ||
| 28 | return false; | ||
| 29 | |||
| 30 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | ||
| 31 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | ||
| 32 | |||
| 33 | return eax >= HYPERV_CPUID_MIN && | ||
| 34 | eax <= HYPERV_CPUID_MAX && | ||
| 35 | !memcmp("Microsoft Hv", hyp_signature, 12); | ||
| 36 | } | ||
| 37 | |||
| 38 | static void __init ms_hyperv_init_platform(void) | ||
| 39 | { | ||
| 40 | /* | ||
| 41 | * Extract the features and hints | ||
| 42 | */ | ||
| 43 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); | ||
| 44 | ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); | ||
| 45 | |||
| 46 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", | ||
| 47 | ms_hyperv.features, ms_hyperv.hints); | ||
| 48 | } | ||
| 49 | |||
| 50 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | ||
| 51 | .name = "Microsoft HyperV", | ||
| 52 | .detect = ms_hyperv_platform, | ||
| 53 | .init_platform = ms_hyperv_init_platform, | ||
| 54 | }; | ||
| 55 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index db5bdc8addf8..fd4db0db3708 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -31,46 +31,51 @@ | |||
| 31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
| 32 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
| 33 | 33 | ||
| 34 | static u64 perf_event_mask __read_mostly; | 34 | #if 0 |
| 35 | #undef wrmsrl | ||
| 36 | #define wrmsrl(msr, val) \ | ||
| 37 | do { \ | ||
| 38 | trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ | ||
| 39 | (unsigned long)(val)); \ | ||
| 40 | native_write_msr((msr), (u32)((u64)(val)), \ | ||
| 41 | (u32)((u64)(val) >> 32)); \ | ||
| 42 | } while (0) | ||
| 43 | #endif | ||
| 35 | 44 | ||
| 36 | /* The maximal number of PEBS events: */ | 45 | /* |
| 37 | #define MAX_PEBS_EVENTS 4 | 46 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
| 47 | */ | ||
| 48 | static unsigned long | ||
| 49 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
| 50 | { | ||
| 51 | unsigned long offset, addr = (unsigned long)from; | ||
| 52 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
| 53 | unsigned long size, len = 0; | ||
| 54 | struct page *page; | ||
| 55 | void *map; | ||
| 56 | int ret; | ||
| 38 | 57 | ||
| 39 | /* The size of a BTS record in bytes: */ | 58 | do { |
| 40 | #define BTS_RECORD_SIZE 24 | 59 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
| 60 | if (!ret) | ||
| 61 | break; | ||
| 41 | 62 | ||
| 42 | /* The size of a per-cpu BTS buffer in bytes: */ | 63 | offset = addr & (PAGE_SIZE - 1); |
| 43 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | 64 | size = min(PAGE_SIZE - offset, n - len); |
| 44 | 65 | ||
| 45 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | 66 | map = kmap_atomic(page, type); |
| 46 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | 67 | memcpy(to, map+offset, size); |
| 68 | kunmap_atomic(map, type); | ||
| 69 | put_page(page); | ||
| 47 | 70 | ||
| 71 | len += size; | ||
| 72 | to += size; | ||
| 73 | addr += size; | ||
| 48 | 74 | ||
| 49 | /* | 75 | } while (len < n); |
| 50 | * Bits in the debugctlmsr controlling branch tracing. | ||
| 51 | */ | ||
| 52 | #define X86_DEBUGCTL_TR (1 << 6) | ||
| 53 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
| 54 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
| 55 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
| 56 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
| 57 | 76 | ||
| 58 | /* | 77 | return len; |
| 59 | * A debug store configuration. | 78 | } |
| 60 | * | ||
| 61 | * We only support architectures that use 64bit fields. | ||
| 62 | */ | ||
| 63 | struct debug_store { | ||
| 64 | u64 bts_buffer_base; | ||
| 65 | u64 bts_index; | ||
| 66 | u64 bts_absolute_maximum; | ||
| 67 | u64 bts_interrupt_threshold; | ||
| 68 | u64 pebs_buffer_base; | ||
| 69 | u64 pebs_index; | ||
| 70 | u64 pebs_absolute_maximum; | ||
| 71 | u64 pebs_interrupt_threshold; | ||
| 72 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
| 73 | }; | ||
| 74 | 79 | ||
| 75 | struct event_constraint { | 80 | struct event_constraint { |
| 76 | union { | 81 | union { |
| @@ -89,18 +94,41 @@ struct amd_nb { | |||
| 89 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 94 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
| 90 | }; | 95 | }; |
| 91 | 96 | ||
| 97 | #define MAX_LBR_ENTRIES 16 | ||
| 98 | |||
| 92 | struct cpu_hw_events { | 99 | struct cpu_hw_events { |
| 100 | /* | ||
| 101 | * Generic x86 PMC bits | ||
| 102 | */ | ||
| 93 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
| 94 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| 95 | unsigned long interrupts; | ||
| 96 | int enabled; | 105 | int enabled; |
| 97 | struct debug_store *ds; | ||
| 98 | 106 | ||
| 99 | int n_events; | 107 | int n_events; |
| 100 | int n_added; | 108 | int n_added; |
| 101 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 109 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
| 102 | u64 tags[X86_PMC_IDX_MAX]; | 110 | u64 tags[X86_PMC_IDX_MAX]; |
| 103 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 111 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
| 112 | |||
| 113 | unsigned int group_flag; | ||
| 114 | |||
| 115 | /* | ||
| 116 | * Intel DebugStore bits | ||
| 117 | */ | ||
| 118 | struct debug_store *ds; | ||
| 119 | u64 pebs_enabled; | ||
| 120 | |||
| 121 | /* | ||
| 122 | * Intel LBR bits | ||
| 123 | */ | ||
| 124 | int lbr_users; | ||
| 125 | void *lbr_context; | ||
| 126 | struct perf_branch_stack lbr_stack; | ||
| 127 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
| 128 | |||
| 129 | /* | ||
| 130 | * AMD specific bits | ||
| 131 | */ | ||
| 104 | struct amd_nb *amd_nb; | 132 | struct amd_nb *amd_nb; |
| 105 | }; | 133 | }; |
| 106 | 134 | ||
| @@ -114,44 +142,75 @@ struct cpu_hw_events { | |||
| 114 | #define EVENT_CONSTRAINT(c, n, m) \ | 142 | #define EVENT_CONSTRAINT(c, n, m) \ |
| 115 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 143 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
| 116 | 144 | ||
| 145 | /* | ||
| 146 | * Constraint on the Event code. | ||
| 147 | */ | ||
| 117 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | 148 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
| 118 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 149 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) |
| 119 | 150 | ||
| 151 | /* | ||
| 152 | * Constraint on the Event code + UMask + fixed-mask | ||
| 153 | * | ||
| 154 | * filter mask to validate fixed counter events. | ||
| 155 | * the following filters disqualify for fixed counters: | ||
| 156 | * - inv | ||
| 157 | * - edge | ||
| 158 | * - cnt-mask | ||
| 159 | * The other filters are supported by fixed counters. | ||
| 160 | * The any-thread option is supported starting with v3. | ||
| 161 | */ | ||
| 120 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 162 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
| 121 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | 163 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) |
| 164 | |||
| 165 | /* | ||
| 166 | * Constraint on the Event code + UMask | ||
| 167 | */ | ||
| 168 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
| 169 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
| 122 | 170 | ||
| 123 | #define EVENT_CONSTRAINT_END \ | 171 | #define EVENT_CONSTRAINT_END \ |
| 124 | EVENT_CONSTRAINT(0, 0, 0) | 172 | EVENT_CONSTRAINT(0, 0, 0) |
| 125 | 173 | ||
| 126 | #define for_each_event_constraint(e, c) \ | 174 | #define for_each_event_constraint(e, c) \ |
| 127 | for ((e) = (c); (e)->cmask; (e)++) | 175 | for ((e) = (c); (e)->weight; (e)++) |
| 176 | |||
| 177 | union perf_capabilities { | ||
| 178 | struct { | ||
| 179 | u64 lbr_format : 6; | ||
| 180 | u64 pebs_trap : 1; | ||
| 181 | u64 pebs_arch_reg : 1; | ||
| 182 | u64 pebs_format : 4; | ||
| 183 | u64 smm_freeze : 1; | ||
| 184 | }; | ||
| 185 | u64 capabilities; | ||
| 186 | }; | ||
| 128 | 187 | ||
| 129 | /* | 188 | /* |
| 130 | * struct x86_pmu - generic x86 pmu | 189 | * struct x86_pmu - generic x86 pmu |
| 131 | */ | 190 | */ |
| 132 | struct x86_pmu { | 191 | struct x86_pmu { |
| 192 | /* | ||
| 193 | * Generic x86 PMC bits | ||
| 194 | */ | ||
| 133 | const char *name; | 195 | const char *name; |
| 134 | int version; | 196 | int version; |
| 135 | int (*handle_irq)(struct pt_regs *); | 197 | int (*handle_irq)(struct pt_regs *); |
| 136 | void (*disable_all)(void); | 198 | void (*disable_all)(void); |
| 137 | void (*enable_all)(void); | 199 | void (*enable_all)(int added); |
| 138 | void (*enable)(struct perf_event *); | 200 | void (*enable)(struct perf_event *); |
| 139 | void (*disable)(struct perf_event *); | 201 | void (*disable)(struct perf_event *); |
| 202 | int (*hw_config)(struct perf_event *event); | ||
| 203 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
| 140 | unsigned eventsel; | 204 | unsigned eventsel; |
| 141 | unsigned perfctr; | 205 | unsigned perfctr; |
| 142 | u64 (*event_map)(int); | 206 | u64 (*event_map)(int); |
| 143 | u64 (*raw_event)(u64); | ||
| 144 | int max_events; | 207 | int max_events; |
| 145 | int num_events; | 208 | int num_counters; |
| 146 | int num_events_fixed; | 209 | int num_counters_fixed; |
| 147 | int event_bits; | 210 | int cntval_bits; |
| 148 | u64 event_mask; | 211 | u64 cntval_mask; |
| 149 | int apic; | 212 | int apic; |
| 150 | u64 max_period; | 213 | u64 max_period; |
| 151 | u64 intel_ctrl; | ||
| 152 | void (*enable_bts)(u64 config); | ||
| 153 | void (*disable_bts)(void); | ||
| 154 | |||
| 155 | struct event_constraint * | 214 | struct event_constraint * |
| 156 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | 215 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
| 157 | struct perf_event *event); | 216 | struct perf_event *event); |
| @@ -159,11 +218,32 @@ struct x86_pmu { | |||
| 159 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 218 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
| 160 | struct perf_event *event); | 219 | struct perf_event *event); |
| 161 | struct event_constraint *event_constraints; | 220 | struct event_constraint *event_constraints; |
| 221 | void (*quirks)(void); | ||
| 162 | 222 | ||
| 163 | int (*cpu_prepare)(int cpu); | 223 | int (*cpu_prepare)(int cpu); |
| 164 | void (*cpu_starting)(int cpu); | 224 | void (*cpu_starting)(int cpu); |
| 165 | void (*cpu_dying)(int cpu); | 225 | void (*cpu_dying)(int cpu); |
| 166 | void (*cpu_dead)(int cpu); | 226 | void (*cpu_dead)(int cpu); |
| 227 | |||
| 228 | /* | ||
| 229 | * Intel Arch Perfmon v2+ | ||
| 230 | */ | ||
| 231 | u64 intel_ctrl; | ||
| 232 | union perf_capabilities intel_cap; | ||
| 233 | |||
| 234 | /* | ||
| 235 | * Intel DebugStore bits | ||
| 236 | */ | ||
| 237 | int bts, pebs; | ||
| 238 | int pebs_record_size; | ||
| 239 | void (*drain_pebs)(struct pt_regs *regs); | ||
| 240 | struct event_constraint *pebs_constraints; | ||
| 241 | |||
| 242 | /* | ||
| 243 | * Intel LBR | ||
| 244 | */ | ||
| 245 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
| 246 | int lbr_nr; /* hardware stack size */ | ||
| 167 | }; | 247 | }; |
| 168 | 248 | ||
| 169 | static struct x86_pmu x86_pmu __read_mostly; | 249 | static struct x86_pmu x86_pmu __read_mostly; |
| @@ -198,7 +278,7 @@ static u64 | |||
| 198 | x86_perf_event_update(struct perf_event *event) | 278 | x86_perf_event_update(struct perf_event *event) |
| 199 | { | 279 | { |
| 200 | struct hw_perf_event *hwc = &event->hw; | 280 | struct hw_perf_event *hwc = &event->hw; |
| 201 | int shift = 64 - x86_pmu.event_bits; | 281 | int shift = 64 - x86_pmu.cntval_bits; |
| 202 | u64 prev_raw_count, new_raw_count; | 282 | u64 prev_raw_count, new_raw_count; |
| 203 | int idx = hwc->idx; | 283 | int idx = hwc->idx; |
| 204 | s64 delta; | 284 | s64 delta; |
| @@ -241,33 +321,32 @@ again: | |||
| 241 | static atomic_t active_events; | 321 | static atomic_t active_events; |
| 242 | static DEFINE_MUTEX(pmc_reserve_mutex); | 322 | static DEFINE_MUTEX(pmc_reserve_mutex); |
| 243 | 323 | ||
| 324 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 325 | |||
| 244 | static bool reserve_pmc_hardware(void) | 326 | static bool reserve_pmc_hardware(void) |
| 245 | { | 327 | { |
| 246 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 247 | int i; | 328 | int i; |
| 248 | 329 | ||
| 249 | if (nmi_watchdog == NMI_LOCAL_APIC) | 330 | if (nmi_watchdog == NMI_LOCAL_APIC) |
| 250 | disable_lapic_nmi_watchdog(); | 331 | disable_lapic_nmi_watchdog(); |
| 251 | 332 | ||
| 252 | for (i = 0; i < x86_pmu.num_events; i++) { | 333 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 253 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 334 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
| 254 | goto perfctr_fail; | 335 | goto perfctr_fail; |
| 255 | } | 336 | } |
| 256 | 337 | ||
| 257 | for (i = 0; i < x86_pmu.num_events; i++) { | 338 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 258 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 339 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
| 259 | goto eventsel_fail; | 340 | goto eventsel_fail; |
| 260 | } | 341 | } |
| 261 | #endif | ||
| 262 | 342 | ||
| 263 | return true; | 343 | return true; |
| 264 | 344 | ||
| 265 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 266 | eventsel_fail: | 345 | eventsel_fail: |
| 267 | for (i--; i >= 0; i--) | 346 | for (i--; i >= 0; i--) |
| 268 | release_evntsel_nmi(x86_pmu.eventsel + i); | 347 | release_evntsel_nmi(x86_pmu.eventsel + i); |
| 269 | 348 | ||
| 270 | i = x86_pmu.num_events; | 349 | i = x86_pmu.num_counters; |
| 271 | 350 | ||
| 272 | perfctr_fail: | 351 | perfctr_fail: |
| 273 | for (i--; i >= 0; i--) | 352 | for (i--; i >= 0; i--) |
| @@ -277,128 +356,36 @@ perfctr_fail: | |||
| 277 | enable_lapic_nmi_watchdog(); | 356 | enable_lapic_nmi_watchdog(); |
| 278 | 357 | ||
| 279 | return false; | 358 | return false; |
| 280 | #endif | ||
| 281 | } | 359 | } |
| 282 | 360 | ||
| 283 | static void release_pmc_hardware(void) | 361 | static void release_pmc_hardware(void) |
| 284 | { | 362 | { |
| 285 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 286 | int i; | 363 | int i; |
| 287 | 364 | ||
| 288 | for (i = 0; i < x86_pmu.num_events; i++) { | 365 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 289 | release_perfctr_nmi(x86_pmu.perfctr + i); | 366 | release_perfctr_nmi(x86_pmu.perfctr + i); |
| 290 | release_evntsel_nmi(x86_pmu.eventsel + i); | 367 | release_evntsel_nmi(x86_pmu.eventsel + i); |
| 291 | } | 368 | } |
| 292 | 369 | ||
| 293 | if (nmi_watchdog == NMI_LOCAL_APIC) | 370 | if (nmi_watchdog == NMI_LOCAL_APIC) |
| 294 | enable_lapic_nmi_watchdog(); | 371 | enable_lapic_nmi_watchdog(); |
| 295 | #endif | ||
| 296 | } | ||
| 297 | |||
| 298 | static inline bool bts_available(void) | ||
| 299 | { | ||
| 300 | return x86_pmu.enable_bts != NULL; | ||
| 301 | } | 372 | } |
| 302 | 373 | ||
| 303 | static void init_debug_store_on_cpu(int cpu) | 374 | #else |
| 304 | { | ||
| 305 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
| 306 | |||
| 307 | if (!ds) | ||
| 308 | return; | ||
| 309 | |||
| 310 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
| 311 | (u32)((u64)(unsigned long)ds), | ||
| 312 | (u32)((u64)(unsigned long)ds >> 32)); | ||
| 313 | } | ||
| 314 | |||
| 315 | static void fini_debug_store_on_cpu(int cpu) | ||
| 316 | { | ||
| 317 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
| 318 | return; | ||
| 319 | |||
| 320 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
| 321 | } | ||
| 322 | |||
| 323 | static void release_bts_hardware(void) | ||
| 324 | { | ||
| 325 | int cpu; | ||
| 326 | |||
| 327 | if (!bts_available()) | ||
| 328 | return; | ||
| 329 | |||
| 330 | get_online_cpus(); | ||
| 331 | |||
| 332 | for_each_online_cpu(cpu) | ||
| 333 | fini_debug_store_on_cpu(cpu); | ||
| 334 | |||
| 335 | for_each_possible_cpu(cpu) { | ||
| 336 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
| 337 | |||
| 338 | if (!ds) | ||
| 339 | continue; | ||
| 340 | |||
| 341 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
| 342 | |||
| 343 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
| 344 | kfree(ds); | ||
| 345 | } | ||
| 346 | |||
| 347 | put_online_cpus(); | ||
| 348 | } | ||
| 349 | |||
| 350 | static int reserve_bts_hardware(void) | ||
| 351 | { | ||
| 352 | int cpu, err = 0; | ||
| 353 | |||
| 354 | if (!bts_available()) | ||
| 355 | return 0; | ||
| 356 | |||
| 357 | get_online_cpus(); | ||
| 358 | |||
| 359 | for_each_possible_cpu(cpu) { | ||
| 360 | struct debug_store *ds; | ||
| 361 | void *buffer; | ||
| 362 | |||
| 363 | err = -ENOMEM; | ||
| 364 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
| 365 | if (unlikely(!buffer)) | ||
| 366 | break; | ||
| 367 | |||
| 368 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
| 369 | if (unlikely(!ds)) { | ||
| 370 | kfree(buffer); | ||
| 371 | break; | ||
| 372 | } | ||
| 373 | |||
| 374 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
| 375 | ds->bts_index = ds->bts_buffer_base; | ||
| 376 | ds->bts_absolute_maximum = | ||
| 377 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
| 378 | ds->bts_interrupt_threshold = | ||
| 379 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
| 380 | |||
| 381 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
| 382 | err = 0; | ||
| 383 | } | ||
| 384 | 375 | ||
| 385 | if (err) | 376 | static bool reserve_pmc_hardware(void) { return true; } |
| 386 | release_bts_hardware(); | 377 | static void release_pmc_hardware(void) {} |
| 387 | else { | ||
| 388 | for_each_online_cpu(cpu) | ||
| 389 | init_debug_store_on_cpu(cpu); | ||
| 390 | } | ||
| 391 | 378 | ||
| 392 | put_online_cpus(); | 379 | #endif |
| 393 | 380 | ||
| 394 | return err; | 381 | static int reserve_ds_buffers(void); |
| 395 | } | 382 | static void release_ds_buffers(void); |
| 396 | 383 | ||
| 397 | static void hw_perf_event_destroy(struct perf_event *event) | 384 | static void hw_perf_event_destroy(struct perf_event *event) |
| 398 | { | 385 | { |
| 399 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 386 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
| 400 | release_pmc_hardware(); | 387 | release_pmc_hardware(); |
| 401 | release_bts_hardware(); | 388 | release_ds_buffers(); |
| 402 | mutex_unlock(&pmc_reserve_mutex); | 389 | mutex_unlock(&pmc_reserve_mutex); |
| 403 | } | 390 | } |
| 404 | } | 391 | } |
| @@ -441,54 +428,11 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
| 441 | return 0; | 428 | return 0; |
| 442 | } | 429 | } |
| 443 | 430 | ||
| 444 | /* | 431 | static int x86_setup_perfctr(struct perf_event *event) |
| 445 | * Setup the hardware configuration for a given attr_type | ||
| 446 | */ | ||
| 447 | static int __hw_perf_event_init(struct perf_event *event) | ||
| 448 | { | 432 | { |
| 449 | struct perf_event_attr *attr = &event->attr; | 433 | struct perf_event_attr *attr = &event->attr; |
| 450 | struct hw_perf_event *hwc = &event->hw; | 434 | struct hw_perf_event *hwc = &event->hw; |
| 451 | u64 config; | 435 | u64 config; |
| 452 | int err; | ||
| 453 | |||
| 454 | if (!x86_pmu_initialized()) | ||
| 455 | return -ENODEV; | ||
| 456 | |||
| 457 | err = 0; | ||
| 458 | if (!atomic_inc_not_zero(&active_events)) { | ||
| 459 | mutex_lock(&pmc_reserve_mutex); | ||
| 460 | if (atomic_read(&active_events) == 0) { | ||
| 461 | if (!reserve_pmc_hardware()) | ||
| 462 | err = -EBUSY; | ||
| 463 | else | ||
| 464 | err = reserve_bts_hardware(); | ||
| 465 | } | ||
| 466 | if (!err) | ||
| 467 | atomic_inc(&active_events); | ||
| 468 | mutex_unlock(&pmc_reserve_mutex); | ||
| 469 | } | ||
| 470 | if (err) | ||
| 471 | return err; | ||
| 472 | |||
| 473 | event->destroy = hw_perf_event_destroy; | ||
| 474 | |||
| 475 | /* | ||
| 476 | * Generate PMC IRQs: | ||
| 477 | * (keep 'enabled' bit clear for now) | ||
| 478 | */ | ||
| 479 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
| 480 | |||
| 481 | hwc->idx = -1; | ||
| 482 | hwc->last_cpu = -1; | ||
| 483 | hwc->last_tag = ~0ULL; | ||
| 484 | |||
| 485 | /* | ||
| 486 | * Count user and OS events unless requested not to. | ||
| 487 | */ | ||
| 488 | if (!attr->exclude_user) | ||
| 489 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
| 490 | if (!attr->exclude_kernel) | ||
| 491 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
| 492 | 436 | ||
| 493 | if (!hwc->sample_period) { | 437 | if (!hwc->sample_period) { |
| 494 | hwc->sample_period = x86_pmu.max_period; | 438 | hwc->sample_period = x86_pmu.max_period; |
| @@ -505,16 +449,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
| 505 | return -EOPNOTSUPP; | 449 | return -EOPNOTSUPP; |
| 506 | } | 450 | } |
| 507 | 451 | ||
| 508 | /* | 452 | if (attr->type == PERF_TYPE_RAW) |
| 509 | * Raw hw_event type provide the config in the hw_event structure | ||
| 510 | */ | ||
| 511 | if (attr->type == PERF_TYPE_RAW) { | ||
| 512 | hwc->config |= x86_pmu.raw_event(attr->config); | ||
| 513 | if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && | ||
| 514 | perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
| 515 | return -EACCES; | ||
| 516 | return 0; | 453 | return 0; |
| 517 | } | ||
| 518 | 454 | ||
| 519 | if (attr->type == PERF_TYPE_HW_CACHE) | 455 | if (attr->type == PERF_TYPE_HW_CACHE) |
| 520 | return set_ext_hw_attr(hwc, attr); | 456 | return set_ext_hw_attr(hwc, attr); |
| @@ -539,11 +475,11 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
| 539 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 475 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
| 540 | (hwc->sample_period == 1)) { | 476 | (hwc->sample_period == 1)) { |
| 541 | /* BTS is not supported by this architecture. */ | 477 | /* BTS is not supported by this architecture. */ |
| 542 | if (!bts_available()) | 478 | if (!x86_pmu.bts) |
| 543 | return -EOPNOTSUPP; | 479 | return -EOPNOTSUPP; |
| 544 | 480 | ||
| 545 | /* BTS is currently only allowed for user-mode. */ | 481 | /* BTS is currently only allowed for user-mode. */ |
| 546 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | 482 | if (!attr->exclude_kernel) |
| 547 | return -EOPNOTSUPP; | 483 | return -EOPNOTSUPP; |
| 548 | } | 484 | } |
| 549 | 485 | ||
| @@ -552,12 +488,87 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
| 552 | return 0; | 488 | return 0; |
| 553 | } | 489 | } |
| 554 | 490 | ||
| 491 | static int x86_pmu_hw_config(struct perf_event *event) | ||
| 492 | { | ||
| 493 | if (event->attr.precise_ip) { | ||
| 494 | int precise = 0; | ||
| 495 | |||
| 496 | /* Support for constant skid */ | ||
| 497 | if (x86_pmu.pebs) | ||
| 498 | precise++; | ||
| 499 | |||
| 500 | /* Support for IP fixup */ | ||
| 501 | if (x86_pmu.lbr_nr) | ||
| 502 | precise++; | ||
| 503 | |||
| 504 | if (event->attr.precise_ip > precise) | ||
| 505 | return -EOPNOTSUPP; | ||
| 506 | } | ||
| 507 | |||
| 508 | /* | ||
| 509 | * Generate PMC IRQs: | ||
| 510 | * (keep 'enabled' bit clear for now) | ||
| 511 | */ | ||
| 512 | event->hw.config = ARCH_PERFMON_EVENTSEL_INT; | ||
| 513 | |||
| 514 | /* | ||
| 515 | * Count user and OS events unless requested not to | ||
| 516 | */ | ||
| 517 | if (!event->attr.exclude_user) | ||
| 518 | event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; | ||
| 519 | if (!event->attr.exclude_kernel) | ||
| 520 | event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; | ||
| 521 | |||
| 522 | if (event->attr.type == PERF_TYPE_RAW) | ||
| 523 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; | ||
| 524 | |||
| 525 | return x86_setup_perfctr(event); | ||
| 526 | } | ||
| 527 | |||
| 528 | /* | ||
| 529 | * Setup the hardware configuration for a given attr_type | ||
| 530 | */ | ||
| 531 | static int __hw_perf_event_init(struct perf_event *event) | ||
| 532 | { | ||
| 533 | int err; | ||
| 534 | |||
| 535 | if (!x86_pmu_initialized()) | ||
| 536 | return -ENODEV; | ||
| 537 | |||
| 538 | err = 0; | ||
| 539 | if (!atomic_inc_not_zero(&active_events)) { | ||
| 540 | mutex_lock(&pmc_reserve_mutex); | ||
| 541 | if (atomic_read(&active_events) == 0) { | ||
| 542 | if (!reserve_pmc_hardware()) | ||
| 543 | err = -EBUSY; | ||
| 544 | else { | ||
| 545 | err = reserve_ds_buffers(); | ||
| 546 | if (err) | ||
| 547 | release_pmc_hardware(); | ||
| 548 | } | ||
| 549 | } | ||
| 550 | if (!err) | ||
| 551 | atomic_inc(&active_events); | ||
| 552 | mutex_unlock(&pmc_reserve_mutex); | ||
| 553 | } | ||
| 554 | if (err) | ||
| 555 | return err; | ||
| 556 | |||
| 557 | event->destroy = hw_perf_event_destroy; | ||
| 558 | |||
| 559 | event->hw.idx = -1; | ||
| 560 | event->hw.last_cpu = -1; | ||
| 561 | event->hw.last_tag = ~0ULL; | ||
| 562 | |||
| 563 | return x86_pmu.hw_config(event); | ||
| 564 | } | ||
| 565 | |||
| 555 | static void x86_pmu_disable_all(void) | 566 | static void x86_pmu_disable_all(void) |
| 556 | { | 567 | { |
| 557 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 568 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 558 | int idx; | 569 | int idx; |
| 559 | 570 | ||
| 560 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 571 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 561 | u64 val; | 572 | u64 val; |
| 562 | 573 | ||
| 563 | if (!test_bit(idx, cpuc->active_mask)) | 574 | if (!test_bit(idx, cpuc->active_mask)) |
| @@ -587,12 +598,12 @@ void hw_perf_disable(void) | |||
| 587 | x86_pmu.disable_all(); | 598 | x86_pmu.disable_all(); |
| 588 | } | 599 | } |
| 589 | 600 | ||
| 590 | static void x86_pmu_enable_all(void) | 601 | static void x86_pmu_enable_all(int added) |
| 591 | { | 602 | { |
| 592 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 603 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 593 | int idx; | 604 | int idx; |
| 594 | 605 | ||
| 595 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 606 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 596 | struct perf_event *event = cpuc->events[idx]; | 607 | struct perf_event *event = cpuc->events[idx]; |
| 597 | u64 val; | 608 | u64 val; |
| 598 | 609 | ||
| @@ -667,14 +678,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
| 667 | * assign events to counters starting with most | 678 | * assign events to counters starting with most |
| 668 | * constrained events. | 679 | * constrained events. |
| 669 | */ | 680 | */ |
| 670 | wmax = x86_pmu.num_events; | 681 | wmax = x86_pmu.num_counters; |
| 671 | 682 | ||
| 672 | /* | 683 | /* |
| 673 | * when fixed event counters are present, | 684 | * when fixed event counters are present, |
| 674 | * wmax is incremented by 1 to account | 685 | * wmax is incremented by 1 to account |
| 675 | * for one more choice | 686 | * for one more choice |
| 676 | */ | 687 | */ |
| 677 | if (x86_pmu.num_events_fixed) | 688 | if (x86_pmu.num_counters_fixed) |
| 678 | wmax++; | 689 | wmax++; |
| 679 | 690 | ||
| 680 | for (w = 1, num = n; num && w <= wmax; w++) { | 691 | for (w = 1, num = n; num && w <= wmax; w++) { |
| @@ -724,7 +735,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
| 724 | struct perf_event *event; | 735 | struct perf_event *event; |
| 725 | int n, max_count; | 736 | int n, max_count; |
| 726 | 737 | ||
| 727 | max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; | 738 | max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; |
| 728 | 739 | ||
| 729 | /* current number of events already accepted */ | 740 | /* current number of events already accepted */ |
| 730 | n = cpuc->n_events; | 741 | n = cpuc->n_events; |
| @@ -795,7 +806,7 @@ void hw_perf_enable(void) | |||
| 795 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 806 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 796 | struct perf_event *event; | 807 | struct perf_event *event; |
| 797 | struct hw_perf_event *hwc; | 808 | struct hw_perf_event *hwc; |
| 798 | int i; | 809 | int i, added = cpuc->n_added; |
| 799 | 810 | ||
| 800 | if (!x86_pmu_initialized()) | 811 | if (!x86_pmu_initialized()) |
| 801 | return; | 812 | return; |
| @@ -847,19 +858,20 @@ void hw_perf_enable(void) | |||
| 847 | cpuc->enabled = 1; | 858 | cpuc->enabled = 1; |
| 848 | barrier(); | 859 | barrier(); |
| 849 | 860 | ||
| 850 | x86_pmu.enable_all(); | 861 | x86_pmu.enable_all(added); |
| 851 | } | 862 | } |
| 852 | 863 | ||
| 853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) | 864 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
| 865 | u64 enable_mask) | ||
| 854 | { | 866 | { |
| 855 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 867 | wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); |
| 856 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
| 857 | } | 868 | } |
| 858 | 869 | ||
| 859 | static inline void x86_pmu_disable_event(struct perf_event *event) | 870 | static inline void x86_pmu_disable_event(struct perf_event *event) |
| 860 | { | 871 | { |
| 861 | struct hw_perf_event *hwc = &event->hw; | 872 | struct hw_perf_event *hwc = &event->hw; |
| 862 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); | 873 | |
| 874 | wrmsrl(hwc->config_base + hwc->idx, hwc->config); | ||
| 863 | } | 875 | } |
| 864 | 876 | ||
| 865 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 877 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
| @@ -874,7 +886,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
| 874 | struct hw_perf_event *hwc = &event->hw; | 886 | struct hw_perf_event *hwc = &event->hw; |
| 875 | s64 left = atomic64_read(&hwc->period_left); | 887 | s64 left = atomic64_read(&hwc->period_left); |
| 876 | s64 period = hwc->sample_period; | 888 | s64 period = hwc->sample_period; |
| 877 | int err, ret = 0, idx = hwc->idx; | 889 | int ret = 0, idx = hwc->idx; |
| 878 | 890 | ||
| 879 | if (idx == X86_PMC_IDX_FIXED_BTS) | 891 | if (idx == X86_PMC_IDX_FIXED_BTS) |
| 880 | return 0; | 892 | return 0; |
| @@ -912,8 +924,8 @@ x86_perf_event_set_period(struct perf_event *event) | |||
| 912 | */ | 924 | */ |
| 913 | atomic64_set(&hwc->prev_count, (u64)-left); | 925 | atomic64_set(&hwc->prev_count, (u64)-left); |
| 914 | 926 | ||
| 915 | err = checking_wrmsrl(hwc->event_base + idx, | 927 | wrmsrl(hwc->event_base + idx, |
| 916 | (u64)(-left) & x86_pmu.event_mask); | 928 | (u64)(-left) & x86_pmu.cntval_mask); |
| 917 | 929 | ||
| 918 | perf_event_update_userpage(event); | 930 | perf_event_update_userpage(event); |
| 919 | 931 | ||
| @@ -924,7 +936,8 @@ static void x86_pmu_enable_event(struct perf_event *event) | |||
| 924 | { | 936 | { |
| 925 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 937 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 926 | if (cpuc->enabled) | 938 | if (cpuc->enabled) |
| 927 | __x86_pmu_enable_event(&event->hw); | 939 | __x86_pmu_enable_event(&event->hw, |
| 940 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
| 928 | } | 941 | } |
| 929 | 942 | ||
| 930 | /* | 943 | /* |
| @@ -950,7 +963,15 @@ static int x86_pmu_enable(struct perf_event *event) | |||
| 950 | if (n < 0) | 963 | if (n < 0) |
| 951 | return n; | 964 | return n; |
| 952 | 965 | ||
| 953 | ret = x86_schedule_events(cpuc, n, assign); | 966 | /* |
| 967 | * If group events scheduling transaction was started, | ||
| 968 | * skip the schedulability test here, it will be peformed | ||
| 969 | * at commit time(->commit_txn) as a whole | ||
| 970 | */ | ||
| 971 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | ||
| 972 | goto out; | ||
| 973 | |||
| 974 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
| 954 | if (ret) | 975 | if (ret) |
| 955 | return ret; | 976 | return ret; |
| 956 | /* | 977 | /* |
| @@ -959,6 +980,7 @@ static int x86_pmu_enable(struct perf_event *event) | |||
| 959 | */ | 980 | */ |
| 960 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 981 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
| 961 | 982 | ||
| 983 | out: | ||
| 962 | cpuc->n_events = n; | 984 | cpuc->n_events = n; |
| 963 | cpuc->n_added += n - n0; | 985 | cpuc->n_added += n - n0; |
| 964 | 986 | ||
| @@ -991,11 +1013,12 @@ static void x86_pmu_unthrottle(struct perf_event *event) | |||
| 991 | void perf_event_print_debug(void) | 1013 | void perf_event_print_debug(void) |
| 992 | { | 1014 | { |
| 993 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 1015 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
| 1016 | u64 pebs; | ||
| 994 | struct cpu_hw_events *cpuc; | 1017 | struct cpu_hw_events *cpuc; |
| 995 | unsigned long flags; | 1018 | unsigned long flags; |
| 996 | int cpu, idx; | 1019 | int cpu, idx; |
| 997 | 1020 | ||
| 998 | if (!x86_pmu.num_events) | 1021 | if (!x86_pmu.num_counters) |
| 999 | return; | 1022 | return; |
| 1000 | 1023 | ||
| 1001 | local_irq_save(flags); | 1024 | local_irq_save(flags); |
| @@ -1008,16 +1031,18 @@ void perf_event_print_debug(void) | |||
| 1008 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 1031 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
| 1009 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 1032 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
| 1010 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 1033 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
| 1034 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
| 1011 | 1035 | ||
| 1012 | pr_info("\n"); | 1036 | pr_info("\n"); |
| 1013 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 1037 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
| 1014 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 1038 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
| 1015 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1039 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
| 1016 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1040 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
| 1041 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
| 1017 | } | 1042 | } |
| 1018 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 1043 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
| 1019 | 1044 | ||
| 1020 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1045 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 1021 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1046 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
| 1022 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1047 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); |
| 1023 | 1048 | ||
| @@ -1030,7 +1055,7 @@ void perf_event_print_debug(void) | |||
| 1030 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | 1055 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", |
| 1031 | cpu, idx, prev_left); | 1056 | cpu, idx, prev_left); |
| 1032 | } | 1057 | } |
| 1033 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 1058 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
| 1034 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | 1059 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); |
| 1035 | 1060 | ||
| 1036 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | 1061 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", |
| @@ -1095,7 +1120,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
| 1095 | 1120 | ||
| 1096 | cpuc = &__get_cpu_var(cpu_hw_events); | 1121 | cpuc = &__get_cpu_var(cpu_hw_events); |
| 1097 | 1122 | ||
| 1098 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1123 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 1099 | if (!test_bit(idx, cpuc->active_mask)) | 1124 | if (!test_bit(idx, cpuc->active_mask)) |
| 1100 | continue; | 1125 | continue; |
| 1101 | 1126 | ||
| @@ -1103,7 +1128,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
| 1103 | hwc = &event->hw; | 1128 | hwc = &event->hw; |
| 1104 | 1129 | ||
| 1105 | val = x86_perf_event_update(event); | 1130 | val = x86_perf_event_update(event); |
| 1106 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | 1131 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) |
| 1107 | continue; | 1132 | continue; |
| 1108 | 1133 | ||
| 1109 | /* | 1134 | /* |
| @@ -1146,7 +1171,6 @@ void set_perf_event_pending(void) | |||
| 1146 | 1171 | ||
| 1147 | void perf_events_lapic_init(void) | 1172 | void perf_events_lapic_init(void) |
| 1148 | { | 1173 | { |
| 1149 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 1150 | if (!x86_pmu.apic || !x86_pmu_initialized()) | 1174 | if (!x86_pmu.apic || !x86_pmu_initialized()) |
| 1151 | return; | 1175 | return; |
| 1152 | 1176 | ||
| @@ -1154,7 +1178,6 @@ void perf_events_lapic_init(void) | |||
| 1154 | * Always use NMI for PMU | 1178 | * Always use NMI for PMU |
| 1155 | */ | 1179 | */ |
| 1156 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1180 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1157 | #endif | ||
| 1158 | } | 1181 | } |
| 1159 | 1182 | ||
| 1160 | static int __kprobes | 1183 | static int __kprobes |
| @@ -1178,9 +1201,7 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
| 1178 | 1201 | ||
| 1179 | regs = args->regs; | 1202 | regs = args->regs; |
| 1180 | 1203 | ||
| 1181 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 1182 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1204 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1183 | #endif | ||
| 1184 | /* | 1205 | /* |
| 1185 | * Can't rely on the handled return value to say it was our NMI, two | 1206 | * Can't rely on the handled return value to say it was our NMI, two |
| 1186 | * events could trigger 'simultaneously' raising two back-to-back NMIs. | 1207 | * events could trigger 'simultaneously' raising two back-to-back NMIs. |
| @@ -1217,118 +1238,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
| 1217 | return &unconstrained; | 1238 | return &unconstrained; |
| 1218 | } | 1239 | } |
| 1219 | 1240 | ||
| 1220 | static int x86_event_sched_in(struct perf_event *event, | ||
| 1221 | struct perf_cpu_context *cpuctx) | ||
| 1222 | { | ||
| 1223 | int ret = 0; | ||
| 1224 | |||
| 1225 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
| 1226 | event->oncpu = smp_processor_id(); | ||
| 1227 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
| 1228 | |||
| 1229 | if (!is_x86_event(event)) | ||
| 1230 | ret = event->pmu->enable(event); | ||
| 1231 | |||
| 1232 | if (!ret && !is_software_event(event)) | ||
| 1233 | cpuctx->active_oncpu++; | ||
| 1234 | |||
| 1235 | if (!ret && event->attr.exclusive) | ||
| 1236 | cpuctx->exclusive = 1; | ||
| 1237 | |||
| 1238 | return ret; | ||
| 1239 | } | ||
| 1240 | |||
| 1241 | static void x86_event_sched_out(struct perf_event *event, | ||
| 1242 | struct perf_cpu_context *cpuctx) | ||
| 1243 | { | ||
| 1244 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
| 1245 | event->oncpu = -1; | ||
| 1246 | |||
| 1247 | if (!is_x86_event(event)) | ||
| 1248 | event->pmu->disable(event); | ||
| 1249 | |||
| 1250 | event->tstamp_running -= event->ctx->time - event->tstamp_stopped; | ||
| 1251 | |||
| 1252 | if (!is_software_event(event)) | ||
| 1253 | cpuctx->active_oncpu--; | ||
| 1254 | |||
| 1255 | if (event->attr.exclusive || !cpuctx->active_oncpu) | ||
| 1256 | cpuctx->exclusive = 0; | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | /* | ||
| 1260 | * Called to enable a whole group of events. | ||
| 1261 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
| 1262 | * Assumes the caller has disabled interrupts and has | ||
| 1263 | * frozen the PMU with hw_perf_save_disable. | ||
| 1264 | * | ||
| 1265 | * called with PMU disabled. If successful and return value 1, | ||
| 1266 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
| 1267 | */ | ||
| 1268 | int hw_perf_group_sched_in(struct perf_event *leader, | ||
| 1269 | struct perf_cpu_context *cpuctx, | ||
| 1270 | struct perf_event_context *ctx) | ||
| 1271 | { | ||
| 1272 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1273 | struct perf_event *sub; | ||
| 1274 | int assign[X86_PMC_IDX_MAX]; | ||
| 1275 | int n0, n1, ret; | ||
| 1276 | |||
| 1277 | /* n0 = total number of events */ | ||
| 1278 | n0 = collect_events(cpuc, leader, true); | ||
| 1279 | if (n0 < 0) | ||
| 1280 | return n0; | ||
| 1281 | |||
| 1282 | ret = x86_schedule_events(cpuc, n0, assign); | ||
| 1283 | if (ret) | ||
| 1284 | return ret; | ||
| 1285 | |||
| 1286 | ret = x86_event_sched_in(leader, cpuctx); | ||
| 1287 | if (ret) | ||
| 1288 | return ret; | ||
| 1289 | |||
| 1290 | n1 = 1; | ||
| 1291 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
| 1292 | if (sub->state > PERF_EVENT_STATE_OFF) { | ||
| 1293 | ret = x86_event_sched_in(sub, cpuctx); | ||
| 1294 | if (ret) | ||
| 1295 | goto undo; | ||
| 1296 | ++n1; | ||
| 1297 | } | ||
| 1298 | } | ||
| 1299 | /* | ||
| 1300 | * copy new assignment, now we know it is possible | ||
| 1301 | * will be used by hw_perf_enable() | ||
| 1302 | */ | ||
| 1303 | memcpy(cpuc->assign, assign, n0*sizeof(int)); | ||
| 1304 | |||
| 1305 | cpuc->n_events = n0; | ||
| 1306 | cpuc->n_added += n1; | ||
| 1307 | ctx->nr_active += n1; | ||
| 1308 | |||
| 1309 | /* | ||
| 1310 | * 1 means successful and events are active | ||
| 1311 | * This is not quite true because we defer | ||
| 1312 | * actual activation until hw_perf_enable() but | ||
| 1313 | * this way we* ensure caller won't try to enable | ||
| 1314 | * individual events | ||
| 1315 | */ | ||
| 1316 | return 1; | ||
| 1317 | undo: | ||
| 1318 | x86_event_sched_out(leader, cpuctx); | ||
| 1319 | n0 = 1; | ||
| 1320 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
| 1321 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { | ||
| 1322 | x86_event_sched_out(sub, cpuctx); | ||
| 1323 | if (++n0 == n1) | ||
| 1324 | break; | ||
| 1325 | } | ||
| 1326 | } | ||
| 1327 | return ret; | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | #include "perf_event_amd.c" | 1241 | #include "perf_event_amd.c" |
| 1331 | #include "perf_event_p6.c" | 1242 | #include "perf_event_p6.c" |
| 1243 | #include "perf_event_p4.c" | ||
| 1244 | #include "perf_event_intel_lbr.c" | ||
| 1245 | #include "perf_event_intel_ds.c" | ||
| 1332 | #include "perf_event_intel.c" | 1246 | #include "perf_event_intel.c" |
| 1333 | 1247 | ||
| 1334 | static int __cpuinit | 1248 | static int __cpuinit |
| @@ -1402,48 +1316,50 @@ void __init init_hw_perf_events(void) | |||
| 1402 | 1316 | ||
| 1403 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1317 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
| 1404 | 1318 | ||
| 1405 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | 1319 | if (x86_pmu.quirks) |
| 1320 | x86_pmu.quirks(); | ||
| 1321 | |||
| 1322 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | ||
| 1406 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1323 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
| 1407 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); | 1324 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); |
| 1408 | x86_pmu.num_events = X86_PMC_MAX_GENERIC; | 1325 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; |
| 1409 | } | 1326 | } |
| 1410 | perf_event_mask = (1 << x86_pmu.num_events) - 1; | 1327 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
| 1411 | perf_max_events = x86_pmu.num_events; | 1328 | perf_max_events = x86_pmu.num_counters; |
| 1412 | 1329 | ||
| 1413 | if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { | 1330 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
| 1414 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | 1331 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
| 1415 | x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); | 1332 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); |
| 1416 | x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; | 1333 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; |
| 1417 | } | 1334 | } |
| 1418 | 1335 | ||
| 1419 | perf_event_mask |= | 1336 | x86_pmu.intel_ctrl |= |
| 1420 | ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; | 1337 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
| 1421 | x86_pmu.intel_ctrl = perf_event_mask; | ||
| 1422 | 1338 | ||
| 1423 | perf_events_lapic_init(); | 1339 | perf_events_lapic_init(); |
| 1424 | register_die_notifier(&perf_event_nmi_notifier); | 1340 | register_die_notifier(&perf_event_nmi_notifier); |
| 1425 | 1341 | ||
| 1426 | unconstrained = (struct event_constraint) | 1342 | unconstrained = (struct event_constraint) |
| 1427 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, | 1343 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
| 1428 | 0, x86_pmu.num_events); | 1344 | 0, x86_pmu.num_counters); |
| 1429 | 1345 | ||
| 1430 | if (x86_pmu.event_constraints) { | 1346 | if (x86_pmu.event_constraints) { |
| 1431 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1347 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
| 1432 | if (c->cmask != INTEL_ARCH_FIXED_MASK) | 1348 | if (c->cmask != X86_RAW_EVENT_MASK) |
| 1433 | continue; | 1349 | continue; |
| 1434 | 1350 | ||
| 1435 | c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; | 1351 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
| 1436 | c->weight += x86_pmu.num_events; | 1352 | c->weight += x86_pmu.num_counters; |
| 1437 | } | 1353 | } |
| 1438 | } | 1354 | } |
| 1439 | 1355 | ||
| 1440 | pr_info("... version: %d\n", x86_pmu.version); | 1356 | pr_info("... version: %d\n", x86_pmu.version); |
| 1441 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 1357 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
| 1442 | pr_info("... generic registers: %d\n", x86_pmu.num_events); | 1358 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
| 1443 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); | 1359 | pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); |
| 1444 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 1360 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
| 1445 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 1361 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
| 1446 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 1362 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
| 1447 | 1363 | ||
| 1448 | perf_cpu_notifier(x86_pmu_notifier); | 1364 | perf_cpu_notifier(x86_pmu_notifier); |
| 1449 | } | 1365 | } |
| @@ -1453,6 +1369,59 @@ static inline void x86_pmu_read(struct perf_event *event) | |||
| 1453 | x86_perf_event_update(event); | 1369 | x86_perf_event_update(event); |
| 1454 | } | 1370 | } |
| 1455 | 1371 | ||
| 1372 | /* | ||
| 1373 | * Start group events scheduling transaction | ||
| 1374 | * Set the flag to make pmu::enable() not perform the | ||
| 1375 | * schedulability test, it will be performed at commit time | ||
| 1376 | */ | ||
| 1377 | static void x86_pmu_start_txn(const struct pmu *pmu) | ||
| 1378 | { | ||
| 1379 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1380 | |||
| 1381 | cpuc->group_flag |= PERF_EVENT_TXN_STARTED; | ||
| 1382 | } | ||
| 1383 | |||
| 1384 | /* | ||
| 1385 | * Stop group events scheduling transaction | ||
| 1386 | * Clear the flag and pmu::enable() will perform the | ||
| 1387 | * schedulability test. | ||
| 1388 | */ | ||
| 1389 | static void x86_pmu_cancel_txn(const struct pmu *pmu) | ||
| 1390 | { | ||
| 1391 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1392 | |||
| 1393 | cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; | ||
| 1394 | } | ||
| 1395 | |||
| 1396 | /* | ||
| 1397 | * Commit group events scheduling transaction | ||
| 1398 | * Perform the group schedulability test as a whole | ||
| 1399 | * Return 0 if success | ||
| 1400 | */ | ||
| 1401 | static int x86_pmu_commit_txn(const struct pmu *pmu) | ||
| 1402 | { | ||
| 1403 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1404 | int assign[X86_PMC_IDX_MAX]; | ||
| 1405 | int n, ret; | ||
| 1406 | |||
| 1407 | n = cpuc->n_events; | ||
| 1408 | |||
| 1409 | if (!x86_pmu_initialized()) | ||
| 1410 | return -EAGAIN; | ||
| 1411 | |||
| 1412 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
| 1413 | if (ret) | ||
| 1414 | return ret; | ||
| 1415 | |||
| 1416 | /* | ||
| 1417 | * copy new assignment, now we know it is possible | ||
| 1418 | * will be used by hw_perf_enable() | ||
| 1419 | */ | ||
| 1420 | memcpy(cpuc->assign, assign, n*sizeof(int)); | ||
| 1421 | |||
| 1422 | return 0; | ||
| 1423 | } | ||
| 1424 | |||
| 1456 | static const struct pmu pmu = { | 1425 | static const struct pmu pmu = { |
| 1457 | .enable = x86_pmu_enable, | 1426 | .enable = x86_pmu_enable, |
| 1458 | .disable = x86_pmu_disable, | 1427 | .disable = x86_pmu_disable, |
| @@ -1460,9 +1429,38 @@ static const struct pmu pmu = { | |||
| 1460 | .stop = x86_pmu_stop, | 1429 | .stop = x86_pmu_stop, |
| 1461 | .read = x86_pmu_read, | 1430 | .read = x86_pmu_read, |
| 1462 | .unthrottle = x86_pmu_unthrottle, | 1431 | .unthrottle = x86_pmu_unthrottle, |
| 1432 | .start_txn = x86_pmu_start_txn, | ||
| 1433 | .cancel_txn = x86_pmu_cancel_txn, | ||
| 1434 | .commit_txn = x86_pmu_commit_txn, | ||
| 1463 | }; | 1435 | }; |
| 1464 | 1436 | ||
| 1465 | /* | 1437 | /* |
| 1438 | * validate that we can schedule this event | ||
| 1439 | */ | ||
| 1440 | static int validate_event(struct perf_event *event) | ||
| 1441 | { | ||
| 1442 | struct cpu_hw_events *fake_cpuc; | ||
| 1443 | struct event_constraint *c; | ||
| 1444 | int ret = 0; | ||
| 1445 | |||
| 1446 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
| 1447 | if (!fake_cpuc) | ||
| 1448 | return -ENOMEM; | ||
| 1449 | |||
| 1450 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
| 1451 | |||
| 1452 | if (!c || !c->weight) | ||
| 1453 | ret = -ENOSPC; | ||
| 1454 | |||
| 1455 | if (x86_pmu.put_event_constraints) | ||
| 1456 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
| 1457 | |||
| 1458 | kfree(fake_cpuc); | ||
| 1459 | |||
| 1460 | return ret; | ||
| 1461 | } | ||
| 1462 | |||
| 1463 | /* | ||
| 1466 | * validate a single event group | 1464 | * validate a single event group |
| 1467 | * | 1465 | * |
| 1468 | * validation include: | 1466 | * validation include: |
| @@ -1502,7 +1500,7 @@ static int validate_group(struct perf_event *event) | |||
| 1502 | 1500 | ||
| 1503 | fake_cpuc->n_events = n; | 1501 | fake_cpuc->n_events = n; |
| 1504 | 1502 | ||
| 1505 | ret = x86_schedule_events(fake_cpuc, n, NULL); | 1503 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
| 1506 | 1504 | ||
| 1507 | out_free: | 1505 | out_free: |
| 1508 | kfree(fake_cpuc); | 1506 | kfree(fake_cpuc); |
| @@ -1527,6 +1525,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
| 1527 | 1525 | ||
| 1528 | if (event->group_leader != event) | 1526 | if (event->group_leader != event) |
| 1529 | err = validate_group(event); | 1527 | err = validate_group(event); |
| 1528 | else | ||
| 1529 | err = validate_event(event); | ||
| 1530 | 1530 | ||
| 1531 | event->pmu = tmp; | 1531 | event->pmu = tmp; |
| 1532 | } | 1532 | } |
| @@ -1574,8 +1574,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
| 1574 | { | 1574 | { |
| 1575 | struct perf_callchain_entry *entry = data; | 1575 | struct perf_callchain_entry *entry = data; |
| 1576 | 1576 | ||
| 1577 | if (reliable) | 1577 | callchain_store(entry, addr); |
| 1578 | callchain_store(entry, addr); | ||
| 1579 | } | 1578 | } |
| 1580 | 1579 | ||
| 1581 | static const struct stacktrace_ops backtrace_ops = { | 1580 | static const struct stacktrace_ops backtrace_ops = { |
| @@ -1597,41 +1596,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
| 1597 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1596 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
| 1598 | } | 1597 | } |
| 1599 | 1598 | ||
| 1600 | /* | ||
| 1601 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
| 1602 | */ | ||
| 1603 | static unsigned long | ||
| 1604 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
| 1605 | { | ||
| 1606 | unsigned long offset, addr = (unsigned long)from; | ||
| 1607 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
| 1608 | unsigned long size, len = 0; | ||
| 1609 | struct page *page; | ||
| 1610 | void *map; | ||
| 1611 | int ret; | ||
| 1612 | |||
| 1613 | do { | ||
| 1614 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
| 1615 | if (!ret) | ||
| 1616 | break; | ||
| 1617 | |||
| 1618 | offset = addr & (PAGE_SIZE - 1); | ||
| 1619 | size = min(PAGE_SIZE - offset, n - len); | ||
| 1620 | |||
| 1621 | map = kmap_atomic(page, type); | ||
| 1622 | memcpy(to, map+offset, size); | ||
| 1623 | kunmap_atomic(map, type); | ||
| 1624 | put_page(page); | ||
| 1625 | |||
| 1626 | len += size; | ||
| 1627 | to += size; | ||
| 1628 | addr += size; | ||
| 1629 | |||
| 1630 | } while (len < n); | ||
| 1631 | |||
| 1632 | return len; | ||
| 1633 | } | ||
| 1634 | |||
| 1635 | #ifdef CONFIG_COMPAT | 1599 | #ifdef CONFIG_COMPAT |
| 1636 | static inline int | 1600 | static inline int |
| 1637 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1601 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) |
| @@ -1727,6 +1691,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
| 1727 | { | 1691 | { |
| 1728 | struct perf_callchain_entry *entry; | 1692 | struct perf_callchain_entry *entry; |
| 1729 | 1693 | ||
| 1694 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
| 1695 | /* TODO: We don't support guest os callchain now */ | ||
| 1696 | return NULL; | ||
| 1697 | } | ||
| 1698 | |||
| 1730 | if (in_nmi()) | 1699 | if (in_nmi()) |
| 1731 | entry = &__get_cpu_var(pmc_nmi_entry); | 1700 | entry = &__get_cpu_var(pmc_nmi_entry); |
| 1732 | else | 1701 | else |
| @@ -1750,3 +1719,37 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski | |||
| 1750 | regs->cs = __KERNEL_CS; | 1719 | regs->cs = __KERNEL_CS; |
| 1751 | local_save_flags(regs->flags); | 1720 | local_save_flags(regs->flags); |
| 1752 | } | 1721 | } |
| 1722 | |||
| 1723 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
| 1724 | { | ||
| 1725 | unsigned long ip; | ||
| 1726 | |||
| 1727 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | ||
| 1728 | ip = perf_guest_cbs->get_guest_ip(); | ||
| 1729 | else | ||
| 1730 | ip = instruction_pointer(regs); | ||
| 1731 | |||
| 1732 | return ip; | ||
| 1733 | } | ||
| 1734 | |||
| 1735 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
| 1736 | { | ||
| 1737 | int misc = 0; | ||
| 1738 | |||
| 1739 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
| 1740 | if (perf_guest_cbs->is_user_mode()) | ||
| 1741 | misc |= PERF_RECORD_MISC_GUEST_USER; | ||
| 1742 | else | ||
| 1743 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | ||
| 1744 | } else { | ||
| 1745 | if (user_mode(regs)) | ||
| 1746 | misc |= PERF_RECORD_MISC_USER; | ||
| 1747 | else | ||
| 1748 | misc |= PERF_RECORD_MISC_KERNEL; | ||
| 1749 | } | ||
| 1750 | |||
| 1751 | if (regs->flags & PERF_EFLAGS_EXACT) | ||
| 1752 | misc |= PERF_RECORD_MISC_EXACT_IP; | ||
| 1753 | |||
| 1754 | return misc; | ||
| 1755 | } | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index db6f7d4056e1..611df11ba15e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | 2 | ||
| 3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | 3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); |
| 4 | 4 | ||
| 5 | static __initconst u64 amd_hw_cache_event_ids | 5 | static __initconst const u64 amd_hw_cache_event_ids |
| 6 | [PERF_COUNT_HW_CACHE_MAX] | 6 | [PERF_COUNT_HW_CACHE_MAX] |
| 7 | [PERF_COUNT_HW_CACHE_OP_MAX] | 7 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| @@ -111,22 +111,19 @@ static u64 amd_pmu_event_map(int hw_event) | |||
| 111 | return amd_perfmon_event_map[hw_event]; | 111 | return amd_perfmon_event_map[hw_event]; |
| 112 | } | 112 | } |
| 113 | 113 | ||
| 114 | static u64 amd_pmu_raw_event(u64 hw_event) | 114 | static int amd_pmu_hw_config(struct perf_event *event) |
| 115 | { | 115 | { |
| 116 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | 116 | int ret = x86_pmu_hw_config(event); |
| 117 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | 117 | |
| 118 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | 118 | if (ret) |
| 119 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | 119 | return ret; |
| 120 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | 120 | |
| 121 | 121 | if (event->attr.type != PERF_TYPE_RAW) | |
| 122 | #define K7_EVNTSEL_MASK \ | 122 | return 0; |
| 123 | (K7_EVNTSEL_EVENT_MASK | \ | 123 | |
| 124 | K7_EVNTSEL_UNIT_MASK | \ | 124 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; |
| 125 | K7_EVNTSEL_EDGE_MASK | \ | 125 | |
| 126 | K7_EVNTSEL_INV_MASK | \ | 126 | return 0; |
| 127 | K7_EVNTSEL_REG_MASK) | ||
| 128 | |||
| 129 | return hw_event & K7_EVNTSEL_MASK; | ||
| 130 | } | 127 | } |
| 131 | 128 | ||
| 132 | /* | 129 | /* |
| @@ -165,7 +162,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
| 165 | * be removed on one CPU at a time AND PMU is disabled | 162 | * be removed on one CPU at a time AND PMU is disabled |
| 166 | * when we come here | 163 | * when we come here |
| 167 | */ | 164 | */ |
| 168 | for (i = 0; i < x86_pmu.num_events; i++) { | 165 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 169 | if (nb->owners[i] == event) { | 166 | if (nb->owners[i] == event) { |
| 170 | cmpxchg(nb->owners+i, event, NULL); | 167 | cmpxchg(nb->owners+i, event, NULL); |
| 171 | break; | 168 | break; |
| @@ -215,7 +212,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
| 215 | struct hw_perf_event *hwc = &event->hw; | 212 | struct hw_perf_event *hwc = &event->hw; |
| 216 | struct amd_nb *nb = cpuc->amd_nb; | 213 | struct amd_nb *nb = cpuc->amd_nb; |
| 217 | struct perf_event *old = NULL; | 214 | struct perf_event *old = NULL; |
| 218 | int max = x86_pmu.num_events; | 215 | int max = x86_pmu.num_counters; |
| 219 | int i, j, k = -1; | 216 | int i, j, k = -1; |
| 220 | 217 | ||
| 221 | /* | 218 | /* |
| @@ -293,7 +290,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | |||
| 293 | /* | 290 | /* |
| 294 | * initialize all possible NB constraints | 291 | * initialize all possible NB constraints |
| 295 | */ | 292 | */ |
| 296 | for (i = 0; i < x86_pmu.num_events; i++) { | 293 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 297 | __set_bit(i, nb->event_constraints[i].idxmsk); | 294 | __set_bit(i, nb->event_constraints[i].idxmsk); |
| 298 | nb->event_constraints[i].weight = 1; | 295 | nb->event_constraints[i].weight = 1; |
| 299 | } | 296 | } |
| @@ -371,21 +368,22 @@ static void amd_pmu_cpu_dead(int cpu) | |||
| 371 | raw_spin_unlock(&amd_nb_lock); | 368 | raw_spin_unlock(&amd_nb_lock); |
| 372 | } | 369 | } |
| 373 | 370 | ||
| 374 | static __initconst struct x86_pmu amd_pmu = { | 371 | static __initconst const struct x86_pmu amd_pmu = { |
| 375 | .name = "AMD", | 372 | .name = "AMD", |
| 376 | .handle_irq = x86_pmu_handle_irq, | 373 | .handle_irq = x86_pmu_handle_irq, |
| 377 | .disable_all = x86_pmu_disable_all, | 374 | .disable_all = x86_pmu_disable_all, |
| 378 | .enable_all = x86_pmu_enable_all, | 375 | .enable_all = x86_pmu_enable_all, |
| 379 | .enable = x86_pmu_enable_event, | 376 | .enable = x86_pmu_enable_event, |
| 380 | .disable = x86_pmu_disable_event, | 377 | .disable = x86_pmu_disable_event, |
| 378 | .hw_config = amd_pmu_hw_config, | ||
| 379 | .schedule_events = x86_schedule_events, | ||
| 381 | .eventsel = MSR_K7_EVNTSEL0, | 380 | .eventsel = MSR_K7_EVNTSEL0, |
| 382 | .perfctr = MSR_K7_PERFCTR0, | 381 | .perfctr = MSR_K7_PERFCTR0, |
| 383 | .event_map = amd_pmu_event_map, | 382 | .event_map = amd_pmu_event_map, |
| 384 | .raw_event = amd_pmu_raw_event, | ||
| 385 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 383 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
| 386 | .num_events = 4, | 384 | .num_counters = 4, |
| 387 | .event_bits = 48, | 385 | .cntval_bits = 48, |
| 388 | .event_mask = (1ULL << 48) - 1, | 386 | .cntval_mask = (1ULL << 48) - 1, |
| 389 | .apic = 1, | 387 | .apic = 1, |
| 390 | /* use highest bit to detect overflow */ | 388 | /* use highest bit to detect overflow */ |
| 391 | .max_period = (1ULL << 47) - 1, | 389 | .max_period = (1ULL << 47) - 1, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9c794ac87837..fdbc652d3feb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
| @@ -88,7 +88,7 @@ static u64 intel_pmu_event_map(int hw_event) | |||
| 88 | return intel_perfmon_event_map[hw_event]; | 88 | return intel_perfmon_event_map[hw_event]; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | static __initconst u64 westmere_hw_cache_event_ids | 91 | static __initconst const u64 westmere_hw_cache_event_ids |
| 92 | [PERF_COUNT_HW_CACHE_MAX] | 92 | [PERF_COUNT_HW_CACHE_MAX] |
| 93 | [PERF_COUNT_HW_CACHE_OP_MAX] | 93 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| @@ -179,7 +179,7 @@ static __initconst u64 westmere_hw_cache_event_ids | |||
| 179 | }, | 179 | }, |
| 180 | }; | 180 | }; |
| 181 | 181 | ||
| 182 | static __initconst u64 nehalem_hw_cache_event_ids | 182 | static __initconst const u64 nehalem_hw_cache_event_ids |
| 183 | [PERF_COUNT_HW_CACHE_MAX] | 183 | [PERF_COUNT_HW_CACHE_MAX] |
| 184 | [PERF_COUNT_HW_CACHE_OP_MAX] | 184 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| @@ -270,7 +270,7 @@ static __initconst u64 nehalem_hw_cache_event_ids | |||
| 270 | }, | 270 | }, |
| 271 | }; | 271 | }; |
| 272 | 272 | ||
| 273 | static __initconst u64 core2_hw_cache_event_ids | 273 | static __initconst const u64 core2_hw_cache_event_ids |
| 274 | [PERF_COUNT_HW_CACHE_MAX] | 274 | [PERF_COUNT_HW_CACHE_MAX] |
| 275 | [PERF_COUNT_HW_CACHE_OP_MAX] | 275 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| @@ -361,7 +361,7 @@ static __initconst u64 core2_hw_cache_event_ids | |||
| 361 | }, | 361 | }, |
| 362 | }; | 362 | }; |
| 363 | 363 | ||
| 364 | static __initconst u64 atom_hw_cache_event_ids | 364 | static __initconst const u64 atom_hw_cache_event_ids |
| 365 | [PERF_COUNT_HW_CACHE_MAX] | 365 | [PERF_COUNT_HW_CACHE_MAX] |
| 366 | [PERF_COUNT_HW_CACHE_OP_MAX] | 366 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| @@ -452,60 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids | |||
| 452 | }, | 452 | }, |
| 453 | }; | 453 | }; |
| 454 | 454 | ||
| 455 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
| 456 | { | ||
| 457 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
| 458 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
| 459 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
| 460 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
| 461 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
| 462 | |||
| 463 | #define CORE_EVNTSEL_MASK \ | ||
| 464 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
| 465 | INTEL_ARCH_UNIT_MASK | \ | ||
| 466 | INTEL_ARCH_EDGE_MASK | \ | ||
| 467 | INTEL_ARCH_INV_MASK | \ | ||
| 468 | INTEL_ARCH_CNT_MASK) | ||
| 469 | |||
| 470 | return hw_event & CORE_EVNTSEL_MASK; | ||
| 471 | } | ||
| 472 | |||
| 473 | static void intel_pmu_enable_bts(u64 config) | ||
| 474 | { | ||
| 475 | unsigned long debugctlmsr; | ||
| 476 | |||
| 477 | debugctlmsr = get_debugctlmsr(); | ||
| 478 | |||
| 479 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
| 480 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
| 481 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
| 482 | |||
| 483 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
| 484 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
| 485 | |||
| 486 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
| 487 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
| 488 | |||
| 489 | update_debugctlmsr(debugctlmsr); | ||
| 490 | } | ||
| 491 | |||
| 492 | static void intel_pmu_disable_bts(void) | ||
| 493 | { | ||
| 494 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 495 | unsigned long debugctlmsr; | ||
| 496 | |||
| 497 | if (!cpuc->ds) | ||
| 498 | return; | ||
| 499 | |||
| 500 | debugctlmsr = get_debugctlmsr(); | ||
| 501 | |||
| 502 | debugctlmsr &= | ||
| 503 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
| 504 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
| 505 | |||
| 506 | update_debugctlmsr(debugctlmsr); | ||
| 507 | } | ||
| 508 | |||
| 509 | static void intel_pmu_disable_all(void) | 455 | static void intel_pmu_disable_all(void) |
| 510 | { | 456 | { |
| 511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 457 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| @@ -514,12 +460,17 @@ static void intel_pmu_disable_all(void) | |||
| 514 | 460 | ||
| 515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 461 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
| 516 | intel_pmu_disable_bts(); | 462 | intel_pmu_disable_bts(); |
| 463 | |||
| 464 | intel_pmu_pebs_disable_all(); | ||
| 465 | intel_pmu_lbr_disable_all(); | ||
| 517 | } | 466 | } |
| 518 | 467 | ||
| 519 | static void intel_pmu_enable_all(void) | 468 | static void intel_pmu_enable_all(int added) |
| 520 | { | 469 | { |
| 521 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 470 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 522 | 471 | ||
| 472 | intel_pmu_pebs_enable_all(); | ||
| 473 | intel_pmu_lbr_enable_all(); | ||
| 523 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 474 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
| 524 | 475 | ||
| 525 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 476 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
| @@ -533,6 +484,42 @@ static void intel_pmu_enable_all(void) | |||
| 533 | } | 484 | } |
| 534 | } | 485 | } |
| 535 | 486 | ||
| 487 | /* | ||
| 488 | * Workaround for: | ||
| 489 | * Intel Errata AAK100 (model 26) | ||
| 490 | * Intel Errata AAP53 (model 30) | ||
| 491 | * Intel Errata BD53 (model 44) | ||
| 492 | * | ||
| 493 | * These chips need to be 'reset' when adding counters by programming | ||
| 494 | * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 | ||
| 495 | * either in sequence on the same PMC or on different PMCs. | ||
| 496 | */ | ||
| 497 | static void intel_pmu_nhm_enable_all(int added) | ||
| 498 | { | ||
| 499 | if (added) { | ||
| 500 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 501 | int i; | ||
| 502 | |||
| 503 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); | ||
| 504 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); | ||
| 505 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); | ||
| 506 | |||
| 507 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); | ||
| 508 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | ||
| 509 | |||
| 510 | for (i = 0; i < 3; i++) { | ||
| 511 | struct perf_event *event = cpuc->events[i]; | ||
| 512 | |||
| 513 | if (!event) | ||
| 514 | continue; | ||
| 515 | |||
| 516 | __x86_pmu_enable_event(&event->hw, | ||
| 517 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
| 518 | } | ||
| 519 | } | ||
| 520 | intel_pmu_enable_all(added); | ||
| 521 | } | ||
| 522 | |||
| 536 | static inline u64 intel_pmu_get_status(void) | 523 | static inline u64 intel_pmu_get_status(void) |
| 537 | { | 524 | { |
| 538 | u64 status; | 525 | u64 status; |
| @@ -547,8 +534,7 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
| 547 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 534 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
| 548 | } | 535 | } |
| 549 | 536 | ||
| 550 | static inline void | 537 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
| 551 | intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
| 552 | { | 538 | { |
| 553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 539 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
| 554 | u64 ctrl_val, mask; | 540 | u64 ctrl_val, mask; |
| @@ -557,71 +543,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc) | |||
| 557 | 543 | ||
| 558 | rdmsrl(hwc->config_base, ctrl_val); | 544 | rdmsrl(hwc->config_base, ctrl_val); |
| 559 | ctrl_val &= ~mask; | 545 | ctrl_val &= ~mask; |
| 560 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 546 | wrmsrl(hwc->config_base, ctrl_val); |
| 561 | } | ||
| 562 | |||
| 563 | static void intel_pmu_drain_bts_buffer(void) | ||
| 564 | { | ||
| 565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 566 | struct debug_store *ds = cpuc->ds; | ||
| 567 | struct bts_record { | ||
| 568 | u64 from; | ||
| 569 | u64 to; | ||
| 570 | u64 flags; | ||
| 571 | }; | ||
| 572 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
| 573 | struct bts_record *at, *top; | ||
| 574 | struct perf_output_handle handle; | ||
| 575 | struct perf_event_header header; | ||
| 576 | struct perf_sample_data data; | ||
| 577 | struct pt_regs regs; | ||
| 578 | |||
| 579 | if (!event) | ||
| 580 | return; | ||
| 581 | |||
| 582 | if (!ds) | ||
| 583 | return; | ||
| 584 | |||
| 585 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
| 586 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
| 587 | |||
| 588 | if (top <= at) | ||
| 589 | return; | ||
| 590 | |||
| 591 | ds->bts_index = ds->bts_buffer_base; | ||
| 592 | |||
| 593 | perf_sample_data_init(&data, 0); | ||
| 594 | |||
| 595 | data.period = event->hw.last_period; | ||
| 596 | regs.ip = 0; | ||
| 597 | |||
| 598 | /* | ||
| 599 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
| 600 | * We will overwrite the from and to address before we output | ||
| 601 | * the sample. | ||
| 602 | */ | ||
| 603 | perf_prepare_sample(&header, &data, event, ®s); | ||
| 604 | |||
| 605 | if (perf_output_begin(&handle, event, | ||
| 606 | header.size * (top - at), 1, 1)) | ||
| 607 | return; | ||
| 608 | |||
| 609 | for (; at < top; at++) { | ||
| 610 | data.ip = at->from; | ||
| 611 | data.addr = at->to; | ||
| 612 | |||
| 613 | perf_output_sample(&handle, &header, &data, event); | ||
| 614 | } | ||
| 615 | |||
| 616 | perf_output_end(&handle); | ||
| 617 | |||
| 618 | /* There's new data available. */ | ||
| 619 | event->hw.interrupts++; | ||
| 620 | event->pending_kill = POLL_IN; | ||
| 621 | } | 547 | } |
| 622 | 548 | ||
| 623 | static inline void | 549 | static void intel_pmu_disable_event(struct perf_event *event) |
| 624 | intel_pmu_disable_event(struct perf_event *event) | ||
| 625 | { | 550 | { |
| 626 | struct hw_perf_event *hwc = &event->hw; | 551 | struct hw_perf_event *hwc = &event->hw; |
| 627 | 552 | ||
| @@ -637,14 +562,15 @@ intel_pmu_disable_event(struct perf_event *event) | |||
| 637 | } | 562 | } |
| 638 | 563 | ||
| 639 | x86_pmu_disable_event(event); | 564 | x86_pmu_disable_event(event); |
| 565 | |||
| 566 | if (unlikely(event->attr.precise_ip)) | ||
| 567 | intel_pmu_pebs_disable(event); | ||
| 640 | } | 568 | } |
| 641 | 569 | ||
| 642 | static inline void | 570 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
| 643 | intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
| 644 | { | 571 | { |
| 645 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 572 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
| 646 | u64 ctrl_val, bits, mask; | 573 | u64 ctrl_val, bits, mask; |
| 647 | int err; | ||
| 648 | 574 | ||
| 649 | /* | 575 | /* |
| 650 | * Enable IRQ generation (0x8), | 576 | * Enable IRQ generation (0x8), |
| @@ -669,7 +595,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc) | |||
| 669 | rdmsrl(hwc->config_base, ctrl_val); | 595 | rdmsrl(hwc->config_base, ctrl_val); |
| 670 | ctrl_val &= ~mask; | 596 | ctrl_val &= ~mask; |
| 671 | ctrl_val |= bits; | 597 | ctrl_val |= bits; |
| 672 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 598 | wrmsrl(hwc->config_base, ctrl_val); |
| 673 | } | 599 | } |
| 674 | 600 | ||
| 675 | static void intel_pmu_enable_event(struct perf_event *event) | 601 | static void intel_pmu_enable_event(struct perf_event *event) |
| @@ -689,7 +615,10 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
| 689 | return; | 615 | return; |
| 690 | } | 616 | } |
| 691 | 617 | ||
| 692 | __x86_pmu_enable_event(hwc); | 618 | if (unlikely(event->attr.precise_ip)) |
| 619 | intel_pmu_pebs_enable(event); | ||
| 620 | |||
| 621 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); | ||
| 693 | } | 622 | } |
| 694 | 623 | ||
| 695 | /* | 624 | /* |
| @@ -708,20 +637,20 @@ static void intel_pmu_reset(void) | |||
| 708 | unsigned long flags; | 637 | unsigned long flags; |
| 709 | int idx; | 638 | int idx; |
| 710 | 639 | ||
| 711 | if (!x86_pmu.num_events) | 640 | if (!x86_pmu.num_counters) |
| 712 | return; | 641 | return; |
| 713 | 642 | ||
| 714 | local_irq_save(flags); | 643 | local_irq_save(flags); |
| 715 | 644 | ||
| 716 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 645 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
| 717 | 646 | ||
| 718 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 647 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 719 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 648 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); |
| 720 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | 649 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); |
| 721 | } | 650 | } |
| 722 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 651 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
| 723 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 652 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
| 724 | } | 653 | |
| 725 | if (ds) | 654 | if (ds) |
| 726 | ds->bts_index = ds->bts_buffer_base; | 655 | ds->bts_index = ds->bts_buffer_base; |
| 727 | 656 | ||
| @@ -747,7 +676,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
| 747 | intel_pmu_drain_bts_buffer(); | 676 | intel_pmu_drain_bts_buffer(); |
| 748 | status = intel_pmu_get_status(); | 677 | status = intel_pmu_get_status(); |
| 749 | if (!status) { | 678 | if (!status) { |
| 750 | intel_pmu_enable_all(); | 679 | intel_pmu_enable_all(0); |
| 751 | return 0; | 680 | return 0; |
| 752 | } | 681 | } |
| 753 | 682 | ||
| @@ -762,6 +691,15 @@ again: | |||
| 762 | 691 | ||
| 763 | inc_irq_stat(apic_perf_irqs); | 692 | inc_irq_stat(apic_perf_irqs); |
| 764 | ack = status; | 693 | ack = status; |
| 694 | |||
| 695 | intel_pmu_lbr_read(); | ||
| 696 | |||
| 697 | /* | ||
| 698 | * PEBS overflow sets bit 62 in the global status register | ||
| 699 | */ | ||
| 700 | if (__test_and_clear_bit(62, (unsigned long *)&status)) | ||
| 701 | x86_pmu.drain_pebs(regs); | ||
| 702 | |||
| 765 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 703 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
| 766 | struct perf_event *event = cpuc->events[bit]; | 704 | struct perf_event *event = cpuc->events[bit]; |
| 767 | 705 | ||
| @@ -787,26 +725,22 @@ again: | |||
| 787 | goto again; | 725 | goto again; |
| 788 | 726 | ||
| 789 | done: | 727 | done: |
| 790 | intel_pmu_enable_all(); | 728 | intel_pmu_enable_all(0); |
| 791 | return 1; | 729 | return 1; |
| 792 | } | 730 | } |
| 793 | 731 | ||
| 794 | static struct event_constraint bts_constraint = | ||
| 795 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
| 796 | |||
| 797 | static struct event_constraint * | 732 | static struct event_constraint * |
| 798 | intel_special_constraints(struct perf_event *event) | 733 | intel_bts_constraints(struct perf_event *event) |
| 799 | { | 734 | { |
| 800 | unsigned int hw_event; | 735 | struct hw_perf_event *hwc = &event->hw; |
| 801 | 736 | unsigned int hw_event, bts_event; | |
| 802 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
| 803 | 737 | ||
| 804 | if (unlikely((hw_event == | 738 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
| 805 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | 739 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
| 806 | (event->hw.sample_period == 1))) { | ||
| 807 | 740 | ||
| 741 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) | ||
| 808 | return &bts_constraint; | 742 | return &bts_constraint; |
| 809 | } | 743 | |
| 810 | return NULL; | 744 | return NULL; |
| 811 | } | 745 | } |
| 812 | 746 | ||
| @@ -815,24 +749,53 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
| 815 | { | 749 | { |
| 816 | struct event_constraint *c; | 750 | struct event_constraint *c; |
| 817 | 751 | ||
| 818 | c = intel_special_constraints(event); | 752 | c = intel_bts_constraints(event); |
| 753 | if (c) | ||
| 754 | return c; | ||
| 755 | |||
| 756 | c = intel_pebs_constraints(event); | ||
| 819 | if (c) | 757 | if (c) |
| 820 | return c; | 758 | return c; |
| 821 | 759 | ||
| 822 | return x86_get_event_constraints(cpuc, event); | 760 | return x86_get_event_constraints(cpuc, event); |
| 823 | } | 761 | } |
| 824 | 762 | ||
| 825 | static __initconst struct x86_pmu core_pmu = { | 763 | static int intel_pmu_hw_config(struct perf_event *event) |
| 764 | { | ||
| 765 | int ret = x86_pmu_hw_config(event); | ||
| 766 | |||
| 767 | if (ret) | ||
| 768 | return ret; | ||
| 769 | |||
| 770 | if (event->attr.type != PERF_TYPE_RAW) | ||
| 771 | return 0; | ||
| 772 | |||
| 773 | if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) | ||
| 774 | return 0; | ||
| 775 | |||
| 776 | if (x86_pmu.version < 3) | ||
| 777 | return -EINVAL; | ||
| 778 | |||
| 779 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
| 780 | return -EACCES; | ||
| 781 | |||
| 782 | event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; | ||
| 783 | |||
| 784 | return 0; | ||
| 785 | } | ||
| 786 | |||
| 787 | static __initconst const struct x86_pmu core_pmu = { | ||
| 826 | .name = "core", | 788 | .name = "core", |
| 827 | .handle_irq = x86_pmu_handle_irq, | 789 | .handle_irq = x86_pmu_handle_irq, |
| 828 | .disable_all = x86_pmu_disable_all, | 790 | .disable_all = x86_pmu_disable_all, |
| 829 | .enable_all = x86_pmu_enable_all, | 791 | .enable_all = x86_pmu_enable_all, |
| 830 | .enable = x86_pmu_enable_event, | 792 | .enable = x86_pmu_enable_event, |
| 831 | .disable = x86_pmu_disable_event, | 793 | .disable = x86_pmu_disable_event, |
| 794 | .hw_config = x86_pmu_hw_config, | ||
| 795 | .schedule_events = x86_schedule_events, | ||
| 832 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 796 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
| 833 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 797 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
| 834 | .event_map = intel_pmu_event_map, | 798 | .event_map = intel_pmu_event_map, |
| 835 | .raw_event = intel_pmu_raw_event, | ||
| 836 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 799 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
| 837 | .apic = 1, | 800 | .apic = 1, |
| 838 | /* | 801 | /* |
| @@ -845,17 +808,32 @@ static __initconst struct x86_pmu core_pmu = { | |||
| 845 | .event_constraints = intel_core_event_constraints, | 808 | .event_constraints = intel_core_event_constraints, |
| 846 | }; | 809 | }; |
| 847 | 810 | ||
| 848 | static __initconst struct x86_pmu intel_pmu = { | 811 | static void intel_pmu_cpu_starting(int cpu) |
| 812 | { | ||
| 813 | init_debug_store_on_cpu(cpu); | ||
| 814 | /* | ||
| 815 | * Deal with CPUs that don't clear their LBRs on power-up. | ||
| 816 | */ | ||
| 817 | intel_pmu_lbr_reset(); | ||
| 818 | } | ||
| 819 | |||
| 820 | static void intel_pmu_cpu_dying(int cpu) | ||
| 821 | { | ||
| 822 | fini_debug_store_on_cpu(cpu); | ||
| 823 | } | ||
| 824 | |||
| 825 | static __initconst const struct x86_pmu intel_pmu = { | ||
| 849 | .name = "Intel", | 826 | .name = "Intel", |
| 850 | .handle_irq = intel_pmu_handle_irq, | 827 | .handle_irq = intel_pmu_handle_irq, |
| 851 | .disable_all = intel_pmu_disable_all, | 828 | .disable_all = intel_pmu_disable_all, |
| 852 | .enable_all = intel_pmu_enable_all, | 829 | .enable_all = intel_pmu_enable_all, |
| 853 | .enable = intel_pmu_enable_event, | 830 | .enable = intel_pmu_enable_event, |
| 854 | .disable = intel_pmu_disable_event, | 831 | .disable = intel_pmu_disable_event, |
| 832 | .hw_config = intel_pmu_hw_config, | ||
| 833 | .schedule_events = x86_schedule_events, | ||
| 855 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 834 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
| 856 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 835 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
| 857 | .event_map = intel_pmu_event_map, | 836 | .event_map = intel_pmu_event_map, |
| 858 | .raw_event = intel_pmu_raw_event, | ||
| 859 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 837 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
| 860 | .apic = 1, | 838 | .apic = 1, |
| 861 | /* | 839 | /* |
| @@ -864,14 +842,38 @@ static __initconst struct x86_pmu intel_pmu = { | |||
| 864 | * the generic event period: | 842 | * the generic event period: |
| 865 | */ | 843 | */ |
| 866 | .max_period = (1ULL << 31) - 1, | 844 | .max_period = (1ULL << 31) - 1, |
| 867 | .enable_bts = intel_pmu_enable_bts, | ||
| 868 | .disable_bts = intel_pmu_disable_bts, | ||
| 869 | .get_event_constraints = intel_get_event_constraints, | 845 | .get_event_constraints = intel_get_event_constraints, |
| 870 | 846 | ||
| 871 | .cpu_starting = init_debug_store_on_cpu, | 847 | .cpu_starting = intel_pmu_cpu_starting, |
| 872 | .cpu_dying = fini_debug_store_on_cpu, | 848 | .cpu_dying = intel_pmu_cpu_dying, |
| 873 | }; | 849 | }; |
| 874 | 850 | ||
| 851 | static void intel_clovertown_quirks(void) | ||
| 852 | { | ||
| 853 | /* | ||
| 854 | * PEBS is unreliable due to: | ||
| 855 | * | ||
| 856 | * AJ67 - PEBS may experience CPL leaks | ||
| 857 | * AJ68 - PEBS PMI may be delayed by one event | ||
| 858 | * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] | ||
| 859 | * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS | ||
| 860 | * | ||
| 861 | * AJ67 could be worked around by restricting the OS/USR flags. | ||
| 862 | * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. | ||
| 863 | * | ||
| 864 | * AJ106 could possibly be worked around by not allowing LBR | ||
| 865 | * usage from PEBS, including the fixup. | ||
| 866 | * AJ68 could possibly be worked around by always programming | ||
| 867 | * a pebs_event_reset[0] value and coping with the lost events. | ||
| 868 | * | ||
| 869 | * But taken together it might just make sense to not enable PEBS on | ||
| 870 | * these chips. | ||
| 871 | */ | ||
| 872 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | ||
| 873 | x86_pmu.pebs = 0; | ||
| 874 | x86_pmu.pebs_constraints = NULL; | ||
| 875 | } | ||
| 876 | |||
| 875 | static __init int intel_pmu_init(void) | 877 | static __init int intel_pmu_init(void) |
| 876 | { | 878 | { |
| 877 | union cpuid10_edx edx; | 879 | union cpuid10_edx edx; |
| @@ -881,12 +883,13 @@ static __init int intel_pmu_init(void) | |||
| 881 | int version; | 883 | int version; |
| 882 | 884 | ||
| 883 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 885 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
| 884 | /* check for P6 processor family */ | 886 | switch (boot_cpu_data.x86) { |
| 885 | if (boot_cpu_data.x86 == 6) { | 887 | case 0x6: |
| 886 | return p6_pmu_init(); | 888 | return p6_pmu_init(); |
| 887 | } else { | 889 | case 0xf: |
| 890 | return p4_pmu_init(); | ||
| 891 | } | ||
| 888 | return -ENODEV; | 892 | return -ENODEV; |
| 889 | } | ||
| 890 | } | 893 | } |
| 891 | 894 | ||
| 892 | /* | 895 | /* |
| @@ -904,16 +907,28 @@ static __init int intel_pmu_init(void) | |||
| 904 | x86_pmu = intel_pmu; | 907 | x86_pmu = intel_pmu; |
| 905 | 908 | ||
| 906 | x86_pmu.version = version; | 909 | x86_pmu.version = version; |
| 907 | x86_pmu.num_events = eax.split.num_events; | 910 | x86_pmu.num_counters = eax.split.num_counters; |
| 908 | x86_pmu.event_bits = eax.split.bit_width; | 911 | x86_pmu.cntval_bits = eax.split.bit_width; |
| 909 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | 912 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
| 910 | 913 | ||
| 911 | /* | 914 | /* |
| 912 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 915 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
| 913 | * assume at least 3 events: | 916 | * assume at least 3 events: |
| 914 | */ | 917 | */ |
| 915 | if (version > 1) | 918 | if (version > 1) |
| 916 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 919 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
| 920 | |||
| 921 | /* | ||
| 922 | * v2 and above have a perf capabilities MSR | ||
| 923 | */ | ||
| 924 | if (version > 1) { | ||
| 925 | u64 capabilities; | ||
| 926 | |||
| 927 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
| 928 | x86_pmu.intel_cap.capabilities = capabilities; | ||
| 929 | } | ||
| 930 | |||
| 931 | intel_ds_init(); | ||
| 917 | 932 | ||
| 918 | /* | 933 | /* |
| 919 | * Install the hw-cache-events table: | 934 | * Install the hw-cache-events table: |
| @@ -924,12 +939,15 @@ static __init int intel_pmu_init(void) | |||
| 924 | break; | 939 | break; |
| 925 | 940 | ||
| 926 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 941 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
| 942 | x86_pmu.quirks = intel_clovertown_quirks; | ||
| 927 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 943 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
| 928 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 944 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
| 929 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 945 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
| 930 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 946 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
| 931 | sizeof(hw_cache_event_ids)); | 947 | sizeof(hw_cache_event_ids)); |
| 932 | 948 | ||
| 949 | intel_pmu_lbr_init_core(); | ||
| 950 | |||
| 933 | x86_pmu.event_constraints = intel_core2_event_constraints; | 951 | x86_pmu.event_constraints = intel_core2_event_constraints; |
| 934 | pr_cont("Core2 events, "); | 952 | pr_cont("Core2 events, "); |
| 935 | break; | 953 | break; |
| @@ -940,13 +958,19 @@ static __init int intel_pmu_init(void) | |||
| 940 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 958 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
| 941 | sizeof(hw_cache_event_ids)); | 959 | sizeof(hw_cache_event_ids)); |
| 942 | 960 | ||
| 961 | intel_pmu_lbr_init_nhm(); | ||
| 962 | |||
| 943 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 963 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
| 944 | pr_cont("Nehalem/Corei7 events, "); | 964 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
| 965 | pr_cont("Nehalem events, "); | ||
| 945 | break; | 966 | break; |
| 967 | |||
| 946 | case 28: /* Atom */ | 968 | case 28: /* Atom */ |
| 947 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 969 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
| 948 | sizeof(hw_cache_event_ids)); | 970 | sizeof(hw_cache_event_ids)); |
| 949 | 971 | ||
| 972 | intel_pmu_lbr_init_atom(); | ||
| 973 | |||
| 950 | x86_pmu.event_constraints = intel_gen_event_constraints; | 974 | x86_pmu.event_constraints = intel_gen_event_constraints; |
| 951 | pr_cont("Atom events, "); | 975 | pr_cont("Atom events, "); |
| 952 | break; | 976 | break; |
| @@ -956,7 +980,10 @@ static __init int intel_pmu_init(void) | |||
| 956 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 980 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
| 957 | sizeof(hw_cache_event_ids)); | 981 | sizeof(hw_cache_event_ids)); |
| 958 | 982 | ||
| 983 | intel_pmu_lbr_init_nhm(); | ||
| 984 | |||
| 959 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 985 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
| 986 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | ||
| 960 | pr_cont("Westmere events, "); | 987 | pr_cont("Westmere events, "); |
| 961 | break; | 988 | break; |
| 962 | 989 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 000000000000..18018d1311cd --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
| @@ -0,0 +1,641 @@ | |||
| 1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
| 2 | |||
| 3 | /* The maximal number of PEBS events: */ | ||
| 4 | #define MAX_PEBS_EVENTS 4 | ||
| 5 | |||
| 6 | /* The size of a BTS record in bytes: */ | ||
| 7 | #define BTS_RECORD_SIZE 24 | ||
| 8 | |||
| 9 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | ||
| 10 | #define PEBS_BUFFER_SIZE PAGE_SIZE | ||
| 11 | |||
| 12 | /* | ||
| 13 | * pebs_record_32 for p4 and core not supported | ||
| 14 | |||
| 15 | struct pebs_record_32 { | ||
| 16 | u32 flags, ip; | ||
| 17 | u32 ax, bc, cx, dx; | ||
| 18 | u32 si, di, bp, sp; | ||
| 19 | }; | ||
| 20 | |||
| 21 | */ | ||
| 22 | |||
| 23 | struct pebs_record_core { | ||
| 24 | u64 flags, ip; | ||
| 25 | u64 ax, bx, cx, dx; | ||
| 26 | u64 si, di, bp, sp; | ||
| 27 | u64 r8, r9, r10, r11; | ||
| 28 | u64 r12, r13, r14, r15; | ||
| 29 | }; | ||
| 30 | |||
| 31 | struct pebs_record_nhm { | ||
| 32 | u64 flags, ip; | ||
| 33 | u64 ax, bx, cx, dx; | ||
| 34 | u64 si, di, bp, sp; | ||
| 35 | u64 r8, r9, r10, r11; | ||
| 36 | u64 r12, r13, r14, r15; | ||
| 37 | u64 status, dla, dse, lat; | ||
| 38 | }; | ||
| 39 | |||
| 40 | /* | ||
| 41 | * A debug store configuration. | ||
| 42 | * | ||
| 43 | * We only support architectures that use 64bit fields. | ||
| 44 | */ | ||
| 45 | struct debug_store { | ||
| 46 | u64 bts_buffer_base; | ||
| 47 | u64 bts_index; | ||
| 48 | u64 bts_absolute_maximum; | ||
| 49 | u64 bts_interrupt_threshold; | ||
| 50 | u64 pebs_buffer_base; | ||
| 51 | u64 pebs_index; | ||
| 52 | u64 pebs_absolute_maximum; | ||
| 53 | u64 pebs_interrupt_threshold; | ||
| 54 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
| 55 | }; | ||
| 56 | |||
| 57 | static void init_debug_store_on_cpu(int cpu) | ||
| 58 | { | ||
| 59 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
| 60 | |||
| 61 | if (!ds) | ||
| 62 | return; | ||
| 63 | |||
| 64 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
| 65 | (u32)((u64)(unsigned long)ds), | ||
| 66 | (u32)((u64)(unsigned long)ds >> 32)); | ||
| 67 | } | ||
| 68 | |||
| 69 | static void fini_debug_store_on_cpu(int cpu) | ||
| 70 | { | ||
| 71 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
| 72 | return; | ||
| 73 | |||
| 74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
| 75 | } | ||
| 76 | |||
| 77 | static void release_ds_buffers(void) | ||
| 78 | { | ||
| 79 | int cpu; | ||
| 80 | |||
| 81 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
| 82 | return; | ||
| 83 | |||
| 84 | get_online_cpus(); | ||
| 85 | |||
| 86 | for_each_online_cpu(cpu) | ||
| 87 | fini_debug_store_on_cpu(cpu); | ||
| 88 | |||
| 89 | for_each_possible_cpu(cpu) { | ||
| 90 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
| 91 | |||
| 92 | if (!ds) | ||
| 93 | continue; | ||
| 94 | |||
| 95 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
| 96 | |||
| 97 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
| 98 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
| 99 | kfree(ds); | ||
| 100 | } | ||
| 101 | |||
| 102 | put_online_cpus(); | ||
| 103 | } | ||
| 104 | |||
| 105 | static int reserve_ds_buffers(void) | ||
| 106 | { | ||
| 107 | int cpu, err = 0; | ||
| 108 | |||
| 109 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
| 110 | return 0; | ||
| 111 | |||
| 112 | get_online_cpus(); | ||
| 113 | |||
| 114 | for_each_possible_cpu(cpu) { | ||
| 115 | struct debug_store *ds; | ||
| 116 | void *buffer; | ||
| 117 | int max, thresh; | ||
| 118 | |||
| 119 | err = -ENOMEM; | ||
| 120 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
| 121 | if (unlikely(!ds)) | ||
| 122 | break; | ||
| 123 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
| 124 | |||
| 125 | if (x86_pmu.bts) { | ||
| 126 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
| 127 | if (unlikely(!buffer)) | ||
| 128 | break; | ||
| 129 | |||
| 130 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
| 131 | thresh = max / 16; | ||
| 132 | |||
| 133 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
| 134 | ds->bts_index = ds->bts_buffer_base; | ||
| 135 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
| 136 | max * BTS_RECORD_SIZE; | ||
| 137 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
| 138 | thresh * BTS_RECORD_SIZE; | ||
| 139 | } | ||
| 140 | |||
| 141 | if (x86_pmu.pebs) { | ||
| 142 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | ||
| 143 | if (unlikely(!buffer)) | ||
| 144 | break; | ||
| 145 | |||
| 146 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
| 147 | |||
| 148 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
| 149 | ds->pebs_index = ds->pebs_buffer_base; | ||
| 150 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
| 151 | max * x86_pmu.pebs_record_size; | ||
| 152 | /* | ||
| 153 | * Always use single record PEBS | ||
| 154 | */ | ||
| 155 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
| 156 | x86_pmu.pebs_record_size; | ||
| 157 | } | ||
| 158 | |||
| 159 | err = 0; | ||
| 160 | } | ||
| 161 | |||
| 162 | if (err) | ||
| 163 | release_ds_buffers(); | ||
| 164 | else { | ||
| 165 | for_each_online_cpu(cpu) | ||
| 166 | init_debug_store_on_cpu(cpu); | ||
| 167 | } | ||
| 168 | |||
| 169 | put_online_cpus(); | ||
| 170 | |||
| 171 | return err; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* | ||
| 175 | * BTS | ||
| 176 | */ | ||
| 177 | |||
| 178 | static struct event_constraint bts_constraint = | ||
| 179 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
| 180 | |||
| 181 | static void intel_pmu_enable_bts(u64 config) | ||
| 182 | { | ||
| 183 | unsigned long debugctlmsr; | ||
| 184 | |||
| 185 | debugctlmsr = get_debugctlmsr(); | ||
| 186 | |||
| 187 | debugctlmsr |= DEBUGCTLMSR_TR; | ||
| 188 | debugctlmsr |= DEBUGCTLMSR_BTS; | ||
| 189 | debugctlmsr |= DEBUGCTLMSR_BTINT; | ||
| 190 | |||
| 191 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
| 192 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; | ||
| 193 | |||
| 194 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
| 195 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; | ||
| 196 | |||
| 197 | update_debugctlmsr(debugctlmsr); | ||
| 198 | } | ||
| 199 | |||
| 200 | static void intel_pmu_disable_bts(void) | ||
| 201 | { | ||
| 202 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 203 | unsigned long debugctlmsr; | ||
| 204 | |||
| 205 | if (!cpuc->ds) | ||
| 206 | return; | ||
| 207 | |||
| 208 | debugctlmsr = get_debugctlmsr(); | ||
| 209 | |||
| 210 | debugctlmsr &= | ||
| 211 | ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | | ||
| 212 | DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); | ||
| 213 | |||
| 214 | update_debugctlmsr(debugctlmsr); | ||
| 215 | } | ||
| 216 | |||
| 217 | static void intel_pmu_drain_bts_buffer(void) | ||
| 218 | { | ||
| 219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 220 | struct debug_store *ds = cpuc->ds; | ||
| 221 | struct bts_record { | ||
| 222 | u64 from; | ||
| 223 | u64 to; | ||
| 224 | u64 flags; | ||
| 225 | }; | ||
| 226 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
| 227 | struct bts_record *at, *top; | ||
| 228 | struct perf_output_handle handle; | ||
| 229 | struct perf_event_header header; | ||
| 230 | struct perf_sample_data data; | ||
| 231 | struct pt_regs regs; | ||
| 232 | |||
| 233 | if (!event) | ||
| 234 | return; | ||
| 235 | |||
| 236 | if (!ds) | ||
| 237 | return; | ||
| 238 | |||
| 239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
| 240 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
| 241 | |||
| 242 | if (top <= at) | ||
| 243 | return; | ||
| 244 | |||
| 245 | ds->bts_index = ds->bts_buffer_base; | ||
| 246 | |||
| 247 | perf_sample_data_init(&data, 0); | ||
| 248 | data.period = event->hw.last_period; | ||
| 249 | regs.ip = 0; | ||
| 250 | |||
| 251 | /* | ||
| 252 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
| 253 | * We will overwrite the from and to address before we output | ||
| 254 | * the sample. | ||
| 255 | */ | ||
| 256 | perf_prepare_sample(&header, &data, event, ®s); | ||
| 257 | |||
| 258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | ||
| 259 | return; | ||
| 260 | |||
| 261 | for (; at < top; at++) { | ||
| 262 | data.ip = at->from; | ||
| 263 | data.addr = at->to; | ||
| 264 | |||
| 265 | perf_output_sample(&handle, &header, &data, event); | ||
| 266 | } | ||
| 267 | |||
| 268 | perf_output_end(&handle); | ||
| 269 | |||
| 270 | /* There's new data available. */ | ||
| 271 | event->hw.interrupts++; | ||
| 272 | event->pending_kill = POLL_IN; | ||
| 273 | } | ||
| 274 | |||
| 275 | /* | ||
| 276 | * PEBS | ||
| 277 | */ | ||
| 278 | |||
| 279 | static struct event_constraint intel_core_pebs_events[] = { | ||
| 280 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | ||
| 281 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | ||
| 282 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | ||
| 283 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | ||
| 284 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
| 285 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
| 286 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
| 287 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
| 288 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
| 289 | EVENT_CONSTRAINT_END | ||
| 290 | }; | ||
| 291 | |||
| 292 | static struct event_constraint intel_nehalem_pebs_events[] = { | ||
| 293 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | ||
| 294 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | ||
| 295 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | ||
| 296 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | ||
| 297 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
| 298 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
| 299 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
| 300 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
| 301 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
| 302 | EVENT_CONSTRAINT_END | ||
| 303 | }; | ||
| 304 | |||
| 305 | static struct event_constraint * | ||
| 306 | intel_pebs_constraints(struct perf_event *event) | ||
| 307 | { | ||
| 308 | struct event_constraint *c; | ||
| 309 | |||
| 310 | if (!event->attr.precise_ip) | ||
| 311 | return NULL; | ||
| 312 | |||
| 313 | if (x86_pmu.pebs_constraints) { | ||
| 314 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { | ||
| 315 | if ((event->hw.config & c->cmask) == c->code) | ||
| 316 | return c; | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | return &emptyconstraint; | ||
| 321 | } | ||
| 322 | |||
| 323 | static void intel_pmu_pebs_enable(struct perf_event *event) | ||
| 324 | { | ||
| 325 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 326 | struct hw_perf_event *hwc = &event->hw; | ||
| 327 | |||
| 328 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | ||
| 329 | |||
| 330 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | ||
| 331 | WARN_ON_ONCE(cpuc->enabled); | ||
| 332 | |||
| 333 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
| 334 | intel_pmu_lbr_enable(event); | ||
| 335 | } | ||
| 336 | |||
| 337 | static void intel_pmu_pebs_disable(struct perf_event *event) | ||
| 338 | { | ||
| 339 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 340 | struct hw_perf_event *hwc = &event->hw; | ||
| 341 | |||
| 342 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | ||
| 343 | if (cpuc->enabled) | ||
| 344 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
| 345 | |||
| 346 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | ||
| 347 | |||
| 348 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
| 349 | intel_pmu_lbr_disable(event); | ||
| 350 | } | ||
| 351 | |||
| 352 | static void intel_pmu_pebs_enable_all(void) | ||
| 353 | { | ||
| 354 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 355 | |||
| 356 | if (cpuc->pebs_enabled) | ||
| 357 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
| 358 | } | ||
| 359 | |||
| 360 | static void intel_pmu_pebs_disable_all(void) | ||
| 361 | { | ||
| 362 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 363 | |||
| 364 | if (cpuc->pebs_enabled) | ||
| 365 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | ||
| 366 | } | ||
| 367 | |||
| 368 | #include <asm/insn.h> | ||
| 369 | |||
| 370 | static inline bool kernel_ip(unsigned long ip) | ||
| 371 | { | ||
| 372 | #ifdef CONFIG_X86_32 | ||
| 373 | return ip > PAGE_OFFSET; | ||
| 374 | #else | ||
| 375 | return (long)ip < 0; | ||
| 376 | #endif | ||
| 377 | } | ||
| 378 | |||
| 379 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | ||
| 380 | { | ||
| 381 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 382 | unsigned long from = cpuc->lbr_entries[0].from; | ||
| 383 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | ||
| 384 | unsigned long ip = regs->ip; | ||
| 385 | |||
| 386 | /* | ||
| 387 | * We don't need to fixup if the PEBS assist is fault like | ||
| 388 | */ | ||
| 389 | if (!x86_pmu.intel_cap.pebs_trap) | ||
| 390 | return 1; | ||
| 391 | |||
| 392 | /* | ||
| 393 | * No LBR entry, no basic block, no rewinding | ||
| 394 | */ | ||
| 395 | if (!cpuc->lbr_stack.nr || !from || !to) | ||
| 396 | return 0; | ||
| 397 | |||
| 398 | /* | ||
| 399 | * Basic blocks should never cross user/kernel boundaries | ||
| 400 | */ | ||
| 401 | if (kernel_ip(ip) != kernel_ip(to)) | ||
| 402 | return 0; | ||
| 403 | |||
| 404 | /* | ||
| 405 | * unsigned math, either ip is before the start (impossible) or | ||
| 406 | * the basic block is larger than 1 page (sanity) | ||
| 407 | */ | ||
| 408 | if ((ip - to) > PAGE_SIZE) | ||
| 409 | return 0; | ||
| 410 | |||
| 411 | /* | ||
| 412 | * We sampled a branch insn, rewind using the LBR stack | ||
| 413 | */ | ||
| 414 | if (ip == to) { | ||
| 415 | regs->ip = from; | ||
| 416 | return 1; | ||
| 417 | } | ||
| 418 | |||
| 419 | do { | ||
| 420 | struct insn insn; | ||
| 421 | u8 buf[MAX_INSN_SIZE]; | ||
| 422 | void *kaddr; | ||
| 423 | |||
| 424 | old_to = to; | ||
| 425 | if (!kernel_ip(ip)) { | ||
| 426 | int bytes, size = MAX_INSN_SIZE; | ||
| 427 | |||
| 428 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
| 429 | if (bytes != size) | ||
| 430 | return 0; | ||
| 431 | |||
| 432 | kaddr = buf; | ||
| 433 | } else | ||
| 434 | kaddr = (void *)to; | ||
| 435 | |||
| 436 | kernel_insn_init(&insn, kaddr); | ||
| 437 | insn_get_length(&insn); | ||
| 438 | to += insn.length; | ||
| 439 | } while (to < ip); | ||
| 440 | |||
| 441 | if (to == ip) { | ||
| 442 | regs->ip = old_to; | ||
| 443 | return 1; | ||
| 444 | } | ||
| 445 | |||
| 446 | /* | ||
| 447 | * Even though we decoded the basic block, the instruction stream | ||
| 448 | * never matched the given IP, either the TO or the IP got corrupted. | ||
| 449 | */ | ||
| 450 | return 0; | ||
| 451 | } | ||
| 452 | |||
| 453 | static int intel_pmu_save_and_restart(struct perf_event *event); | ||
| 454 | |||
| 455 | static void __intel_pmu_pebs_event(struct perf_event *event, | ||
| 456 | struct pt_regs *iregs, void *__pebs) | ||
| 457 | { | ||
| 458 | /* | ||
| 459 | * We cast to pebs_record_core since that is a subset of | ||
| 460 | * both formats and we don't use the other fields in this | ||
| 461 | * routine. | ||
| 462 | */ | ||
| 463 | struct pebs_record_core *pebs = __pebs; | ||
| 464 | struct perf_sample_data data; | ||
| 465 | struct pt_regs regs; | ||
| 466 | |||
| 467 | if (!intel_pmu_save_and_restart(event)) | ||
| 468 | return; | ||
| 469 | |||
| 470 | perf_sample_data_init(&data, 0); | ||
| 471 | data.period = event->hw.last_period; | ||
| 472 | |||
| 473 | /* | ||
| 474 | * We use the interrupt regs as a base because the PEBS record | ||
| 475 | * does not contain a full regs set, specifically it seems to | ||
| 476 | * lack segment descriptors, which get used by things like | ||
| 477 | * user_mode(). | ||
| 478 | * | ||
| 479 | * In the simple case fix up only the IP and BP,SP regs, for | ||
| 480 | * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. | ||
| 481 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | ||
| 482 | */ | ||
| 483 | regs = *iregs; | ||
| 484 | regs.ip = pebs->ip; | ||
| 485 | regs.bp = pebs->bp; | ||
| 486 | regs.sp = pebs->sp; | ||
| 487 | |||
| 488 | if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | ||
| 489 | regs.flags |= PERF_EFLAGS_EXACT; | ||
| 490 | else | ||
| 491 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
| 492 | |||
| 493 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
| 494 | x86_pmu_stop(event); | ||
| 495 | } | ||
| 496 | |||
| 497 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | ||
| 498 | { | ||
| 499 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 500 | struct debug_store *ds = cpuc->ds; | ||
| 501 | struct perf_event *event = cpuc->events[0]; /* PMC0 only */ | ||
| 502 | struct pebs_record_core *at, *top; | ||
| 503 | int n; | ||
| 504 | |||
| 505 | if (!ds || !x86_pmu.pebs) | ||
| 506 | return; | ||
| 507 | |||
| 508 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | ||
| 509 | top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; | ||
| 510 | |||
| 511 | /* | ||
| 512 | * Whatever else happens, drain the thing | ||
| 513 | */ | ||
| 514 | ds->pebs_index = ds->pebs_buffer_base; | ||
| 515 | |||
| 516 | if (!test_bit(0, cpuc->active_mask)) | ||
| 517 | return; | ||
| 518 | |||
| 519 | WARN_ON_ONCE(!event); | ||
| 520 | |||
| 521 | if (!event->attr.precise_ip) | ||
| 522 | return; | ||
| 523 | |||
| 524 | n = top - at; | ||
| 525 | if (n <= 0) | ||
| 526 | return; | ||
| 527 | |||
| 528 | /* | ||
| 529 | * Should not happen, we program the threshold at 1 and do not | ||
| 530 | * set a reset value. | ||
| 531 | */ | ||
| 532 | WARN_ON_ONCE(n > 1); | ||
| 533 | at += n - 1; | ||
| 534 | |||
| 535 | __intel_pmu_pebs_event(event, iregs, at); | ||
| 536 | } | ||
| 537 | |||
| 538 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
| 539 | { | ||
| 540 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 541 | struct debug_store *ds = cpuc->ds; | ||
| 542 | struct pebs_record_nhm *at, *top; | ||
| 543 | struct perf_event *event = NULL; | ||
| 544 | u64 status = 0; | ||
| 545 | int bit, n; | ||
| 546 | |||
| 547 | if (!ds || !x86_pmu.pebs) | ||
| 548 | return; | ||
| 549 | |||
| 550 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
| 551 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
| 552 | |||
| 553 | ds->pebs_index = ds->pebs_buffer_base; | ||
| 554 | |||
| 555 | n = top - at; | ||
| 556 | if (n <= 0) | ||
| 557 | return; | ||
| 558 | |||
| 559 | /* | ||
| 560 | * Should not happen, we program the threshold at 1 and do not | ||
| 561 | * set a reset value. | ||
| 562 | */ | ||
| 563 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | ||
| 564 | |||
| 565 | for ( ; at < top; at++) { | ||
| 566 | for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | ||
| 567 | event = cpuc->events[bit]; | ||
| 568 | if (!test_bit(bit, cpuc->active_mask)) | ||
| 569 | continue; | ||
| 570 | |||
| 571 | WARN_ON_ONCE(!event); | ||
| 572 | |||
| 573 | if (!event->attr.precise_ip) | ||
| 574 | continue; | ||
| 575 | |||
| 576 | if (__test_and_set_bit(bit, (unsigned long *)&status)) | ||
| 577 | continue; | ||
| 578 | |||
| 579 | break; | ||
| 580 | } | ||
| 581 | |||
| 582 | if (!event || bit >= MAX_PEBS_EVENTS) | ||
| 583 | continue; | ||
| 584 | |||
| 585 | __intel_pmu_pebs_event(event, iregs, at); | ||
| 586 | } | ||
| 587 | } | ||
| 588 | |||
| 589 | /* | ||
| 590 | * BTS, PEBS probe and setup | ||
| 591 | */ | ||
| 592 | |||
| 593 | static void intel_ds_init(void) | ||
| 594 | { | ||
| 595 | /* | ||
| 596 | * No support for 32bit formats | ||
| 597 | */ | ||
| 598 | if (!boot_cpu_has(X86_FEATURE_DTES64)) | ||
| 599 | return; | ||
| 600 | |||
| 601 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | ||
| 602 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | ||
| 603 | if (x86_pmu.pebs) { | ||
| 604 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; | ||
| 605 | int format = x86_pmu.intel_cap.pebs_format; | ||
| 606 | |||
| 607 | switch (format) { | ||
| 608 | case 0: | ||
| 609 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); | ||
| 610 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | ||
| 611 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | ||
| 612 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
| 613 | break; | ||
| 614 | |||
| 615 | case 1: | ||
| 616 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); | ||
| 617 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | ||
| 618 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | ||
| 619 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
| 620 | break; | ||
| 621 | |||
| 622 | default: | ||
| 623 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | ||
| 624 | x86_pmu.pebs = 0; | ||
| 625 | break; | ||
| 626 | } | ||
| 627 | } | ||
| 628 | } | ||
| 629 | |||
| 630 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
| 631 | |||
| 632 | static int reserve_ds_buffers(void) | ||
| 633 | { | ||
| 634 | return 0; | ||
| 635 | } | ||
| 636 | |||
| 637 | static void release_ds_buffers(void) | ||
| 638 | { | ||
| 639 | } | ||
| 640 | |||
| 641 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 000000000000..d202c1bece1a --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
| 2 | |||
| 3 | enum { | ||
| 4 | LBR_FORMAT_32 = 0x00, | ||
| 5 | LBR_FORMAT_LIP = 0x01, | ||
| 6 | LBR_FORMAT_EIP = 0x02, | ||
| 7 | LBR_FORMAT_EIP_FLAGS = 0x03, | ||
| 8 | }; | ||
| 9 | |||
| 10 | /* | ||
| 11 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | ||
| 12 | * otherwise it becomes near impossible to get a reliable stack. | ||
| 13 | */ | ||
| 14 | |||
| 15 | static void __intel_pmu_lbr_enable(void) | ||
| 16 | { | ||
| 17 | u64 debugctl; | ||
| 18 | |||
| 19 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
| 20 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
| 21 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
| 22 | } | ||
| 23 | |||
| 24 | static void __intel_pmu_lbr_disable(void) | ||
| 25 | { | ||
| 26 | u64 debugctl; | ||
| 27 | |||
| 28 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
| 29 | debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
| 30 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
| 31 | } | ||
| 32 | |||
| 33 | static void intel_pmu_lbr_reset_32(void) | ||
| 34 | { | ||
| 35 | int i; | ||
| 36 | |||
| 37 | for (i = 0; i < x86_pmu.lbr_nr; i++) | ||
| 38 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
| 39 | } | ||
| 40 | |||
| 41 | static void intel_pmu_lbr_reset_64(void) | ||
| 42 | { | ||
| 43 | int i; | ||
| 44 | |||
| 45 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
| 46 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
| 47 | wrmsrl(x86_pmu.lbr_to + i, 0); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | static void intel_pmu_lbr_reset(void) | ||
| 52 | { | ||
| 53 | if (!x86_pmu.lbr_nr) | ||
| 54 | return; | ||
| 55 | |||
| 56 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
| 57 | intel_pmu_lbr_reset_32(); | ||
| 58 | else | ||
| 59 | intel_pmu_lbr_reset_64(); | ||
| 60 | } | ||
| 61 | |||
| 62 | static void intel_pmu_lbr_enable(struct perf_event *event) | ||
| 63 | { | ||
| 64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 65 | |||
| 66 | if (!x86_pmu.lbr_nr) | ||
| 67 | return; | ||
| 68 | |||
| 69 | WARN_ON_ONCE(cpuc->enabled); | ||
| 70 | |||
| 71 | /* | ||
| 72 | * Reset the LBR stack if we changed task context to | ||
| 73 | * avoid data leaks. | ||
| 74 | */ | ||
| 75 | |||
| 76 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | ||
| 77 | intel_pmu_lbr_reset(); | ||
| 78 | cpuc->lbr_context = event->ctx; | ||
| 79 | } | ||
| 80 | |||
| 81 | cpuc->lbr_users++; | ||
| 82 | } | ||
| 83 | |||
| 84 | static void intel_pmu_lbr_disable(struct perf_event *event) | ||
| 85 | { | ||
| 86 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 87 | |||
| 88 | if (!x86_pmu.lbr_nr) | ||
| 89 | return; | ||
| 90 | |||
| 91 | cpuc->lbr_users--; | ||
| 92 | WARN_ON_ONCE(cpuc->lbr_users < 0); | ||
| 93 | |||
| 94 | if (cpuc->enabled && !cpuc->lbr_users) | ||
| 95 | __intel_pmu_lbr_disable(); | ||
| 96 | } | ||
| 97 | |||
| 98 | static void intel_pmu_lbr_enable_all(void) | ||
| 99 | { | ||
| 100 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 101 | |||
| 102 | if (cpuc->lbr_users) | ||
| 103 | __intel_pmu_lbr_enable(); | ||
| 104 | } | ||
| 105 | |||
| 106 | static void intel_pmu_lbr_disable_all(void) | ||
| 107 | { | ||
| 108 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 109 | |||
| 110 | if (cpuc->lbr_users) | ||
| 111 | __intel_pmu_lbr_disable(); | ||
| 112 | } | ||
| 113 | |||
| 114 | static inline u64 intel_pmu_lbr_tos(void) | ||
| 115 | { | ||
| 116 | u64 tos; | ||
| 117 | |||
| 118 | rdmsrl(x86_pmu.lbr_tos, tos); | ||
| 119 | |||
| 120 | return tos; | ||
| 121 | } | ||
| 122 | |||
| 123 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | ||
| 124 | { | ||
| 125 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
| 126 | u64 tos = intel_pmu_lbr_tos(); | ||
| 127 | int i; | ||
| 128 | |||
| 129 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
| 130 | unsigned long lbr_idx = (tos - i) & mask; | ||
| 131 | union { | ||
| 132 | struct { | ||
| 133 | u32 from; | ||
| 134 | u32 to; | ||
| 135 | }; | ||
| 136 | u64 lbr; | ||
| 137 | } msr_lastbranch; | ||
| 138 | |||
| 139 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | ||
| 140 | |||
| 141 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | ||
| 142 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | ||
| 143 | cpuc->lbr_entries[i].flags = 0; | ||
| 144 | } | ||
| 145 | cpuc->lbr_stack.nr = i; | ||
| 146 | } | ||
| 147 | |||
| 148 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
| 149 | |||
| 150 | /* | ||
| 151 | * Due to lack of segmentation in Linux the effective address (offset) | ||
| 152 | * is the same as the linear address, allowing us to merge the LIP and EIP | ||
| 153 | * LBR formats. | ||
| 154 | */ | ||
| 155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||
| 156 | { | ||
| 157 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
| 158 | int lbr_format = x86_pmu.intel_cap.lbr_format; | ||
| 159 | u64 tos = intel_pmu_lbr_tos(); | ||
| 160 | int i; | ||
| 161 | |||
| 162 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
| 163 | unsigned long lbr_idx = (tos - i) & mask; | ||
| 164 | u64 from, to, flags = 0; | ||
| 165 | |||
| 166 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||
| 167 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | ||
| 168 | |||
| 169 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | ||
| 170 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | ||
| 171 | from = (u64)((((s64)from) << 1) >> 1); | ||
| 172 | } | ||
| 173 | |||
| 174 | cpuc->lbr_entries[i].from = from; | ||
| 175 | cpuc->lbr_entries[i].to = to; | ||
| 176 | cpuc->lbr_entries[i].flags = flags; | ||
| 177 | } | ||
| 178 | cpuc->lbr_stack.nr = i; | ||
| 179 | } | ||
| 180 | |||
| 181 | static void intel_pmu_lbr_read(void) | ||
| 182 | { | ||
| 183 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 184 | |||
| 185 | if (!cpuc->lbr_users) | ||
| 186 | return; | ||
| 187 | |||
| 188 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
| 189 | intel_pmu_lbr_read_32(cpuc); | ||
| 190 | else | ||
| 191 | intel_pmu_lbr_read_64(cpuc); | ||
| 192 | } | ||
| 193 | |||
| 194 | static void intel_pmu_lbr_init_core(void) | ||
| 195 | { | ||
| 196 | x86_pmu.lbr_nr = 4; | ||
| 197 | x86_pmu.lbr_tos = 0x01c9; | ||
| 198 | x86_pmu.lbr_from = 0x40; | ||
| 199 | x86_pmu.lbr_to = 0x60; | ||
| 200 | } | ||
| 201 | |||
| 202 | static void intel_pmu_lbr_init_nhm(void) | ||
| 203 | { | ||
| 204 | x86_pmu.lbr_nr = 16; | ||
| 205 | x86_pmu.lbr_tos = 0x01c9; | ||
| 206 | x86_pmu.lbr_from = 0x680; | ||
| 207 | x86_pmu.lbr_to = 0x6c0; | ||
| 208 | } | ||
| 209 | |||
| 210 | static void intel_pmu_lbr_init_atom(void) | ||
| 211 | { | ||
| 212 | x86_pmu.lbr_nr = 8; | ||
| 213 | x86_pmu.lbr_tos = 0x01c9; | ||
| 214 | x86_pmu.lbr_from = 0x40; | ||
| 215 | x86_pmu.lbr_to = 0x60; | ||
| 216 | } | ||
| 217 | |||
| 218 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 000000000000..424fc8de68e4 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
| @@ -0,0 +1,857 @@ | |||
| 1 | /* | ||
| 2 | * Netburst Perfomance Events (P4, old Xeon) | ||
| 3 | * | ||
| 4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | ||
| 5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | ||
| 6 | * | ||
| 7 | * For licencing details see kernel-base/COPYING | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifdef CONFIG_CPU_SUP_INTEL | ||
| 11 | |||
| 12 | #include <asm/perf_event_p4.h> | ||
| 13 | |||
| 14 | #define P4_CNTR_LIMIT 3 | ||
| 15 | /* | ||
| 16 | * array indices: 0,1 - HT threads, used with HT enabled cpu | ||
| 17 | */ | ||
| 18 | struct p4_event_bind { | ||
| 19 | unsigned int opcode; /* Event code and ESCR selector */ | ||
| 20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | ||
| 21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | ||
| 22 | }; | ||
| 23 | |||
| 24 | struct p4_cache_event_bind { | ||
| 25 | unsigned int metric_pebs; | ||
| 26 | unsigned int metric_vert; | ||
| 27 | }; | ||
| 28 | |||
| 29 | #define P4_GEN_CACHE_EVENT_BIND(name) \ | ||
| 30 | [P4_CACHE__##name] = { \ | ||
| 31 | .metric_pebs = P4_PEBS__##name, \ | ||
| 32 | .metric_vert = P4_VERT__##name, \ | ||
| 33 | } | ||
| 34 | |||
| 35 | static struct p4_cache_event_bind p4_cache_event_bind_map[] = { | ||
| 36 | P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), | ||
| 37 | P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), | ||
| 38 | P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), | ||
| 39 | P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), | ||
| 40 | }; | ||
| 41 | |||
| 42 | /* | ||
| 43 | * Note that we don't use CCCR1 here, there is an | ||
| 44 | * exception for P4_BSQ_ALLOCATION but we just have | ||
| 45 | * no workaround | ||
| 46 | * | ||
| 47 | * consider this binding as resources which particular | ||
| 48 | * event may borrow, it doesn't contain EventMask, | ||
| 49 | * Tags and friends -- they are left to a caller | ||
| 50 | */ | ||
| 51 | static struct p4_event_bind p4_event_bind_map[] = { | ||
| 52 | [P4_EVENT_TC_DELIVER_MODE] = { | ||
| 53 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | ||
| 54 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
| 55 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 56 | }, | ||
| 57 | [P4_EVENT_BPU_FETCH_REQUEST] = { | ||
| 58 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | ||
| 59 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | ||
| 60 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 61 | }, | ||
| 62 | [P4_EVENT_ITLB_REFERENCE] = { | ||
| 63 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | ||
| 64 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | ||
| 65 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 66 | }, | ||
| 67 | [P4_EVENT_MEMORY_CANCEL] = { | ||
| 68 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | ||
| 69 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
| 70 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 71 | }, | ||
| 72 | [P4_EVENT_MEMORY_COMPLETE] = { | ||
| 73 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | ||
| 74 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
| 75 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 76 | }, | ||
| 77 | [P4_EVENT_LOAD_PORT_REPLAY] = { | ||
| 78 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | ||
| 79 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | ||
| 80 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 81 | }, | ||
| 82 | [P4_EVENT_STORE_PORT_REPLAY] = { | ||
| 83 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | ||
| 84 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
| 85 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 86 | }, | ||
| 87 | [P4_EVENT_MOB_LOAD_REPLAY] = { | ||
| 88 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | ||
| 89 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | ||
| 90 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 91 | }, | ||
| 92 | [P4_EVENT_PAGE_WALK_TYPE] = { | ||
| 93 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | ||
| 94 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | ||
| 95 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 96 | }, | ||
| 97 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | ||
| 98 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | ||
| 99 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
| 100 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 101 | }, | ||
| 102 | [P4_EVENT_IOQ_ALLOCATION] = { | ||
| 103 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | ||
| 104 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 105 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 106 | }, | ||
| 107 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
| 108 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | ||
| 109 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | ||
| 110 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
| 111 | }, | ||
| 112 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | ||
| 113 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | ||
| 114 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 115 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 116 | }, | ||
| 117 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | ||
| 118 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | ||
| 119 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | ||
| 120 | .cntr = { {0, -1, -1}, {1, -1, -1} }, | ||
| 121 | }, | ||
| 122 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
| 123 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | ||
| 124 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | ||
| 125 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
| 126 | }, | ||
| 127 | [P4_EVENT_SSE_INPUT_ASSIST] = { | ||
| 128 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | ||
| 129 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 130 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 131 | }, | ||
| 132 | [P4_EVENT_PACKED_SP_UOP] = { | ||
| 133 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | ||
| 134 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 135 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 136 | }, | ||
| 137 | [P4_EVENT_PACKED_DP_UOP] = { | ||
| 138 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | ||
| 139 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 140 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 141 | }, | ||
| 142 | [P4_EVENT_SCALAR_SP_UOP] = { | ||
| 143 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | ||
| 144 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 145 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 146 | }, | ||
| 147 | [P4_EVENT_SCALAR_DP_UOP] = { | ||
| 148 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | ||
| 149 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 150 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 151 | }, | ||
| 152 | [P4_EVENT_64BIT_MMX_UOP] = { | ||
| 153 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | ||
| 154 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 155 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 156 | }, | ||
| 157 | [P4_EVENT_128BIT_MMX_UOP] = { | ||
| 158 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | ||
| 159 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 160 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 161 | }, | ||
| 162 | [P4_EVENT_X87_FP_UOP] = { | ||
| 163 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | ||
| 164 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 165 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 166 | }, | ||
| 167 | [P4_EVENT_TC_MISC] = { | ||
| 168 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | ||
| 169 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
| 170 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 171 | }, | ||
| 172 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | ||
| 173 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | ||
| 174 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 175 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 176 | }, | ||
| 177 | [P4_EVENT_TC_MS_XFER] = { | ||
| 178 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | ||
| 179 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
| 180 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 181 | }, | ||
| 182 | [P4_EVENT_UOP_QUEUE_WRITES] = { | ||
| 183 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | ||
| 184 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
| 185 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 186 | }, | ||
| 187 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | ||
| 188 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | ||
| 189 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | ||
| 190 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 191 | }, | ||
| 192 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | ||
| 193 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | ||
| 194 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | ||
| 195 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 196 | }, | ||
| 197 | [P4_EVENT_RESOURCE_STALL] = { | ||
| 198 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | ||
| 199 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | ||
| 200 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 201 | }, | ||
| 202 | [P4_EVENT_WC_BUFFER] = { | ||
| 203 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | ||
| 204 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
| 205 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 206 | }, | ||
| 207 | [P4_EVENT_B2B_CYCLES] = { | ||
| 208 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | ||
| 209 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 210 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 211 | }, | ||
| 212 | [P4_EVENT_BNR] = { | ||
| 213 | .opcode = P4_OPCODE(P4_EVENT_BNR), | ||
| 214 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 215 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 216 | }, | ||
| 217 | [P4_EVENT_SNOOP] = { | ||
| 218 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | ||
| 219 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 220 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 221 | }, | ||
| 222 | [P4_EVENT_RESPONSE] = { | ||
| 223 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | ||
| 224 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 225 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 226 | }, | ||
| 227 | [P4_EVENT_FRONT_END_EVENT] = { | ||
| 228 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | ||
| 229 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 230 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 231 | }, | ||
| 232 | [P4_EVENT_EXECUTION_EVENT] = { | ||
| 233 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | ||
| 234 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 235 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 236 | }, | ||
| 237 | [P4_EVENT_REPLAY_EVENT] = { | ||
| 238 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | ||
| 239 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 240 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 241 | }, | ||
| 242 | [P4_EVENT_INSTR_RETIRED] = { | ||
| 243 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | ||
| 244 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
| 245 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 246 | }, | ||
| 247 | [P4_EVENT_UOPS_RETIRED] = { | ||
| 248 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | ||
| 249 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
| 250 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 251 | }, | ||
| 252 | [P4_EVENT_UOP_TYPE] = { | ||
| 253 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | ||
| 254 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | ||
| 255 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 256 | }, | ||
| 257 | [P4_EVENT_BRANCH_RETIRED] = { | ||
| 258 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | ||
| 259 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 260 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 261 | }, | ||
| 262 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | ||
| 263 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | ||
| 264 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
| 265 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 266 | }, | ||
| 267 | [P4_EVENT_X87_ASSIST] = { | ||
| 268 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | ||
| 269 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 270 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 271 | }, | ||
| 272 | [P4_EVENT_MACHINE_CLEAR] = { | ||
| 273 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | ||
| 274 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 275 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 276 | }, | ||
| 277 | [P4_EVENT_INSTR_COMPLETED] = { | ||
| 278 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | ||
| 279 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
| 280 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 281 | }, | ||
| 282 | }; | ||
| 283 | |||
| 284 | #define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ | ||
| 285 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | ||
| 286 | P4_ESCR_EMASK_BIT(event, bit)) | \ | ||
| 287 | p4_config_pack_cccr(cache_event | \ | ||
| 288 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | ||
| 289 | |||
| 290 | static __initconst const u64 p4_hw_cache_event_ids | ||
| 291 | [PERF_COUNT_HW_CACHE_MAX] | ||
| 292 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 293 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
| 294 | { | ||
| 295 | [ C(L1D ) ] = { | ||
| 296 | [ C(OP_READ) ] = { | ||
| 297 | [ C(RESULT_ACCESS) ] = 0x0, | ||
| 298 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
| 299 | P4_CACHE__1stl_cache_load_miss_retired), | ||
| 300 | }, | ||
| 301 | }, | ||
| 302 | [ C(LL ) ] = { | ||
| 303 | [ C(OP_READ) ] = { | ||
| 304 | [ C(RESULT_ACCESS) ] = 0x0, | ||
| 305 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
| 306 | P4_CACHE__2ndl_cache_load_miss_retired), | ||
| 307 | }, | ||
| 308 | }, | ||
| 309 | [ C(DTLB) ] = { | ||
| 310 | [ C(OP_READ) ] = { | ||
| 311 | [ C(RESULT_ACCESS) ] = 0x0, | ||
| 312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
| 313 | P4_CACHE__dtlb_load_miss_retired), | ||
| 314 | }, | ||
| 315 | [ C(OP_WRITE) ] = { | ||
| 316 | [ C(RESULT_ACCESS) ] = 0x0, | ||
| 317 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
| 318 | P4_CACHE__dtlb_store_miss_retired), | ||
| 319 | }, | ||
| 320 | }, | ||
| 321 | [ C(ITLB) ] = { | ||
| 322 | [ C(OP_READ) ] = { | ||
| 323 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | ||
| 324 | P4_CACHE__itlb_reference_hit), | ||
| 325 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | ||
| 326 | P4_CACHE__itlb_reference_miss), | ||
| 327 | }, | ||
| 328 | [ C(OP_WRITE) ] = { | ||
| 329 | [ C(RESULT_ACCESS) ] = -1, | ||
| 330 | [ C(RESULT_MISS) ] = -1, | ||
| 331 | }, | ||
| 332 | [ C(OP_PREFETCH) ] = { | ||
| 333 | [ C(RESULT_ACCESS) ] = -1, | ||
| 334 | [ C(RESULT_MISS) ] = -1, | ||
| 335 | }, | ||
| 336 | }, | ||
| 337 | }; | ||
| 338 | |||
| 339 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { | ||
| 340 | /* non-halted CPU clocks */ | ||
| 341 | [PERF_COUNT_HW_CPU_CYCLES] = | ||
| 342 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | ||
| 343 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | ||
| 344 | |||
| 345 | /* | ||
| 346 | * retired instructions | ||
| 347 | * in a sake of simplicity we don't use the FSB tagging | ||
| 348 | */ | ||
| 349 | [PERF_COUNT_HW_INSTRUCTIONS] = | ||
| 350 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | | ||
| 351 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | ||
| 352 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), | ||
| 353 | |||
| 354 | /* cache hits */ | ||
| 355 | [PERF_COUNT_HW_CACHE_REFERENCES] = | ||
| 356 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
| 357 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
| 358 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
| 359 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
| 360 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
| 361 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
| 362 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), | ||
| 363 | |||
| 364 | /* cache misses */ | ||
| 365 | [PERF_COUNT_HW_CACHE_MISSES] = | ||
| 366 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
| 367 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
| 368 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
| 369 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), | ||
| 370 | |||
| 371 | /* branch instructions retired */ | ||
| 372 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = | ||
| 373 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | | ||
| 374 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
| 375 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | ||
| 376 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | ||
| 377 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), | ||
| 378 | |||
| 379 | /* mispredicted branches retired */ | ||
| 380 | [PERF_COUNT_HW_BRANCH_MISSES] = | ||
| 381 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | | ||
| 382 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), | ||
| 383 | |||
| 384 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | ||
| 385 | [PERF_COUNT_HW_BUS_CYCLES] = | ||
| 386 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | | ||
| 387 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
| 388 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | | ||
| 389 | p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | ||
| 390 | }; | ||
| 391 | |||
| 392 | static struct p4_event_bind *p4_config_get_bind(u64 config) | ||
| 393 | { | ||
| 394 | unsigned int evnt = p4_config_unpack_event(config); | ||
| 395 | struct p4_event_bind *bind = NULL; | ||
| 396 | |||
| 397 | if (evnt < ARRAY_SIZE(p4_event_bind_map)) | ||
| 398 | bind = &p4_event_bind_map[evnt]; | ||
| 399 | |||
| 400 | return bind; | ||
| 401 | } | ||
| 402 | |||
| 403 | static u64 p4_pmu_event_map(int hw_event) | ||
| 404 | { | ||
| 405 | struct p4_event_bind *bind; | ||
| 406 | unsigned int esel; | ||
| 407 | u64 config; | ||
| 408 | |||
| 409 | config = p4_general_events[hw_event]; | ||
| 410 | bind = p4_config_get_bind(config); | ||
| 411 | esel = P4_OPCODE_ESEL(bind->opcode); | ||
| 412 | config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | ||
| 413 | |||
| 414 | return config; | ||
| 415 | } | ||
| 416 | |||
| 417 | static int p4_hw_config(struct perf_event *event) | ||
| 418 | { | ||
| 419 | int cpu = get_cpu(); | ||
| 420 | int rc = 0; | ||
| 421 | unsigned int evnt; | ||
| 422 | u32 escr, cccr; | ||
| 423 | |||
| 424 | /* | ||
| 425 | * the reason we use cpu that early is that: if we get scheduled | ||
| 426 | * first time on the same cpu -- we will not need swap thread | ||
| 427 | * specific flags in config (and will save some cpu cycles) | ||
| 428 | */ | ||
| 429 | |||
| 430 | cccr = p4_default_cccr_conf(cpu); | ||
| 431 | escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, | ||
| 432 | event->attr.exclude_user); | ||
| 433 | event->hw.config = p4_config_pack_escr(escr) | | ||
| 434 | p4_config_pack_cccr(cccr); | ||
| 435 | |||
| 436 | if (p4_ht_active() && p4_ht_thread(cpu)) | ||
| 437 | event->hw.config = p4_set_ht_bit(event->hw.config); | ||
| 438 | |||
| 439 | if (event->attr.type == PERF_TYPE_RAW) { | ||
| 440 | |||
| 441 | /* user data may have out-of-bound event index */ | ||
| 442 | evnt = p4_config_unpack_event(event->attr.config); | ||
| 443 | if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { | ||
| 444 | rc = -EINVAL; | ||
| 445 | goto out; | ||
| 446 | } | ||
| 447 | |||
| 448 | /* | ||
| 449 | * We don't control raw events so it's up to the caller | ||
| 450 | * to pass sane values (and we don't count the thread number | ||
| 451 | * on HT machine but allow HT-compatible specifics to be | ||
| 452 | * passed on) | ||
| 453 | * | ||
| 454 | * XXX: HT wide things should check perf_paranoid_cpu() && | ||
| 455 | * CAP_SYS_ADMIN | ||
| 456 | */ | ||
| 457 | event->hw.config |= event->attr.config & | ||
| 458 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | ||
| 459 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | ||
| 460 | } | ||
| 461 | |||
| 462 | rc = x86_setup_perfctr(event); | ||
| 463 | out: | ||
| 464 | put_cpu(); | ||
| 465 | return rc; | ||
| 466 | } | ||
| 467 | |||
| 468 | static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | ||
| 469 | { | ||
| 470 | unsigned long dummy; | ||
| 471 | |||
| 472 | rdmsrl(hwc->config_base + hwc->idx, dummy); | ||
| 473 | if (dummy & P4_CCCR_OVF) { | ||
| 474 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
| 475 | ((u64)dummy) & ~P4_CCCR_OVF); | ||
| 476 | } | ||
| 477 | } | ||
| 478 | |||
| 479 | static inline void p4_pmu_disable_event(struct perf_event *event) | ||
| 480 | { | ||
| 481 | struct hw_perf_event *hwc = &event->hw; | ||
| 482 | |||
| 483 | /* | ||
| 484 | * If event gets disabled while counter is in overflowed | ||
| 485 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | ||
| 486 | * asserted again and again | ||
| 487 | */ | ||
| 488 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
| 489 | (u64)(p4_config_unpack_cccr(hwc->config)) & | ||
| 490 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | ||
| 491 | } | ||
| 492 | |||
| 493 | static void p4_pmu_disable_all(void) | ||
| 494 | { | ||
| 495 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 496 | int idx; | ||
| 497 | |||
| 498 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
| 499 | struct perf_event *event = cpuc->events[idx]; | ||
| 500 | if (!test_bit(idx, cpuc->active_mask)) | ||
| 501 | continue; | ||
| 502 | p4_pmu_disable_event(event); | ||
| 503 | } | ||
| 504 | } | ||
| 505 | |||
| 506 | static void p4_pmu_enable_event(struct perf_event *event) | ||
| 507 | { | ||
| 508 | struct hw_perf_event *hwc = &event->hw; | ||
| 509 | int thread = p4_ht_config_thread(hwc->config); | ||
| 510 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | ||
| 511 | unsigned int idx = p4_config_unpack_event(hwc->config); | ||
| 512 | unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); | ||
| 513 | struct p4_event_bind *bind; | ||
| 514 | struct p4_cache_event_bind *bind_cache; | ||
| 515 | u64 escr_addr, cccr; | ||
| 516 | |||
| 517 | bind = &p4_event_bind_map[idx]; | ||
| 518 | escr_addr = (u64)bind->escr_msr[thread]; | ||
| 519 | |||
| 520 | /* | ||
| 521 | * - we dont support cascaded counters yet | ||
| 522 | * - and counter 1 is broken (erratum) | ||
| 523 | */ | ||
| 524 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | ||
| 525 | WARN_ON_ONCE(hwc->idx == 1); | ||
| 526 | |||
| 527 | /* we need a real Event value */ | ||
| 528 | escr_conf &= ~P4_ESCR_EVENT_MASK; | ||
| 529 | escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); | ||
| 530 | |||
| 531 | cccr = p4_config_unpack_cccr(hwc->config); | ||
| 532 | |||
| 533 | /* | ||
| 534 | * it could be Cache event so that we need to | ||
| 535 | * set metrics into additional MSRs | ||
| 536 | */ | ||
| 537 | BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); | ||
| 538 | if (idx_cache > P4_CACHE__NONE && | ||
| 539 | idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { | ||
| 540 | bind_cache = &p4_cache_event_bind_map[idx_cache]; | ||
| 541 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); | ||
| 542 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); | ||
| 543 | } | ||
| 544 | |||
| 545 | (void)checking_wrmsrl(escr_addr, escr_conf); | ||
| 546 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
| 547 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | ||
| 548 | } | ||
| 549 | |||
| 550 | static void p4_pmu_enable_all(int added) | ||
| 551 | { | ||
| 552 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 553 | int idx; | ||
| 554 | |||
| 555 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
| 556 | struct perf_event *event = cpuc->events[idx]; | ||
| 557 | if (!test_bit(idx, cpuc->active_mask)) | ||
| 558 | continue; | ||
| 559 | p4_pmu_enable_event(event); | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 563 | static int p4_pmu_handle_irq(struct pt_regs *regs) | ||
| 564 | { | ||
| 565 | struct perf_sample_data data; | ||
| 566 | struct cpu_hw_events *cpuc; | ||
| 567 | struct perf_event *event; | ||
| 568 | struct hw_perf_event *hwc; | ||
| 569 | int idx, handled = 0; | ||
| 570 | u64 val; | ||
| 571 | |||
| 572 | data.addr = 0; | ||
| 573 | data.raw = NULL; | ||
| 574 | |||
| 575 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 576 | |||
| 577 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
| 578 | |||
| 579 | if (!test_bit(idx, cpuc->active_mask)) | ||
| 580 | continue; | ||
| 581 | |||
| 582 | event = cpuc->events[idx]; | ||
| 583 | hwc = &event->hw; | ||
| 584 | |||
| 585 | WARN_ON_ONCE(hwc->idx != idx); | ||
| 586 | |||
| 587 | /* | ||
| 588 | * FIXME: Redundant call, actually not needed | ||
| 589 | * but just to check if we're screwed | ||
| 590 | */ | ||
| 591 | p4_pmu_clear_cccr_ovf(hwc); | ||
| 592 | |||
| 593 | val = x86_perf_event_update(event); | ||
| 594 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) | ||
| 595 | continue; | ||
| 596 | |||
| 597 | /* | ||
| 598 | * event overflow | ||
| 599 | */ | ||
| 600 | handled = 1; | ||
| 601 | data.period = event->hw.last_period; | ||
| 602 | |||
| 603 | if (!x86_perf_event_set_period(event)) | ||
| 604 | continue; | ||
| 605 | if (perf_event_overflow(event, 1, &data, regs)) | ||
| 606 | p4_pmu_disable_event(event); | ||
| 607 | } | ||
| 608 | |||
| 609 | if (handled) { | ||
| 610 | /* p4 quirk: unmask it again */ | ||
| 611 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
| 612 | inc_irq_stat(apic_perf_irqs); | ||
| 613 | } | ||
| 614 | |||
| 615 | return handled; | ||
| 616 | } | ||
| 617 | |||
| 618 | /* | ||
| 619 | * swap thread specific fields according to a thread | ||
| 620 | * we are going to run on | ||
| 621 | */ | ||
| 622 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | ||
| 623 | { | ||
| 624 | u32 escr, cccr; | ||
| 625 | |||
| 626 | /* | ||
| 627 | * we either lucky and continue on same cpu or no HT support | ||
| 628 | */ | ||
| 629 | if (!p4_should_swap_ts(hwc->config, cpu)) | ||
| 630 | return; | ||
| 631 | |||
| 632 | /* | ||
| 633 | * the event is migrated from an another logical | ||
| 634 | * cpu, so we need to swap thread specific flags | ||
| 635 | */ | ||
| 636 | |||
| 637 | escr = p4_config_unpack_escr(hwc->config); | ||
| 638 | cccr = p4_config_unpack_cccr(hwc->config); | ||
| 639 | |||
| 640 | if (p4_ht_thread(cpu)) { | ||
| 641 | cccr &= ~P4_CCCR_OVF_PMI_T0; | ||
| 642 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
| 643 | if (escr & P4_ESCR_T0_OS) { | ||
| 644 | escr &= ~P4_ESCR_T0_OS; | ||
| 645 | escr |= P4_ESCR_T1_OS; | ||
| 646 | } | ||
| 647 | if (escr & P4_ESCR_T0_USR) { | ||
| 648 | escr &= ~P4_ESCR_T0_USR; | ||
| 649 | escr |= P4_ESCR_T1_USR; | ||
| 650 | } | ||
| 651 | hwc->config = p4_config_pack_escr(escr); | ||
| 652 | hwc->config |= p4_config_pack_cccr(cccr); | ||
| 653 | hwc->config |= P4_CONFIG_HT; | ||
| 654 | } else { | ||
| 655 | cccr &= ~P4_CCCR_OVF_PMI_T1; | ||
| 656 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
| 657 | if (escr & P4_ESCR_T1_OS) { | ||
| 658 | escr &= ~P4_ESCR_T1_OS; | ||
| 659 | escr |= P4_ESCR_T0_OS; | ||
| 660 | } | ||
| 661 | if (escr & P4_ESCR_T1_USR) { | ||
| 662 | escr &= ~P4_ESCR_T1_USR; | ||
| 663 | escr |= P4_ESCR_T0_USR; | ||
| 664 | } | ||
| 665 | hwc->config = p4_config_pack_escr(escr); | ||
| 666 | hwc->config |= p4_config_pack_cccr(cccr); | ||
| 667 | hwc->config &= ~P4_CONFIG_HT; | ||
| 668 | } | ||
| 669 | } | ||
| 670 | |||
| 671 | /* | ||
| 672 | * ESCR address hashing is tricky, ESCRs are not sequential | ||
| 673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03e0) and | ||
| 674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] | ||
| 675 | * | ||
| 676 | * so we make ~70% filled hashtable | ||
| 677 | */ | ||
| 678 | |||
| 679 | #define P4_ESCR_MSR_BASE 0x000003a0 | ||
| 680 | #define P4_ESCR_MSR_MAX 0x000003e1 | ||
| 681 | #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) | ||
| 682 | #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) | ||
| 683 | #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr | ||
| 684 | |||
| 685 | static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { | ||
| 686 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), | ||
| 687 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), | ||
| 688 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), | ||
| 689 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), | ||
| 690 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), | ||
| 691 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), | ||
| 692 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), | ||
| 693 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), | ||
| 694 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), | ||
| 695 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), | ||
| 696 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), | ||
| 697 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), | ||
| 698 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), | ||
| 699 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), | ||
| 700 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), | ||
| 701 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), | ||
| 702 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), | ||
| 703 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), | ||
| 704 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), | ||
| 705 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), | ||
| 706 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), | ||
| 707 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), | ||
| 708 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), | ||
| 709 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), | ||
| 710 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), | ||
| 711 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), | ||
| 712 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), | ||
| 713 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), | ||
| 714 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), | ||
| 715 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), | ||
| 716 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), | ||
| 717 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), | ||
| 718 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), | ||
| 719 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), | ||
| 720 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), | ||
| 721 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), | ||
| 722 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), | ||
| 723 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), | ||
| 724 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), | ||
| 725 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), | ||
| 726 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), | ||
| 727 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), | ||
| 728 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), | ||
| 729 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), | ||
| 730 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), | ||
| 731 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), | ||
| 732 | }; | ||
| 733 | |||
| 734 | static int p4_get_escr_idx(unsigned int addr) | ||
| 735 | { | ||
| 736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); | ||
| 737 | |||
| 738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || | ||
| 739 | !p4_escr_table[idx])) { | ||
| 740 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); | ||
| 741 | return -1; | ||
| 742 | } | ||
| 743 | |||
| 744 | return idx; | ||
| 745 | } | ||
| 746 | |||
| 747 | static int p4_next_cntr(int thread, unsigned long *used_mask, | ||
| 748 | struct p4_event_bind *bind) | ||
| 749 | { | ||
| 750 | int i, j; | ||
| 751 | |||
| 752 | for (i = 0; i < P4_CNTR_LIMIT; i++) { | ||
| 753 | j = bind->cntr[thread][i]; | ||
| 754 | if (j != -1 && !test_bit(j, used_mask)) | ||
| 755 | return j; | ||
| 756 | } | ||
| 757 | |||
| 758 | return -1; | ||
| 759 | } | ||
| 760 | |||
| 761 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||
| 762 | { | ||
| 763 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 764 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; | ||
| 765 | int cpu = raw_smp_processor_id(); | ||
| 766 | struct hw_perf_event *hwc; | ||
| 767 | struct p4_event_bind *bind; | ||
| 768 | unsigned int i, thread, num; | ||
| 769 | int cntr_idx, escr_idx; | ||
| 770 | |||
| 771 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
| 772 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); | ||
| 773 | |||
| 774 | for (i = 0, num = n; i < n; i++, num--) { | ||
| 775 | |||
| 776 | hwc = &cpuc->event_list[i]->hw; | ||
| 777 | thread = p4_ht_thread(cpu); | ||
| 778 | bind = p4_config_get_bind(hwc->config); | ||
| 779 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | ||
| 780 | if (unlikely(escr_idx == -1)) | ||
| 781 | goto done; | ||
| 782 | |||
| 783 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | ||
| 784 | cntr_idx = hwc->idx; | ||
| 785 | if (assign) | ||
| 786 | assign[i] = hwc->idx; | ||
| 787 | goto reserve; | ||
| 788 | } | ||
| 789 | |||
| 790 | cntr_idx = p4_next_cntr(thread, used_mask, bind); | ||
| 791 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) | ||
| 792 | goto done; | ||
| 793 | |||
| 794 | p4_pmu_swap_config_ts(hwc, cpu); | ||
| 795 | if (assign) | ||
| 796 | assign[i] = cntr_idx; | ||
| 797 | reserve: | ||
| 798 | set_bit(cntr_idx, used_mask); | ||
| 799 | set_bit(escr_idx, escr_mask); | ||
| 800 | } | ||
| 801 | |||
| 802 | done: | ||
| 803 | return num ? -ENOSPC : 0; | ||
| 804 | } | ||
| 805 | |||
| 806 | static __initconst const struct x86_pmu p4_pmu = { | ||
| 807 | .name = "Netburst P4/Xeon", | ||
| 808 | .handle_irq = p4_pmu_handle_irq, | ||
| 809 | .disable_all = p4_pmu_disable_all, | ||
| 810 | .enable_all = p4_pmu_enable_all, | ||
| 811 | .enable = p4_pmu_enable_event, | ||
| 812 | .disable = p4_pmu_disable_event, | ||
| 813 | .eventsel = MSR_P4_BPU_CCCR0, | ||
| 814 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
| 815 | .event_map = p4_pmu_event_map, | ||
| 816 | .max_events = ARRAY_SIZE(p4_general_events), | ||
| 817 | .get_event_constraints = x86_get_event_constraints, | ||
| 818 | /* | ||
| 819 | * IF HT disabled we may need to use all | ||
| 820 | * ARCH_P4_MAX_CCCR counters simulaneously | ||
| 821 | * though leave it restricted at moment assuming | ||
| 822 | * HT is on | ||
| 823 | */ | ||
| 824 | .num_counters = ARCH_P4_MAX_CCCR, | ||
| 825 | .apic = 1, | ||
| 826 | .cntval_bits = 40, | ||
| 827 | .cntval_mask = (1ULL << 40) - 1, | ||
| 828 | .max_period = (1ULL << 39) - 1, | ||
| 829 | .hw_config = p4_hw_config, | ||
| 830 | .schedule_events = p4_pmu_schedule_events, | ||
| 831 | }; | ||
| 832 | |||
| 833 | static __init int p4_pmu_init(void) | ||
| 834 | { | ||
| 835 | unsigned int low, high; | ||
| 836 | |||
| 837 | /* If we get stripped -- indexig fails */ | ||
| 838 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | ||
| 839 | |||
| 840 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
| 841 | if (!(low & (1 << 7))) { | ||
| 842 | pr_cont("unsupported Netburst CPU model %d ", | ||
| 843 | boot_cpu_data.x86_model); | ||
| 844 | return -ENODEV; | ||
| 845 | } | ||
| 846 | |||
| 847 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, | ||
| 848 | sizeof(hw_cache_event_ids)); | ||
| 849 | |||
| 850 | pr_cont("Netburst events, "); | ||
| 851 | |||
| 852 | x86_pmu = p4_pmu; | ||
| 853 | |||
| 854 | return 0; | ||
| 855 | } | ||
| 856 | |||
| 857 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a330485d14da..34ba07be2cda 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
| @@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event) | |||
| 27 | */ | 27 | */ |
| 28 | #define P6_NOP_EVENT 0x0000002EULL | 28 | #define P6_NOP_EVENT 0x0000002EULL |
| 29 | 29 | ||
| 30 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
| 31 | { | ||
| 32 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
| 33 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
| 34 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
| 35 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
| 36 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
| 37 | |||
| 38 | #define P6_EVNTSEL_MASK \ | ||
| 39 | (P6_EVNTSEL_EVENT_MASK | \ | ||
| 40 | P6_EVNTSEL_UNIT_MASK | \ | ||
| 41 | P6_EVNTSEL_EDGE_MASK | \ | ||
| 42 | P6_EVNTSEL_INV_MASK | \ | ||
| 43 | P6_EVNTSEL_REG_MASK) | ||
| 44 | |||
| 45 | return hw_event & P6_EVNTSEL_MASK; | ||
| 46 | } | ||
| 47 | |||
| 48 | static struct event_constraint p6_event_constraints[] = | 30 | static struct event_constraint p6_event_constraints[] = |
| 49 | { | 31 | { |
| 50 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | 32 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ |
| @@ -66,7 +48,7 @@ static void p6_pmu_disable_all(void) | |||
| 66 | wrmsrl(MSR_P6_EVNTSEL0, val); | 48 | wrmsrl(MSR_P6_EVNTSEL0, val); |
| 67 | } | 49 | } |
| 68 | 50 | ||
| 69 | static void p6_pmu_enable_all(void) | 51 | static void p6_pmu_enable_all(int added) |
| 70 | { | 52 | { |
| 71 | unsigned long val; | 53 | unsigned long val; |
| 72 | 54 | ||
| @@ -102,22 +84,23 @@ static void p6_pmu_enable_event(struct perf_event *event) | |||
| 102 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | 84 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); |
| 103 | } | 85 | } |
| 104 | 86 | ||
| 105 | static __initconst struct x86_pmu p6_pmu = { | 87 | static __initconst const struct x86_pmu p6_pmu = { |
| 106 | .name = "p6", | 88 | .name = "p6", |
| 107 | .handle_irq = x86_pmu_handle_irq, | 89 | .handle_irq = x86_pmu_handle_irq, |
| 108 | .disable_all = p6_pmu_disable_all, | 90 | .disable_all = p6_pmu_disable_all, |
| 109 | .enable_all = p6_pmu_enable_all, | 91 | .enable_all = p6_pmu_enable_all, |
| 110 | .enable = p6_pmu_enable_event, | 92 | .enable = p6_pmu_enable_event, |
| 111 | .disable = p6_pmu_disable_event, | 93 | .disable = p6_pmu_disable_event, |
| 94 | .hw_config = x86_pmu_hw_config, | ||
| 95 | .schedule_events = x86_schedule_events, | ||
| 112 | .eventsel = MSR_P6_EVNTSEL0, | 96 | .eventsel = MSR_P6_EVNTSEL0, |
| 113 | .perfctr = MSR_P6_PERFCTR0, | 97 | .perfctr = MSR_P6_PERFCTR0, |
| 114 | .event_map = p6_pmu_event_map, | 98 | .event_map = p6_pmu_event_map, |
| 115 | .raw_event = p6_pmu_raw_event, | ||
| 116 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | 99 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), |
| 117 | .apic = 1, | 100 | .apic = 1, |
| 118 | .max_period = (1ULL << 31) - 1, | 101 | .max_period = (1ULL << 31) - 1, |
| 119 | .version = 0, | 102 | .version = 0, |
| 120 | .num_events = 2, | 103 | .num_counters = 2, |
| 121 | /* | 104 | /* |
| 122 | * Events have 40 bits implemented. However they are designed such | 105 | * Events have 40 bits implemented. However they are designed such |
| 123 | * that bits [32-39] are sign extensions of bit 31. As such the | 106 | * that bits [32-39] are sign extensions of bit 31. As such the |
| @@ -125,8 +108,8 @@ static __initconst struct x86_pmu p6_pmu = { | |||
| 125 | * | 108 | * |
| 126 | * See IA-32 Intel Architecture Software developer manual Vol 3B | 109 | * See IA-32 Intel Architecture Software developer manual Vol 3B |
| 127 | */ | 110 | */ |
| 128 | .event_bits = 32, | 111 | .cntval_bits = 32, |
| 129 | .event_mask = (1ULL << 32) - 1, | 112 | .cntval_mask = (1ULL << 32) - 1, |
| 130 | .get_event_constraints = x86_get_event_constraints, | 113 | .get_event_constraints = x86_get_event_constraints, |
| 131 | .event_constraints = p6_event_constraints, | 114 | .event_constraints = p6_event_constraints, |
| 132 | }; | 115 | }; |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index dfdb4dba2320..b9d1ff588445 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
| @@ -24,8 +24,8 @@ | |||
| 24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
| 25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
| 26 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
| 27 | #include <asm/vmware.h> | ||
| 28 | #include <asm/x86_init.h> | 27 | #include <asm/x86_init.h> |
| 28 | #include <asm/hypervisor.h> | ||
| 29 | 29 | ||
| 30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | 30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 |
| 31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | 31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 |
| @@ -65,7 +65,7 @@ static unsigned long vmware_get_tsc_khz(void) | |||
| 65 | return tsc_hz; | 65 | return tsc_hz; |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | void __init vmware_platform_setup(void) | 68 | static void __init vmware_platform_setup(void) |
| 69 | { | 69 | { |
| 70 | uint32_t eax, ebx, ecx, edx; | 70 | uint32_t eax, ebx, ecx, edx; |
| 71 | 71 | ||
| @@ -83,26 +83,22 @@ void __init vmware_platform_setup(void) | |||
| 83 | * serial key should be enough, as this will always have a VMware | 83 | * serial key should be enough, as this will always have a VMware |
| 84 | * specific string when running under VMware hypervisor. | 84 | * specific string when running under VMware hypervisor. |
| 85 | */ | 85 | */ |
| 86 | int vmware_platform(void) | 86 | static bool __init vmware_platform(void) |
| 87 | { | 87 | { |
| 88 | if (cpu_has_hypervisor) { | 88 | if (cpu_has_hypervisor) { |
| 89 | unsigned int eax, ebx, ecx, edx; | 89 | unsigned int eax; |
| 90 | char hyper_vendor_id[13]; | 90 | unsigned int hyper_vendor_id[3]; |
| 91 | 91 | ||
| 92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | 92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], |
| 93 | memcpy(hyper_vendor_id + 0, &ebx, 4); | 93 | &hyper_vendor_id[1], &hyper_vendor_id[2]); |
| 94 | memcpy(hyper_vendor_id + 4, &ecx, 4); | 94 | if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) |
| 95 | memcpy(hyper_vendor_id + 8, &edx, 4); | 95 | return true; |
| 96 | hyper_vendor_id[12] = '\0'; | ||
| 97 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
| 98 | return 1; | ||
| 99 | } else if (dmi_available && dmi_name_in_serial("VMware") && | 96 | } else if (dmi_available && dmi_name_in_serial("VMware") && |
| 100 | __vmware_platform()) | 97 | __vmware_platform()) |
| 101 | return 1; | 98 | return true; |
| 102 | 99 | ||
| 103 | return 0; | 100 | return false; |
| 104 | } | 101 | } |
| 105 | EXPORT_SYMBOL(vmware_platform); | ||
| 106 | 102 | ||
| 107 | /* | 103 | /* |
| 108 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | 104 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
| @@ -116,8 +112,16 @@ EXPORT_SYMBOL(vmware_platform); | |||
| 116 | * so that the kernel could just trust the hypervisor with providing a | 112 | * so that the kernel could just trust the hypervisor with providing a |
| 117 | * reliable virtual TSC that is suitable for timekeeping. | 113 | * reliable virtual TSC that is suitable for timekeeping. |
| 118 | */ | 114 | */ |
| 119 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | 115 | static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) |
| 120 | { | 116 | { |
| 121 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
| 122 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | 118 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); |
| 123 | } | 119 | } |
| 120 | |||
| 121 | const __refconst struct hypervisor_x86 x86_hyper_vmware = { | ||
| 122 | .name = "VMware", | ||
| 123 | .detect = vmware_platform, | ||
| 124 | .set_cpu_features = vmware_set_cpu_features, | ||
| 125 | .init_platform = vmware_platform_setup, | ||
| 126 | }; | ||
| 127 | EXPORT_SYMBOL(x86_hyper_vmware); | ||
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c deleted file mode 100644 index 1c47390dd0e5..000000000000 --- a/arch/x86/kernel/ds.c +++ /dev/null | |||
| @@ -1,1437 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Debug Store support | ||
| 3 | * | ||
| 4 | * This provides a low-level interface to the hardware's Debug Store | ||
| 5 | * feature that is used for branch trace store (BTS) and | ||
| 6 | * precise-event based sampling (PEBS). | ||
| 7 | * | ||
| 8 | * It manages: | ||
| 9 | * - DS and BTS hardware configuration | ||
| 10 | * - buffer overflow handling (to be done) | ||
| 11 | * - buffer access | ||
| 12 | * | ||
| 13 | * It does not do: | ||
| 14 | * - security checking (is the caller allowed to trace the task) | ||
| 15 | * - buffer allocation (memory accounting) | ||
| 16 | * | ||
| 17 | * | ||
| 18 | * Copyright (C) 2007-2009 Intel Corporation. | ||
| 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <linux/kernel.h> | ||
| 23 | #include <linux/string.h> | ||
| 24 | #include <linux/errno.h> | ||
| 25 | #include <linux/sched.h> | ||
| 26 | #include <linux/slab.h> | ||
| 27 | #include <linux/mm.h> | ||
| 28 | #include <linux/trace_clock.h> | ||
| 29 | |||
| 30 | #include <asm/ds.h> | ||
| 31 | |||
| 32 | #include "ds_selftest.h" | ||
| 33 | |||
| 34 | /* | ||
| 35 | * The configuration for a particular DS hardware implementation: | ||
| 36 | */ | ||
| 37 | struct ds_configuration { | ||
| 38 | /* The name of the configuration: */ | ||
| 39 | const char *name; | ||
| 40 | |||
| 41 | /* The size of pointer-typed fields in DS, BTS, and PEBS: */ | ||
| 42 | unsigned char sizeof_ptr_field; | ||
| 43 | |||
| 44 | /* The size of a BTS/PEBS record in bytes: */ | ||
| 45 | unsigned char sizeof_rec[2]; | ||
| 46 | |||
| 47 | /* The number of pebs counter reset values in the DS structure. */ | ||
| 48 | unsigned char nr_counter_reset; | ||
| 49 | |||
| 50 | /* Control bit-masks indexed by enum ds_feature: */ | ||
| 51 | unsigned long ctl[dsf_ctl_max]; | ||
| 52 | }; | ||
| 53 | static struct ds_configuration ds_cfg __read_mostly; | ||
| 54 | |||
| 55 | |||
| 56 | /* Maximal size of a DS configuration: */ | ||
| 57 | #define MAX_SIZEOF_DS 0x80 | ||
| 58 | |||
| 59 | /* Maximal size of a BTS record: */ | ||
| 60 | #define MAX_SIZEOF_BTS (3 * 8) | ||
| 61 | |||
| 62 | /* BTS and PEBS buffer alignment: */ | ||
| 63 | #define DS_ALIGNMENT (1 << 3) | ||
| 64 | |||
| 65 | /* Number of buffer pointers in DS: */ | ||
| 66 | #define NUM_DS_PTR_FIELDS 8 | ||
| 67 | |||
| 68 | /* Size of a pebs reset value in DS: */ | ||
| 69 | #define PEBS_RESET_FIELD_SIZE 8 | ||
| 70 | |||
| 71 | /* Mask of control bits in the DS MSR register: */ | ||
| 72 | #define BTS_CONTROL \ | ||
| 73 | ( ds_cfg.ctl[dsf_bts] | \ | ||
| 74 | ds_cfg.ctl[dsf_bts_kernel] | \ | ||
| 75 | ds_cfg.ctl[dsf_bts_user] | \ | ||
| 76 | ds_cfg.ctl[dsf_bts_overflow] ) | ||
| 77 | |||
| 78 | /* | ||
| 79 | * A BTS or PEBS tracer. | ||
| 80 | * | ||
| 81 | * This holds the configuration of the tracer and serves as a handle | ||
| 82 | * to identify tracers. | ||
| 83 | */ | ||
| 84 | struct ds_tracer { | ||
| 85 | /* The DS context (partially) owned by this tracer. */ | ||
| 86 | struct ds_context *context; | ||
| 87 | /* The buffer provided on ds_request() and its size in bytes. */ | ||
| 88 | void *buffer; | ||
| 89 | size_t size; | ||
| 90 | }; | ||
| 91 | |||
| 92 | struct bts_tracer { | ||
| 93 | /* The common DS part: */ | ||
| 94 | struct ds_tracer ds; | ||
| 95 | |||
| 96 | /* The trace including the DS configuration: */ | ||
| 97 | struct bts_trace trace; | ||
| 98 | |||
| 99 | /* Buffer overflow notification function: */ | ||
| 100 | bts_ovfl_callback_t ovfl; | ||
| 101 | |||
| 102 | /* Active flags affecting trace collection. */ | ||
| 103 | unsigned int flags; | ||
| 104 | }; | ||
| 105 | |||
| 106 | struct pebs_tracer { | ||
| 107 | /* The common DS part: */ | ||
| 108 | struct ds_tracer ds; | ||
| 109 | |||
| 110 | /* The trace including the DS configuration: */ | ||
| 111 | struct pebs_trace trace; | ||
| 112 | |||
| 113 | /* Buffer overflow notification function: */ | ||
| 114 | pebs_ovfl_callback_t ovfl; | ||
| 115 | }; | ||
| 116 | |||
| 117 | /* | ||
| 118 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | ||
| 119 | * Architectures Software Developer's Manual, section 18.5) | ||
| 120 | * | ||
| 121 | * The DS configuration consists of the following fields; different | ||
| 122 | * architetures vary in the size of those fields. | ||
| 123 | * | ||
| 124 | * - double-word aligned base linear address of the BTS buffer | ||
| 125 | * - write pointer into the BTS buffer | ||
| 126 | * - end linear address of the BTS buffer (one byte beyond the end of | ||
| 127 | * the buffer) | ||
| 128 | * - interrupt pointer into BTS buffer | ||
| 129 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
| 130 | * - double-word aligned base linear address of the PEBS buffer | ||
| 131 | * - write pointer into the PEBS buffer | ||
| 132 | * - end linear address of the PEBS buffer (one byte beyond the end of | ||
| 133 | * the buffer) | ||
| 134 | * - interrupt pointer into PEBS buffer | ||
| 135 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
| 136 | * - value to which counter is reset following counter overflow | ||
| 137 | * | ||
| 138 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 139 | * architectures use 32bit pointers in 32bit mode. | ||
| 140 | * | ||
| 141 | * | ||
| 142 | * We compute the base address for the first 8 fields based on: | ||
| 143 | * - the field size stored in the DS configuration | ||
| 144 | * - the relative field position | ||
| 145 | * - an offset giving the start of the respective region | ||
| 146 | * | ||
| 147 | * This offset is further used to index various arrays holding | ||
| 148 | * information for BTS and PEBS at the respective index. | ||
| 149 | * | ||
| 150 | * On later 32bit processors, we only access the lower 32bit of the | ||
| 151 | * 64bit pointer fields. The upper halves will be zeroed out. | ||
| 152 | */ | ||
| 153 | |||
| 154 | enum ds_field { | ||
| 155 | ds_buffer_base = 0, | ||
| 156 | ds_index, | ||
| 157 | ds_absolute_maximum, | ||
| 158 | ds_interrupt_threshold, | ||
| 159 | }; | ||
| 160 | |||
| 161 | enum ds_qualifier { | ||
| 162 | ds_bts = 0, | ||
| 163 | ds_pebs | ||
| 164 | }; | ||
| 165 | |||
| 166 | static inline unsigned long | ||
| 167 | ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) | ||
| 168 | { | ||
| 169 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); | ||
| 170 | return *(unsigned long *)base; | ||
| 171 | } | ||
| 172 | |||
| 173 | static inline void | ||
| 174 | ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, | ||
| 175 | unsigned long value) | ||
| 176 | { | ||
| 177 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); | ||
| 178 | (*(unsigned long *)base) = value; | ||
| 179 | } | ||
| 180 | |||
| 181 | |||
| 182 | /* | ||
| 183 | * Locking is done only for allocating BTS or PEBS resources. | ||
| 184 | */ | ||
| 185 | static DEFINE_SPINLOCK(ds_lock); | ||
| 186 | |||
| 187 | /* | ||
| 188 | * We either support (system-wide) per-cpu or per-thread allocation. | ||
| 189 | * We distinguish the two based on the task_struct pointer, where a | ||
| 190 | * NULL pointer indicates per-cpu allocation for the current cpu. | ||
| 191 | * | ||
| 192 | * Allocations are use-counted. As soon as resources are allocated, | ||
| 193 | * further allocations must be of the same type (per-cpu or | ||
| 194 | * per-thread). We model this by counting allocations (i.e. the number | ||
| 195 | * of tracers of a certain type) for one type negatively: | ||
| 196 | * =0 no tracers | ||
| 197 | * >0 number of per-thread tracers | ||
| 198 | * <0 number of per-cpu tracers | ||
| 199 | * | ||
| 200 | * Tracers essentially gives the number of ds contexts for a certain | ||
| 201 | * type of allocation. | ||
| 202 | */ | ||
| 203 | static atomic_t tracers = ATOMIC_INIT(0); | ||
| 204 | |||
| 205 | static inline int get_tracer(struct task_struct *task) | ||
| 206 | { | ||
| 207 | int error; | ||
| 208 | |||
| 209 | spin_lock_irq(&ds_lock); | ||
| 210 | |||
| 211 | if (task) { | ||
| 212 | error = -EPERM; | ||
| 213 | if (atomic_read(&tracers) < 0) | ||
| 214 | goto out; | ||
| 215 | atomic_inc(&tracers); | ||
| 216 | } else { | ||
| 217 | error = -EPERM; | ||
| 218 | if (atomic_read(&tracers) > 0) | ||
| 219 | goto out; | ||
| 220 | atomic_dec(&tracers); | ||
| 221 | } | ||
| 222 | |||
| 223 | error = 0; | ||
| 224 | out: | ||
| 225 | spin_unlock_irq(&ds_lock); | ||
| 226 | return error; | ||
| 227 | } | ||
| 228 | |||
| 229 | static inline void put_tracer(struct task_struct *task) | ||
| 230 | { | ||
| 231 | if (task) | ||
| 232 | atomic_dec(&tracers); | ||
| 233 | else | ||
| 234 | atomic_inc(&tracers); | ||
| 235 | } | ||
| 236 | |||
| 237 | /* | ||
| 238 | * The DS context is either attached to a thread or to a cpu: | ||
| 239 | * - in the former case, the thread_struct contains a pointer to the | ||
| 240 | * attached context. | ||
| 241 | * - in the latter case, we use a static array of per-cpu context | ||
| 242 | * pointers. | ||
| 243 | * | ||
| 244 | * Contexts are use-counted. They are allocated on first access and | ||
| 245 | * deallocated when the last user puts the context. | ||
| 246 | */ | ||
| 247 | struct ds_context { | ||
| 248 | /* The DS configuration; goes into MSR_IA32_DS_AREA: */ | ||
| 249 | unsigned char ds[MAX_SIZEOF_DS]; | ||
| 250 | |||
| 251 | /* The owner of the BTS and PEBS configuration, respectively: */ | ||
| 252 | struct bts_tracer *bts_master; | ||
| 253 | struct pebs_tracer *pebs_master; | ||
| 254 | |||
| 255 | /* Use count: */ | ||
| 256 | unsigned long count; | ||
| 257 | |||
| 258 | /* Pointer to the context pointer field: */ | ||
| 259 | struct ds_context **this; | ||
| 260 | |||
| 261 | /* The traced task; NULL for cpu tracing: */ | ||
| 262 | struct task_struct *task; | ||
| 263 | |||
| 264 | /* The traced cpu; only valid if task is NULL: */ | ||
| 265 | int cpu; | ||
| 266 | }; | ||
| 267 | |||
| 268 | static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context); | ||
| 269 | |||
| 270 | |||
| 271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) | ||
| 272 | { | ||
| 273 | struct ds_context **p_context = | ||
| 274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu)); | ||
| 275 | struct ds_context *context = NULL; | ||
| 276 | struct ds_context *new_context = NULL; | ||
| 277 | |||
| 278 | /* Chances are small that we already have a context. */ | ||
| 279 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); | ||
| 280 | if (!new_context) | ||
| 281 | return NULL; | ||
| 282 | |||
| 283 | spin_lock_irq(&ds_lock); | ||
| 284 | |||
| 285 | context = *p_context; | ||
| 286 | if (likely(!context)) { | ||
| 287 | context = new_context; | ||
| 288 | |||
| 289 | context->this = p_context; | ||
| 290 | context->task = task; | ||
| 291 | context->cpu = cpu; | ||
| 292 | context->count = 0; | ||
| 293 | |||
| 294 | *p_context = context; | ||
| 295 | } | ||
| 296 | |||
| 297 | context->count++; | ||
| 298 | |||
| 299 | spin_unlock_irq(&ds_lock); | ||
| 300 | |||
| 301 | if (context != new_context) | ||
| 302 | kfree(new_context); | ||
| 303 | |||
| 304 | return context; | ||
| 305 | } | ||
| 306 | |||
| 307 | static void ds_put_context(struct ds_context *context) | ||
| 308 | { | ||
| 309 | struct task_struct *task; | ||
| 310 | unsigned long irq; | ||
| 311 | |||
| 312 | if (!context) | ||
| 313 | return; | ||
| 314 | |||
| 315 | spin_lock_irqsave(&ds_lock, irq); | ||
| 316 | |||
| 317 | if (--context->count) { | ||
| 318 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 319 | return; | ||
| 320 | } | ||
| 321 | |||
| 322 | *(context->this) = NULL; | ||
| 323 | |||
| 324 | task = context->task; | ||
| 325 | |||
| 326 | if (task) | ||
| 327 | clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
| 328 | |||
| 329 | /* | ||
| 330 | * We leave the (now dangling) pointer to the DS configuration in | ||
| 331 | * the DS_AREA msr. This is as good or as bad as replacing it with | ||
| 332 | * NULL - the hardware would crash if we enabled tracing. | ||
| 333 | * | ||
| 334 | * This saves us some problems with having to write an msr on a | ||
| 335 | * different cpu while preventing others from doing the same for the | ||
| 336 | * next context for that same cpu. | ||
| 337 | */ | ||
| 338 | |||
| 339 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 340 | |||
| 341 | /* The context might still be in use for context switching. */ | ||
| 342 | if (task && (task != current)) | ||
| 343 | wait_task_context_switch(task); | ||
| 344 | |||
| 345 | kfree(context); | ||
| 346 | } | ||
| 347 | |||
| 348 | static void ds_install_ds_area(struct ds_context *context) | ||
| 349 | { | ||
| 350 | unsigned long ds; | ||
| 351 | |||
| 352 | ds = (unsigned long)context->ds; | ||
| 353 | |||
| 354 | /* | ||
| 355 | * There is a race between the bts master and the pebs master. | ||
| 356 | * | ||
| 357 | * The thread/cpu access is synchronized via get/put_cpu() for | ||
| 358 | * task tracing and via wrmsr_on_cpu for cpu tracing. | ||
| 359 | * | ||
| 360 | * If bts and pebs are collected for the same task or same cpu, | ||
| 361 | * the same confiuration is written twice. | ||
| 362 | */ | ||
| 363 | if (context->task) { | ||
| 364 | get_cpu(); | ||
| 365 | if (context->task == current) | ||
| 366 | wrmsrl(MSR_IA32_DS_AREA, ds); | ||
| 367 | set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
| 368 | put_cpu(); | ||
| 369 | } else | ||
| 370 | wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, | ||
| 371 | (u32)((u64)ds), (u32)((u64)ds >> 32)); | ||
| 372 | } | ||
| 373 | |||
| 374 | /* | ||
| 375 | * Call the tracer's callback on a buffer overflow. | ||
| 376 | * | ||
| 377 | * context: the ds context | ||
| 378 | * qual: the buffer type | ||
| 379 | */ | ||
| 380 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) | ||
| 381 | { | ||
| 382 | switch (qual) { | ||
| 383 | case ds_bts: | ||
| 384 | if (context->bts_master && | ||
| 385 | context->bts_master->ovfl) | ||
| 386 | context->bts_master->ovfl(context->bts_master); | ||
| 387 | break; | ||
| 388 | case ds_pebs: | ||
| 389 | if (context->pebs_master && | ||
| 390 | context->pebs_master->ovfl) | ||
| 391 | context->pebs_master->ovfl(context->pebs_master); | ||
| 392 | break; | ||
| 393 | } | ||
| 394 | } | ||
| 395 | |||
| 396 | |||
| 397 | /* | ||
| 398 | * Write raw data into the BTS or PEBS buffer. | ||
| 399 | * | ||
| 400 | * The remainder of any partially written record is zeroed out. | ||
| 401 | * | ||
| 402 | * context: the DS context | ||
| 403 | * qual: the buffer type | ||
| 404 | * record: the data to write | ||
| 405 | * size: the size of the data | ||
| 406 | */ | ||
| 407 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | ||
| 408 | const void *record, size_t size) | ||
| 409 | { | ||
| 410 | int bytes_written = 0; | ||
| 411 | |||
| 412 | if (!record) | ||
| 413 | return -EINVAL; | ||
| 414 | |||
| 415 | while (size) { | ||
| 416 | unsigned long base, index, end, write_end, int_th; | ||
| 417 | unsigned long write_size, adj_write_size; | ||
| 418 | |||
| 419 | /* | ||
| 420 | * Write as much as possible without producing an | ||
| 421 | * overflow interrupt. | ||
| 422 | * | ||
| 423 | * Interrupt_threshold must either be | ||
| 424 | * - bigger than absolute_maximum or | ||
| 425 | * - point to a record between buffer_base and absolute_maximum | ||
| 426 | * | ||
| 427 | * Index points to a valid record. | ||
| 428 | */ | ||
| 429 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 430 | index = ds_get(context->ds, qual, ds_index); | ||
| 431 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 432 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
| 433 | |||
| 434 | write_end = min(end, int_th); | ||
| 435 | |||
| 436 | /* | ||
| 437 | * If we are already beyond the interrupt threshold, | ||
| 438 | * we fill the entire buffer. | ||
| 439 | */ | ||
| 440 | if (write_end <= index) | ||
| 441 | write_end = end; | ||
| 442 | |||
| 443 | if (write_end <= index) | ||
| 444 | break; | ||
| 445 | |||
| 446 | write_size = min((unsigned long) size, write_end - index); | ||
| 447 | memcpy((void *)index, record, write_size); | ||
| 448 | |||
| 449 | record = (const char *)record + write_size; | ||
| 450 | size -= write_size; | ||
| 451 | bytes_written += write_size; | ||
| 452 | |||
| 453 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
| 454 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
| 455 | |||
| 456 | /* Zero out trailing bytes. */ | ||
| 457 | memset((char *)index + write_size, 0, | ||
| 458 | adj_write_size - write_size); | ||
| 459 | index += adj_write_size; | ||
| 460 | |||
| 461 | if (index >= end) | ||
| 462 | index = base; | ||
| 463 | ds_set(context->ds, qual, ds_index, index); | ||
| 464 | |||
| 465 | if (index >= int_th) | ||
| 466 | ds_overflow(context, qual); | ||
| 467 | } | ||
| 468 | |||
| 469 | return bytes_written; | ||
| 470 | } | ||
| 471 | |||
| 472 | |||
| 473 | /* | ||
| 474 | * Branch Trace Store (BTS) uses the following format. Different | ||
| 475 | * architectures vary in the size of those fields. | ||
| 476 | * - source linear address | ||
| 477 | * - destination linear address | ||
| 478 | * - flags | ||
| 479 | * | ||
| 480 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 481 | * architectures use 32bit pointers in 32bit mode. | ||
| 482 | * | ||
| 483 | * We compute the base address for the fields based on: | ||
| 484 | * - the field size stored in the DS configuration | ||
| 485 | * - the relative field position | ||
| 486 | * | ||
| 487 | * In order to store additional information in the BTS buffer, we use | ||
| 488 | * a special source address to indicate that the record requires | ||
| 489 | * special interpretation. | ||
| 490 | * | ||
| 491 | * Netburst indicated via a bit in the flags field whether the branch | ||
| 492 | * was predicted; this is ignored. | ||
| 493 | * | ||
| 494 | * We use two levels of abstraction: | ||
| 495 | * - the raw data level defined here | ||
| 496 | * - an arch-independent level defined in ds.h | ||
| 497 | */ | ||
| 498 | |||
| 499 | enum bts_field { | ||
| 500 | bts_from, | ||
| 501 | bts_to, | ||
| 502 | bts_flags, | ||
| 503 | |||
| 504 | bts_qual = bts_from, | ||
| 505 | bts_clock = bts_to, | ||
| 506 | bts_pid = bts_flags, | ||
| 507 | |||
| 508 | bts_qual_mask = (bts_qual_max - 1), | ||
| 509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | ||
| 510 | }; | ||
| 511 | |||
| 512 | static inline unsigned long bts_get(const char *base, unsigned long field) | ||
| 513 | { | ||
| 514 | base += (ds_cfg.sizeof_ptr_field * field); | ||
| 515 | return *(unsigned long *)base; | ||
| 516 | } | ||
| 517 | |||
| 518 | static inline void bts_set(char *base, unsigned long field, unsigned long val) | ||
| 519 | { | ||
| 520 | base += (ds_cfg.sizeof_ptr_field * field); | ||
| 521 | (*(unsigned long *)base) = val; | ||
| 522 | } | ||
| 523 | |||
| 524 | |||
| 525 | /* | ||
| 526 | * The raw BTS data is architecture dependent. | ||
| 527 | * | ||
| 528 | * For higher-level users, we give an arch-independent view. | ||
| 529 | * - ds.h defines struct bts_struct | ||
| 530 | * - bts_read translates one raw bts record into a bts_struct | ||
| 531 | * - bts_write translates one bts_struct into the raw format and | ||
| 532 | * writes it into the top of the parameter tracer's buffer. | ||
| 533 | * | ||
| 534 | * return: bytes read/written on success; -Eerrno, otherwise | ||
| 535 | */ | ||
| 536 | static int | ||
| 537 | bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) | ||
| 538 | { | ||
| 539 | if (!tracer) | ||
| 540 | return -EINVAL; | ||
| 541 | |||
| 542 | if (at < tracer->trace.ds.begin) | ||
| 543 | return -EINVAL; | ||
| 544 | |||
| 545 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
| 546 | return -EINVAL; | ||
| 547 | |||
| 548 | memset(out, 0, sizeof(*out)); | ||
| 549 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | ||
| 550 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | ||
| 551 | out->variant.event.clock = bts_get(at, bts_clock); | ||
| 552 | out->variant.event.pid = bts_get(at, bts_pid); | ||
| 553 | } else { | ||
| 554 | out->qualifier = bts_branch; | ||
| 555 | out->variant.lbr.from = bts_get(at, bts_from); | ||
| 556 | out->variant.lbr.to = bts_get(at, bts_to); | ||
| 557 | |||
| 558 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
| 559 | out->qualifier = bts_invalid; | ||
| 560 | } | ||
| 561 | |||
| 562 | return ds_cfg.sizeof_rec[ds_bts]; | ||
| 563 | } | ||
| 564 | |||
| 565 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | ||
| 566 | { | ||
| 567 | unsigned char raw[MAX_SIZEOF_BTS]; | ||
| 568 | |||
| 569 | if (!tracer) | ||
| 570 | return -EINVAL; | ||
| 571 | |||
| 572 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) | ||
| 573 | return -EOVERFLOW; | ||
| 574 | |||
| 575 | switch (in->qualifier) { | ||
| 576 | case bts_invalid: | ||
| 577 | bts_set(raw, bts_from, 0); | ||
| 578 | bts_set(raw, bts_to, 0); | ||
| 579 | bts_set(raw, bts_flags, 0); | ||
| 580 | break; | ||
| 581 | case bts_branch: | ||
| 582 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
| 583 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
| 584 | bts_set(raw, bts_flags, 0); | ||
| 585 | break; | ||
| 586 | case bts_task_arrives: | ||
| 587 | case bts_task_departs: | ||
| 588 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
| 589 | bts_set(raw, bts_clock, in->variant.event.clock); | ||
| 590 | bts_set(raw, bts_pid, in->variant.event.pid); | ||
| 591 | break; | ||
| 592 | default: | ||
| 593 | return -EINVAL; | ||
| 594 | } | ||
| 595 | |||
| 596 | return ds_write(tracer->ds.context, ds_bts, raw, | ||
| 597 | ds_cfg.sizeof_rec[ds_bts]); | ||
| 598 | } | ||
| 599 | |||
| 600 | |||
| 601 | static void ds_write_config(struct ds_context *context, | ||
| 602 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
| 603 | { | ||
| 604 | unsigned char *ds = context->ds; | ||
| 605 | |||
| 606 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
| 607 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
| 608 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
| 609 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
| 610 | } | ||
| 611 | |||
| 612 | static void ds_read_config(struct ds_context *context, | ||
| 613 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
| 614 | { | ||
| 615 | unsigned char *ds = context->ds; | ||
| 616 | |||
| 617 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | ||
| 618 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
| 619 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
| 620 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
| 621 | } | ||
| 622 | |||
| 623 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | ||
| 624 | void *base, size_t size, size_t ith, | ||
| 625 | unsigned int flags) { | ||
| 626 | unsigned long buffer, adj; | ||
| 627 | |||
| 628 | /* | ||
| 629 | * Adjust the buffer address and size to meet alignment | ||
| 630 | * constraints: | ||
| 631 | * - buffer is double-word aligned | ||
| 632 | * - size is multiple of record size | ||
| 633 | * | ||
| 634 | * We checked the size at the very beginning; we have enough | ||
| 635 | * space to do the adjustment. | ||
| 636 | */ | ||
| 637 | buffer = (unsigned long)base; | ||
| 638 | |||
| 639 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; | ||
| 640 | buffer += adj; | ||
| 641 | size -= adj; | ||
| 642 | |||
| 643 | trace->n = size / ds_cfg.sizeof_rec[qual]; | ||
| 644 | trace->size = ds_cfg.sizeof_rec[qual]; | ||
| 645 | |||
| 646 | size = (trace->n * trace->size); | ||
| 647 | |||
| 648 | trace->begin = (void *)buffer; | ||
| 649 | trace->top = trace->begin; | ||
| 650 | trace->end = (void *)(buffer + size); | ||
| 651 | /* | ||
| 652 | * The value for 'no threshold' is -1, which will set the | ||
| 653 | * threshold outside of the buffer, just like we want it. | ||
| 654 | */ | ||
| 655 | ith *= ds_cfg.sizeof_rec[qual]; | ||
| 656 | trace->ith = (void *)(buffer + size - ith); | ||
| 657 | |||
| 658 | trace->flags = flags; | ||
| 659 | } | ||
| 660 | |||
| 661 | |||
| 662 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | ||
| 663 | enum ds_qualifier qual, struct task_struct *task, | ||
| 664 | int cpu, void *base, size_t size, size_t th) | ||
| 665 | { | ||
| 666 | struct ds_context *context; | ||
| 667 | int error; | ||
| 668 | size_t req_size; | ||
| 669 | |||
| 670 | error = -EOPNOTSUPP; | ||
| 671 | if (!ds_cfg.sizeof_rec[qual]) | ||
| 672 | goto out; | ||
| 673 | |||
| 674 | error = -EINVAL; | ||
| 675 | if (!base) | ||
| 676 | goto out; | ||
| 677 | |||
| 678 | req_size = ds_cfg.sizeof_rec[qual]; | ||
| 679 | /* We might need space for alignment adjustments. */ | ||
| 680 | if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) | ||
| 681 | req_size += DS_ALIGNMENT; | ||
| 682 | |||
| 683 | error = -EINVAL; | ||
| 684 | if (size < req_size) | ||
| 685 | goto out; | ||
| 686 | |||
| 687 | if (th != (size_t)-1) { | ||
| 688 | th *= ds_cfg.sizeof_rec[qual]; | ||
| 689 | |||
| 690 | error = -EINVAL; | ||
| 691 | if (size <= th) | ||
| 692 | goto out; | ||
| 693 | } | ||
| 694 | |||
| 695 | tracer->buffer = base; | ||
| 696 | tracer->size = size; | ||
| 697 | |||
| 698 | error = -ENOMEM; | ||
| 699 | context = ds_get_context(task, cpu); | ||
| 700 | if (!context) | ||
| 701 | goto out; | ||
| 702 | tracer->context = context; | ||
| 703 | |||
| 704 | /* | ||
| 705 | * Defer any tracer-specific initialization work for the context until | ||
| 706 | * context ownership has been clarified. | ||
| 707 | */ | ||
| 708 | |||
| 709 | error = 0; | ||
| 710 | out: | ||
| 711 | return error; | ||
| 712 | } | ||
| 713 | |||
| 714 | static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, | ||
| 715 | void *base, size_t size, | ||
| 716 | bts_ovfl_callback_t ovfl, size_t th, | ||
| 717 | unsigned int flags) | ||
| 718 | { | ||
| 719 | struct bts_tracer *tracer; | ||
| 720 | int error; | ||
| 721 | |||
| 722 | /* Buffer overflow notification is not yet implemented. */ | ||
| 723 | error = -EOPNOTSUPP; | ||
| 724 | if (ovfl) | ||
| 725 | goto out; | ||
| 726 | |||
| 727 | error = get_tracer(task); | ||
| 728 | if (error < 0) | ||
| 729 | goto out; | ||
| 730 | |||
| 731 | error = -ENOMEM; | ||
| 732 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | ||
| 733 | if (!tracer) | ||
| 734 | goto out_put_tracer; | ||
| 735 | tracer->ovfl = ovfl; | ||
| 736 | |||
| 737 | /* Do some more error checking and acquire a tracing context. */ | ||
| 738 | error = ds_request(&tracer->ds, &tracer->trace.ds, | ||
| 739 | ds_bts, task, cpu, base, size, th); | ||
| 740 | if (error < 0) | ||
| 741 | goto out_tracer; | ||
| 742 | |||
| 743 | /* Claim the bts part of the tracing context we acquired above. */ | ||
| 744 | spin_lock_irq(&ds_lock); | ||
| 745 | |||
| 746 | error = -EPERM; | ||
| 747 | if (tracer->ds.context->bts_master) | ||
| 748 | goto out_unlock; | ||
| 749 | tracer->ds.context->bts_master = tracer; | ||
| 750 | |||
| 751 | spin_unlock_irq(&ds_lock); | ||
| 752 | |||
| 753 | /* | ||
| 754 | * Now that we own the bts part of the context, let's complete the | ||
| 755 | * initialization for that part. | ||
| 756 | */ | ||
| 757 | ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); | ||
| 758 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
| 759 | ds_install_ds_area(tracer->ds.context); | ||
| 760 | |||
| 761 | tracer->trace.read = bts_read; | ||
| 762 | tracer->trace.write = bts_write; | ||
| 763 | |||
| 764 | /* Start tracing. */ | ||
| 765 | ds_resume_bts(tracer); | ||
| 766 | |||
| 767 | return tracer; | ||
| 768 | |||
| 769 | out_unlock: | ||
| 770 | spin_unlock_irq(&ds_lock); | ||
| 771 | ds_put_context(tracer->ds.context); | ||
| 772 | out_tracer: | ||
| 773 | kfree(tracer); | ||
| 774 | out_put_tracer: | ||
| 775 | put_tracer(task); | ||
| 776 | out: | ||
| 777 | return ERR_PTR(error); | ||
| 778 | } | ||
| 779 | |||
| 780 | struct bts_tracer *ds_request_bts_task(struct task_struct *task, | ||
| 781 | void *base, size_t size, | ||
| 782 | bts_ovfl_callback_t ovfl, | ||
| 783 | size_t th, unsigned int flags) | ||
| 784 | { | ||
| 785 | return ds_request_bts(task, 0, base, size, ovfl, th, flags); | ||
| 786 | } | ||
| 787 | |||
| 788 | struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
| 789 | bts_ovfl_callback_t ovfl, | ||
| 790 | size_t th, unsigned int flags) | ||
| 791 | { | ||
| 792 | return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); | ||
| 793 | } | ||
| 794 | |||
| 795 | static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, | ||
| 796 | void *base, size_t size, | ||
| 797 | pebs_ovfl_callback_t ovfl, size_t th, | ||
| 798 | unsigned int flags) | ||
| 799 | { | ||
| 800 | struct pebs_tracer *tracer; | ||
| 801 | int error; | ||
| 802 | |||
| 803 | /* Buffer overflow notification is not yet implemented. */ | ||
| 804 | error = -EOPNOTSUPP; | ||
| 805 | if (ovfl) | ||
| 806 | goto out; | ||
| 807 | |||
| 808 | error = get_tracer(task); | ||
| 809 | if (error < 0) | ||
| 810 | goto out; | ||
| 811 | |||
| 812 | error = -ENOMEM; | ||
| 813 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | ||
| 814 | if (!tracer) | ||
| 815 | goto out_put_tracer; | ||
| 816 | tracer->ovfl = ovfl; | ||
| 817 | |||
| 818 | /* Do some more error checking and acquire a tracing context. */ | ||
| 819 | error = ds_request(&tracer->ds, &tracer->trace.ds, | ||
| 820 | ds_pebs, task, cpu, base, size, th); | ||
| 821 | if (error < 0) | ||
| 822 | goto out_tracer; | ||
| 823 | |||
| 824 | /* Claim the pebs part of the tracing context we acquired above. */ | ||
| 825 | spin_lock_irq(&ds_lock); | ||
| 826 | |||
| 827 | error = -EPERM; | ||
| 828 | if (tracer->ds.context->pebs_master) | ||
| 829 | goto out_unlock; | ||
| 830 | tracer->ds.context->pebs_master = tracer; | ||
| 831 | |||
| 832 | spin_unlock_irq(&ds_lock); | ||
| 833 | |||
| 834 | /* | ||
| 835 | * Now that we own the pebs part of the context, let's complete the | ||
| 836 | * initialization for that part. | ||
| 837 | */ | ||
| 838 | ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); | ||
| 839 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
| 840 | ds_install_ds_area(tracer->ds.context); | ||
| 841 | |||
| 842 | /* Start tracing. */ | ||
| 843 | ds_resume_pebs(tracer); | ||
| 844 | |||
| 845 | return tracer; | ||
| 846 | |||
| 847 | out_unlock: | ||
| 848 | spin_unlock_irq(&ds_lock); | ||
| 849 | ds_put_context(tracer->ds.context); | ||
| 850 | out_tracer: | ||
| 851 | kfree(tracer); | ||
| 852 | out_put_tracer: | ||
| 853 | put_tracer(task); | ||
| 854 | out: | ||
| 855 | return ERR_PTR(error); | ||
| 856 | } | ||
| 857 | |||
| 858 | struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, | ||
| 859 | void *base, size_t size, | ||
| 860 | pebs_ovfl_callback_t ovfl, | ||
| 861 | size_t th, unsigned int flags) | ||
| 862 | { | ||
| 863 | return ds_request_pebs(task, 0, base, size, ovfl, th, flags); | ||
| 864 | } | ||
| 865 | |||
| 866 | struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, | ||
| 867 | pebs_ovfl_callback_t ovfl, | ||
| 868 | size_t th, unsigned int flags) | ||
| 869 | { | ||
| 870 | return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); | ||
| 871 | } | ||
| 872 | |||
| 873 | static void ds_free_bts(struct bts_tracer *tracer) | ||
| 874 | { | ||
| 875 | struct task_struct *task; | ||
| 876 | |||
| 877 | task = tracer->ds.context->task; | ||
| 878 | |||
| 879 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); | ||
| 880 | tracer->ds.context->bts_master = NULL; | ||
| 881 | |||
| 882 | /* Make sure tracing stopped and the tracer is not in use. */ | ||
| 883 | if (task && (task != current)) | ||
| 884 | wait_task_context_switch(task); | ||
| 885 | |||
| 886 | ds_put_context(tracer->ds.context); | ||
| 887 | put_tracer(task); | ||
| 888 | |||
| 889 | kfree(tracer); | ||
| 890 | } | ||
| 891 | |||
| 892 | void ds_release_bts(struct bts_tracer *tracer) | ||
| 893 | { | ||
| 894 | might_sleep(); | ||
| 895 | |||
| 896 | if (!tracer) | ||
| 897 | return; | ||
| 898 | |||
| 899 | ds_suspend_bts(tracer); | ||
| 900 | ds_free_bts(tracer); | ||
| 901 | } | ||
| 902 | |||
| 903 | int ds_release_bts_noirq(struct bts_tracer *tracer) | ||
| 904 | { | ||
| 905 | struct task_struct *task; | ||
| 906 | unsigned long irq; | ||
| 907 | int error; | ||
| 908 | |||
| 909 | if (!tracer) | ||
| 910 | return 0; | ||
| 911 | |||
| 912 | task = tracer->ds.context->task; | ||
| 913 | |||
| 914 | local_irq_save(irq); | ||
| 915 | |||
| 916 | error = -EPERM; | ||
| 917 | if (!task && | ||
| 918 | (tracer->ds.context->cpu != smp_processor_id())) | ||
| 919 | goto out; | ||
| 920 | |||
| 921 | error = -EPERM; | ||
| 922 | if (task && (task != current)) | ||
| 923 | goto out; | ||
| 924 | |||
| 925 | ds_suspend_bts_noirq(tracer); | ||
| 926 | ds_free_bts(tracer); | ||
| 927 | |||
| 928 | error = 0; | ||
| 929 | out: | ||
| 930 | local_irq_restore(irq); | ||
| 931 | return error; | ||
| 932 | } | ||
| 933 | |||
| 934 | static void update_task_debugctlmsr(struct task_struct *task, | ||
| 935 | unsigned long debugctlmsr) | ||
| 936 | { | ||
| 937 | task->thread.debugctlmsr = debugctlmsr; | ||
| 938 | |||
| 939 | get_cpu(); | ||
| 940 | if (task == current) | ||
| 941 | update_debugctlmsr(debugctlmsr); | ||
| 942 | put_cpu(); | ||
| 943 | } | ||
| 944 | |||
| 945 | void ds_suspend_bts(struct bts_tracer *tracer) | ||
| 946 | { | ||
| 947 | struct task_struct *task; | ||
| 948 | unsigned long debugctlmsr; | ||
| 949 | int cpu; | ||
| 950 | |||
| 951 | if (!tracer) | ||
| 952 | return; | ||
| 953 | |||
| 954 | tracer->flags = 0; | ||
| 955 | |||
| 956 | task = tracer->ds.context->task; | ||
| 957 | cpu = tracer->ds.context->cpu; | ||
| 958 | |||
| 959 | WARN_ON(!task && irqs_disabled()); | ||
| 960 | |||
| 961 | debugctlmsr = (task ? | ||
| 962 | task->thread.debugctlmsr : | ||
| 963 | get_debugctlmsr_on_cpu(cpu)); | ||
| 964 | debugctlmsr &= ~BTS_CONTROL; | ||
| 965 | |||
| 966 | if (task) | ||
| 967 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 968 | else | ||
| 969 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
| 970 | } | ||
| 971 | |||
| 972 | int ds_suspend_bts_noirq(struct bts_tracer *tracer) | ||
| 973 | { | ||
| 974 | struct task_struct *task; | ||
| 975 | unsigned long debugctlmsr, irq; | ||
| 976 | int cpu, error = 0; | ||
| 977 | |||
| 978 | if (!tracer) | ||
| 979 | return 0; | ||
| 980 | |||
| 981 | tracer->flags = 0; | ||
| 982 | |||
| 983 | task = tracer->ds.context->task; | ||
| 984 | cpu = tracer->ds.context->cpu; | ||
| 985 | |||
| 986 | local_irq_save(irq); | ||
| 987 | |||
| 988 | error = -EPERM; | ||
| 989 | if (!task && (cpu != smp_processor_id())) | ||
| 990 | goto out; | ||
| 991 | |||
| 992 | debugctlmsr = (task ? | ||
| 993 | task->thread.debugctlmsr : | ||
| 994 | get_debugctlmsr()); | ||
| 995 | debugctlmsr &= ~BTS_CONTROL; | ||
| 996 | |||
| 997 | if (task) | ||
| 998 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 999 | else | ||
| 1000 | update_debugctlmsr(debugctlmsr); | ||
| 1001 | |||
| 1002 | error = 0; | ||
| 1003 | out: | ||
| 1004 | local_irq_restore(irq); | ||
| 1005 | return error; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | static unsigned long ds_bts_control(struct bts_tracer *tracer) | ||
| 1009 | { | ||
| 1010 | unsigned long control; | ||
| 1011 | |||
| 1012 | control = ds_cfg.ctl[dsf_bts]; | ||
| 1013 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | ||
| 1014 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
| 1015 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
| 1016 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
| 1017 | |||
| 1018 | return control; | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | void ds_resume_bts(struct bts_tracer *tracer) | ||
| 1022 | { | ||
| 1023 | struct task_struct *task; | ||
| 1024 | unsigned long debugctlmsr; | ||
| 1025 | int cpu; | ||
| 1026 | |||
| 1027 | if (!tracer) | ||
| 1028 | return; | ||
| 1029 | |||
| 1030 | tracer->flags = tracer->trace.ds.flags; | ||
| 1031 | |||
| 1032 | task = tracer->ds.context->task; | ||
| 1033 | cpu = tracer->ds.context->cpu; | ||
| 1034 | |||
| 1035 | WARN_ON(!task && irqs_disabled()); | ||
| 1036 | |||
| 1037 | debugctlmsr = (task ? | ||
| 1038 | task->thread.debugctlmsr : | ||
| 1039 | get_debugctlmsr_on_cpu(cpu)); | ||
| 1040 | debugctlmsr |= ds_bts_control(tracer); | ||
| 1041 | |||
| 1042 | if (task) | ||
| 1043 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 1044 | else | ||
| 1045 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | int ds_resume_bts_noirq(struct bts_tracer *tracer) | ||
| 1049 | { | ||
| 1050 | struct task_struct *task; | ||
| 1051 | unsigned long debugctlmsr, irq; | ||
| 1052 | int cpu, error = 0; | ||
| 1053 | |||
| 1054 | if (!tracer) | ||
| 1055 | return 0; | ||
| 1056 | |||
| 1057 | tracer->flags = tracer->trace.ds.flags; | ||
| 1058 | |||
| 1059 | task = tracer->ds.context->task; | ||
| 1060 | cpu = tracer->ds.context->cpu; | ||
| 1061 | |||
| 1062 | local_irq_save(irq); | ||
| 1063 | |||
| 1064 | error = -EPERM; | ||
| 1065 | if (!task && (cpu != smp_processor_id())) | ||
| 1066 | goto out; | ||
| 1067 | |||
| 1068 | debugctlmsr = (task ? | ||
| 1069 | task->thread.debugctlmsr : | ||
| 1070 | get_debugctlmsr()); | ||
| 1071 | debugctlmsr |= ds_bts_control(tracer); | ||
| 1072 | |||
| 1073 | if (task) | ||
| 1074 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 1075 | else | ||
| 1076 | update_debugctlmsr(debugctlmsr); | ||
| 1077 | |||
| 1078 | error = 0; | ||
| 1079 | out: | ||
| 1080 | local_irq_restore(irq); | ||
| 1081 | return error; | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | static void ds_free_pebs(struct pebs_tracer *tracer) | ||
| 1085 | { | ||
| 1086 | struct task_struct *task; | ||
| 1087 | |||
| 1088 | task = tracer->ds.context->task; | ||
| 1089 | |||
| 1090 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
| 1091 | tracer->ds.context->pebs_master = NULL; | ||
| 1092 | |||
| 1093 | ds_put_context(tracer->ds.context); | ||
| 1094 | put_tracer(task); | ||
| 1095 | |||
| 1096 | kfree(tracer); | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | void ds_release_pebs(struct pebs_tracer *tracer) | ||
| 1100 | { | ||
| 1101 | might_sleep(); | ||
| 1102 | |||
| 1103 | if (!tracer) | ||
| 1104 | return; | ||
| 1105 | |||
| 1106 | ds_suspend_pebs(tracer); | ||
| 1107 | ds_free_pebs(tracer); | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | int ds_release_pebs_noirq(struct pebs_tracer *tracer) | ||
| 1111 | { | ||
| 1112 | struct task_struct *task; | ||
| 1113 | unsigned long irq; | ||
| 1114 | int error; | ||
| 1115 | |||
| 1116 | if (!tracer) | ||
| 1117 | return 0; | ||
| 1118 | |||
| 1119 | task = tracer->ds.context->task; | ||
| 1120 | |||
| 1121 | local_irq_save(irq); | ||
| 1122 | |||
| 1123 | error = -EPERM; | ||
| 1124 | if (!task && | ||
| 1125 | (tracer->ds.context->cpu != smp_processor_id())) | ||
| 1126 | goto out; | ||
| 1127 | |||
| 1128 | error = -EPERM; | ||
| 1129 | if (task && (task != current)) | ||
| 1130 | goto out; | ||
| 1131 | |||
| 1132 | ds_suspend_pebs_noirq(tracer); | ||
| 1133 | ds_free_pebs(tracer); | ||
| 1134 | |||
| 1135 | error = 0; | ||
| 1136 | out: | ||
| 1137 | local_irq_restore(irq); | ||
| 1138 | return error; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | void ds_suspend_pebs(struct pebs_tracer *tracer) | ||
| 1142 | { | ||
| 1143 | |||
| 1144 | } | ||
| 1145 | |||
| 1146 | int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) | ||
| 1147 | { | ||
| 1148 | return 0; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | void ds_resume_pebs(struct pebs_tracer *tracer) | ||
| 1152 | { | ||
| 1153 | |||
| 1154 | } | ||
| 1155 | |||
| 1156 | int ds_resume_pebs_noirq(struct pebs_tracer *tracer) | ||
| 1157 | { | ||
| 1158 | return 0; | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) | ||
| 1162 | { | ||
| 1163 | if (!tracer) | ||
| 1164 | return NULL; | ||
| 1165 | |||
| 1166 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
| 1167 | return &tracer->trace; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) | ||
| 1171 | { | ||
| 1172 | if (!tracer) | ||
| 1173 | return NULL; | ||
| 1174 | |||
| 1175 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
| 1176 | |||
| 1177 | tracer->trace.counters = ds_cfg.nr_counter_reset; | ||
| 1178 | memcpy(tracer->trace.counter_reset, | ||
| 1179 | tracer->ds.context->ds + | ||
| 1180 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), | ||
| 1181 | ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); | ||
| 1182 | |||
| 1183 | return &tracer->trace; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | int ds_reset_bts(struct bts_tracer *tracer) | ||
| 1187 | { | ||
| 1188 | if (!tracer) | ||
| 1189 | return -EINVAL; | ||
| 1190 | |||
| 1191 | tracer->trace.ds.top = tracer->trace.ds.begin; | ||
| 1192 | |||
| 1193 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, | ||
| 1194 | (unsigned long)tracer->trace.ds.top); | ||
| 1195 | |||
| 1196 | return 0; | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | int ds_reset_pebs(struct pebs_tracer *tracer) | ||
| 1200 | { | ||
| 1201 | if (!tracer) | ||
| 1202 | return -EINVAL; | ||
| 1203 | |||
| 1204 | tracer->trace.ds.top = tracer->trace.ds.begin; | ||
| 1205 | |||
| 1206 | ds_set(tracer->ds.context->ds, ds_pebs, ds_index, | ||
| 1207 | (unsigned long)tracer->trace.ds.top); | ||
| 1208 | |||
| 1209 | return 0; | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | int ds_set_pebs_reset(struct pebs_tracer *tracer, | ||
| 1213 | unsigned int counter, u64 value) | ||
| 1214 | { | ||
| 1215 | if (!tracer) | ||
| 1216 | return -EINVAL; | ||
| 1217 | |||
| 1218 | if (ds_cfg.nr_counter_reset < counter) | ||
| 1219 | return -EINVAL; | ||
| 1220 | |||
| 1221 | *(u64 *)(tracer->ds.context->ds + | ||
| 1222 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + | ||
| 1223 | (counter * PEBS_RESET_FIELD_SIZE)) = value; | ||
| 1224 | |||
| 1225 | return 0; | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | static const struct ds_configuration ds_cfg_netburst = { | ||
| 1229 | .name = "Netburst", | ||
| 1230 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), | ||
| 1231 | .ctl[dsf_bts_kernel] = (1 << 5), | ||
| 1232 | .ctl[dsf_bts_user] = (1 << 6), | ||
| 1233 | .nr_counter_reset = 1, | ||
| 1234 | }; | ||
| 1235 | static const struct ds_configuration ds_cfg_pentium_m = { | ||
| 1236 | .name = "Pentium M", | ||
| 1237 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
| 1238 | .nr_counter_reset = 1, | ||
| 1239 | }; | ||
| 1240 | static const struct ds_configuration ds_cfg_core2_atom = { | ||
| 1241 | .name = "Core 2/Atom", | ||
| 1242 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
| 1243 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
| 1244 | .ctl[dsf_bts_user] = (1 << 10), | ||
| 1245 | .nr_counter_reset = 1, | ||
| 1246 | }; | ||
| 1247 | static const struct ds_configuration ds_cfg_core_i7 = { | ||
| 1248 | .name = "Core i7", | ||
| 1249 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
| 1250 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
| 1251 | .ctl[dsf_bts_user] = (1 << 10), | ||
| 1252 | .nr_counter_reset = 4, | ||
| 1253 | }; | ||
| 1254 | |||
| 1255 | static void | ||
| 1256 | ds_configure(const struct ds_configuration *cfg, | ||
| 1257 | struct cpuinfo_x86 *cpu) | ||
| 1258 | { | ||
| 1259 | unsigned long nr_pebs_fields = 0; | ||
| 1260 | |||
| 1261 | printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); | ||
| 1262 | |||
| 1263 | #ifdef __i386__ | ||
| 1264 | nr_pebs_fields = 10; | ||
| 1265 | #else | ||
| 1266 | nr_pebs_fields = 18; | ||
| 1267 | #endif | ||
| 1268 | |||
| 1269 | /* | ||
| 1270 | * Starting with version 2, architectural performance | ||
| 1271 | * monitoring supports a format specifier. | ||
| 1272 | */ | ||
| 1273 | if ((cpuid_eax(0xa) & 0xff) > 1) { | ||
| 1274 | unsigned long perf_capabilities, format; | ||
| 1275 | |||
| 1276 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); | ||
| 1277 | |||
| 1278 | format = (perf_capabilities >> 8) & 0xf; | ||
| 1279 | |||
| 1280 | switch (format) { | ||
| 1281 | case 0: | ||
| 1282 | nr_pebs_fields = 18; | ||
| 1283 | break; | ||
| 1284 | case 1: | ||
| 1285 | nr_pebs_fields = 22; | ||
| 1286 | break; | ||
| 1287 | default: | ||
| 1288 | printk(KERN_INFO | ||
| 1289 | "[ds] unknown PEBS format: %lu\n", format); | ||
| 1290 | nr_pebs_fields = 0; | ||
| 1291 | break; | ||
| 1292 | } | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
| 1296 | ds_cfg = *cfg; | ||
| 1297 | |||
| 1298 | ds_cfg.sizeof_ptr_field = | ||
| 1299 | (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); | ||
| 1300 | |||
| 1301 | ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; | ||
| 1302 | ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; | ||
| 1303 | |||
| 1304 | if (!cpu_has(cpu, X86_FEATURE_BTS)) { | ||
| 1305 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
| 1306 | printk(KERN_INFO "[ds] bts not available\n"); | ||
| 1307 | } | ||
| 1308 | if (!cpu_has(cpu, X86_FEATURE_PEBS)) { | ||
| 1309 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
| 1310 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
| 1311 | } | ||
| 1312 | |||
| 1313 | printk(KERN_INFO "[ds] sizes: address: %u bit, ", | ||
| 1314 | 8 * ds_cfg.sizeof_ptr_field); | ||
| 1315 | printk("bts/pebs record: %u/%u bytes\n", | ||
| 1316 | ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); | ||
| 1317 | |||
| 1318 | WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | ||
| 1322 | { | ||
| 1323 | /* Only configure the first cpu. Others are identical. */ | ||
| 1324 | if (ds_cfg.name) | ||
| 1325 | return; | ||
| 1326 | |||
| 1327 | switch (c->x86) { | ||
| 1328 | case 0x6: | ||
| 1329 | switch (c->x86_model) { | ||
| 1330 | case 0x9: | ||
| 1331 | case 0xd: /* Pentium M */ | ||
| 1332 | ds_configure(&ds_cfg_pentium_m, c); | ||
| 1333 | break; | ||
| 1334 | case 0xf: | ||
| 1335 | case 0x17: /* Core2 */ | ||
| 1336 | case 0x1c: /* Atom */ | ||
| 1337 | ds_configure(&ds_cfg_core2_atom, c); | ||
| 1338 | break; | ||
| 1339 | case 0x1a: /* Core i7 */ | ||
| 1340 | ds_configure(&ds_cfg_core_i7, c); | ||
| 1341 | break; | ||
| 1342 | default: | ||
| 1343 | /* Sorry, don't know about them. */ | ||
| 1344 | break; | ||
| 1345 | } | ||
| 1346 | break; | ||
| 1347 | case 0xf: | ||
| 1348 | switch (c->x86_model) { | ||
| 1349 | case 0x0: | ||
| 1350 | case 0x1: | ||
| 1351 | case 0x2: /* Netburst */ | ||
| 1352 | ds_configure(&ds_cfg_netburst, c); | ||
| 1353 | break; | ||
| 1354 | default: | ||
| 1355 | /* Sorry, don't know about them. */ | ||
| 1356 | break; | ||
| 1357 | } | ||
| 1358 | break; | ||
| 1359 | default: | ||
| 1360 | /* Sorry, don't know about them. */ | ||
| 1361 | break; | ||
| 1362 | } | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | static inline void ds_take_timestamp(struct ds_context *context, | ||
| 1366 | enum bts_qualifier qualifier, | ||
| 1367 | struct task_struct *task) | ||
| 1368 | { | ||
| 1369 | struct bts_tracer *tracer = context->bts_master; | ||
| 1370 | struct bts_struct ts; | ||
| 1371 | |||
| 1372 | /* Prevent compilers from reading the tracer pointer twice. */ | ||
| 1373 | barrier(); | ||
| 1374 | |||
| 1375 | if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) | ||
| 1376 | return; | ||
| 1377 | |||
| 1378 | memset(&ts, 0, sizeof(ts)); | ||
| 1379 | ts.qualifier = qualifier; | ||
| 1380 | ts.variant.event.clock = trace_clock_global(); | ||
| 1381 | ts.variant.event.pid = task->pid; | ||
| 1382 | |||
| 1383 | bts_write(tracer, &ts); | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | /* | ||
| 1387 | * Change the DS configuration from tracing prev to tracing next. | ||
| 1388 | */ | ||
| 1389 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | ||
| 1390 | { | ||
| 1391 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | ||
| 1392 | struct ds_context *next_ctx = next->thread.ds_ctx; | ||
| 1393 | unsigned long debugctlmsr = next->thread.debugctlmsr; | ||
| 1394 | |||
| 1395 | /* Make sure all data is read before we start. */ | ||
| 1396 | barrier(); | ||
| 1397 | |||
| 1398 | if (prev_ctx) { | ||
| 1399 | update_debugctlmsr(0); | ||
| 1400 | |||
| 1401 | ds_take_timestamp(prev_ctx, bts_task_departs, prev); | ||
| 1402 | } | ||
| 1403 | |||
| 1404 | if (next_ctx) { | ||
| 1405 | ds_take_timestamp(next_ctx, bts_task_arrives, next); | ||
| 1406 | |||
| 1407 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | update_debugctlmsr(debugctlmsr); | ||
| 1411 | } | ||
| 1412 | |||
| 1413 | static __init int ds_selftest(void) | ||
| 1414 | { | ||
| 1415 | if (ds_cfg.sizeof_rec[ds_bts]) { | ||
| 1416 | int error; | ||
| 1417 | |||
| 1418 | error = ds_selftest_bts(); | ||
| 1419 | if (error) { | ||
| 1420 | WARN(1, "[ds] selftest failed. disabling bts.\n"); | ||
| 1421 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
| 1422 | } | ||
| 1423 | } | ||
| 1424 | |||
| 1425 | if (ds_cfg.sizeof_rec[ds_pebs]) { | ||
| 1426 | int error; | ||
| 1427 | |||
| 1428 | error = ds_selftest_pebs(); | ||
| 1429 | if (error) { | ||
| 1430 | WARN(1, "[ds] selftest failed. disabling pebs.\n"); | ||
| 1431 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
| 1432 | } | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | return 0; | ||
| 1436 | } | ||
| 1437 | device_initcall(ds_selftest); | ||
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c deleted file mode 100644 index 6bc7c199ab99..000000000000 --- a/arch/x86/kernel/ds_selftest.c +++ /dev/null | |||
| @@ -1,408 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Debug Store support - selftest | ||
| 3 | * | ||
| 4 | * | ||
| 5 | * Copyright (C) 2009 Intel Corporation. | ||
| 6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "ds_selftest.h" | ||
| 10 | |||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/string.h> | ||
| 13 | #include <linux/smp.h> | ||
| 14 | #include <linux/cpu.h> | ||
| 15 | |||
| 16 | #include <asm/ds.h> | ||
| 17 | |||
| 18 | |||
| 19 | #define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ | ||
| 20 | #define SMALL_BUFFER_SIZE 24 /* A single bts entry. */ | ||
| 21 | |||
| 22 | struct ds_selftest_bts_conf { | ||
| 23 | struct bts_tracer *tracer; | ||
| 24 | int error; | ||
| 25 | int (*suspend)(struct bts_tracer *); | ||
| 26 | int (*resume)(struct bts_tracer *); | ||
| 27 | }; | ||
| 28 | |||
| 29 | static int ds_selftest_bts_consistency(const struct bts_trace *trace) | ||
| 30 | { | ||
| 31 | int error = 0; | ||
| 32 | |||
| 33 | if (!trace) { | ||
| 34 | printk(KERN_CONT "failed to access trace..."); | ||
| 35 | /* Bail out. Other tests are pointless. */ | ||
| 36 | return -1; | ||
| 37 | } | ||
| 38 | |||
| 39 | if (!trace->read) { | ||
| 40 | printk(KERN_CONT "bts read not available..."); | ||
| 41 | error = -1; | ||
| 42 | } | ||
| 43 | |||
| 44 | /* Do some sanity checks on the trace configuration. */ | ||
| 45 | if (!trace->ds.n) { | ||
| 46 | printk(KERN_CONT "empty bts buffer..."); | ||
| 47 | error = -1; | ||
| 48 | } | ||
| 49 | if (!trace->ds.size) { | ||
| 50 | printk(KERN_CONT "bad bts trace setup..."); | ||
| 51 | error = -1; | ||
| 52 | } | ||
| 53 | if (trace->ds.end != | ||
| 54 | (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) { | ||
| 55 | printk(KERN_CONT "bad bts buffer setup..."); | ||
| 56 | error = -1; | ||
| 57 | } | ||
| 58 | /* | ||
| 59 | * We allow top in [begin; end], since its not clear when the | ||
| 60 | * overflow adjustment happens: after the increment or before the | ||
| 61 | * write. | ||
| 62 | */ | ||
| 63 | if ((trace->ds.top < trace->ds.begin) || | ||
| 64 | (trace->ds.end < trace->ds.top)) { | ||
| 65 | printk(KERN_CONT "bts top out of bounds..."); | ||
| 66 | error = -1; | ||
| 67 | } | ||
| 68 | |||
| 69 | return error; | ||
| 70 | } | ||
| 71 | |||
| 72 | static int ds_selftest_bts_read(struct bts_tracer *tracer, | ||
| 73 | const struct bts_trace *trace, | ||
| 74 | const void *from, const void *to) | ||
| 75 | { | ||
| 76 | const unsigned char *at; | ||
| 77 | |||
| 78 | /* | ||
| 79 | * Check a few things which do not belong to this test. | ||
| 80 | * They should be covered by other tests. | ||
| 81 | */ | ||
| 82 | if (!trace) | ||
| 83 | return -1; | ||
| 84 | |||
| 85 | if (!trace->read) | ||
| 86 | return -1; | ||
| 87 | |||
| 88 | if (to < from) | ||
| 89 | return -1; | ||
| 90 | |||
| 91 | if (from < trace->ds.begin) | ||
| 92 | return -1; | ||
| 93 | |||
| 94 | if (trace->ds.end < to) | ||
| 95 | return -1; | ||
| 96 | |||
| 97 | if (!trace->ds.size) | ||
| 98 | return -1; | ||
| 99 | |||
| 100 | /* Now to the test itself. */ | ||
| 101 | for (at = from; (void *)at < to; at += trace->ds.size) { | ||
| 102 | struct bts_struct bts; | ||
| 103 | unsigned long index; | ||
| 104 | int error; | ||
| 105 | |||
| 106 | if (((void *)at - trace->ds.begin) % trace->ds.size) { | ||
| 107 | printk(KERN_CONT | ||
| 108 | "read from non-integer index..."); | ||
| 109 | return -1; | ||
| 110 | } | ||
| 111 | index = ((void *)at - trace->ds.begin) / trace->ds.size; | ||
| 112 | |||
| 113 | memset(&bts, 0, sizeof(bts)); | ||
| 114 | error = trace->read(tracer, at, &bts); | ||
| 115 | if (error < 0) { | ||
| 116 | printk(KERN_CONT | ||
| 117 | "error reading bts trace at [%lu] (0x%p)...", | ||
| 118 | index, at); | ||
| 119 | return error; | ||
| 120 | } | ||
| 121 | |||
| 122 | switch (bts.qualifier) { | ||
| 123 | case BTS_BRANCH: | ||
| 124 | break; | ||
| 125 | default: | ||
| 126 | printk(KERN_CONT | ||
| 127 | "unexpected bts entry %llu at [%lu] (0x%p)...", | ||
| 128 | bts.qualifier, index, at); | ||
| 129 | return -1; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | return 0; | ||
| 134 | } | ||
| 135 | |||
| 136 | static void ds_selftest_bts_cpu(void *arg) | ||
| 137 | { | ||
| 138 | struct ds_selftest_bts_conf *conf = arg; | ||
| 139 | const struct bts_trace *trace; | ||
| 140 | void *top; | ||
| 141 | |||
| 142 | if (IS_ERR(conf->tracer)) { | ||
| 143 | conf->error = PTR_ERR(conf->tracer); | ||
| 144 | conf->tracer = NULL; | ||
| 145 | |||
| 146 | printk(KERN_CONT | ||
| 147 | "initialization failed (err: %d)...", conf->error); | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | |||
| 151 | /* We should meanwhile have enough trace. */ | ||
| 152 | conf->error = conf->suspend(conf->tracer); | ||
| 153 | if (conf->error < 0) | ||
| 154 | return; | ||
| 155 | |||
| 156 | /* Let's see if we can access the trace. */ | ||
| 157 | trace = ds_read_bts(conf->tracer); | ||
| 158 | |||
| 159 | conf->error = ds_selftest_bts_consistency(trace); | ||
| 160 | if (conf->error < 0) | ||
| 161 | return; | ||
| 162 | |||
| 163 | /* If everything went well, we should have a few trace entries. */ | ||
| 164 | if (trace->ds.top == trace->ds.begin) { | ||
| 165 | /* | ||
| 166 | * It is possible but highly unlikely that we got a | ||
| 167 | * buffer overflow and end up at exactly the same | ||
| 168 | * position we started from. | ||
| 169 | * Let's issue a warning, but continue. | ||
| 170 | */ | ||
| 171 | printk(KERN_CONT "no trace/overflow..."); | ||
| 172 | } | ||
| 173 | |||
| 174 | /* Let's try to read the trace we collected. */ | ||
| 175 | conf->error = | ||
| 176 | ds_selftest_bts_read(conf->tracer, trace, | ||
| 177 | trace->ds.begin, trace->ds.top); | ||
| 178 | if (conf->error < 0) | ||
| 179 | return; | ||
| 180 | |||
| 181 | /* | ||
| 182 | * Let's read the trace again. | ||
| 183 | * Since we suspended tracing, we should get the same result. | ||
| 184 | */ | ||
| 185 | top = trace->ds.top; | ||
| 186 | |||
| 187 | trace = ds_read_bts(conf->tracer); | ||
| 188 | conf->error = ds_selftest_bts_consistency(trace); | ||
| 189 | if (conf->error < 0) | ||
| 190 | return; | ||
| 191 | |||
| 192 | if (top != trace->ds.top) { | ||
| 193 | printk(KERN_CONT "suspend not working..."); | ||
| 194 | conf->error = -1; | ||
| 195 | return; | ||
| 196 | } | ||
| 197 | |||
| 198 | /* Let's collect some more trace - see if resume is working. */ | ||
| 199 | conf->error = conf->resume(conf->tracer); | ||
| 200 | if (conf->error < 0) | ||
| 201 | return; | ||
| 202 | |||
| 203 | conf->error = conf->suspend(conf->tracer); | ||
| 204 | if (conf->error < 0) | ||
| 205 | return; | ||
| 206 | |||
| 207 | trace = ds_read_bts(conf->tracer); | ||
| 208 | |||
| 209 | conf->error = ds_selftest_bts_consistency(trace); | ||
| 210 | if (conf->error < 0) | ||
| 211 | return; | ||
| 212 | |||
| 213 | if (trace->ds.top == top) { | ||
| 214 | /* | ||
| 215 | * It is possible but highly unlikely that we got a | ||
| 216 | * buffer overflow and end up at exactly the same | ||
| 217 | * position we started from. | ||
| 218 | * Let's issue a warning and check the full trace. | ||
| 219 | */ | ||
| 220 | printk(KERN_CONT | ||
| 221 | "no resume progress/overflow..."); | ||
| 222 | |||
| 223 | conf->error = | ||
| 224 | ds_selftest_bts_read(conf->tracer, trace, | ||
| 225 | trace->ds.begin, trace->ds.end); | ||
| 226 | } else if (trace->ds.top < top) { | ||
| 227 | /* | ||
| 228 | * We had a buffer overflow - the entire buffer should | ||
| 229 | * contain trace records. | ||
| 230 | */ | ||
| 231 | conf->error = | ||
| 232 | ds_selftest_bts_read(conf->tracer, trace, | ||
| 233 | trace->ds.begin, trace->ds.end); | ||
| 234 | } else { | ||
| 235 | /* | ||
| 236 | * It is quite likely that the buffer did not overflow. | ||
| 237 | * Let's just check the delta trace. | ||
| 238 | */ | ||
| 239 | conf->error = | ||
| 240 | ds_selftest_bts_read(conf->tracer, trace, top, | ||
| 241 | trace->ds.top); | ||
| 242 | } | ||
| 243 | if (conf->error < 0) | ||
| 244 | return; | ||
| 245 | |||
| 246 | conf->error = 0; | ||
| 247 | } | ||
| 248 | |||
| 249 | static int ds_suspend_bts_wrap(struct bts_tracer *tracer) | ||
| 250 | { | ||
| 251 | ds_suspend_bts(tracer); | ||
| 252 | return 0; | ||
| 253 | } | ||
| 254 | |||
| 255 | static int ds_resume_bts_wrap(struct bts_tracer *tracer) | ||
| 256 | { | ||
| 257 | ds_resume_bts(tracer); | ||
| 258 | return 0; | ||
| 259 | } | ||
| 260 | |||
| 261 | static void ds_release_bts_noirq_wrap(void *tracer) | ||
| 262 | { | ||
| 263 | (void)ds_release_bts_noirq(tracer); | ||
| 264 | } | ||
| 265 | |||
| 266 | static int ds_selftest_bts_bad_release_noirq(int cpu, | ||
| 267 | struct bts_tracer *tracer) | ||
| 268 | { | ||
| 269 | int error = -EPERM; | ||
| 270 | |||
| 271 | /* Try to release the tracer on the wrong cpu. */ | ||
| 272 | get_cpu(); | ||
| 273 | if (cpu != smp_processor_id()) { | ||
| 274 | error = ds_release_bts_noirq(tracer); | ||
| 275 | if (error != -EPERM) | ||
| 276 | printk(KERN_CONT "release on wrong cpu..."); | ||
| 277 | } | ||
| 278 | put_cpu(); | ||
| 279 | |||
| 280 | return error ? 0 : -1; | ||
| 281 | } | ||
| 282 | |||
| 283 | static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer) | ||
| 284 | { | ||
| 285 | struct bts_tracer *tracer; | ||
| 286 | int error; | ||
| 287 | |||
| 288 | /* Try to request cpu tracing while task tracing is active. */ | ||
| 289 | tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, | ||
| 290 | (size_t)-1, BTS_KERNEL); | ||
| 291 | error = PTR_ERR(tracer); | ||
| 292 | if (!IS_ERR(tracer)) { | ||
| 293 | ds_release_bts(tracer); | ||
| 294 | error = 0; | ||
| 295 | } | ||
| 296 | |||
| 297 | if (error != -EPERM) | ||
| 298 | printk(KERN_CONT "cpu/task tracing overlap..."); | ||
| 299 | |||
| 300 | return error ? 0 : -1; | ||
| 301 | } | ||
| 302 | |||
| 303 | static int ds_selftest_bts_bad_request_task(void *buffer) | ||
| 304 | { | ||
| 305 | struct bts_tracer *tracer; | ||
| 306 | int error; | ||
| 307 | |||
| 308 | /* Try to request cpu tracing while task tracing is active. */ | ||
| 309 | tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL, | ||
| 310 | (size_t)-1, BTS_KERNEL); | ||
| 311 | error = PTR_ERR(tracer); | ||
| 312 | if (!IS_ERR(tracer)) { | ||
| 313 | error = 0; | ||
| 314 | ds_release_bts(tracer); | ||
| 315 | } | ||
| 316 | |||
| 317 | if (error != -EPERM) | ||
| 318 | printk(KERN_CONT "task/cpu tracing overlap..."); | ||
| 319 | |||
| 320 | return error ? 0 : -1; | ||
| 321 | } | ||
| 322 | |||
| 323 | int ds_selftest_bts(void) | ||
| 324 | { | ||
| 325 | struct ds_selftest_bts_conf conf; | ||
| 326 | unsigned char buffer[BUFFER_SIZE], *small_buffer; | ||
| 327 | unsigned long irq; | ||
| 328 | int cpu; | ||
| 329 | |||
| 330 | printk(KERN_INFO "[ds] bts selftest..."); | ||
| 331 | conf.error = 0; | ||
| 332 | |||
| 333 | small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8; | ||
| 334 | |||
| 335 | get_online_cpus(); | ||
| 336 | for_each_online_cpu(cpu) { | ||
| 337 | conf.suspend = ds_suspend_bts_wrap; | ||
| 338 | conf.resume = ds_resume_bts_wrap; | ||
| 339 | conf.tracer = | ||
| 340 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
| 341 | NULL, (size_t)-1, BTS_KERNEL); | ||
| 342 | ds_selftest_bts_cpu(&conf); | ||
| 343 | if (conf.error >= 0) | ||
| 344 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
| 345 | ds_release_bts(conf.tracer); | ||
| 346 | if (conf.error < 0) | ||
| 347 | goto out; | ||
| 348 | |||
| 349 | conf.suspend = ds_suspend_bts_noirq; | ||
| 350 | conf.resume = ds_resume_bts_noirq; | ||
| 351 | conf.tracer = | ||
| 352 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
| 353 | NULL, (size_t)-1, BTS_KERNEL); | ||
| 354 | smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1); | ||
| 355 | if (conf.error >= 0) { | ||
| 356 | conf.error = | ||
| 357 | ds_selftest_bts_bad_release_noirq(cpu, | ||
| 358 | conf.tracer); | ||
| 359 | /* We must not release the tracer twice. */ | ||
| 360 | if (conf.error < 0) | ||
| 361 | conf.tracer = NULL; | ||
| 362 | } | ||
| 363 | if (conf.error >= 0) | ||
| 364 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
| 365 | smp_call_function_single(cpu, ds_release_bts_noirq_wrap, | ||
| 366 | conf.tracer, 1); | ||
| 367 | if (conf.error < 0) | ||
| 368 | goto out; | ||
| 369 | } | ||
| 370 | |||
| 371 | conf.suspend = ds_suspend_bts_wrap; | ||
| 372 | conf.resume = ds_resume_bts_wrap; | ||
| 373 | conf.tracer = | ||
| 374 | ds_request_bts_task(current, buffer, BUFFER_SIZE, | ||
| 375 | NULL, (size_t)-1, BTS_KERNEL); | ||
| 376 | ds_selftest_bts_cpu(&conf); | ||
| 377 | if (conf.error >= 0) | ||
| 378 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
| 379 | ds_release_bts(conf.tracer); | ||
| 380 | if (conf.error < 0) | ||
| 381 | goto out; | ||
| 382 | |||
| 383 | conf.suspend = ds_suspend_bts_noirq; | ||
| 384 | conf.resume = ds_resume_bts_noirq; | ||
| 385 | conf.tracer = | ||
| 386 | ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE, | ||
| 387 | NULL, (size_t)-1, BTS_KERNEL); | ||
| 388 | local_irq_save(irq); | ||
| 389 | ds_selftest_bts_cpu(&conf); | ||
| 390 | if (conf.error >= 0) | ||
| 391 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
| 392 | ds_release_bts_noirq(conf.tracer); | ||
| 393 | local_irq_restore(irq); | ||
| 394 | if (conf.error < 0) | ||
| 395 | goto out; | ||
| 396 | |||
| 397 | conf.error = 0; | ||
| 398 | out: | ||
| 399 | put_online_cpus(); | ||
| 400 | printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed")); | ||
| 401 | |||
| 402 | return conf.error; | ||
| 403 | } | ||
| 404 | |||
| 405 | int ds_selftest_pebs(void) | ||
| 406 | { | ||
| 407 | return 0; | ||
| 408 | } | ||
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h deleted file mode 100644 index 2ba8745c6663..000000000000 --- a/arch/x86/kernel/ds_selftest.h +++ /dev/null | |||
| @@ -1,15 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Debug Store support - selftest | ||
| 3 | * | ||
| 4 | * | ||
| 5 | * Copyright (C) 2009 Intel Corporation. | ||
| 6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
| 7 | */ | ||
| 8 | |||
| 9 | #ifdef CONFIG_X86_DS_SELFTEST | ||
| 10 | extern int ds_selftest_bts(void); | ||
| 11 | extern int ds_selftest_pebs(void); | ||
| 12 | #else | ||
| 13 | static inline int ds_selftest_bts(void) { return 0; } | ||
| 14 | static inline int ds_selftest_pebs(void) { return 0; } | ||
| 15 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6d817554780a..c89a386930b7 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
| @@ -224,11 +224,6 @@ unsigned __kprobes long oops_begin(void) | |||
| 224 | int cpu; | 224 | int cpu; |
| 225 | unsigned long flags; | 225 | unsigned long flags; |
| 226 | 226 | ||
| 227 | /* notify the hw-branch tracer so it may disable tracing and | ||
| 228 | add the last trace to the trace buffer - | ||
| 229 | the earlier this happens, the more useful the trace. */ | ||
| 230 | trace_hw_branch_oops(); | ||
| 231 | |||
| 232 | oops_enter(); | 227 | oops_enter(); |
| 233 | 228 | ||
| 234 | /* racy, but better than risking deadlock. */ | 229 | /* racy, but better than risking deadlock. */ |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 44a8e0dc6737..cd49141cf153 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
| @@ -53,6 +53,7 @@ | |||
| 53 | #include <asm/processor-flags.h> | 53 | #include <asm/processor-flags.h> |
| 54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
| 55 | #include <asm/irq_vectors.h> | 55 | #include <asm/irq_vectors.h> |
| 56 | #include <asm/cpufeature.h> | ||
| 56 | 57 | ||
| 57 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 58 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
| 58 | #include <linux/elf-em.h> | 59 | #include <linux/elf-em.h> |
| @@ -905,7 +906,25 @@ ENTRY(simd_coprocessor_error) | |||
| 905 | RING0_INT_FRAME | 906 | RING0_INT_FRAME |
| 906 | pushl $0 | 907 | pushl $0 |
| 907 | CFI_ADJUST_CFA_OFFSET 4 | 908 | CFI_ADJUST_CFA_OFFSET 4 |
| 909 | #ifdef CONFIG_X86_INVD_BUG | ||
| 910 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ | ||
| 911 | 661: pushl $do_general_protection | ||
| 912 | 662: | ||
| 913 | .section .altinstructions,"a" | ||
| 914 | .balign 4 | ||
| 915 | .long 661b | ||
| 916 | .long 663f | ||
| 917 | .byte X86_FEATURE_XMM | ||
| 918 | .byte 662b-661b | ||
| 919 | .byte 664f-663f | ||
| 920 | .previous | ||
| 921 | .section .altinstr_replacement,"ax" | ||
| 922 | 663: pushl $do_simd_coprocessor_error | ||
| 923 | 664: | ||
| 924 | .previous | ||
| 925 | #else | ||
| 908 | pushl $do_simd_coprocessor_error | 926 | pushl $do_simd_coprocessor_error |
| 927 | #endif | ||
| 909 | CFI_ADJUST_CFA_OFFSET 4 | 928 | CFI_ADJUST_CFA_OFFSET 4 |
| 910 | jmp error_code | 929 | jmp error_code |
| 911 | CFI_ENDPROC | 930 | CFI_ENDPROC |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d6cc065f519f..a8f1b803d2fd 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
| @@ -189,25 +189,16 @@ static int get_hbp_len(u8 hbp_len) | |||
| 189 | } | 189 | } |
| 190 | 190 | ||
| 191 | /* | 191 | /* |
| 192 | * Check for virtual address in user space. | ||
| 193 | */ | ||
| 194 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
| 195 | { | ||
| 196 | unsigned int len; | ||
| 197 | |||
| 198 | len = get_hbp_len(hbp_len); | ||
| 199 | |||
| 200 | return (va <= TASK_SIZE - len); | ||
| 201 | } | ||
| 202 | |||
| 203 | /* | ||
| 204 | * Check for virtual address in kernel space. | 192 | * Check for virtual address in kernel space. |
| 205 | */ | 193 | */ |
| 206 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | 194 | int arch_check_bp_in_kernelspace(struct perf_event *bp) |
| 207 | { | 195 | { |
| 208 | unsigned int len; | 196 | unsigned int len; |
| 197 | unsigned long va; | ||
| 198 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
| 209 | 199 | ||
| 210 | len = get_hbp_len(hbp_len); | 200 | va = info->address; |
| 201 | len = get_hbp_len(info->len); | ||
| 211 | 202 | ||
| 212 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | 203 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); |
| 213 | } | 204 | } |
| @@ -300,8 +291,7 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
| 300 | /* | 291 | /* |
| 301 | * Validate the arch-specific HW Breakpoint register settings | 292 | * Validate the arch-specific HW Breakpoint register settings |
| 302 | */ | 293 | */ |
| 303 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | 294 | int arch_validate_hwbkpt_settings(struct perf_event *bp) |
| 304 | struct task_struct *tsk) | ||
| 305 | { | 295 | { |
| 306 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | 296 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); |
| 307 | unsigned int align; | 297 | unsigned int align; |
| @@ -314,16 +304,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, | |||
| 314 | 304 | ||
| 315 | ret = -EINVAL; | 305 | ret = -EINVAL; |
| 316 | 306 | ||
| 317 | if (info->type == X86_BREAKPOINT_EXECUTE) | ||
| 318 | /* | ||
| 319 | * Ptrace-refactoring code | ||
| 320 | * For now, we'll allow instruction breakpoint only for user-space | ||
| 321 | * addresses | ||
| 322 | */ | ||
| 323 | if ((!arch_check_va_in_userspace(info->address, info->len)) && | ||
| 324 | info->len != X86_BREAKPOINT_EXECUTE) | ||
| 325 | return ret; | ||
| 326 | |||
| 327 | switch (info->len) { | 307 | switch (info->len) { |
| 328 | case X86_BREAKPOINT_LEN_1: | 308 | case X86_BREAKPOINT_LEN_1: |
| 329 | align = 0; | 309 | align = 0; |
| @@ -350,15 +330,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, | |||
| 350 | if (info->address & align) | 330 | if (info->address & align) |
| 351 | return -EINVAL; | 331 | return -EINVAL; |
| 352 | 332 | ||
| 353 | /* Check that the virtual address is in the proper range */ | ||
| 354 | if (tsk) { | ||
| 355 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
| 356 | return -EFAULT; | ||
| 357 | } else { | ||
| 358 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
| 359 | return -EFAULT; | ||
| 360 | } | ||
| 361 | |||
| 362 | return 0; | 333 | return 0; |
| 363 | } | 334 | } |
| 364 | 335 | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 54c31c285488..86cef6b32253 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
| @@ -102,65 +102,62 @@ void __cpuinit fpu_init(void) | |||
| 102 | 102 | ||
| 103 | mxcsr_feature_mask_init(); | 103 | mxcsr_feature_mask_init(); |
| 104 | /* clean state in init */ | 104 | /* clean state in init */ |
| 105 | if (cpu_has_xsave) | 105 | current_thread_info()->status = 0; |
| 106 | current_thread_info()->status = TS_XSAVE; | ||
| 107 | else | ||
| 108 | current_thread_info()->status = 0; | ||
| 109 | clear_used_math(); | 106 | clear_used_math(); |
| 110 | } | 107 | } |
| 111 | #endif /* CONFIG_X86_64 */ | 108 | #endif /* CONFIG_X86_64 */ |
| 112 | 109 | ||
| 113 | /* | 110 | static void fpu_finit(struct fpu *fpu) |
| 114 | * The _current_ task is using the FPU for the first time | ||
| 115 | * so initialize it and set the mxcsr to its default | ||
| 116 | * value at reset if we support XMM instructions and then | ||
| 117 | * remeber the current task has used the FPU. | ||
| 118 | */ | ||
| 119 | int init_fpu(struct task_struct *tsk) | ||
| 120 | { | 111 | { |
| 121 | if (tsk_used_math(tsk)) { | ||
| 122 | if (HAVE_HWFP && tsk == current) | ||
| 123 | unlazy_fpu(tsk); | ||
| 124 | return 0; | ||
| 125 | } | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Memory allocation at the first usage of the FPU and other state. | ||
| 129 | */ | ||
| 130 | if (!tsk->thread.xstate) { | ||
| 131 | tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, | ||
| 132 | GFP_KERNEL); | ||
| 133 | if (!tsk->thread.xstate) | ||
| 134 | return -ENOMEM; | ||
| 135 | } | ||
| 136 | |||
| 137 | #ifdef CONFIG_X86_32 | 112 | #ifdef CONFIG_X86_32 |
| 138 | if (!HAVE_HWFP) { | 113 | if (!HAVE_HWFP) { |
| 139 | memset(tsk->thread.xstate, 0, xstate_size); | 114 | finit_soft_fpu(&fpu->state->soft); |
| 140 | finit_task(tsk); | 115 | return; |
| 141 | set_stopped_child_used_math(tsk); | ||
| 142 | return 0; | ||
| 143 | } | 116 | } |
| 144 | #endif | 117 | #endif |
| 145 | 118 | ||
| 146 | if (cpu_has_fxsr) { | 119 | if (cpu_has_fxsr) { |
| 147 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 120 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
| 148 | 121 | ||
| 149 | memset(fx, 0, xstate_size); | 122 | memset(fx, 0, xstate_size); |
| 150 | fx->cwd = 0x37f; | 123 | fx->cwd = 0x37f; |
| 151 | if (cpu_has_xmm) | 124 | if (cpu_has_xmm) |
| 152 | fx->mxcsr = MXCSR_DEFAULT; | 125 | fx->mxcsr = MXCSR_DEFAULT; |
| 153 | } else { | 126 | } else { |
| 154 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | 127 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
| 155 | memset(fp, 0, xstate_size); | 128 | memset(fp, 0, xstate_size); |
| 156 | fp->cwd = 0xffff037fu; | 129 | fp->cwd = 0xffff037fu; |
| 157 | fp->swd = 0xffff0000u; | 130 | fp->swd = 0xffff0000u; |
| 158 | fp->twd = 0xffffffffu; | 131 | fp->twd = 0xffffffffu; |
| 159 | fp->fos = 0xffff0000u; | 132 | fp->fos = 0xffff0000u; |
| 160 | } | 133 | } |
| 134 | } | ||
| 135 | |||
| 136 | /* | ||
| 137 | * The _current_ task is using the FPU for the first time | ||
| 138 | * so initialize it and set the mxcsr to its default | ||
| 139 | * value at reset if we support XMM instructions and then | ||
| 140 | * remeber the current task has used the FPU. | ||
| 141 | */ | ||
| 142 | int init_fpu(struct task_struct *tsk) | ||
| 143 | { | ||
| 144 | int ret; | ||
| 145 | |||
| 146 | if (tsk_used_math(tsk)) { | ||
| 147 | if (HAVE_HWFP && tsk == current) | ||
| 148 | unlazy_fpu(tsk); | ||
| 149 | return 0; | ||
| 150 | } | ||
| 151 | |||
| 161 | /* | 152 | /* |
| 162 | * Only the device not available exception or ptrace can call init_fpu. | 153 | * Memory allocation at the first usage of the FPU and other state. |
| 163 | */ | 154 | */ |
| 155 | ret = fpu_alloc(&tsk->thread.fpu); | ||
| 156 | if (ret) | ||
| 157 | return ret; | ||
| 158 | |||
| 159 | fpu_finit(&tsk->thread.fpu); | ||
| 160 | |||
| 164 | set_stopped_child_used_math(tsk); | 161 | set_stopped_child_used_math(tsk); |
| 165 | return 0; | 162 | return 0; |
| 166 | } | 163 | } |
| @@ -194,7 +191,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
| 194 | return ret; | 191 | return ret; |
| 195 | 192 | ||
| 196 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 193 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
| 197 | &target->thread.xstate->fxsave, 0, -1); | 194 | &target->thread.fpu.state->fxsave, 0, -1); |
| 198 | } | 195 | } |
| 199 | 196 | ||
| 200 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | 197 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, |
| @@ -211,19 +208,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 211 | return ret; | 208 | return ret; |
| 212 | 209 | ||
| 213 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 210 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
| 214 | &target->thread.xstate->fxsave, 0, -1); | 211 | &target->thread.fpu.state->fxsave, 0, -1); |
| 215 | 212 | ||
| 216 | /* | 213 | /* |
| 217 | * mxcsr reserved bits must be masked to zero for security reasons. | 214 | * mxcsr reserved bits must be masked to zero for security reasons. |
| 218 | */ | 215 | */ |
| 219 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 216 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
| 220 | 217 | ||
| 221 | /* | 218 | /* |
| 222 | * update the header bits in the xsave header, indicating the | 219 | * update the header bits in the xsave header, indicating the |
| 223 | * presence of FP and SSE state. | 220 | * presence of FP and SSE state. |
| 224 | */ | 221 | */ |
| 225 | if (cpu_has_xsave) | 222 | if (cpu_has_xsave) |
| 226 | target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | 223 | target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; |
| 227 | 224 | ||
| 228 | return ret; | 225 | return ret; |
| 229 | } | 226 | } |
| @@ -246,14 +243,14 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | |||
| 246 | * memory layout in the thread struct, so that we can copy the entire | 243 | * memory layout in the thread struct, so that we can copy the entire |
| 247 | * xstateregs to the user using one user_regset_copyout(). | 244 | * xstateregs to the user using one user_regset_copyout(). |
| 248 | */ | 245 | */ |
| 249 | memcpy(&target->thread.xstate->fxsave.sw_reserved, | 246 | memcpy(&target->thread.fpu.state->fxsave.sw_reserved, |
| 250 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | 247 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); |
| 251 | 248 | ||
| 252 | /* | 249 | /* |
| 253 | * Copy the xstate memory layout. | 250 | * Copy the xstate memory layout. |
| 254 | */ | 251 | */ |
| 255 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 252 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
| 256 | &target->thread.xstate->xsave, 0, -1); | 253 | &target->thread.fpu.state->xsave, 0, -1); |
| 257 | return ret; | 254 | return ret; |
| 258 | } | 255 | } |
| 259 | 256 | ||
| @@ -272,14 +269,14 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 272 | return ret; | 269 | return ret; |
| 273 | 270 | ||
| 274 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 271 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
| 275 | &target->thread.xstate->xsave, 0, -1); | 272 | &target->thread.fpu.state->xsave, 0, -1); |
| 276 | 273 | ||
| 277 | /* | 274 | /* |
| 278 | * mxcsr reserved bits must be masked to zero for security reasons. | 275 | * mxcsr reserved bits must be masked to zero for security reasons. |
| 279 | */ | 276 | */ |
| 280 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 277 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
| 281 | 278 | ||
| 282 | xsave_hdr = &target->thread.xstate->xsave.xsave_hdr; | 279 | xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr; |
| 283 | 280 | ||
| 284 | xsave_hdr->xstate_bv &= pcntxt_mask; | 281 | xsave_hdr->xstate_bv &= pcntxt_mask; |
| 285 | /* | 282 | /* |
| @@ -365,7 +362,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) | |||
| 365 | static void | 362 | static void |
| 366 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | 363 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) |
| 367 | { | 364 | { |
| 368 | struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; | 365 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
| 369 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; | 366 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; |
| 370 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; | 367 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; |
| 371 | int i; | 368 | int i; |
| @@ -405,7 +402,7 @@ static void convert_to_fxsr(struct task_struct *tsk, | |||
| 405 | const struct user_i387_ia32_struct *env) | 402 | const struct user_i387_ia32_struct *env) |
| 406 | 403 | ||
| 407 | { | 404 | { |
| 408 | struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; | 405 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
| 409 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; | 406 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; |
| 410 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; | 407 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; |
| 411 | int i; | 408 | int i; |
| @@ -445,7 +442,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
| 445 | 442 | ||
| 446 | if (!cpu_has_fxsr) { | 443 | if (!cpu_has_fxsr) { |
| 447 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 444 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
| 448 | &target->thread.xstate->fsave, 0, | 445 | &target->thread.fpu.state->fsave, 0, |
| 449 | -1); | 446 | -1); |
| 450 | } | 447 | } |
| 451 | 448 | ||
| @@ -475,7 +472,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 475 | 472 | ||
| 476 | if (!cpu_has_fxsr) { | 473 | if (!cpu_has_fxsr) { |
| 477 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 474 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
| 478 | &target->thread.xstate->fsave, 0, -1); | 475 | &target->thread.fpu.state->fsave, 0, -1); |
| 479 | } | 476 | } |
| 480 | 477 | ||
| 481 | if (pos > 0 || count < sizeof(env)) | 478 | if (pos > 0 || count < sizeof(env)) |
| @@ -490,7 +487,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 490 | * presence of FP. | 487 | * presence of FP. |
| 491 | */ | 488 | */ |
| 492 | if (cpu_has_xsave) | 489 | if (cpu_has_xsave) |
| 493 | target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; | 490 | target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; |
| 494 | return ret; | 491 | return ret; |
| 495 | } | 492 | } |
| 496 | 493 | ||
| @@ -501,7 +498,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 501 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | 498 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) |
| 502 | { | 499 | { |
| 503 | struct task_struct *tsk = current; | 500 | struct task_struct *tsk = current; |
| 504 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | 501 | struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; |
| 505 | 502 | ||
| 506 | fp->status = fp->swd; | 503 | fp->status = fp->swd; |
| 507 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) | 504 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) |
| @@ -512,7 +509,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
| 512 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | 509 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) |
| 513 | { | 510 | { |
| 514 | struct task_struct *tsk = current; | 511 | struct task_struct *tsk = current; |
| 515 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 512 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; |
| 516 | struct user_i387_ia32_struct env; | 513 | struct user_i387_ia32_struct env; |
| 517 | int err = 0; | 514 | int err = 0; |
| 518 | 515 | ||
| @@ -547,7 +544,7 @@ static int save_i387_xsave(void __user *buf) | |||
| 547 | * header as well as change any contents in the memory layout. | 544 | * header as well as change any contents in the memory layout. |
| 548 | * xrestore as part of sigreturn will capture all the changes. | 545 | * xrestore as part of sigreturn will capture all the changes. |
| 549 | */ | 546 | */ |
| 550 | tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | 547 | tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; |
| 551 | 548 | ||
| 552 | if (save_i387_fxsave(fx) < 0) | 549 | if (save_i387_fxsave(fx) < 0) |
| 553 | return -1; | 550 | return -1; |
| @@ -599,7 +596,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
| 599 | { | 596 | { |
| 600 | struct task_struct *tsk = current; | 597 | struct task_struct *tsk = current; |
| 601 | 598 | ||
| 602 | return __copy_from_user(&tsk->thread.xstate->fsave, buf, | 599 | return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, |
| 603 | sizeof(struct i387_fsave_struct)); | 600 | sizeof(struct i387_fsave_struct)); |
| 604 | } | 601 | } |
| 605 | 602 | ||
| @@ -610,10 +607,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, | |||
| 610 | struct user_i387_ia32_struct env; | 607 | struct user_i387_ia32_struct env; |
| 611 | int err; | 608 | int err; |
| 612 | 609 | ||
| 613 | err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], | 610 | err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], |
| 614 | size); | 611 | size); |
| 615 | /* mxcsr reserved bits must be masked to zero for security reasons */ | 612 | /* mxcsr reserved bits must be masked to zero for security reasons */ |
| 616 | tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 613 | tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
| 617 | if (err || __copy_from_user(&env, buf, sizeof(env))) | 614 | if (err || __copy_from_user(&env, buf, sizeof(env))) |
| 618 | return 1; | 615 | return 1; |
| 619 | convert_to_fxsr(tsk, &env); | 616 | convert_to_fxsr(tsk, &env); |
| @@ -629,7 +626,7 @@ static int restore_i387_xsave(void __user *buf) | |||
| 629 | struct i387_fxsave_struct __user *fx = | 626 | struct i387_fxsave_struct __user *fx = |
| 630 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; | 627 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; |
| 631 | struct xsave_hdr_struct *xsave_hdr = | 628 | struct xsave_hdr_struct *xsave_hdr = |
| 632 | ¤t->thread.xstate->xsave.xsave_hdr; | 629 | ¤t->thread.fpu.state->xsave.xsave_hdr; |
| 633 | u64 mask; | 630 | u64 mask; |
| 634 | int err; | 631 | int err; |
| 635 | 632 | ||
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 23c167925a5c..2dfd31597443 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | #include <asm/hpet.h> | 16 | #include <asm/hpet.h> |
| 17 | #include <asm/smp.h> | 17 | #include <asm/smp.h> |
| 18 | 18 | ||
| 19 | DEFINE_SPINLOCK(i8253_lock); | 19 | DEFINE_RAW_SPINLOCK(i8253_lock); |
| 20 | EXPORT_SYMBOL(i8253_lock); | 20 | EXPORT_SYMBOL(i8253_lock); |
| 21 | 21 | ||
| 22 | /* | 22 | /* |
| @@ -33,7 +33,7 @@ struct clock_event_device *global_clock_event; | |||
| 33 | static void init_pit_timer(enum clock_event_mode mode, | 33 | static void init_pit_timer(enum clock_event_mode mode, |
| 34 | struct clock_event_device *evt) | 34 | struct clock_event_device *evt) |
| 35 | { | 35 | { |
| 36 | spin_lock(&i8253_lock); | 36 | raw_spin_lock(&i8253_lock); |
| 37 | 37 | ||
| 38 | switch (mode) { | 38 | switch (mode) { |
| 39 | case CLOCK_EVT_MODE_PERIODIC: | 39 | case CLOCK_EVT_MODE_PERIODIC: |
| @@ -62,7 +62,7 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
| 62 | /* Nothing to do here */ | 62 | /* Nothing to do here */ |
| 63 | break; | 63 | break; |
| 64 | } | 64 | } |
| 65 | spin_unlock(&i8253_lock); | 65 | raw_spin_unlock(&i8253_lock); |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | /* | 68 | /* |
| @@ -72,10 +72,10 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
| 72 | */ | 72 | */ |
| 73 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) | 73 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) |
| 74 | { | 74 | { |
| 75 | spin_lock(&i8253_lock); | 75 | raw_spin_lock(&i8253_lock); |
| 76 | outb_pit(delta & 0xff , PIT_CH0); /* LSB */ | 76 | outb_pit(delta & 0xff , PIT_CH0); /* LSB */ |
| 77 | outb_pit(delta >> 8 , PIT_CH0); /* MSB */ | 77 | outb_pit(delta >> 8 , PIT_CH0); /* MSB */ |
| 78 | spin_unlock(&i8253_lock); | 78 | raw_spin_unlock(&i8253_lock); |
| 79 | 79 | ||
| 80 | return 0; | 80 | return 0; |
| 81 | } | 81 | } |
| @@ -130,7 +130,7 @@ static cycle_t pit_read(struct clocksource *cs) | |||
| 130 | int count; | 130 | int count; |
| 131 | u32 jifs; | 131 | u32 jifs; |
| 132 | 132 | ||
| 133 | spin_lock_irqsave(&i8253_lock, flags); | 133 | raw_spin_lock_irqsave(&i8253_lock, flags); |
| 134 | /* | 134 | /* |
| 135 | * Although our caller may have the read side of xtime_lock, | 135 | * Although our caller may have the read side of xtime_lock, |
| 136 | * this is now a seqlock, and we are cheating in this routine | 136 | * this is now a seqlock, and we are cheating in this routine |
| @@ -176,7 +176,7 @@ static cycle_t pit_read(struct clocksource *cs) | |||
| 176 | old_count = count; | 176 | old_count = count; |
| 177 | old_jifs = jifs; | 177 | old_jifs = jifs; |
| 178 | 178 | ||
| 179 | spin_unlock_irqrestore(&i8253_lock, flags); | 179 | raw_spin_unlock_irqrestore(&i8253_lock, flags); |
| 180 | 180 | ||
| 181 | count = (LATCH - 1) - count; | 181 | count = (LATCH - 1) - count; |
| 182 | 182 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 0ed2d300cd46..990ae7cfc578 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
| @@ -60,7 +60,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) | |||
| 60 | outb(0, 0xF0); | 60 | outb(0, 0xF0); |
| 61 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | 61 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) |
| 62 | return IRQ_NONE; | 62 | return IRQ_NONE; |
| 63 | math_error((void __user *)get_irq_regs()->ip); | 63 | math_error(get_irq_regs(), 0, 16); |
| 64 | return IRQ_HANDLED; | 64 | return IRQ_HANDLED; |
| 65 | } | 65 | } |
| 66 | 66 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b43bbaebe2c0..345a4b1fe144 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -422,14 +422,22 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
| 422 | 422 | ||
| 423 | static void __kprobes clear_btf(void) | 423 | static void __kprobes clear_btf(void) |
| 424 | { | 424 | { |
| 425 | if (test_thread_flag(TIF_DEBUGCTLMSR)) | 425 | if (test_thread_flag(TIF_BLOCKSTEP)) { |
| 426 | update_debugctlmsr(0); | 426 | unsigned long debugctl = get_debugctlmsr(); |
| 427 | |||
| 428 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
| 429 | update_debugctlmsr(debugctl); | ||
| 430 | } | ||
| 427 | } | 431 | } |
| 428 | 432 | ||
| 429 | static void __kprobes restore_btf(void) | 433 | static void __kprobes restore_btf(void) |
| 430 | { | 434 | { |
| 431 | if (test_thread_flag(TIF_DEBUGCTLMSR)) | 435 | if (test_thread_flag(TIF_BLOCKSTEP)) { |
| 432 | update_debugctlmsr(current->thread.debugctlmsr); | 436 | unsigned long debugctl = get_debugctlmsr(); |
| 437 | |||
| 438 | debugctl |= DEBUGCTLMSR_BTF; | ||
| 439 | update_debugctlmsr(debugctl); | ||
| 440 | } | ||
| 433 | } | 441 | } |
| 434 | 442 | ||
| 435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 443 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
| @@ -534,20 +542,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
| 534 | struct kprobe_ctlblk *kcb; | 542 | struct kprobe_ctlblk *kcb; |
| 535 | 543 | ||
| 536 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); | 544 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); |
| 537 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
| 538 | /* | ||
| 539 | * The breakpoint instruction was removed right | ||
| 540 | * after we hit it. Another cpu has removed | ||
| 541 | * either a probepoint or a debugger breakpoint | ||
| 542 | * at this address. In either case, no further | ||
| 543 | * handling of this interrupt is appropriate. | ||
| 544 | * Back up over the (now missing) int3 and run | ||
| 545 | * the original instruction. | ||
| 546 | */ | ||
| 547 | regs->ip = (unsigned long)addr; | ||
| 548 | return 1; | ||
| 549 | } | ||
| 550 | |||
| 551 | /* | 545 | /* |
| 552 | * We don't want to be preempted for the entire | 546 | * We don't want to be preempted for the entire |
| 553 | * duration of kprobe processing. We conditionally | 547 | * duration of kprobe processing. We conditionally |
| @@ -579,6 +573,19 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
| 579 | setup_singlestep(p, regs, kcb, 0); | 573 | setup_singlestep(p, regs, kcb, 0); |
| 580 | return 1; | 574 | return 1; |
| 581 | } | 575 | } |
| 576 | } else if (*addr != BREAKPOINT_INSTRUCTION) { | ||
| 577 | /* | ||
| 578 | * The breakpoint instruction was removed right | ||
| 579 | * after we hit it. Another cpu has removed | ||
| 580 | * either a probepoint or a debugger breakpoint | ||
| 581 | * at this address. In either case, no further | ||
| 582 | * handling of this interrupt is appropriate. | ||
| 583 | * Back up over the (now missing) int3 and run | ||
| 584 | * the original instruction. | ||
| 585 | */ | ||
| 586 | regs->ip = (unsigned long)addr; | ||
| 587 | preempt_enable_no_resched(); | ||
| 588 | return 1; | ||
| 582 | } else if (kprobe_running()) { | 589 | } else if (kprobe_running()) { |
| 583 | p = __get_cpu_var(current_kprobe); | 590 | p = __get_cpu_var(current_kprobe); |
| 584 | if (p->break_handler && p->break_handler(p, regs)) { | 591 | if (p->break_handler && p->break_handler(p, regs)) { |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index cceb5bc3c3c2..2cd8c544e41a 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
| @@ -201,9 +201,9 @@ static int do_microcode_update(const void __user *buf, size_t size) | |||
| 201 | return error; | 201 | return error; |
| 202 | } | 202 | } |
| 203 | 203 | ||
| 204 | static int microcode_open(struct inode *unused1, struct file *unused2) | 204 | static int microcode_open(struct inode *inode, struct file *file) |
| 205 | { | 205 | { |
| 206 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | 206 | return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; |
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | static ssize_t microcode_write(struct file *file, const char __user *buf, | 209 | static ssize_t microcode_write(struct file *file, const char __user *buf, |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 85a343e28937..356170262a93 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
| @@ -343,10 +343,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
| 343 | int (*get_ucode_data)(void *, const void *, size_t)) | 343 | int (*get_ucode_data)(void *, const void *, size_t)) |
| 344 | { | 344 | { |
| 345 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 345 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
| 346 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | 346 | u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL; |
| 347 | int new_rev = uci->cpu_sig.rev; | 347 | int new_rev = uci->cpu_sig.rev; |
| 348 | unsigned int leftover = size; | 348 | unsigned int leftover = size; |
| 349 | enum ucode_state state = UCODE_OK; | 349 | enum ucode_state state = UCODE_OK; |
| 350 | unsigned int curr_mc_size = 0; | ||
| 350 | 351 | ||
| 351 | while (leftover) { | 352 | while (leftover) { |
| 352 | struct microcode_header_intel mc_header; | 353 | struct microcode_header_intel mc_header; |
| @@ -361,9 +362,15 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
| 361 | break; | 362 | break; |
| 362 | } | 363 | } |
| 363 | 364 | ||
| 364 | mc = vmalloc(mc_size); | 365 | /* For performance reasons, reuse mc area when possible */ |
| 365 | if (!mc) | 366 | if (!mc || mc_size > curr_mc_size) { |
| 366 | break; | 367 | if (mc) |
| 368 | vfree(mc); | ||
| 369 | mc = vmalloc(mc_size); | ||
| 370 | if (!mc) | ||
| 371 | break; | ||
| 372 | curr_mc_size = mc_size; | ||
| 373 | } | ||
| 367 | 374 | ||
| 368 | if (get_ucode_data(mc, ucode_ptr, mc_size) || | 375 | if (get_ucode_data(mc, ucode_ptr, mc_size) || |
| 369 | microcode_sanity_check(mc) < 0) { | 376 | microcode_sanity_check(mc) < 0) { |
| @@ -376,13 +383,16 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
| 376 | vfree(new_mc); | 383 | vfree(new_mc); |
| 377 | new_rev = mc_header.rev; | 384 | new_rev = mc_header.rev; |
| 378 | new_mc = mc; | 385 | new_mc = mc; |
| 379 | } else | 386 | mc = NULL; /* trigger new vmalloc */ |
| 380 | vfree(mc); | 387 | } |
| 381 | 388 | ||
| 382 | ucode_ptr += mc_size; | 389 | ucode_ptr += mc_size; |
| 383 | leftover -= mc_size; | 390 | leftover -= mc_size; |
| 384 | } | 391 | } |
| 385 | 392 | ||
| 393 | if (mc) | ||
| 394 | vfree(mc); | ||
| 395 | |||
| 386 | if (leftover) { | 396 | if (leftover) { |
| 387 | if (new_mc) | 397 | if (new_mc) |
| 388 | vfree(new_mc); | 398 | vfree(new_mc); |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index e81030f71a8f..5ae5d2426edf 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
| @@ -115,21 +115,6 @@ static void __init MP_bus_info(struct mpc_bus *m) | |||
| 115 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | 115 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | static int bad_ioapic(unsigned long address) | ||
| 119 | { | ||
| 120 | if (nr_ioapics >= MAX_IO_APICS) { | ||
| 121 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " | ||
| 122 | "(found %d)\n", MAX_IO_APICS, nr_ioapics); | ||
| 123 | panic("Recompile kernel with bigger MAX_IO_APICS!\n"); | ||
| 124 | } | ||
| 125 | if (!address) { | ||
| 126 | printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" | ||
| 127 | " found in table, skipping!\n"); | ||
| 128 | return 1; | ||
| 129 | } | ||
| 130 | return 0; | ||
| 131 | } | ||
| 132 | |||
| 133 | static void __init MP_ioapic_info(struct mpc_ioapic *m) | 118 | static void __init MP_ioapic_info(struct mpc_ioapic *m) |
| 134 | { | 119 | { |
| 135 | if (!(m->flags & MPC_APIC_USABLE)) | 120 | if (!(m->flags & MPC_APIC_USABLE)) |
| @@ -138,15 +123,7 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) | |||
| 138 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", | 123 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", |
| 139 | m->apicid, m->apicver, m->apicaddr); | 124 | m->apicid, m->apicver, m->apicaddr); |
| 140 | 125 | ||
| 141 | if (bad_ioapic(m->apicaddr)) | 126 | mp_register_ioapic(m->apicid, m->apicaddr, gsi_end + 1); |
| 142 | return; | ||
| 143 | |||
| 144 | mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; | ||
| 145 | mp_ioapics[nr_ioapics].apicid = m->apicid; | ||
| 146 | mp_ioapics[nr_ioapics].type = m->type; | ||
| 147 | mp_ioapics[nr_ioapics].apicver = m->apicver; | ||
| 148 | mp_ioapics[nr_ioapics].flags = m->flags; | ||
| 149 | nr_ioapics++; | ||
| 150 | } | 127 | } |
| 151 | 128 | ||
| 152 | static void print_MP_intsrc_info(struct mpc_intsrc *m) | 129 | static void print_MP_intsrc_info(struct mpc_intsrc *m) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 28ad9f4d8b94..e7e35219b32f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
| 21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
| 22 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
| 23 | #include <asm/ds.h> | ||
| 24 | #include <asm/debugreg.h> | 23 | #include <asm/debugreg.h> |
| 25 | 24 | ||
| 26 | unsigned long idle_halt; | 25 | unsigned long idle_halt; |
| @@ -32,26 +31,22 @@ struct kmem_cache *task_xstate_cachep; | |||
| 32 | 31 | ||
| 33 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 32 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
| 34 | { | 33 | { |
| 34 | int ret; | ||
| 35 | |||
| 35 | *dst = *src; | 36 | *dst = *src; |
| 36 | if (src->thread.xstate) { | 37 | if (fpu_allocated(&src->thread.fpu)) { |
| 37 | dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, | 38 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); |
| 38 | GFP_KERNEL); | 39 | ret = fpu_alloc(&dst->thread.fpu); |
| 39 | if (!dst->thread.xstate) | 40 | if (ret) |
| 40 | return -ENOMEM; | 41 | return ret; |
| 41 | WARN_ON((unsigned long)dst->thread.xstate & 15); | 42 | fpu_copy(&dst->thread.fpu, &src->thread.fpu); |
| 42 | memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); | ||
| 43 | } | 43 | } |
| 44 | return 0; | 44 | return 0; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | void free_thread_xstate(struct task_struct *tsk) | 47 | void free_thread_xstate(struct task_struct *tsk) |
| 48 | { | 48 | { |
| 49 | if (tsk->thread.xstate) { | 49 | fpu_free(&tsk->thread.fpu); |
| 50 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); | ||
| 51 | tsk->thread.xstate = NULL; | ||
| 52 | } | ||
| 53 | |||
| 54 | WARN(tsk->thread.ds_ctx, "leaking DS context\n"); | ||
| 55 | } | 50 | } |
| 56 | 51 | ||
| 57 | void free_thread_info(struct thread_info *ti) | 52 | void free_thread_info(struct thread_info *ti) |
| @@ -198,11 +193,16 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 198 | prev = &prev_p->thread; | 193 | prev = &prev_p->thread; |
| 199 | next = &next_p->thread; | 194 | next = &next_p->thread; |
| 200 | 195 | ||
| 201 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || | 196 | if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ |
| 202 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | 197 | test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { |
| 203 | ds_switch_to(prev_p, next_p); | 198 | unsigned long debugctl = get_debugctlmsr(); |
| 204 | else if (next->debugctlmsr != prev->debugctlmsr) | 199 | |
| 205 | update_debugctlmsr(next->debugctlmsr); | 200 | debugctl &= ~DEBUGCTLMSR_BTF; |
| 201 | if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) | ||
| 202 | debugctl |= DEBUGCTLMSR_BTF; | ||
| 203 | |||
| 204 | update_debugctlmsr(debugctl); | ||
| 205 | } | ||
| 206 | 206 | ||
| 207 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 207 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
| 208 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 208 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
| @@ -546,11 +546,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
| 546 | * check OSVW bit for CPUs that are not affected | 546 | * check OSVW bit for CPUs that are not affected |
| 547 | * by erratum #400 | 547 | * by erratum #400 |
| 548 | */ | 548 | */ |
| 549 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); | 549 | if (cpu_has(c, X86_FEATURE_OSVW)) { |
| 550 | if (val >= 2) { | 550 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); |
| 551 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); | 551 | if (val >= 2) { |
| 552 | if (!(val & BIT(1))) | 552 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); |
| 553 | goto no_c1e_idle; | 553 | if (!(val & BIT(1))) |
| 554 | goto no_c1e_idle; | ||
| 555 | } | ||
| 554 | } | 556 | } |
| 555 | return 1; | 557 | return 1; |
| 556 | } | 558 | } |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f6c62667e30c..8d128783af47 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -55,7 +55,6 @@ | |||
| 55 | #include <asm/cpu.h> | 55 | #include <asm/cpu.h> |
| 56 | #include <asm/idle.h> | 56 | #include <asm/idle.h> |
| 57 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
| 58 | #include <asm/ds.h> | ||
| 59 | #include <asm/debugreg.h> | 58 | #include <asm/debugreg.h> |
| 60 | 59 | ||
| 61 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
| @@ -238,13 +237,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
| 238 | kfree(p->thread.io_bitmap_ptr); | 237 | kfree(p->thread.io_bitmap_ptr); |
| 239 | p->thread.io_bitmap_max = 0; | 238 | p->thread.io_bitmap_max = 0; |
| 240 | } | 239 | } |
| 241 | |||
| 242 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); | ||
| 243 | p->thread.ds_ctx = NULL; | ||
| 244 | |||
| 245 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
| 246 | p->thread.debugctlmsr = 0; | ||
| 247 | |||
| 248 | return err; | 240 | return err; |
| 249 | } | 241 | } |
| 250 | 242 | ||
| @@ -317,7 +309,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 317 | 309 | ||
| 318 | /* we're going to use this soon, after a few expensive things */ | 310 | /* we're going to use this soon, after a few expensive things */ |
| 319 | if (preload_fpu) | 311 | if (preload_fpu) |
| 320 | prefetch(next->xstate); | 312 | prefetch(next->fpu.state); |
| 321 | 313 | ||
| 322 | /* | 314 | /* |
| 323 | * Reload esp0. | 315 | * Reload esp0. |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 17cb3295cbf7..3c2422a99f1f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -49,7 +49,6 @@ | |||
| 49 | #include <asm/ia32.h> | 49 | #include <asm/ia32.h> |
| 50 | #include <asm/idle.h> | 50 | #include <asm/idle.h> |
| 51 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
| 52 | #include <asm/ds.h> | ||
| 53 | #include <asm/debugreg.h> | 52 | #include <asm/debugreg.h> |
| 54 | 53 | ||
| 55 | asmlinkage extern void ret_from_fork(void); | 54 | asmlinkage extern void ret_from_fork(void); |
| @@ -313,13 +312,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
| 313 | if (err) | 312 | if (err) |
| 314 | goto out; | 313 | goto out; |
| 315 | } | 314 | } |
| 316 | |||
| 317 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); | ||
| 318 | p->thread.ds_ctx = NULL; | ||
| 319 | |||
| 320 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
| 321 | p->thread.debugctlmsr = 0; | ||
| 322 | |||
| 323 | err = 0; | 315 | err = 0; |
| 324 | out: | 316 | out: |
| 325 | if (err && p->thread.io_bitmap_ptr) { | 317 | if (err && p->thread.io_bitmap_ptr) { |
| @@ -396,7 +388,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 396 | 388 | ||
| 397 | /* we're going to use this soon, after a few expensive things */ | 389 | /* we're going to use this soon, after a few expensive things */ |
| 398 | if (preload_fpu) | 390 | if (preload_fpu) |
| 399 | prefetch(next->xstate); | 391 | prefetch(next->fpu.state); |
| 400 | 392 | ||
| 401 | /* | 393 | /* |
| 402 | * Reload esp0, LDT and the page table pointer: | 394 | * Reload esp0, LDT and the page table pointer: |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2e9b55027b7e..70c4872cd8aa 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
| @@ -2,9 +2,6 @@ | |||
| 2 | /* | 2 | /* |
| 3 | * Pentium III FXSR, SSE support | 3 | * Pentium III FXSR, SSE support |
| 4 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 4 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
| 5 | * | ||
| 6 | * BTS tracing | ||
| 7 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
| 8 | */ | 5 | */ |
| 9 | 6 | ||
| 10 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
| @@ -22,7 +19,6 @@ | |||
| 22 | #include <linux/audit.h> | 19 | #include <linux/audit.h> |
| 23 | #include <linux/seccomp.h> | 20 | #include <linux/seccomp.h> |
| 24 | #include <linux/signal.h> | 21 | #include <linux/signal.h> |
| 25 | #include <linux/workqueue.h> | ||
| 26 | #include <linux/perf_event.h> | 22 | #include <linux/perf_event.h> |
| 27 | #include <linux/hw_breakpoint.h> | 23 | #include <linux/hw_breakpoint.h> |
| 28 | 24 | ||
| @@ -36,7 +32,6 @@ | |||
| 36 | #include <asm/desc.h> | 32 | #include <asm/desc.h> |
| 37 | #include <asm/prctl.h> | 33 | #include <asm/prctl.h> |
| 38 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
| 39 | #include <asm/ds.h> | ||
| 40 | #include <asm/hw_breakpoint.h> | 35 | #include <asm/hw_breakpoint.h> |
| 41 | 36 | ||
| 42 | #include "tls.h" | 37 | #include "tls.h" |
| @@ -693,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | |||
| 693 | struct perf_event_attr attr; | 688 | struct perf_event_attr attr; |
| 694 | 689 | ||
| 695 | if (!t->ptrace_bps[nr]) { | 690 | if (!t->ptrace_bps[nr]) { |
| 696 | hw_breakpoint_init(&attr); | 691 | ptrace_breakpoint_init(&attr); |
| 697 | /* | 692 | /* |
| 698 | * Put stub len and type to register (reserve) an inactive but | 693 | * Put stub len and type to register (reserve) an inactive but |
| 699 | * correct bp | 694 | * correct bp |
| @@ -789,342 +784,6 @@ static int ioperm_get(struct task_struct *target, | |||
| 789 | 0, IO_BITMAP_BYTES); | 784 | 0, IO_BITMAP_BYTES); |
| 790 | } | 785 | } |
| 791 | 786 | ||
| 792 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 793 | /* | ||
| 794 | * A branch trace store context. | ||
| 795 | * | ||
| 796 | * Contexts may only be installed by ptrace_bts_config() and only for | ||
| 797 | * ptraced tasks. | ||
| 798 | * | ||
| 799 | * Contexts are destroyed when the tracee is detached from the tracer. | ||
| 800 | * The actual destruction work requires interrupts enabled, so the | ||
| 801 | * work is deferred and will be scheduled during __ptrace_unlink(). | ||
| 802 | * | ||
| 803 | * Contexts hold an additional task_struct reference on the traced | ||
| 804 | * task, as well as a reference on the tracer's mm. | ||
| 805 | * | ||
| 806 | * Ptrace already holds a task_struct for the duration of ptrace operations, | ||
| 807 | * but since destruction is deferred, it may be executed after both | ||
| 808 | * tracer and tracee exited. | ||
| 809 | */ | ||
| 810 | struct bts_context { | ||
| 811 | /* The branch trace handle. */ | ||
| 812 | struct bts_tracer *tracer; | ||
| 813 | |||
| 814 | /* The buffer used to store the branch trace and its size. */ | ||
| 815 | void *buffer; | ||
| 816 | unsigned int size; | ||
| 817 | |||
| 818 | /* The mm that paid for the above buffer. */ | ||
| 819 | struct mm_struct *mm; | ||
| 820 | |||
| 821 | /* The task this context belongs to. */ | ||
| 822 | struct task_struct *task; | ||
| 823 | |||
| 824 | /* The signal to send on a bts buffer overflow. */ | ||
| 825 | unsigned int bts_ovfl_signal; | ||
| 826 | |||
| 827 | /* The work struct to destroy a context. */ | ||
| 828 | struct work_struct work; | ||
| 829 | }; | ||
| 830 | |||
| 831 | static int alloc_bts_buffer(struct bts_context *context, unsigned int size) | ||
| 832 | { | ||
| 833 | void *buffer = NULL; | ||
| 834 | int err = -ENOMEM; | ||
| 835 | |||
| 836 | err = account_locked_memory(current->mm, current->signal->rlim, size); | ||
| 837 | if (err < 0) | ||
| 838 | return err; | ||
| 839 | |||
| 840 | buffer = kzalloc(size, GFP_KERNEL); | ||
| 841 | if (!buffer) | ||
| 842 | goto out_refund; | ||
| 843 | |||
| 844 | context->buffer = buffer; | ||
| 845 | context->size = size; | ||
| 846 | context->mm = get_task_mm(current); | ||
| 847 | |||
| 848 | return 0; | ||
| 849 | |||
| 850 | out_refund: | ||
| 851 | refund_locked_memory(current->mm, size); | ||
| 852 | return err; | ||
| 853 | } | ||
| 854 | |||
| 855 | static inline void free_bts_buffer(struct bts_context *context) | ||
| 856 | { | ||
| 857 | if (!context->buffer) | ||
| 858 | return; | ||
| 859 | |||
| 860 | kfree(context->buffer); | ||
| 861 | context->buffer = NULL; | ||
| 862 | |||
| 863 | refund_locked_memory(context->mm, context->size); | ||
| 864 | context->size = 0; | ||
| 865 | |||
| 866 | mmput(context->mm); | ||
| 867 | context->mm = NULL; | ||
| 868 | } | ||
| 869 | |||
| 870 | static void free_bts_context_work(struct work_struct *w) | ||
| 871 | { | ||
| 872 | struct bts_context *context; | ||
| 873 | |||
| 874 | context = container_of(w, struct bts_context, work); | ||
| 875 | |||
| 876 | ds_release_bts(context->tracer); | ||
| 877 | put_task_struct(context->task); | ||
| 878 | free_bts_buffer(context); | ||
| 879 | kfree(context); | ||
| 880 | } | ||
| 881 | |||
| 882 | static inline void free_bts_context(struct bts_context *context) | ||
| 883 | { | ||
| 884 | INIT_WORK(&context->work, free_bts_context_work); | ||
| 885 | schedule_work(&context->work); | ||
| 886 | } | ||
| 887 | |||
| 888 | static inline struct bts_context *alloc_bts_context(struct task_struct *task) | ||
| 889 | { | ||
| 890 | struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
| 891 | if (context) { | ||
| 892 | context->task = task; | ||
| 893 | task->bts = context; | ||
| 894 | |||
| 895 | get_task_struct(task); | ||
| 896 | } | ||
| 897 | |||
| 898 | return context; | ||
| 899 | } | ||
| 900 | |||
| 901 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | ||
| 902 | struct bts_struct __user *out) | ||
| 903 | { | ||
| 904 | struct bts_context *context; | ||
| 905 | const struct bts_trace *trace; | ||
| 906 | struct bts_struct bts; | ||
| 907 | const unsigned char *at; | ||
| 908 | int error; | ||
| 909 | |||
| 910 | context = child->bts; | ||
| 911 | if (!context) | ||
| 912 | return -ESRCH; | ||
| 913 | |||
| 914 | trace = ds_read_bts(context->tracer); | ||
| 915 | if (!trace) | ||
| 916 | return -ESRCH; | ||
| 917 | |||
| 918 | at = trace->ds.top - ((index + 1) * trace->ds.size); | ||
| 919 | if ((void *)at < trace->ds.begin) | ||
| 920 | at += (trace->ds.n * trace->ds.size); | ||
| 921 | |||
| 922 | if (!trace->read) | ||
| 923 | return -EOPNOTSUPP; | ||
| 924 | |||
| 925 | error = trace->read(context->tracer, at, &bts); | ||
| 926 | if (error < 0) | ||
| 927 | return error; | ||
| 928 | |||
| 929 | if (copy_to_user(out, &bts, sizeof(bts))) | ||
| 930 | return -EFAULT; | ||
| 931 | |||
| 932 | return sizeof(bts); | ||
| 933 | } | ||
| 934 | |||
| 935 | static int ptrace_bts_drain(struct task_struct *child, | ||
| 936 | long size, | ||
| 937 | struct bts_struct __user *out) | ||
| 938 | { | ||
| 939 | struct bts_context *context; | ||
| 940 | const struct bts_trace *trace; | ||
| 941 | const unsigned char *at; | ||
| 942 | int error, drained = 0; | ||
| 943 | |||
| 944 | context = child->bts; | ||
| 945 | if (!context) | ||
| 946 | return -ESRCH; | ||
| 947 | |||
| 948 | trace = ds_read_bts(context->tracer); | ||
| 949 | if (!trace) | ||
| 950 | return -ESRCH; | ||
| 951 | |||
| 952 | if (!trace->read) | ||
| 953 | return -EOPNOTSUPP; | ||
| 954 | |||
| 955 | if (size < (trace->ds.top - trace->ds.begin)) | ||
| 956 | return -EIO; | ||
| 957 | |||
| 958 | for (at = trace->ds.begin; (void *)at < trace->ds.top; | ||
| 959 | out++, drained++, at += trace->ds.size) { | ||
| 960 | struct bts_struct bts; | ||
| 961 | |||
| 962 | error = trace->read(context->tracer, at, &bts); | ||
| 963 | if (error < 0) | ||
| 964 | return error; | ||
| 965 | |||
| 966 | if (copy_to_user(out, &bts, sizeof(bts))) | ||
| 967 | return -EFAULT; | ||
| 968 | } | ||
| 969 | |||
| 970 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | ||
| 971 | |||
| 972 | error = ds_reset_bts(context->tracer); | ||
| 973 | if (error < 0) | ||
| 974 | return error; | ||
| 975 | |||
| 976 | return drained; | ||
| 977 | } | ||
| 978 | |||
| 979 | static int ptrace_bts_config(struct task_struct *child, | ||
| 980 | long cfg_size, | ||
| 981 | const struct ptrace_bts_config __user *ucfg) | ||
| 982 | { | ||
| 983 | struct bts_context *context; | ||
| 984 | struct ptrace_bts_config cfg; | ||
| 985 | unsigned int flags = 0; | ||
| 986 | |||
| 987 | if (cfg_size < sizeof(cfg)) | ||
| 988 | return -EIO; | ||
| 989 | |||
| 990 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | ||
| 991 | return -EFAULT; | ||
| 992 | |||
| 993 | context = child->bts; | ||
| 994 | if (!context) | ||
| 995 | context = alloc_bts_context(child); | ||
| 996 | if (!context) | ||
| 997 | return -ENOMEM; | ||
| 998 | |||
| 999 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | ||
| 1000 | if (!cfg.signal) | ||
| 1001 | return -EINVAL; | ||
| 1002 | |||
| 1003 | return -EOPNOTSUPP; | ||
| 1004 | context->bts_ovfl_signal = cfg.signal; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | ds_release_bts(context->tracer); | ||
| 1008 | context->tracer = NULL; | ||
| 1009 | |||
| 1010 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { | ||
| 1011 | int err; | ||
| 1012 | |||
| 1013 | free_bts_buffer(context); | ||
| 1014 | if (!cfg.size) | ||
| 1015 | return 0; | ||
| 1016 | |||
| 1017 | err = alloc_bts_buffer(context, cfg.size); | ||
| 1018 | if (err < 0) | ||
| 1019 | return err; | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | if (cfg.flags & PTRACE_BTS_O_TRACE) | ||
| 1023 | flags |= BTS_USER; | ||
| 1024 | |||
| 1025 | if (cfg.flags & PTRACE_BTS_O_SCHED) | ||
| 1026 | flags |= BTS_TIMESTAMPS; | ||
| 1027 | |||
| 1028 | context->tracer = | ||
| 1029 | ds_request_bts_task(child, context->buffer, context->size, | ||
| 1030 | NULL, (size_t)-1, flags); | ||
| 1031 | if (unlikely(IS_ERR(context->tracer))) { | ||
| 1032 | int error = PTR_ERR(context->tracer); | ||
| 1033 | |||
| 1034 | free_bts_buffer(context); | ||
| 1035 | context->tracer = NULL; | ||
| 1036 | return error; | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | return sizeof(cfg); | ||
| 1040 | } | ||
| 1041 | |||
| 1042 | static int ptrace_bts_status(struct task_struct *child, | ||
| 1043 | long cfg_size, | ||
| 1044 | struct ptrace_bts_config __user *ucfg) | ||
| 1045 | { | ||
| 1046 | struct bts_context *context; | ||
| 1047 | const struct bts_trace *trace; | ||
| 1048 | struct ptrace_bts_config cfg; | ||
| 1049 | |||
| 1050 | context = child->bts; | ||
| 1051 | if (!context) | ||
| 1052 | return -ESRCH; | ||
| 1053 | |||
| 1054 | if (cfg_size < sizeof(cfg)) | ||
| 1055 | return -EIO; | ||
| 1056 | |||
| 1057 | trace = ds_read_bts(context->tracer); | ||
| 1058 | if (!trace) | ||
| 1059 | return -ESRCH; | ||
| 1060 | |||
| 1061 | memset(&cfg, 0, sizeof(cfg)); | ||
| 1062 | cfg.size = trace->ds.end - trace->ds.begin; | ||
| 1063 | cfg.signal = context->bts_ovfl_signal; | ||
| 1064 | cfg.bts_size = sizeof(struct bts_struct); | ||
| 1065 | |||
| 1066 | if (cfg.signal) | ||
| 1067 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | ||
| 1068 | |||
| 1069 | if (trace->ds.flags & BTS_USER) | ||
| 1070 | cfg.flags |= PTRACE_BTS_O_TRACE; | ||
| 1071 | |||
| 1072 | if (trace->ds.flags & BTS_TIMESTAMPS) | ||
| 1073 | cfg.flags |= PTRACE_BTS_O_SCHED; | ||
| 1074 | |||
| 1075 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | ||
| 1076 | return -EFAULT; | ||
| 1077 | |||
| 1078 | return sizeof(cfg); | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | static int ptrace_bts_clear(struct task_struct *child) | ||
| 1082 | { | ||
| 1083 | struct bts_context *context; | ||
| 1084 | const struct bts_trace *trace; | ||
| 1085 | |||
| 1086 | context = child->bts; | ||
| 1087 | if (!context) | ||
| 1088 | return -ESRCH; | ||
| 1089 | |||
| 1090 | trace = ds_read_bts(context->tracer); | ||
| 1091 | if (!trace) | ||
| 1092 | return -ESRCH; | ||
| 1093 | |||
| 1094 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | ||
| 1095 | |||
| 1096 | return ds_reset_bts(context->tracer); | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | static int ptrace_bts_size(struct task_struct *child) | ||
| 1100 | { | ||
| 1101 | struct bts_context *context; | ||
| 1102 | const struct bts_trace *trace; | ||
| 1103 | |||
| 1104 | context = child->bts; | ||
| 1105 | if (!context) | ||
| 1106 | return -ESRCH; | ||
| 1107 | |||
| 1108 | trace = ds_read_bts(context->tracer); | ||
| 1109 | if (!trace) | ||
| 1110 | return -ESRCH; | ||
| 1111 | |||
| 1112 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | /* | ||
| 1116 | * Called from __ptrace_unlink() after the child has been moved back | ||
| 1117 | * to its original parent. | ||
| 1118 | */ | ||
| 1119 | void ptrace_bts_untrace(struct task_struct *child) | ||
| 1120 | { | ||
| 1121 | if (unlikely(child->bts)) { | ||
| 1122 | free_bts_context(child->bts); | ||
| 1123 | child->bts = NULL; | ||
| 1124 | } | ||
| 1125 | } | ||
| 1126 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 1127 | |||
| 1128 | /* | 787 | /* |
| 1129 | * Called by kernel/ptrace.c when detaching.. | 788 | * Called by kernel/ptrace.c when detaching.. |
| 1130 | * | 789 | * |
| @@ -1252,39 +911,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 1252 | break; | 911 | break; |
| 1253 | #endif | 912 | #endif |
| 1254 | 913 | ||
| 1255 | /* | ||
| 1256 | * These bits need more cooking - not enabled yet: | ||
| 1257 | */ | ||
| 1258 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 1259 | case PTRACE_BTS_CONFIG: | ||
| 1260 | ret = ptrace_bts_config | ||
| 1261 | (child, data, (struct ptrace_bts_config __user *)addr); | ||
| 1262 | break; | ||
| 1263 | |||
| 1264 | case PTRACE_BTS_STATUS: | ||
| 1265 | ret = ptrace_bts_status | ||
| 1266 | (child, data, (struct ptrace_bts_config __user *)addr); | ||
| 1267 | break; | ||
| 1268 | |||
| 1269 | case PTRACE_BTS_SIZE: | ||
| 1270 | ret = ptrace_bts_size(child); | ||
| 1271 | break; | ||
| 1272 | |||
| 1273 | case PTRACE_BTS_GET: | ||
| 1274 | ret = ptrace_bts_read_record | ||
| 1275 | (child, data, (struct bts_struct __user *) addr); | ||
| 1276 | break; | ||
| 1277 | |||
| 1278 | case PTRACE_BTS_CLEAR: | ||
| 1279 | ret = ptrace_bts_clear(child); | ||
| 1280 | break; | ||
| 1281 | |||
| 1282 | case PTRACE_BTS_DRAIN: | ||
| 1283 | ret = ptrace_bts_drain | ||
| 1284 | (child, data, (struct bts_struct __user *) addr); | ||
| 1285 | break; | ||
| 1286 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 1287 | |||
| 1288 | default: | 914 | default: |
| 1289 | ret = ptrace_request(child, request, addr, data); | 915 | ret = ptrace_request(child, request, addr, data); |
| 1290 | break; | 916 | break; |
| @@ -1544,14 +1170,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
| 1544 | 1170 | ||
| 1545 | case PTRACE_GET_THREAD_AREA: | 1171 | case PTRACE_GET_THREAD_AREA: |
| 1546 | case PTRACE_SET_THREAD_AREA: | 1172 | case PTRACE_SET_THREAD_AREA: |
| 1547 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 1548 | case PTRACE_BTS_CONFIG: | ||
| 1549 | case PTRACE_BTS_STATUS: | ||
| 1550 | case PTRACE_BTS_SIZE: | ||
| 1551 | case PTRACE_BTS_GET: | ||
| 1552 | case PTRACE_BTS_CLEAR: | ||
| 1553 | case PTRACE_BTS_DRAIN: | ||
| 1554 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 1555 | return arch_ptrace(child, request, addr, data); | 1173 | return arch_ptrace(child, request, addr, data); |
| 1556 | 1174 | ||
| 1557 | default: | 1175 | default: |
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c index 34e099382651..7ded57896c0a 100644 --- a/arch/x86/kernel/sfi.c +++ b/arch/x86/kernel/sfi.c | |||
| @@ -81,7 +81,6 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table) | |||
| 81 | #endif /* CONFIG_X86_LOCAL_APIC */ | 81 | #endif /* CONFIG_X86_LOCAL_APIC */ |
| 82 | 82 | ||
| 83 | #ifdef CONFIG_X86_IO_APIC | 83 | #ifdef CONFIG_X86_IO_APIC |
| 84 | static u32 gsi_base; | ||
| 85 | 84 | ||
| 86 | static int __init sfi_parse_ioapic(struct sfi_table_header *table) | 85 | static int __init sfi_parse_ioapic(struct sfi_table_header *table) |
| 87 | { | 86 | { |
| @@ -94,8 +93,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) | |||
| 94 | pentry = (struct sfi_apic_table_entry *)sb->pentry; | 93 | pentry = (struct sfi_apic_table_entry *)sb->pentry; |
| 95 | 94 | ||
| 96 | for (i = 0; i < num; i++) { | 95 | for (i = 0; i < num; i++) { |
| 97 | mp_register_ioapic(i, pentry->phys_addr, gsi_base); | 96 | mp_register_ioapic(i, pentry->phys_addr, gsi_end + 1); |
| 98 | gsi_base += io_apic_get_redir_entries(i); | ||
| 99 | pentry++; | 97 | pentry++; |
| 100 | } | 98 | } |
| 101 | 99 | ||
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 3149032ff107..58de45ee08b6 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
| @@ -158,22 +158,6 @@ static int enable_single_step(struct task_struct *child) | |||
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | /* | 160 | /* |
| 161 | * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running. | ||
| 162 | */ | ||
| 163 | static void write_debugctlmsr(struct task_struct *child, unsigned long val) | ||
| 164 | { | ||
| 165 | if (child->thread.debugctlmsr == val) | ||
| 166 | return; | ||
| 167 | |||
| 168 | child->thread.debugctlmsr = val; | ||
| 169 | |||
| 170 | if (child != current) | ||
| 171 | return; | ||
| 172 | |||
| 173 | update_debugctlmsr(val); | ||
| 174 | } | ||
| 175 | |||
| 176 | /* | ||
| 177 | * Enable single or block step. | 161 | * Enable single or block step. |
| 178 | */ | 162 | */ |
| 179 | static void enable_step(struct task_struct *child, bool block) | 163 | static void enable_step(struct task_struct *child, bool block) |
| @@ -186,15 +170,17 @@ static void enable_step(struct task_struct *child, bool block) | |||
| 186 | * that uses user-mode single stepping itself. | 170 | * that uses user-mode single stepping itself. |
| 187 | */ | 171 | */ |
| 188 | if (enable_single_step(child) && block) { | 172 | if (enable_single_step(child) && block) { |
| 189 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 173 | unsigned long debugctl = get_debugctlmsr(); |
| 190 | write_debugctlmsr(child, | 174 | |
| 191 | child->thread.debugctlmsr | DEBUGCTLMSR_BTF); | 175 | debugctl |= DEBUGCTLMSR_BTF; |
| 192 | } else { | 176 | update_debugctlmsr(debugctl); |
| 193 | write_debugctlmsr(child, | 177 | set_tsk_thread_flag(child, TIF_BLOCKSTEP); |
| 194 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); | 178 | } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { |
| 195 | 179 | unsigned long debugctl = get_debugctlmsr(); | |
| 196 | if (!child->thread.debugctlmsr) | 180 | |
| 197 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 181 | debugctl &= ~DEBUGCTLMSR_BTF; |
| 182 | update_debugctlmsr(debugctl); | ||
| 183 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
| 198 | } | 184 | } |
| 199 | } | 185 | } |
| 200 | 186 | ||
| @@ -213,11 +199,13 @@ void user_disable_single_step(struct task_struct *child) | |||
| 213 | /* | 199 | /* |
| 214 | * Make sure block stepping (BTF) is disabled. | 200 | * Make sure block stepping (BTF) is disabled. |
| 215 | */ | 201 | */ |
| 216 | write_debugctlmsr(child, | 202 | if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { |
| 217 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); | 203 | unsigned long debugctl = get_debugctlmsr(); |
| 218 | 204 | ||
| 219 | if (!child->thread.debugctlmsr) | 205 | debugctl &= ~DEBUGCTLMSR_BTF; |
| 220 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 206 | update_debugctlmsr(debugctl); |
| 207 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
| 208 | } | ||
| 221 | 209 | ||
| 222 | /* Always clear TIF_SINGLESTEP... */ | 210 | /* Always clear TIF_SINGLESTEP... */ |
| 223 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | 211 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1168e4454188..02cfb9b8f5b1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -108,15 +108,6 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
| 108 | dec_preempt_count(); | 108 | dec_preempt_count(); |
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | #ifdef CONFIG_X86_32 | ||
| 112 | static inline void | ||
| 113 | die_if_kernel(const char *str, struct pt_regs *regs, long err) | ||
| 114 | { | ||
| 115 | if (!user_mode_vm(regs)) | ||
| 116 | die(str, regs, err); | ||
| 117 | } | ||
| 118 | #endif | ||
| 119 | |||
| 120 | static void __kprobes | 111 | static void __kprobes |
| 121 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | 112 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
| 122 | long error_code, siginfo_t *info) | 113 | long error_code, siginfo_t *info) |
| @@ -543,11 +534,11 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
| 543 | 534 | ||
| 544 | /* DR6 may or may not be cleared by the CPU */ | 535 | /* DR6 may or may not be cleared by the CPU */ |
| 545 | set_debugreg(0, 6); | 536 | set_debugreg(0, 6); |
| 537 | |||
| 546 | /* | 538 | /* |
| 547 | * The processor cleared BTF, so don't mark that we need it set. | 539 | * The processor cleared BTF, so don't mark that we need it set. |
| 548 | */ | 540 | */ |
| 549 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 541 | clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); |
| 550 | tsk->thread.debugctlmsr = 0; | ||
| 551 | 542 | ||
| 552 | /* Store the virtualized DR6 value */ | 543 | /* Store the virtualized DR6 value */ |
| 553 | tsk->thread.debugreg6 = dr6; | 544 | tsk->thread.debugreg6 = dr6; |
| @@ -585,55 +576,67 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
| 585 | return; | 576 | return; |
| 586 | } | 577 | } |
| 587 | 578 | ||
| 588 | #ifdef CONFIG_X86_64 | ||
| 589 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | ||
| 590 | { | ||
| 591 | if (fixup_exception(regs)) | ||
| 592 | return 1; | ||
| 593 | |||
| 594 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | ||
| 595 | /* Illegal floating point operation in the kernel */ | ||
| 596 | current->thread.trap_no = trapnr; | ||
| 597 | die(str, regs, 0); | ||
| 598 | return 0; | ||
| 599 | } | ||
| 600 | #endif | ||
| 601 | |||
| 602 | /* | 579 | /* |
| 603 | * Note that we play around with the 'TS' bit in an attempt to get | 580 | * Note that we play around with the 'TS' bit in an attempt to get |
| 604 | * the correct behaviour even in the presence of the asynchronous | 581 | * the correct behaviour even in the presence of the asynchronous |
| 605 | * IRQ13 behaviour | 582 | * IRQ13 behaviour |
| 606 | */ | 583 | */ |
| 607 | void math_error(void __user *ip) | 584 | void math_error(struct pt_regs *regs, int error_code, int trapnr) |
| 608 | { | 585 | { |
| 609 | struct task_struct *task; | 586 | struct task_struct *task = current; |
| 610 | siginfo_t info; | 587 | siginfo_t info; |
| 611 | unsigned short cwd, swd, err; | 588 | unsigned short err; |
| 589 | char *str = (trapnr == 16) ? "fpu exception" : "simd exception"; | ||
| 590 | |||
| 591 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) | ||
| 592 | return; | ||
| 593 | conditional_sti(regs); | ||
| 594 | |||
| 595 | if (!user_mode_vm(regs)) | ||
| 596 | { | ||
| 597 | if (!fixup_exception(regs)) { | ||
| 598 | task->thread.error_code = error_code; | ||
| 599 | task->thread.trap_no = trapnr; | ||
| 600 | die(str, regs, error_code); | ||
| 601 | } | ||
| 602 | return; | ||
| 603 | } | ||
| 612 | 604 | ||
| 613 | /* | 605 | /* |
| 614 | * Save the info for the exception handler and clear the error. | 606 | * Save the info for the exception handler and clear the error. |
| 615 | */ | 607 | */ |
| 616 | task = current; | ||
| 617 | save_init_fpu(task); | 608 | save_init_fpu(task); |
| 618 | task->thread.trap_no = 16; | 609 | task->thread.trap_no = trapnr; |
| 619 | task->thread.error_code = 0; | 610 | task->thread.error_code = error_code; |
| 620 | info.si_signo = SIGFPE; | 611 | info.si_signo = SIGFPE; |
| 621 | info.si_errno = 0; | 612 | info.si_errno = 0; |
| 622 | info.si_addr = ip; | 613 | info.si_addr = (void __user *)regs->ip; |
| 623 | /* | 614 | if (trapnr == 16) { |
| 624 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 615 | unsigned short cwd, swd; |
| 625 | * status. 0x3f is the exception bits in these regs, 0x200 is the | 616 | /* |
| 626 | * C1 reg you need in case of a stack fault, 0x040 is the stack | 617 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
| 627 | * fault bit. We should only be taking one exception at a time, | 618 | * status. 0x3f is the exception bits in these regs, 0x200 is the |
| 628 | * so if this combination doesn't produce any single exception, | 619 | * C1 reg you need in case of a stack fault, 0x040 is the stack |
| 629 | * then we have a bad program that isn't synchronizing its FPU usage | 620 | * fault bit. We should only be taking one exception at a time, |
| 630 | * and it will suffer the consequences since we won't be able to | 621 | * so if this combination doesn't produce any single exception, |
| 631 | * fully reproduce the context of the exception | 622 | * then we have a bad program that isn't synchronizing its FPU usage |
| 632 | */ | 623 | * and it will suffer the consequences since we won't be able to |
| 633 | cwd = get_fpu_cwd(task); | 624 | * fully reproduce the context of the exception |
| 634 | swd = get_fpu_swd(task); | 625 | */ |
| 626 | cwd = get_fpu_cwd(task); | ||
| 627 | swd = get_fpu_swd(task); | ||
| 635 | 628 | ||
| 636 | err = swd & ~cwd; | 629 | err = swd & ~cwd; |
| 630 | } else { | ||
| 631 | /* | ||
| 632 | * The SIMD FPU exceptions are handled a little differently, as there | ||
| 633 | * is only a single status/control register. Thus, to determine which | ||
| 634 | * unmasked exception was caught we must mask the exception mask bits | ||
| 635 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
| 636 | */ | ||
| 637 | unsigned short mxcsr = get_fpu_mxcsr(task); | ||
| 638 | err = ~(mxcsr >> 7) & mxcsr; | ||
| 639 | } | ||
| 637 | 640 | ||
| 638 | if (err & 0x001) { /* Invalid op */ | 641 | if (err & 0x001) { /* Invalid op */ |
| 639 | /* | 642 | /* |
| @@ -662,97 +665,17 @@ void math_error(void __user *ip) | |||
| 662 | 665 | ||
| 663 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | 666 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) |
| 664 | { | 667 | { |
| 665 | conditional_sti(regs); | ||
| 666 | |||
| 667 | #ifdef CONFIG_X86_32 | 668 | #ifdef CONFIG_X86_32 |
| 668 | ignore_fpu_irq = 1; | 669 | ignore_fpu_irq = 1; |
| 669 | #else | ||
| 670 | if (!user_mode(regs) && | ||
| 671 | kernel_math_error(regs, "kernel x87 math error", 16)) | ||
| 672 | return; | ||
| 673 | #endif | 670 | #endif |
| 674 | 671 | ||
| 675 | math_error((void __user *)regs->ip); | 672 | math_error(regs, error_code, 16); |
| 676 | } | ||
| 677 | |||
| 678 | static void simd_math_error(void __user *ip) | ||
| 679 | { | ||
| 680 | struct task_struct *task; | ||
| 681 | siginfo_t info; | ||
| 682 | unsigned short mxcsr; | ||
| 683 | |||
| 684 | /* | ||
| 685 | * Save the info for the exception handler and clear the error. | ||
| 686 | */ | ||
| 687 | task = current; | ||
| 688 | save_init_fpu(task); | ||
| 689 | task->thread.trap_no = 19; | ||
| 690 | task->thread.error_code = 0; | ||
| 691 | info.si_signo = SIGFPE; | ||
| 692 | info.si_errno = 0; | ||
| 693 | info.si_code = __SI_FAULT; | ||
| 694 | info.si_addr = ip; | ||
| 695 | /* | ||
| 696 | * The SIMD FPU exceptions are handled a little differently, as there | ||
| 697 | * is only a single status/control register. Thus, to determine which | ||
| 698 | * unmasked exception was caught we must mask the exception mask bits | ||
| 699 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
| 700 | */ | ||
| 701 | mxcsr = get_fpu_mxcsr(task); | ||
| 702 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | ||
| 703 | case 0x000: | ||
| 704 | default: | ||
| 705 | break; | ||
| 706 | case 0x001: /* Invalid Op */ | ||
| 707 | info.si_code = FPE_FLTINV; | ||
| 708 | break; | ||
| 709 | case 0x002: /* Denormalize */ | ||
| 710 | case 0x010: /* Underflow */ | ||
| 711 | info.si_code = FPE_FLTUND; | ||
| 712 | break; | ||
| 713 | case 0x004: /* Zero Divide */ | ||
| 714 | info.si_code = FPE_FLTDIV; | ||
| 715 | break; | ||
| 716 | case 0x008: /* Overflow */ | ||
| 717 | info.si_code = FPE_FLTOVF; | ||
| 718 | break; | ||
| 719 | case 0x020: /* Precision */ | ||
| 720 | info.si_code = FPE_FLTRES; | ||
| 721 | break; | ||
| 722 | } | ||
| 723 | force_sig_info(SIGFPE, &info, task); | ||
| 724 | } | 673 | } |
| 725 | 674 | ||
| 726 | dotraplinkage void | 675 | dotraplinkage void |
| 727 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | 676 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) |
| 728 | { | 677 | { |
| 729 | conditional_sti(regs); | 678 | math_error(regs, error_code, 19); |
| 730 | |||
| 731 | #ifdef CONFIG_X86_32 | ||
| 732 | if (cpu_has_xmm) { | ||
| 733 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | ||
| 734 | ignore_fpu_irq = 1; | ||
| 735 | simd_math_error((void __user *)regs->ip); | ||
| 736 | return; | ||
| 737 | } | ||
| 738 | /* | ||
| 739 | * Handle strange cache flush from user space exception | ||
| 740 | * in all other cases. This is undocumented behaviour. | ||
| 741 | */ | ||
| 742 | if (regs->flags & X86_VM_MASK) { | ||
| 743 | handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); | ||
| 744 | return; | ||
| 745 | } | ||
| 746 | current->thread.trap_no = 19; | ||
| 747 | current->thread.error_code = error_code; | ||
| 748 | die_if_kernel("cache flush denied", regs, error_code); | ||
| 749 | force_sig(SIGSEGV, current); | ||
| 750 | #else | ||
| 751 | if (!user_mode(regs) && | ||
| 752 | kernel_math_error(regs, "kernel simd math error", 19)) | ||
| 753 | return; | ||
| 754 | simd_math_error((void __user *)regs->ip); | ||
| 755 | #endif | ||
| 756 | } | 679 | } |
| 757 | 680 | ||
| 758 | dotraplinkage void | 681 | dotraplinkage void |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 693920b22496..1b950d151e58 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
| @@ -54,7 +54,6 @@ EXPORT_SYMBOL(memcpy); | |||
| 54 | EXPORT_SYMBOL(__memcpy); | 54 | EXPORT_SYMBOL(__memcpy); |
| 55 | 55 | ||
| 56 | EXPORT_SYMBOL(empty_zero_page); | 56 | EXPORT_SYMBOL(empty_zero_page); |
| 57 | EXPORT_SYMBOL(init_level4_pgt); | ||
| 58 | #ifndef CONFIG_PARAVIRT | 57 | #ifndef CONFIG_PARAVIRT |
| 59 | EXPORT_SYMBOL(native_load_gs_index); | 58 | EXPORT_SYMBOL(native_load_gs_index); |
| 60 | #endif | 59 | #endif |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 782c3a362ec6..37e68fc5e24a 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
| @@ -99,7 +99,7 @@ int save_i387_xstate(void __user *buf) | |||
| 99 | if (err) | 99 | if (err) |
| 100 | return err; | 100 | return err; |
| 101 | 101 | ||
| 102 | if (task_thread_info(tsk)->status & TS_XSAVE) | 102 | if (use_xsave()) |
| 103 | err = xsave_user(buf); | 103 | err = xsave_user(buf); |
| 104 | else | 104 | else |
| 105 | err = fxsave_user(buf); | 105 | err = fxsave_user(buf); |
| @@ -109,14 +109,14 @@ int save_i387_xstate(void __user *buf) | |||
| 109 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 109 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
| 110 | stts(); | 110 | stts(); |
| 111 | } else { | 111 | } else { |
| 112 | if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, | 112 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, |
| 113 | xstate_size)) | 113 | xstate_size)) |
| 114 | return -1; | 114 | return -1; |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | clear_used_math(); /* trigger finit */ | 117 | clear_used_math(); /* trigger finit */ |
| 118 | 118 | ||
| 119 | if (task_thread_info(tsk)->status & TS_XSAVE) { | 119 | if (use_xsave()) { |
| 120 | struct _fpstate __user *fx = buf; | 120 | struct _fpstate __user *fx = buf; |
| 121 | struct _xstate __user *x = buf; | 121 | struct _xstate __user *x = buf; |
| 122 | u64 xstate_bv; | 122 | u64 xstate_bv; |
| @@ -225,7 +225,7 @@ int restore_i387_xstate(void __user *buf) | |||
| 225 | clts(); | 225 | clts(); |
| 226 | task_thread_info(current)->status |= TS_USEDFPU; | 226 | task_thread_info(current)->status |= TS_USEDFPU; |
| 227 | } | 227 | } |
| 228 | if (task_thread_info(tsk)->status & TS_XSAVE) | 228 | if (use_xsave()) |
| 229 | err = restore_user_xstate(buf); | 229 | err = restore_user_xstate(buf); |
| 230 | else | 230 | else |
| 231 | err = fxrstor_checking((__force struct i387_fxsave_struct *) | 231 | err = fxrstor_checking((__force struct i387_fxsave_struct *) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2ba58206812a..737361fcd503 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -2067,7 +2067,7 @@ static int cpuid_interception(struct vcpu_svm *svm) | |||
| 2067 | static int iret_interception(struct vcpu_svm *svm) | 2067 | static int iret_interception(struct vcpu_svm *svm) |
| 2068 | { | 2068 | { |
| 2069 | ++svm->vcpu.stat.nmi_window_exits; | 2069 | ++svm->vcpu.stat.nmi_window_exits; |
| 2070 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2070 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); |
| 2071 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2071 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
| 2072 | return 1; | 2072 | return 1; |
| 2073 | } | 2073 | } |
| @@ -2479,7 +2479,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) | |||
| 2479 | 2479 | ||
| 2480 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; | 2480 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; |
| 2481 | vcpu->arch.hflags |= HF_NMI_MASK; | 2481 | vcpu->arch.hflags |= HF_NMI_MASK; |
| 2482 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | 2482 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); |
| 2483 | ++vcpu->stat.nmi_injections; | 2483 | ++vcpu->stat.nmi_injections; |
| 2484 | } | 2484 | } |
| 2485 | 2485 | ||
| @@ -2539,10 +2539,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
| 2539 | 2539 | ||
| 2540 | if (masked) { | 2540 | if (masked) { |
| 2541 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | 2541 | svm->vcpu.arch.hflags |= HF_NMI_MASK; |
| 2542 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | 2542 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); |
| 2543 | } else { | 2543 | } else { |
| 2544 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | 2544 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; |
| 2545 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2545 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); |
| 2546 | } | 2546 | } |
| 2547 | } | 2547 | } |
| 2548 | 2548 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bc933cfb4e66..edca080407a5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -2703,8 +2703,7 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
| 2703 | return 0; | 2703 | return 0; |
| 2704 | 2704 | ||
| 2705 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 2705 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
| 2706 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | | 2706 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); |
| 2707 | GUEST_INTR_STATE_NMI)); | ||
| 2708 | } | 2707 | } |
| 2709 | 2708 | ||
| 2710 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 2709 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
| @@ -3660,8 +3659,11 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
| 3660 | 3659 | ||
| 3661 | /* We need to handle NMIs before interrupts are enabled */ | 3660 | /* We need to handle NMIs before interrupts are enabled */ |
| 3662 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && | 3661 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && |
| 3663 | (exit_intr_info & INTR_INFO_VALID_MASK)) | 3662 | (exit_intr_info & INTR_INFO_VALID_MASK)) { |
| 3663 | kvm_before_handle_nmi(&vmx->vcpu); | ||
| 3664 | asm("int $2"); | 3664 | asm("int $2"); |
| 3665 | kvm_after_handle_nmi(&vmx->vcpu); | ||
| 3666 | } | ||
| 3665 | 3667 | ||
| 3666 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 3668 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
| 3667 | 3669 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3c4ca98ad27f..dd9bc8fb81ab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include <linux/user-return-notifier.h> | 40 | #include <linux/user-return-notifier.h> |
| 41 | #include <linux/srcu.h> | 41 | #include <linux/srcu.h> |
| 42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
| 43 | #include <linux/perf_event.h> | ||
| 43 | #include <trace/events/kvm.h> | 44 | #include <trace/events/kvm.h> |
| 44 | #undef TRACE_INCLUDE_FILE | 45 | #undef TRACE_INCLUDE_FILE |
| 45 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
| @@ -1712,6 +1713,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
| 1712 | if (copy_from_user(cpuid_entries, entries, | 1713 | if (copy_from_user(cpuid_entries, entries, |
| 1713 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | 1714 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) |
| 1714 | goto out_free; | 1715 | goto out_free; |
| 1716 | vcpu_load(vcpu); | ||
| 1715 | for (i = 0; i < cpuid->nent; i++) { | 1717 | for (i = 0; i < cpuid->nent; i++) { |
| 1716 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | 1718 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; |
| 1717 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | 1719 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; |
| @@ -1729,6 +1731,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
| 1729 | r = 0; | 1731 | r = 0; |
| 1730 | kvm_apic_set_version(vcpu); | 1732 | kvm_apic_set_version(vcpu); |
| 1731 | kvm_x86_ops->cpuid_update(vcpu); | 1733 | kvm_x86_ops->cpuid_update(vcpu); |
| 1734 | vcpu_put(vcpu); | ||
| 1732 | 1735 | ||
| 1733 | out_free: | 1736 | out_free: |
| 1734 | vfree(cpuid_entries); | 1737 | vfree(cpuid_entries); |
| @@ -1749,9 +1752,11 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
| 1749 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | 1752 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, |
| 1750 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | 1753 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) |
| 1751 | goto out; | 1754 | goto out; |
| 1755 | vcpu_load(vcpu); | ||
| 1752 | vcpu->arch.cpuid_nent = cpuid->nent; | 1756 | vcpu->arch.cpuid_nent = cpuid->nent; |
| 1753 | kvm_apic_set_version(vcpu); | 1757 | kvm_apic_set_version(vcpu); |
| 1754 | kvm_x86_ops->cpuid_update(vcpu); | 1758 | kvm_x86_ops->cpuid_update(vcpu); |
| 1759 | vcpu_put(vcpu); | ||
| 1755 | return 0; | 1760 | return 0; |
| 1756 | 1761 | ||
| 1757 | out: | 1762 | out: |
| @@ -3743,6 +3748,51 @@ static void kvm_timer_init(void) | |||
| 3743 | } | 3748 | } |
| 3744 | } | 3749 | } |
| 3745 | 3750 | ||
| 3751 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | ||
| 3752 | |||
| 3753 | static int kvm_is_in_guest(void) | ||
| 3754 | { | ||
| 3755 | return percpu_read(current_vcpu) != NULL; | ||
| 3756 | } | ||
| 3757 | |||
| 3758 | static int kvm_is_user_mode(void) | ||
| 3759 | { | ||
| 3760 | int user_mode = 3; | ||
| 3761 | |||
| 3762 | if (percpu_read(current_vcpu)) | ||
| 3763 | user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu)); | ||
| 3764 | |||
| 3765 | return user_mode != 0; | ||
| 3766 | } | ||
| 3767 | |||
| 3768 | static unsigned long kvm_get_guest_ip(void) | ||
| 3769 | { | ||
| 3770 | unsigned long ip = 0; | ||
| 3771 | |||
| 3772 | if (percpu_read(current_vcpu)) | ||
| 3773 | ip = kvm_rip_read(percpu_read(current_vcpu)); | ||
| 3774 | |||
| 3775 | return ip; | ||
| 3776 | } | ||
| 3777 | |||
| 3778 | static struct perf_guest_info_callbacks kvm_guest_cbs = { | ||
| 3779 | .is_in_guest = kvm_is_in_guest, | ||
| 3780 | .is_user_mode = kvm_is_user_mode, | ||
| 3781 | .get_guest_ip = kvm_get_guest_ip, | ||
| 3782 | }; | ||
| 3783 | |||
| 3784 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) | ||
| 3785 | { | ||
| 3786 | percpu_write(current_vcpu, vcpu); | ||
| 3787 | } | ||
| 3788 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); | ||
| 3789 | |||
| 3790 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) | ||
| 3791 | { | ||
| 3792 | percpu_write(current_vcpu, NULL); | ||
| 3793 | } | ||
| 3794 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); | ||
| 3795 | |||
| 3746 | int kvm_arch_init(void *opaque) | 3796 | int kvm_arch_init(void *opaque) |
| 3747 | { | 3797 | { |
| 3748 | int r; | 3798 | int r; |
| @@ -3779,6 +3829,8 @@ int kvm_arch_init(void *opaque) | |||
| 3779 | 3829 | ||
| 3780 | kvm_timer_init(); | 3830 | kvm_timer_init(); |
| 3781 | 3831 | ||
| 3832 | perf_register_guest_info_callbacks(&kvm_guest_cbs); | ||
| 3833 | |||
| 3782 | return 0; | 3834 | return 0; |
| 3783 | 3835 | ||
| 3784 | out: | 3836 | out: |
| @@ -3787,6 +3839,8 @@ out: | |||
| 3787 | 3839 | ||
| 3788 | void kvm_arch_exit(void) | 3840 | void kvm_arch_exit(void) |
| 3789 | { | 3841 | { |
| 3842 | perf_unregister_guest_info_callbacks(&kvm_guest_cbs); | ||
| 3843 | |||
| 3790 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 3844 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
| 3791 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, | 3845 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, |
| 3792 | CPUFREQ_TRANSITION_NOTIFIER); | 3846 | CPUFREQ_TRANSITION_NOTIFIER); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2d101639bd8d..b7a404722d2b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
| @@ -65,4 +65,7 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
| 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | ||
| 69 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | ||
| 70 | |||
| 68 | #endif | 71 | #endif |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 419386c24b82..f871e04b6965 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
| @@ -20,17 +20,18 @@ lib-y := delay.o | |||
| 20 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
| 21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o |
| 22 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
| 23 | lib-$(CONFIG_KPROBES) += insn.o inat.o | 23 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o |
| 24 | 24 | ||
| 25 | obj-y += msr.o msr-reg.o msr-reg-export.o | 25 | obj-y += msr.o msr-reg.o msr-reg-export.o |
| 26 | 26 | ||
| 27 | ifeq ($(CONFIG_X86_32),y) | 27 | ifeq ($(CONFIG_X86_32),y) |
| 28 | obj-y += atomic64_32.o | 28 | obj-y += atomic64_32.o |
| 29 | lib-y += atomic64_cx8_32.o | ||
| 29 | lib-y += checksum_32.o | 30 | lib-y += checksum_32.o |
| 30 | lib-y += strstr_32.o | 31 | lib-y += strstr_32.o |
| 31 | lib-y += semaphore_32.o string_32.o | 32 | lib-y += semaphore_32.o string_32.o |
| 32 | ifneq ($(CONFIG_X86_CMPXCHG64),y) | 33 | ifneq ($(CONFIG_X86_CMPXCHG64),y) |
| 33 | lib-y += cmpxchg8b_emu.o | 34 | lib-y += cmpxchg8b_emu.o atomic64_386_32.o |
| 34 | endif | 35 | endif |
| 35 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o | 36 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o |
| 36 | else | 37 | else |
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c index 824fa0be55a3..540179e8e9fa 100644 --- a/arch/x86/lib/atomic64_32.c +++ b/arch/x86/lib/atomic64_32.c | |||
| @@ -6,225 +6,54 @@ | |||
| 6 | #include <asm/cmpxchg.h> | 6 | #include <asm/cmpxchg.h> |
| 7 | #include <asm/atomic.h> | 7 | #include <asm/atomic.h> |
| 8 | 8 | ||
| 9 | static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new) | 9 | long long atomic64_read_cx8(long long, const atomic64_t *v); |
| 10 | { | 10 | EXPORT_SYMBOL(atomic64_read_cx8); |
| 11 | u32 low = new; | 11 | long long atomic64_set_cx8(long long, const atomic64_t *v); |
| 12 | u32 high = new >> 32; | 12 | EXPORT_SYMBOL(atomic64_set_cx8); |
| 13 | 13 | long long atomic64_xchg_cx8(long long, unsigned high); | |
| 14 | asm volatile( | 14 | EXPORT_SYMBOL(atomic64_xchg_cx8); |
| 15 | LOCK_PREFIX "cmpxchg8b %1\n" | 15 | long long atomic64_add_return_cx8(long long a, atomic64_t *v); |
| 16 | : "+A" (old), "+m" (*ptr) | 16 | EXPORT_SYMBOL(atomic64_add_return_cx8); |
| 17 | : "b" (low), "c" (high) | 17 | long long atomic64_sub_return_cx8(long long a, atomic64_t *v); |
| 18 | ); | 18 | EXPORT_SYMBOL(atomic64_sub_return_cx8); |
| 19 | return old; | 19 | long long atomic64_inc_return_cx8(long long a, atomic64_t *v); |
| 20 | } | 20 | EXPORT_SYMBOL(atomic64_inc_return_cx8); |
| 21 | 21 | long long atomic64_dec_return_cx8(long long a, atomic64_t *v); | |
| 22 | u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val) | 22 | EXPORT_SYMBOL(atomic64_dec_return_cx8); |
| 23 | { | 23 | long long atomic64_dec_if_positive_cx8(atomic64_t *v); |
| 24 | return cmpxchg8b(&ptr->counter, old_val, new_val); | 24 | EXPORT_SYMBOL(atomic64_dec_if_positive_cx8); |
| 25 | } | 25 | int atomic64_inc_not_zero_cx8(atomic64_t *v); |
| 26 | EXPORT_SYMBOL(atomic64_cmpxchg); | 26 | EXPORT_SYMBOL(atomic64_inc_not_zero_cx8); |
| 27 | 27 | int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u); | |
| 28 | /** | 28 | EXPORT_SYMBOL(atomic64_add_unless_cx8); |
| 29 | * atomic64_xchg - xchg atomic64 variable | 29 | |
| 30 | * @ptr: pointer to type atomic64_t | 30 | #ifndef CONFIG_X86_CMPXCHG64 |
| 31 | * @new_val: value to assign | 31 | long long atomic64_read_386(long long, const atomic64_t *v); |
| 32 | * | 32 | EXPORT_SYMBOL(atomic64_read_386); |
| 33 | * Atomically xchgs the value of @ptr to @new_val and returns | 33 | long long atomic64_set_386(long long, const atomic64_t *v); |
| 34 | * the old value. | 34 | EXPORT_SYMBOL(atomic64_set_386); |
| 35 | */ | 35 | long long atomic64_xchg_386(long long, unsigned high); |
| 36 | u64 atomic64_xchg(atomic64_t *ptr, u64 new_val) | 36 | EXPORT_SYMBOL(atomic64_xchg_386); |
| 37 | { | 37 | long long atomic64_add_return_386(long long a, atomic64_t *v); |
| 38 | /* | 38 | EXPORT_SYMBOL(atomic64_add_return_386); |
| 39 | * Try first with a (possibly incorrect) assumption about | 39 | long long atomic64_sub_return_386(long long a, atomic64_t *v); |
| 40 | * what we have there. We'll do two loops most likely, | 40 | EXPORT_SYMBOL(atomic64_sub_return_386); |
| 41 | * but we'll get an ownership MESI transaction straight away | 41 | long long atomic64_inc_return_386(long long a, atomic64_t *v); |
| 42 | * instead of a read transaction followed by a | 42 | EXPORT_SYMBOL(atomic64_inc_return_386); |
| 43 | * flush-for-ownership transaction: | 43 | long long atomic64_dec_return_386(long long a, atomic64_t *v); |
| 44 | */ | 44 | EXPORT_SYMBOL(atomic64_dec_return_386); |
| 45 | u64 old_val, real_val = 0; | 45 | long long atomic64_add_386(long long a, atomic64_t *v); |
| 46 | 46 | EXPORT_SYMBOL(atomic64_add_386); | |
| 47 | do { | 47 | long long atomic64_sub_386(long long a, atomic64_t *v); |
| 48 | old_val = real_val; | 48 | EXPORT_SYMBOL(atomic64_sub_386); |
| 49 | 49 | long long atomic64_inc_386(long long a, atomic64_t *v); | |
| 50 | real_val = atomic64_cmpxchg(ptr, old_val, new_val); | 50 | EXPORT_SYMBOL(atomic64_inc_386); |
| 51 | 51 | long long atomic64_dec_386(long long a, atomic64_t *v); | |
| 52 | } while (real_val != old_val); | 52 | EXPORT_SYMBOL(atomic64_dec_386); |
| 53 | 53 | long long atomic64_dec_if_positive_386(atomic64_t *v); | |
| 54 | return old_val; | 54 | EXPORT_SYMBOL(atomic64_dec_if_positive_386); |
| 55 | } | 55 | int atomic64_inc_not_zero_386(atomic64_t *v); |
| 56 | EXPORT_SYMBOL(atomic64_xchg); | 56 | EXPORT_SYMBOL(atomic64_inc_not_zero_386); |
| 57 | 57 | int atomic64_add_unless_386(atomic64_t *v, long long a, long long u); | |
| 58 | /** | 58 | EXPORT_SYMBOL(atomic64_add_unless_386); |
| 59 | * atomic64_set - set atomic64 variable | 59 | #endif |
| 60 | * @ptr: pointer to type atomic64_t | ||
| 61 | * @new_val: value to assign | ||
| 62 | * | ||
| 63 | * Atomically sets the value of @ptr to @new_val. | ||
| 64 | */ | ||
| 65 | void atomic64_set(atomic64_t *ptr, u64 new_val) | ||
| 66 | { | ||
| 67 | atomic64_xchg(ptr, new_val); | ||
| 68 | } | ||
| 69 | EXPORT_SYMBOL(atomic64_set); | ||
| 70 | |||
| 71 | /** | ||
| 72 | EXPORT_SYMBOL(atomic64_read); | ||
| 73 | * atomic64_add_return - add and return | ||
| 74 | * @delta: integer value to add | ||
| 75 | * @ptr: pointer to type atomic64_t | ||
| 76 | * | ||
| 77 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | ||
| 78 | */ | ||
| 79 | noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr) | ||
| 80 | { | ||
| 81 | /* | ||
| 82 | * Try first with a (possibly incorrect) assumption about | ||
| 83 | * what we have there. We'll do two loops most likely, | ||
| 84 | * but we'll get an ownership MESI transaction straight away | ||
| 85 | * instead of a read transaction followed by a | ||
| 86 | * flush-for-ownership transaction: | ||
| 87 | */ | ||
| 88 | u64 old_val, new_val, real_val = 0; | ||
| 89 | |||
| 90 | do { | ||
| 91 | old_val = real_val; | ||
| 92 | new_val = old_val + delta; | ||
| 93 | |||
| 94 | real_val = atomic64_cmpxchg(ptr, old_val, new_val); | ||
| 95 | |||
| 96 | } while (real_val != old_val); | ||
| 97 | |||
| 98 | return new_val; | ||
| 99 | } | ||
| 100 | EXPORT_SYMBOL(atomic64_add_return); | ||
| 101 | |||
| 102 | u64 atomic64_sub_return(u64 delta, atomic64_t *ptr) | ||
| 103 | { | ||
| 104 | return atomic64_add_return(-delta, ptr); | ||
| 105 | } | ||
| 106 | EXPORT_SYMBOL(atomic64_sub_return); | ||
| 107 | |||
| 108 | u64 atomic64_inc_return(atomic64_t *ptr) | ||
| 109 | { | ||
| 110 | return atomic64_add_return(1, ptr); | ||
| 111 | } | ||
| 112 | EXPORT_SYMBOL(atomic64_inc_return); | ||
| 113 | |||
| 114 | u64 atomic64_dec_return(atomic64_t *ptr) | ||
| 115 | { | ||
| 116 | return atomic64_sub_return(1, ptr); | ||
| 117 | } | ||
| 118 | EXPORT_SYMBOL(atomic64_dec_return); | ||
| 119 | |||
| 120 | /** | ||
| 121 | * atomic64_add - add integer to atomic64 variable | ||
| 122 | * @delta: integer value to add | ||
| 123 | * @ptr: pointer to type atomic64_t | ||
| 124 | * | ||
| 125 | * Atomically adds @delta to @ptr. | ||
| 126 | */ | ||
| 127 | void atomic64_add(u64 delta, atomic64_t *ptr) | ||
| 128 | { | ||
| 129 | atomic64_add_return(delta, ptr); | ||
| 130 | } | ||
| 131 | EXPORT_SYMBOL(atomic64_add); | ||
| 132 | |||
| 133 | /** | ||
| 134 | * atomic64_sub - subtract the atomic64 variable | ||
| 135 | * @delta: integer value to subtract | ||
| 136 | * @ptr: pointer to type atomic64_t | ||
| 137 | * | ||
| 138 | * Atomically subtracts @delta from @ptr. | ||
| 139 | */ | ||
| 140 | void atomic64_sub(u64 delta, atomic64_t *ptr) | ||
| 141 | { | ||
| 142 | atomic64_add(-delta, ptr); | ||
| 143 | } | ||
| 144 | EXPORT_SYMBOL(atomic64_sub); | ||
| 145 | |||
| 146 | /** | ||
| 147 | * atomic64_sub_and_test - subtract value from variable and test result | ||
| 148 | * @delta: integer value to subtract | ||
| 149 | * @ptr: pointer to type atomic64_t | ||
| 150 | * | ||
| 151 | * Atomically subtracts @delta from @ptr and returns | ||
| 152 | * true if the result is zero, or false for all | ||
| 153 | * other cases. | ||
| 154 | */ | ||
| 155 | int atomic64_sub_and_test(u64 delta, atomic64_t *ptr) | ||
| 156 | { | ||
| 157 | u64 new_val = atomic64_sub_return(delta, ptr); | ||
| 158 | |||
| 159 | return new_val == 0; | ||
| 160 | } | ||
| 161 | EXPORT_SYMBOL(atomic64_sub_and_test); | ||
| 162 | |||
| 163 | /** | ||
| 164 | * atomic64_inc - increment atomic64 variable | ||
| 165 | * @ptr: pointer to type atomic64_t | ||
| 166 | * | ||
| 167 | * Atomically increments @ptr by 1. | ||
| 168 | */ | ||
| 169 | void atomic64_inc(atomic64_t *ptr) | ||
| 170 | { | ||
| 171 | atomic64_add(1, ptr); | ||
| 172 | } | ||
| 173 | EXPORT_SYMBOL(atomic64_inc); | ||
| 174 | |||
| 175 | /** | ||
| 176 | * atomic64_dec - decrement atomic64 variable | ||
| 177 | * @ptr: pointer to type atomic64_t | ||
| 178 | * | ||
| 179 | * Atomically decrements @ptr by 1. | ||
| 180 | */ | ||
| 181 | void atomic64_dec(atomic64_t *ptr) | ||
| 182 | { | ||
| 183 | atomic64_sub(1, ptr); | ||
| 184 | } | ||
| 185 | EXPORT_SYMBOL(atomic64_dec); | ||
| 186 | |||
| 187 | /** | ||
| 188 | * atomic64_dec_and_test - decrement and test | ||
| 189 | * @ptr: pointer to type atomic64_t | ||
| 190 | * | ||
| 191 | * Atomically decrements @ptr by 1 and | ||
| 192 | * returns true if the result is 0, or false for all other | ||
| 193 | * cases. | ||
| 194 | */ | ||
| 195 | int atomic64_dec_and_test(atomic64_t *ptr) | ||
| 196 | { | ||
| 197 | return atomic64_sub_and_test(1, ptr); | ||
| 198 | } | ||
| 199 | EXPORT_SYMBOL(atomic64_dec_and_test); | ||
| 200 | |||
| 201 | /** | ||
| 202 | * atomic64_inc_and_test - increment and test | ||
| 203 | * @ptr: pointer to type atomic64_t | ||
| 204 | * | ||
| 205 | * Atomically increments @ptr by 1 | ||
| 206 | * and returns true if the result is zero, or false for all | ||
| 207 | * other cases. | ||
| 208 | */ | ||
| 209 | int atomic64_inc_and_test(atomic64_t *ptr) | ||
| 210 | { | ||
| 211 | return atomic64_sub_and_test(-1, ptr); | ||
| 212 | } | ||
| 213 | EXPORT_SYMBOL(atomic64_inc_and_test); | ||
| 214 | |||
| 215 | /** | ||
| 216 | * atomic64_add_negative - add and test if negative | ||
| 217 | * @delta: integer value to add | ||
| 218 | * @ptr: pointer to type atomic64_t | ||
| 219 | * | ||
| 220 | * Atomically adds @delta to @ptr and returns true | ||
| 221 | * if the result is negative, or false when | ||
| 222 | * result is greater than or equal to zero. | ||
| 223 | */ | ||
| 224 | int atomic64_add_negative(u64 delta, atomic64_t *ptr) | ||
| 225 | { | ||
| 226 | s64 new_val = atomic64_add_return(delta, ptr); | ||
| 227 | |||
| 228 | return new_val < 0; | ||
| 229 | } | ||
| 230 | EXPORT_SYMBOL(atomic64_add_negative); | ||
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S new file mode 100644 index 000000000000..4a5979aa6883 --- /dev/null +++ b/arch/x86/lib/atomic64_386_32.S | |||
| @@ -0,0 +1,174 @@ | |||
| 1 | /* | ||
| 2 | * atomic64_t for 386/486 | ||
| 3 | * | ||
| 4 | * Copyright © 2010 Luca Barbieri | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/linkage.h> | ||
| 13 | #include <asm/alternative-asm.h> | ||
| 14 | #include <asm/dwarf2.h> | ||
| 15 | |||
| 16 | /* if you want SMP support, implement these with real spinlocks */ | ||
| 17 | .macro LOCK reg | ||
| 18 | pushfl | ||
| 19 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 20 | cli | ||
| 21 | .endm | ||
| 22 | |||
| 23 | .macro UNLOCK reg | ||
| 24 | popfl | ||
| 25 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 26 | .endm | ||
| 27 | |||
| 28 | .macro BEGIN func reg | ||
| 29 | $v = \reg | ||
| 30 | |||
| 31 | ENTRY(atomic64_\func\()_386) | ||
| 32 | CFI_STARTPROC | ||
| 33 | LOCK $v | ||
| 34 | |||
| 35 | .macro RETURN | ||
| 36 | UNLOCK $v | ||
| 37 | ret | ||
| 38 | .endm | ||
| 39 | |||
| 40 | .macro END_ | ||
| 41 | CFI_ENDPROC | ||
| 42 | ENDPROC(atomic64_\func\()_386) | ||
| 43 | .purgem RETURN | ||
| 44 | .purgem END_ | ||
| 45 | .purgem END | ||
| 46 | .endm | ||
| 47 | |||
| 48 | .macro END | ||
| 49 | RETURN | ||
| 50 | END_ | ||
| 51 | .endm | ||
| 52 | .endm | ||
| 53 | |||
| 54 | BEGIN read %ecx | ||
| 55 | movl ($v), %eax | ||
| 56 | movl 4($v), %edx | ||
| 57 | END | ||
| 58 | |||
| 59 | BEGIN set %esi | ||
| 60 | movl %ebx, ($v) | ||
| 61 | movl %ecx, 4($v) | ||
| 62 | END | ||
| 63 | |||
| 64 | BEGIN xchg %esi | ||
| 65 | movl ($v), %eax | ||
| 66 | movl 4($v), %edx | ||
| 67 | movl %ebx, ($v) | ||
| 68 | movl %ecx, 4($v) | ||
| 69 | END | ||
| 70 | |||
| 71 | BEGIN add %ecx | ||
| 72 | addl %eax, ($v) | ||
| 73 | adcl %edx, 4($v) | ||
| 74 | END | ||
| 75 | |||
| 76 | BEGIN add_return %ecx | ||
| 77 | addl ($v), %eax | ||
| 78 | adcl 4($v), %edx | ||
| 79 | movl %eax, ($v) | ||
| 80 | movl %edx, 4($v) | ||
| 81 | END | ||
| 82 | |||
| 83 | BEGIN sub %ecx | ||
| 84 | subl %eax, ($v) | ||
| 85 | sbbl %edx, 4($v) | ||
| 86 | END | ||
| 87 | |||
| 88 | BEGIN sub_return %ecx | ||
| 89 | negl %edx | ||
| 90 | negl %eax | ||
| 91 | sbbl $0, %edx | ||
| 92 | addl ($v), %eax | ||
| 93 | adcl 4($v), %edx | ||
| 94 | movl %eax, ($v) | ||
| 95 | movl %edx, 4($v) | ||
| 96 | END | ||
| 97 | |||
| 98 | BEGIN inc %esi | ||
| 99 | addl $1, ($v) | ||
| 100 | adcl $0, 4($v) | ||
| 101 | END | ||
| 102 | |||
| 103 | BEGIN inc_return %esi | ||
| 104 | movl ($v), %eax | ||
| 105 | movl 4($v), %edx | ||
| 106 | addl $1, %eax | ||
| 107 | adcl $0, %edx | ||
| 108 | movl %eax, ($v) | ||
| 109 | movl %edx, 4($v) | ||
| 110 | END | ||
| 111 | |||
| 112 | BEGIN dec %esi | ||
| 113 | subl $1, ($v) | ||
| 114 | sbbl $0, 4($v) | ||
| 115 | END | ||
| 116 | |||
| 117 | BEGIN dec_return %esi | ||
| 118 | movl ($v), %eax | ||
| 119 | movl 4($v), %edx | ||
| 120 | subl $1, %eax | ||
| 121 | sbbl $0, %edx | ||
| 122 | movl %eax, ($v) | ||
| 123 | movl %edx, 4($v) | ||
| 124 | END | ||
| 125 | |||
| 126 | BEGIN add_unless %ecx | ||
| 127 | addl %eax, %esi | ||
| 128 | adcl %edx, %edi | ||
| 129 | addl ($v), %eax | ||
| 130 | adcl 4($v), %edx | ||
| 131 | cmpl %eax, %esi | ||
| 132 | je 3f | ||
| 133 | 1: | ||
| 134 | movl %eax, ($v) | ||
| 135 | movl %edx, 4($v) | ||
| 136 | movl $1, %eax | ||
| 137 | 2: | ||
| 138 | RETURN | ||
| 139 | 3: | ||
| 140 | cmpl %edx, %edi | ||
| 141 | jne 1b | ||
| 142 | xorl %eax, %eax | ||
| 143 | jmp 2b | ||
| 144 | END_ | ||
| 145 | |||
| 146 | BEGIN inc_not_zero %esi | ||
| 147 | movl ($v), %eax | ||
| 148 | movl 4($v), %edx | ||
| 149 | testl %eax, %eax | ||
| 150 | je 3f | ||
| 151 | 1: | ||
| 152 | addl $1, %eax | ||
| 153 | adcl $0, %edx | ||
| 154 | movl %eax, ($v) | ||
| 155 | movl %edx, 4($v) | ||
| 156 | movl $1, %eax | ||
| 157 | 2: | ||
| 158 | RETURN | ||
| 159 | 3: | ||
| 160 | testl %edx, %edx | ||
| 161 | jne 1b | ||
| 162 | jmp 2b | ||
| 163 | END_ | ||
| 164 | |||
| 165 | BEGIN dec_if_positive %esi | ||
| 166 | movl ($v), %eax | ||
| 167 | movl 4($v), %edx | ||
| 168 | subl $1, %eax | ||
| 169 | sbbl $0, %edx | ||
| 170 | js 1f | ||
| 171 | movl %eax, ($v) | ||
| 172 | movl %edx, 4($v) | ||
| 173 | 1: | ||
| 174 | END | ||
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S new file mode 100644 index 000000000000..71e080de3352 --- /dev/null +++ b/arch/x86/lib/atomic64_cx8_32.S | |||
| @@ -0,0 +1,224 @@ | |||
| 1 | /* | ||
| 2 | * atomic64_t for 586+ | ||
| 3 | * | ||
| 4 | * Copyright © 2010 Luca Barbieri | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/linkage.h> | ||
| 13 | #include <asm/alternative-asm.h> | ||
| 14 | #include <asm/dwarf2.h> | ||
| 15 | |||
| 16 | .macro SAVE reg | ||
| 17 | pushl %\reg | ||
| 18 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 19 | CFI_REL_OFFSET \reg, 0 | ||
| 20 | .endm | ||
| 21 | |||
| 22 | .macro RESTORE reg | ||
| 23 | popl %\reg | ||
| 24 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 25 | CFI_RESTORE \reg | ||
| 26 | .endm | ||
| 27 | |||
| 28 | .macro read64 reg | ||
| 29 | movl %ebx, %eax | ||
| 30 | movl %ecx, %edx | ||
| 31 | /* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */ | ||
| 32 | LOCK_PREFIX | ||
| 33 | cmpxchg8b (\reg) | ||
| 34 | .endm | ||
| 35 | |||
| 36 | ENTRY(atomic64_read_cx8) | ||
| 37 | CFI_STARTPROC | ||
| 38 | |||
| 39 | read64 %ecx | ||
| 40 | ret | ||
| 41 | CFI_ENDPROC | ||
| 42 | ENDPROC(atomic64_read_cx8) | ||
| 43 | |||
| 44 | ENTRY(atomic64_set_cx8) | ||
| 45 | CFI_STARTPROC | ||
| 46 | |||
| 47 | 1: | ||
| 48 | /* we don't need LOCK_PREFIX since aligned 64-bit writes | ||
| 49 | * are atomic on 586 and newer */ | ||
| 50 | cmpxchg8b (%esi) | ||
| 51 | jne 1b | ||
| 52 | |||
| 53 | ret | ||
| 54 | CFI_ENDPROC | ||
| 55 | ENDPROC(atomic64_set_cx8) | ||
| 56 | |||
| 57 | ENTRY(atomic64_xchg_cx8) | ||
| 58 | CFI_STARTPROC | ||
| 59 | |||
| 60 | movl %ebx, %eax | ||
| 61 | movl %ecx, %edx | ||
| 62 | 1: | ||
| 63 | LOCK_PREFIX | ||
| 64 | cmpxchg8b (%esi) | ||
| 65 | jne 1b | ||
| 66 | |||
| 67 | ret | ||
| 68 | CFI_ENDPROC | ||
| 69 | ENDPROC(atomic64_xchg_cx8) | ||
| 70 | |||
| 71 | .macro addsub_return func ins insc | ||
| 72 | ENTRY(atomic64_\func\()_return_cx8) | ||
| 73 | CFI_STARTPROC | ||
| 74 | SAVE ebp | ||
| 75 | SAVE ebx | ||
| 76 | SAVE esi | ||
| 77 | SAVE edi | ||
| 78 | |||
| 79 | movl %eax, %esi | ||
| 80 | movl %edx, %edi | ||
| 81 | movl %ecx, %ebp | ||
| 82 | |||
| 83 | read64 %ebp | ||
| 84 | 1: | ||
| 85 | movl %eax, %ebx | ||
| 86 | movl %edx, %ecx | ||
| 87 | \ins\()l %esi, %ebx | ||
| 88 | \insc\()l %edi, %ecx | ||
| 89 | LOCK_PREFIX | ||
| 90 | cmpxchg8b (%ebp) | ||
| 91 | jne 1b | ||
| 92 | |||
| 93 | 10: | ||
| 94 | movl %ebx, %eax | ||
| 95 | movl %ecx, %edx | ||
| 96 | RESTORE edi | ||
| 97 | RESTORE esi | ||
| 98 | RESTORE ebx | ||
| 99 | RESTORE ebp | ||
| 100 | ret | ||
| 101 | CFI_ENDPROC | ||
| 102 | ENDPROC(atomic64_\func\()_return_cx8) | ||
| 103 | .endm | ||
| 104 | |||
| 105 | addsub_return add add adc | ||
| 106 | addsub_return sub sub sbb | ||
| 107 | |||
| 108 | .macro incdec_return func ins insc | ||
| 109 | ENTRY(atomic64_\func\()_return_cx8) | ||
| 110 | CFI_STARTPROC | ||
| 111 | SAVE ebx | ||
| 112 | |||
| 113 | read64 %esi | ||
| 114 | 1: | ||
| 115 | movl %eax, %ebx | ||
| 116 | movl %edx, %ecx | ||
| 117 | \ins\()l $1, %ebx | ||
| 118 | \insc\()l $0, %ecx | ||
| 119 | LOCK_PREFIX | ||
| 120 | cmpxchg8b (%esi) | ||
| 121 | jne 1b | ||
| 122 | |||
| 123 | 10: | ||
| 124 | movl %ebx, %eax | ||
| 125 | movl %ecx, %edx | ||
| 126 | RESTORE ebx | ||
| 127 | ret | ||
| 128 | CFI_ENDPROC | ||
| 129 | ENDPROC(atomic64_\func\()_return_cx8) | ||
| 130 | .endm | ||
| 131 | |||
| 132 | incdec_return inc add adc | ||
| 133 | incdec_return dec sub sbb | ||
| 134 | |||
| 135 | ENTRY(atomic64_dec_if_positive_cx8) | ||
| 136 | CFI_STARTPROC | ||
| 137 | SAVE ebx | ||
| 138 | |||
| 139 | read64 %esi | ||
| 140 | 1: | ||
| 141 | movl %eax, %ebx | ||
| 142 | movl %edx, %ecx | ||
| 143 | subl $1, %ebx | ||
| 144 | sbb $0, %ecx | ||
| 145 | js 2f | ||
| 146 | LOCK_PREFIX | ||
| 147 | cmpxchg8b (%esi) | ||
| 148 | jne 1b | ||
| 149 | |||
| 150 | 2: | ||
| 151 | movl %ebx, %eax | ||
| 152 | movl %ecx, %edx | ||
| 153 | RESTORE ebx | ||
| 154 | ret | ||
| 155 | CFI_ENDPROC | ||
| 156 | ENDPROC(atomic64_dec_if_positive_cx8) | ||
| 157 | |||
| 158 | ENTRY(atomic64_add_unless_cx8) | ||
| 159 | CFI_STARTPROC | ||
| 160 | SAVE ebp | ||
| 161 | SAVE ebx | ||
| 162 | /* these just push these two parameters on the stack */ | ||
| 163 | SAVE edi | ||
| 164 | SAVE esi | ||
| 165 | |||
| 166 | movl %ecx, %ebp | ||
| 167 | movl %eax, %esi | ||
| 168 | movl %edx, %edi | ||
| 169 | |||
| 170 | read64 %ebp | ||
| 171 | 1: | ||
| 172 | cmpl %eax, 0(%esp) | ||
| 173 | je 4f | ||
| 174 | 2: | ||
| 175 | movl %eax, %ebx | ||
| 176 | movl %edx, %ecx | ||
| 177 | addl %esi, %ebx | ||
| 178 | adcl %edi, %ecx | ||
| 179 | LOCK_PREFIX | ||
| 180 | cmpxchg8b (%ebp) | ||
| 181 | jne 1b | ||
| 182 | |||
| 183 | movl $1, %eax | ||
| 184 | 3: | ||
| 185 | addl $8, %esp | ||
| 186 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 187 | RESTORE ebx | ||
| 188 | RESTORE ebp | ||
| 189 | ret | ||
| 190 | 4: | ||
| 191 | cmpl %edx, 4(%esp) | ||
| 192 | jne 2b | ||
| 193 | xorl %eax, %eax | ||
| 194 | jmp 3b | ||
| 195 | CFI_ENDPROC | ||
| 196 | ENDPROC(atomic64_add_unless_cx8) | ||
| 197 | |||
| 198 | ENTRY(atomic64_inc_not_zero_cx8) | ||
| 199 | CFI_STARTPROC | ||
| 200 | SAVE ebx | ||
| 201 | |||
| 202 | read64 %esi | ||
| 203 | 1: | ||
| 204 | testl %eax, %eax | ||
| 205 | je 4f | ||
| 206 | 2: | ||
| 207 | movl %eax, %ebx | ||
| 208 | movl %edx, %ecx | ||
| 209 | addl $1, %ebx | ||
| 210 | adcl $0, %ecx | ||
| 211 | LOCK_PREFIX | ||
| 212 | cmpxchg8b (%esi) | ||
| 213 | jne 1b | ||
| 214 | |||
| 215 | movl $1, %eax | ||
| 216 | 3: | ||
| 217 | RESTORE ebx | ||
| 218 | ret | ||
| 219 | 4: | ||
| 220 | testl %edx, %edx | ||
| 221 | jne 2b | ||
| 222 | jmp 3b | ||
| 223 | CFI_ENDPROC | ||
| 224 | ENDPROC(atomic64_inc_not_zero_cx8) | ||
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index aa0987088774..dc8adad10a2f 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c | |||
| @@ -30,10 +30,10 @@ static void fclex(void) | |||
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | /* Needs to be externally visible */ | 32 | /* Needs to be externally visible */ |
| 33 | void finit_task(struct task_struct *tsk) | 33 | void finit_soft_fpu(struct i387_soft_struct *soft) |
| 34 | { | 34 | { |
| 35 | struct i387_soft_struct *soft = &tsk->thread.xstate->soft; | ||
| 36 | struct address *oaddr, *iaddr; | 35 | struct address *oaddr, *iaddr; |
| 36 | memset(soft, 0, sizeof(*soft)); | ||
| 37 | soft->cwd = 0x037f; | 37 | soft->cwd = 0x037f; |
| 38 | soft->swd = 0; | 38 | soft->swd = 0; |
| 39 | soft->ftop = 0; /* We don't keep top in the status word internally. */ | 39 | soft->ftop = 0; /* We don't keep top in the status word internally. */ |
| @@ -52,7 +52,7 @@ void finit_task(struct task_struct *tsk) | |||
| 52 | 52 | ||
| 53 | void finit(void) | 53 | void finit(void) |
| 54 | { | 54 | { |
| 55 | finit_task(current); | 55 | finit_soft_fpu(¤t->thread.fpu.state->soft); |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | /* | 58 | /* |
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 5d87f586f8d7..7718541541d4 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c | |||
| @@ -681,7 +681,7 @@ int fpregs_soft_set(struct task_struct *target, | |||
| 681 | unsigned int pos, unsigned int count, | 681 | unsigned int pos, unsigned int count, |
| 682 | const void *kbuf, const void __user *ubuf) | 682 | const void *kbuf, const void __user *ubuf) |
| 683 | { | 683 | { |
| 684 | struct i387_soft_struct *s387 = &target->thread.xstate->soft; | 684 | struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; |
| 685 | void *space = s387->st_space; | 685 | void *space = s387->st_space; |
| 686 | int ret; | 686 | int ret; |
| 687 | int offset, other, i, tags, regnr, tag, newtop; | 687 | int offset, other, i, tags, regnr, tag, newtop; |
| @@ -733,7 +733,7 @@ int fpregs_soft_get(struct task_struct *target, | |||
| 733 | unsigned int pos, unsigned int count, | 733 | unsigned int pos, unsigned int count, |
| 734 | void *kbuf, void __user *ubuf) | 734 | void *kbuf, void __user *ubuf) |
| 735 | { | 735 | { |
| 736 | struct i387_soft_struct *s387 = &target->thread.xstate->soft; | 736 | struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; |
| 737 | const void *space = s387->st_space; | 737 | const void *space = s387->st_space; |
| 738 | int ret; | 738 | int ret; |
| 739 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; | 739 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; |
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 50fa0ec2c8a5..2c614410a5f3 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ | 31 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ |
| 32 | == (1 << 10)) | 32 | == (1 << 10)) |
| 33 | 33 | ||
| 34 | #define I387 (current->thread.xstate) | 34 | #define I387 (current->thread.fpu.state) |
| 35 | #define FPU_info (I387->soft.info) | 35 | #define FPU_info (I387->soft.info) |
| 36 | 36 | ||
| 37 | #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) | 37 | #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 28c68762648f..f9897f7a9ef1 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
| @@ -363,6 +363,54 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
| 363 | for (i = 0; i < MAX_NUMNODES; i++) | 363 | for (i = 0; i < MAX_NUMNODES; i++) |
| 364 | cutoff_node(i, start, end); | 364 | cutoff_node(i, start, end); |
| 365 | 365 | ||
| 366 | /* | ||
| 367 | * Join together blocks on the same node, holes between | ||
| 368 | * which don't overlap with memory on other nodes. | ||
| 369 | */ | ||
| 370 | for (i = 0; i < num_node_memblks; ++i) { | ||
| 371 | int j, k; | ||
| 372 | |||
| 373 | for (j = i + 1; j < num_node_memblks; ++j) { | ||
| 374 | unsigned long start, end; | ||
| 375 | |||
| 376 | if (memblk_nodeid[i] != memblk_nodeid[j]) | ||
| 377 | continue; | ||
| 378 | start = min(node_memblk_range[i].end, | ||
| 379 | node_memblk_range[j].end); | ||
| 380 | end = max(node_memblk_range[i].start, | ||
| 381 | node_memblk_range[j].start); | ||
| 382 | for (k = 0; k < num_node_memblks; ++k) { | ||
| 383 | if (memblk_nodeid[i] == memblk_nodeid[k]) | ||
| 384 | continue; | ||
| 385 | if (start < node_memblk_range[k].end && | ||
| 386 | end > node_memblk_range[k].start) | ||
| 387 | break; | ||
| 388 | } | ||
| 389 | if (k < num_node_memblks) | ||
| 390 | continue; | ||
| 391 | start = min(node_memblk_range[i].start, | ||
| 392 | node_memblk_range[j].start); | ||
| 393 | end = max(node_memblk_range[i].end, | ||
| 394 | node_memblk_range[j].end); | ||
| 395 | printk(KERN_INFO "SRAT: Node %d " | ||
| 396 | "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", | ||
| 397 | memblk_nodeid[i], | ||
| 398 | node_memblk_range[i].start, | ||
| 399 | node_memblk_range[i].end, | ||
| 400 | node_memblk_range[j].start, | ||
| 401 | node_memblk_range[j].end, | ||
| 402 | start, end); | ||
| 403 | node_memblk_range[i].start = start; | ||
| 404 | node_memblk_range[i].end = end; | ||
| 405 | k = --num_node_memblks - j; | ||
| 406 | memmove(memblk_nodeid + j, memblk_nodeid + j+1, | ||
| 407 | k * sizeof(*memblk_nodeid)); | ||
| 408 | memmove(node_memblk_range + j, node_memblk_range + j+1, | ||
| 409 | k * sizeof(*node_memblk_range)); | ||
| 410 | --j; | ||
| 411 | } | ||
| 412 | } | ||
| 413 | |||
| 366 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 414 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
| 367 | memblk_nodeid); | 415 | memblk_nodeid); |
| 368 | if (memnode_shift < 0) { | 416 | if (memnode_shift < 0) { |
| @@ -461,7 +509,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
| 461 | * node, it must now point to the fake node ID. | 509 | * node, it must now point to the fake node ID. |
| 462 | */ | 510 | */ |
| 463 | for (j = 0; j < MAX_LOCAL_APIC; j++) | 511 | for (j = 0; j < MAX_LOCAL_APIC; j++) |
| 464 | if (apicid_to_node[j] == nid) | 512 | if (apicid_to_node[j] == nid && |
| 513 | fake_apicid_to_node[j] == NUMA_NO_NODE) | ||
| 465 | fake_apicid_to_node[j] = i; | 514 | fake_apicid_to_node[j] = i; |
| 466 | } | 515 | } |
| 467 | for (i = 0; i < num_nodes; i++) | 516 | for (i = 0; i < num_nodes; i++) |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 2c505ee71014..b28d2f1253bb 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
| @@ -31,8 +31,9 @@ static struct op_x86_model_spec *model; | |||
| 31 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); | 31 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); |
| 32 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); | 32 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); |
| 33 | 33 | ||
| 34 | /* 0 == registered but off, 1 == registered and on */ | 34 | /* must be protected with get_online_cpus()/put_online_cpus(): */ |
| 35 | static int nmi_enabled = 0; | 35 | static int nmi_enabled; |
| 36 | static int ctr_running; | ||
| 36 | 37 | ||
| 37 | struct op_counter_config counter_config[OP_MAX_COUNTER]; | 38 | struct op_counter_config counter_config[OP_MAX_COUNTER]; |
| 38 | 39 | ||
| @@ -61,12 +62,16 @@ static int profile_exceptions_notify(struct notifier_block *self, | |||
| 61 | { | 62 | { |
| 62 | struct die_args *args = (struct die_args *)data; | 63 | struct die_args *args = (struct die_args *)data; |
| 63 | int ret = NOTIFY_DONE; | 64 | int ret = NOTIFY_DONE; |
| 64 | int cpu = smp_processor_id(); | ||
| 65 | 65 | ||
| 66 | switch (val) { | 66 | switch (val) { |
| 67 | case DIE_NMI: | 67 | case DIE_NMI: |
| 68 | case DIE_NMI_IPI: | 68 | case DIE_NMI_IPI: |
| 69 | model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)); | 69 | if (ctr_running) |
| 70 | model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); | ||
| 71 | else if (!nmi_enabled) | ||
| 72 | break; | ||
| 73 | else | ||
| 74 | model->stop(&__get_cpu_var(cpu_msrs)); | ||
| 70 | ret = NOTIFY_STOP; | 75 | ret = NOTIFY_STOP; |
| 71 | break; | 76 | break; |
| 72 | default: | 77 | default: |
| @@ -95,24 +100,36 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs) | |||
| 95 | static void nmi_cpu_start(void *dummy) | 100 | static void nmi_cpu_start(void *dummy) |
| 96 | { | 101 | { |
| 97 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | 102 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); |
| 98 | model->start(msrs); | 103 | if (!msrs->controls) |
| 104 | WARN_ON_ONCE(1); | ||
| 105 | else | ||
| 106 | model->start(msrs); | ||
| 99 | } | 107 | } |
| 100 | 108 | ||
| 101 | static int nmi_start(void) | 109 | static int nmi_start(void) |
| 102 | { | 110 | { |
| 111 | get_online_cpus(); | ||
| 103 | on_each_cpu(nmi_cpu_start, NULL, 1); | 112 | on_each_cpu(nmi_cpu_start, NULL, 1); |
| 113 | ctr_running = 1; | ||
| 114 | put_online_cpus(); | ||
| 104 | return 0; | 115 | return 0; |
| 105 | } | 116 | } |
| 106 | 117 | ||
| 107 | static void nmi_cpu_stop(void *dummy) | 118 | static void nmi_cpu_stop(void *dummy) |
| 108 | { | 119 | { |
| 109 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | 120 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); |
| 110 | model->stop(msrs); | 121 | if (!msrs->controls) |
| 122 | WARN_ON_ONCE(1); | ||
| 123 | else | ||
| 124 | model->stop(msrs); | ||
| 111 | } | 125 | } |
| 112 | 126 | ||
| 113 | static void nmi_stop(void) | 127 | static void nmi_stop(void) |
| 114 | { | 128 | { |
| 129 | get_online_cpus(); | ||
| 115 | on_each_cpu(nmi_cpu_stop, NULL, 1); | 130 | on_each_cpu(nmi_cpu_stop, NULL, 1); |
| 131 | ctr_running = 0; | ||
| 132 | put_online_cpus(); | ||
| 116 | } | 133 | } |
| 117 | 134 | ||
| 118 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 135 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX |
| @@ -252,7 +269,10 @@ static int nmi_switch_event(void) | |||
| 252 | if (nmi_multiplex_on() < 0) | 269 | if (nmi_multiplex_on() < 0) |
| 253 | return -EINVAL; /* not necessary */ | 270 | return -EINVAL; /* not necessary */ |
| 254 | 271 | ||
| 255 | on_each_cpu(nmi_cpu_switch, NULL, 1); | 272 | get_online_cpus(); |
| 273 | if (ctr_running) | ||
| 274 | on_each_cpu(nmi_cpu_switch, NULL, 1); | ||
| 275 | put_online_cpus(); | ||
| 256 | 276 | ||
| 257 | return 0; | 277 | return 0; |
| 258 | } | 278 | } |
| @@ -295,6 +315,7 @@ static void free_msrs(void) | |||
| 295 | kfree(per_cpu(cpu_msrs, i).controls); | 315 | kfree(per_cpu(cpu_msrs, i).controls); |
| 296 | per_cpu(cpu_msrs, i).controls = NULL; | 316 | per_cpu(cpu_msrs, i).controls = NULL; |
| 297 | } | 317 | } |
| 318 | nmi_shutdown_mux(); | ||
| 298 | } | 319 | } |
| 299 | 320 | ||
| 300 | static int allocate_msrs(void) | 321 | static int allocate_msrs(void) |
| @@ -307,14 +328,21 @@ static int allocate_msrs(void) | |||
| 307 | per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, | 328 | per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, |
| 308 | GFP_KERNEL); | 329 | GFP_KERNEL); |
| 309 | if (!per_cpu(cpu_msrs, i).counters) | 330 | if (!per_cpu(cpu_msrs, i).counters) |
| 310 | return 0; | 331 | goto fail; |
| 311 | per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, | 332 | per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, |
| 312 | GFP_KERNEL); | 333 | GFP_KERNEL); |
| 313 | if (!per_cpu(cpu_msrs, i).controls) | 334 | if (!per_cpu(cpu_msrs, i).controls) |
| 314 | return 0; | 335 | goto fail; |
| 315 | } | 336 | } |
| 316 | 337 | ||
| 338 | if (!nmi_setup_mux()) | ||
| 339 | goto fail; | ||
| 340 | |||
| 317 | return 1; | 341 | return 1; |
| 342 | |||
| 343 | fail: | ||
| 344 | free_msrs(); | ||
| 345 | return 0; | ||
| 318 | } | 346 | } |
| 319 | 347 | ||
| 320 | static void nmi_cpu_setup(void *dummy) | 348 | static void nmi_cpu_setup(void *dummy) |
| @@ -336,49 +364,6 @@ static struct notifier_block profile_exceptions_nb = { | |||
| 336 | .priority = 2 | 364 | .priority = 2 |
| 337 | }; | 365 | }; |
| 338 | 366 | ||
| 339 | static int nmi_setup(void) | ||
| 340 | { | ||
| 341 | int err = 0; | ||
| 342 | int cpu; | ||
| 343 | |||
| 344 | if (!allocate_msrs()) | ||
| 345 | err = -ENOMEM; | ||
| 346 | else if (!nmi_setup_mux()) | ||
| 347 | err = -ENOMEM; | ||
| 348 | else | ||
| 349 | err = register_die_notifier(&profile_exceptions_nb); | ||
| 350 | |||
| 351 | if (err) { | ||
| 352 | free_msrs(); | ||
| 353 | nmi_shutdown_mux(); | ||
| 354 | return err; | ||
| 355 | } | ||
| 356 | |||
| 357 | /* We need to serialize save and setup for HT because the subset | ||
| 358 | * of msrs are distinct for save and setup operations | ||
| 359 | */ | ||
| 360 | |||
| 361 | /* Assume saved/restored counters are the same on all CPUs */ | ||
| 362 | model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); | ||
| 363 | for_each_possible_cpu(cpu) { | ||
| 364 | if (!cpu) | ||
| 365 | continue; | ||
| 366 | |||
| 367 | memcpy(per_cpu(cpu_msrs, cpu).counters, | ||
| 368 | per_cpu(cpu_msrs, 0).counters, | ||
| 369 | sizeof(struct op_msr) * model->num_counters); | ||
| 370 | |||
| 371 | memcpy(per_cpu(cpu_msrs, cpu).controls, | ||
| 372 | per_cpu(cpu_msrs, 0).controls, | ||
| 373 | sizeof(struct op_msr) * model->num_controls); | ||
| 374 | |||
| 375 | mux_clone(cpu); | ||
| 376 | } | ||
| 377 | on_each_cpu(nmi_cpu_setup, NULL, 1); | ||
| 378 | nmi_enabled = 1; | ||
| 379 | return 0; | ||
| 380 | } | ||
| 381 | |||
| 382 | static void nmi_cpu_restore_registers(struct op_msrs *msrs) | 367 | static void nmi_cpu_restore_registers(struct op_msrs *msrs) |
| 383 | { | 368 | { |
| 384 | struct op_msr *counters = msrs->counters; | 369 | struct op_msr *counters = msrs->counters; |
| @@ -412,20 +397,24 @@ static void nmi_cpu_shutdown(void *dummy) | |||
| 412 | apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); | 397 | apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); |
| 413 | apic_write(APIC_LVTERR, v); | 398 | apic_write(APIC_LVTERR, v); |
| 414 | nmi_cpu_restore_registers(msrs); | 399 | nmi_cpu_restore_registers(msrs); |
| 400 | if (model->cpu_down) | ||
| 401 | model->cpu_down(); | ||
| 415 | } | 402 | } |
| 416 | 403 | ||
| 417 | static void nmi_shutdown(void) | 404 | static void nmi_cpu_up(void *dummy) |
| 418 | { | 405 | { |
| 419 | struct op_msrs *msrs; | 406 | if (nmi_enabled) |
| 407 | nmi_cpu_setup(dummy); | ||
| 408 | if (ctr_running) | ||
| 409 | nmi_cpu_start(dummy); | ||
| 410 | } | ||
| 420 | 411 | ||
| 421 | nmi_enabled = 0; | 412 | static void nmi_cpu_down(void *dummy) |
| 422 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); | 413 | { |
| 423 | unregister_die_notifier(&profile_exceptions_nb); | 414 | if (ctr_running) |
| 424 | nmi_shutdown_mux(); | 415 | nmi_cpu_stop(dummy); |
| 425 | msrs = &get_cpu_var(cpu_msrs); | 416 | if (nmi_enabled) |
| 426 | model->shutdown(msrs); | 417 | nmi_cpu_shutdown(dummy); |
| 427 | free_msrs(); | ||
| 428 | put_cpu_var(cpu_msrs); | ||
| 429 | } | 418 | } |
| 430 | 419 | ||
| 431 | static int nmi_create_files(struct super_block *sb, struct dentry *root) | 420 | static int nmi_create_files(struct super_block *sb, struct dentry *root) |
| @@ -457,7 +446,6 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) | |||
| 457 | return 0; | 446 | return 0; |
| 458 | } | 447 | } |
| 459 | 448 | ||
| 460 | #ifdef CONFIG_SMP | ||
| 461 | static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | 449 | static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, |
| 462 | void *data) | 450 | void *data) |
| 463 | { | 451 | { |
| @@ -465,10 +453,10 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | |||
| 465 | switch (action) { | 453 | switch (action) { |
| 466 | case CPU_DOWN_FAILED: | 454 | case CPU_DOWN_FAILED: |
| 467 | case CPU_ONLINE: | 455 | case CPU_ONLINE: |
| 468 | smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); | 456 | smp_call_function_single(cpu, nmi_cpu_up, NULL, 0); |
| 469 | break; | 457 | break; |
| 470 | case CPU_DOWN_PREPARE: | 458 | case CPU_DOWN_PREPARE: |
| 471 | smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); | 459 | smp_call_function_single(cpu, nmi_cpu_down, NULL, 1); |
| 472 | break; | 460 | break; |
| 473 | } | 461 | } |
| 474 | return NOTIFY_DONE; | 462 | return NOTIFY_DONE; |
| @@ -477,7 +465,75 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | |||
| 477 | static struct notifier_block oprofile_cpu_nb = { | 465 | static struct notifier_block oprofile_cpu_nb = { |
| 478 | .notifier_call = oprofile_cpu_notifier | 466 | .notifier_call = oprofile_cpu_notifier |
| 479 | }; | 467 | }; |
| 480 | #endif | 468 | |
| 469 | static int nmi_setup(void) | ||
| 470 | { | ||
| 471 | int err = 0; | ||
| 472 | int cpu; | ||
| 473 | |||
| 474 | if (!allocate_msrs()) | ||
| 475 | return -ENOMEM; | ||
| 476 | |||
| 477 | /* We need to serialize save and setup for HT because the subset | ||
| 478 | * of msrs are distinct for save and setup operations | ||
| 479 | */ | ||
| 480 | |||
| 481 | /* Assume saved/restored counters are the same on all CPUs */ | ||
| 482 | err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); | ||
| 483 | if (err) | ||
| 484 | goto fail; | ||
| 485 | |||
| 486 | for_each_possible_cpu(cpu) { | ||
| 487 | if (!cpu) | ||
| 488 | continue; | ||
| 489 | |||
| 490 | memcpy(per_cpu(cpu_msrs, cpu).counters, | ||
| 491 | per_cpu(cpu_msrs, 0).counters, | ||
| 492 | sizeof(struct op_msr) * model->num_counters); | ||
| 493 | |||
| 494 | memcpy(per_cpu(cpu_msrs, cpu).controls, | ||
| 495 | per_cpu(cpu_msrs, 0).controls, | ||
| 496 | sizeof(struct op_msr) * model->num_controls); | ||
| 497 | |||
| 498 | mux_clone(cpu); | ||
| 499 | } | ||
| 500 | |||
| 501 | nmi_enabled = 0; | ||
| 502 | ctr_running = 0; | ||
| 503 | barrier(); | ||
| 504 | err = register_die_notifier(&profile_exceptions_nb); | ||
| 505 | if (err) | ||
| 506 | goto fail; | ||
| 507 | |||
| 508 | get_online_cpus(); | ||
| 509 | register_cpu_notifier(&oprofile_cpu_nb); | ||
| 510 | on_each_cpu(nmi_cpu_setup, NULL, 1); | ||
| 511 | nmi_enabled = 1; | ||
| 512 | put_online_cpus(); | ||
| 513 | |||
| 514 | return 0; | ||
| 515 | fail: | ||
| 516 | free_msrs(); | ||
| 517 | return err; | ||
| 518 | } | ||
| 519 | |||
| 520 | static void nmi_shutdown(void) | ||
| 521 | { | ||
| 522 | struct op_msrs *msrs; | ||
| 523 | |||
| 524 | get_online_cpus(); | ||
| 525 | unregister_cpu_notifier(&oprofile_cpu_nb); | ||
| 526 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); | ||
| 527 | nmi_enabled = 0; | ||
| 528 | ctr_running = 0; | ||
| 529 | put_online_cpus(); | ||
| 530 | barrier(); | ||
| 531 | unregister_die_notifier(&profile_exceptions_nb); | ||
| 532 | msrs = &get_cpu_var(cpu_msrs); | ||
| 533 | model->shutdown(msrs); | ||
| 534 | free_msrs(); | ||
| 535 | put_cpu_var(cpu_msrs); | ||
| 536 | } | ||
| 481 | 537 | ||
| 482 | #ifdef CONFIG_PM | 538 | #ifdef CONFIG_PM |
| 483 | 539 | ||
| @@ -687,9 +743,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
| 687 | return -ENODEV; | 743 | return -ENODEV; |
| 688 | } | 744 | } |
| 689 | 745 | ||
| 690 | #ifdef CONFIG_SMP | ||
| 691 | register_cpu_notifier(&oprofile_cpu_nb); | ||
| 692 | #endif | ||
| 693 | /* default values, can be overwritten by model */ | 746 | /* default values, can be overwritten by model */ |
| 694 | ops->create_files = nmi_create_files; | 747 | ops->create_files = nmi_create_files; |
| 695 | ops->setup = nmi_setup; | 748 | ops->setup = nmi_setup; |
| @@ -716,12 +769,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
| 716 | 769 | ||
| 717 | void op_nmi_exit(void) | 770 | void op_nmi_exit(void) |
| 718 | { | 771 | { |
| 719 | if (using_nmi) { | 772 | if (using_nmi) |
| 720 | exit_sysfs(); | 773 | exit_sysfs(); |
| 721 | #ifdef CONFIG_SMP | ||
| 722 | unregister_cpu_notifier(&oprofile_cpu_nb); | ||
| 723 | #endif | ||
| 724 | } | ||
| 725 | if (model->exit) | ||
| 726 | model->exit(); | ||
| 727 | } | 774 | } |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 090cbbec7dbd..b67a6b5aa8d4 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
| @@ -30,13 +30,10 @@ | |||
| 30 | #include "op_counter.h" | 30 | #include "op_counter.h" |
| 31 | 31 | ||
| 32 | #define NUM_COUNTERS 4 | 32 | #define NUM_COUNTERS 4 |
| 33 | #define NUM_CONTROLS 4 | ||
| 34 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 33 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX |
| 35 | #define NUM_VIRT_COUNTERS 32 | 34 | #define NUM_VIRT_COUNTERS 32 |
| 36 | #define NUM_VIRT_CONTROLS 32 | ||
| 37 | #else | 35 | #else |
| 38 | #define NUM_VIRT_COUNTERS NUM_COUNTERS | 36 | #define NUM_VIRT_COUNTERS NUM_COUNTERS |
| 39 | #define NUM_VIRT_CONTROLS NUM_CONTROLS | ||
| 40 | #endif | 37 | #endif |
| 41 | 38 | ||
| 42 | #define OP_EVENT_MASK 0x0FFF | 39 | #define OP_EVENT_MASK 0x0FFF |
| @@ -105,102 +102,6 @@ static u32 get_ibs_caps(void) | |||
| 105 | return ibs_caps; | 102 | return ibs_caps; |
| 106 | } | 103 | } |
| 107 | 104 | ||
| 108 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
| 109 | |||
| 110 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | ||
| 111 | struct op_msrs const * const msrs) | ||
| 112 | { | ||
| 113 | u64 val; | ||
| 114 | int i; | ||
| 115 | |||
| 116 | /* enable active counters */ | ||
| 117 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
| 118 | int virt = op_x86_phys_to_virt(i); | ||
| 119 | if (!reset_value[virt]) | ||
| 120 | continue; | ||
| 121 | rdmsrl(msrs->controls[i].addr, val); | ||
| 122 | val &= model->reserved; | ||
| 123 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
| 124 | wrmsrl(msrs->controls[i].addr, val); | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | #endif | ||
| 129 | |||
| 130 | /* functions for op_amd_spec */ | ||
| 131 | |||
| 132 | static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | ||
| 133 | { | ||
| 134 | int i; | ||
| 135 | |||
| 136 | for (i = 0; i < NUM_COUNTERS; i++) { | ||
| 137 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | ||
| 138 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | ||
| 139 | } | ||
| 140 | |||
| 141 | for (i = 0; i < NUM_CONTROLS; i++) { | ||
| 142 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) | ||
| 143 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
| 144 | } | ||
| 145 | } | ||
| 146 | |||
| 147 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | ||
| 148 | struct op_msrs const * const msrs) | ||
| 149 | { | ||
| 150 | u64 val; | ||
| 151 | int i; | ||
| 152 | |||
| 153 | /* setup reset_value */ | ||
| 154 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | ||
| 155 | if (counter_config[i].enabled | ||
| 156 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | ||
| 157 | reset_value[i] = counter_config[i].count; | ||
| 158 | else | ||
| 159 | reset_value[i] = 0; | ||
| 160 | } | ||
| 161 | |||
| 162 | /* clear all counters */ | ||
| 163 | for (i = 0; i < NUM_CONTROLS; ++i) { | ||
| 164 | if (unlikely(!msrs->controls[i].addr)) { | ||
| 165 | if (counter_config[i].enabled && !smp_processor_id()) | ||
| 166 | /* | ||
| 167 | * counter is reserved, this is on all | ||
| 168 | * cpus, so report only for cpu #0 | ||
| 169 | */ | ||
| 170 | op_x86_warn_reserved(i); | ||
| 171 | continue; | ||
| 172 | } | ||
| 173 | rdmsrl(msrs->controls[i].addr, val); | ||
| 174 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
| 175 | op_x86_warn_in_use(i); | ||
| 176 | val &= model->reserved; | ||
| 177 | wrmsrl(msrs->controls[i].addr, val); | ||
| 178 | } | ||
| 179 | |||
| 180 | /* avoid a false detection of ctr overflows in NMI handler */ | ||
| 181 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
| 182 | if (unlikely(!msrs->counters[i].addr)) | ||
| 183 | continue; | ||
| 184 | wrmsrl(msrs->counters[i].addr, -1LL); | ||
| 185 | } | ||
| 186 | |||
| 187 | /* enable active counters */ | ||
| 188 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
| 189 | int virt = op_x86_phys_to_virt(i); | ||
| 190 | if (!reset_value[virt]) | ||
| 191 | continue; | ||
| 192 | |||
| 193 | /* setup counter registers */ | ||
| 194 | wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); | ||
| 195 | |||
| 196 | /* setup control registers */ | ||
| 197 | rdmsrl(msrs->controls[i].addr, val); | ||
| 198 | val &= model->reserved; | ||
| 199 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
| 200 | wrmsrl(msrs->controls[i].addr, val); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | /* | 105 | /* |
| 205 | * 16-bit Linear Feedback Shift Register (LFSR) | 106 | * 16-bit Linear Feedback Shift Register (LFSR) |
| 206 | * | 107 | * |
| @@ -365,6 +266,125 @@ static void op_amd_stop_ibs(void) | |||
| 365 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); | 266 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); |
| 366 | } | 267 | } |
| 367 | 268 | ||
| 269 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
| 270 | |||
| 271 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | ||
| 272 | struct op_msrs const * const msrs) | ||
| 273 | { | ||
| 274 | u64 val; | ||
| 275 | int i; | ||
| 276 | |||
| 277 | /* enable active counters */ | ||
| 278 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
| 279 | int virt = op_x86_phys_to_virt(i); | ||
| 280 | if (!reset_value[virt]) | ||
| 281 | continue; | ||
| 282 | rdmsrl(msrs->controls[i].addr, val); | ||
| 283 | val &= model->reserved; | ||
| 284 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
| 285 | wrmsrl(msrs->controls[i].addr, val); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | #endif | ||
| 290 | |||
| 291 | /* functions for op_amd_spec */ | ||
| 292 | |||
| 293 | static void op_amd_shutdown(struct op_msrs const * const msrs) | ||
| 294 | { | ||
| 295 | int i; | ||
| 296 | |||
| 297 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
| 298 | if (!msrs->counters[i].addr) | ||
| 299 | continue; | ||
| 300 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
| 301 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
| 302 | } | ||
| 303 | } | ||
| 304 | |||
| 305 | static int op_amd_fill_in_addresses(struct op_msrs * const msrs) | ||
| 306 | { | ||
| 307 | int i; | ||
| 308 | |||
| 309 | for (i = 0; i < NUM_COUNTERS; i++) { | ||
| 310 | if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | ||
| 311 | goto fail; | ||
| 312 | if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { | ||
| 313 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
| 314 | goto fail; | ||
| 315 | } | ||
| 316 | /* both registers must be reserved */ | ||
| 317 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | ||
| 318 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
| 319 | continue; | ||
| 320 | fail: | ||
| 321 | if (!counter_config[i].enabled) | ||
| 322 | continue; | ||
| 323 | op_x86_warn_reserved(i); | ||
| 324 | op_amd_shutdown(msrs); | ||
| 325 | return -EBUSY; | ||
| 326 | } | ||
| 327 | |||
| 328 | return 0; | ||
| 329 | } | ||
| 330 | |||
| 331 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | ||
| 332 | struct op_msrs const * const msrs) | ||
| 333 | { | ||
| 334 | u64 val; | ||
| 335 | int i; | ||
| 336 | |||
| 337 | /* setup reset_value */ | ||
| 338 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | ||
| 339 | if (counter_config[i].enabled | ||
| 340 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | ||
| 341 | reset_value[i] = counter_config[i].count; | ||
| 342 | else | ||
| 343 | reset_value[i] = 0; | ||
| 344 | } | ||
| 345 | |||
| 346 | /* clear all counters */ | ||
| 347 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
| 348 | if (!msrs->controls[i].addr) | ||
| 349 | continue; | ||
| 350 | rdmsrl(msrs->controls[i].addr, val); | ||
| 351 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
| 352 | op_x86_warn_in_use(i); | ||
| 353 | val &= model->reserved; | ||
| 354 | wrmsrl(msrs->controls[i].addr, val); | ||
| 355 | /* | ||
| 356 | * avoid a false detection of ctr overflows in NMI | ||
| 357 | * handler | ||
| 358 | */ | ||
| 359 | wrmsrl(msrs->counters[i].addr, -1LL); | ||
| 360 | } | ||
| 361 | |||
| 362 | /* enable active counters */ | ||
| 363 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
| 364 | int virt = op_x86_phys_to_virt(i); | ||
| 365 | if (!reset_value[virt]) | ||
| 366 | continue; | ||
| 367 | |||
| 368 | /* setup counter registers */ | ||
| 369 | wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); | ||
| 370 | |||
| 371 | /* setup control registers */ | ||
| 372 | rdmsrl(msrs->controls[i].addr, val); | ||
| 373 | val &= model->reserved; | ||
| 374 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
| 375 | wrmsrl(msrs->controls[i].addr, val); | ||
| 376 | } | ||
| 377 | |||
| 378 | if (ibs_caps) | ||
| 379 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); | ||
| 380 | } | ||
| 381 | |||
| 382 | static void op_amd_cpu_shutdown(void) | ||
| 383 | { | ||
| 384 | if (ibs_caps) | ||
| 385 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | ||
| 386 | } | ||
| 387 | |||
| 368 | static int op_amd_check_ctrs(struct pt_regs * const regs, | 388 | static int op_amd_check_ctrs(struct pt_regs * const regs, |
| 369 | struct op_msrs const * const msrs) | 389 | struct op_msrs const * const msrs) |
| 370 | { | 390 | { |
| @@ -425,42 +445,16 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
| 425 | op_amd_stop_ibs(); | 445 | op_amd_stop_ibs(); |
| 426 | } | 446 | } |
| 427 | 447 | ||
| 428 | static void op_amd_shutdown(struct op_msrs const * const msrs) | 448 | static int __init_ibs_nmi(void) |
| 429 | { | ||
| 430 | int i; | ||
| 431 | |||
| 432 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
| 433 | if (msrs->counters[i].addr) | ||
| 434 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
| 435 | } | ||
| 436 | for (i = 0; i < NUM_CONTROLS; ++i) { | ||
| 437 | if (msrs->controls[i].addr) | ||
| 438 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
| 439 | } | ||
| 440 | } | ||
| 441 | |||
| 442 | static u8 ibs_eilvt_off; | ||
| 443 | |||
| 444 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) | ||
| 445 | { | ||
| 446 | ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); | ||
| 447 | } | ||
| 448 | |||
| 449 | static inline void apic_clear_ibs_nmi_per_cpu(void *arg) | ||
| 450 | { | ||
| 451 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | ||
| 452 | } | ||
| 453 | |||
| 454 | static int init_ibs_nmi(void) | ||
| 455 | { | 449 | { |
| 456 | #define IBSCTL_LVTOFFSETVAL (1 << 8) | 450 | #define IBSCTL_LVTOFFSETVAL (1 << 8) |
| 457 | #define IBSCTL 0x1cc | 451 | #define IBSCTL 0x1cc |
| 458 | struct pci_dev *cpu_cfg; | 452 | struct pci_dev *cpu_cfg; |
| 459 | int nodes; | 453 | int nodes; |
| 460 | u32 value = 0; | 454 | u32 value = 0; |
| 455 | u8 ibs_eilvt_off; | ||
| 461 | 456 | ||
| 462 | /* per CPU setup */ | 457 | ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); |
| 463 | on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1); | ||
| 464 | 458 | ||
| 465 | nodes = 0; | 459 | nodes = 0; |
| 466 | cpu_cfg = NULL; | 460 | cpu_cfg = NULL; |
| @@ -490,22 +484,15 @@ static int init_ibs_nmi(void) | |||
| 490 | return 0; | 484 | return 0; |
| 491 | } | 485 | } |
| 492 | 486 | ||
| 493 | /* uninitialize the APIC for the IBS interrupts if needed */ | ||
| 494 | static void clear_ibs_nmi(void) | ||
| 495 | { | ||
| 496 | if (ibs_caps) | ||
| 497 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); | ||
| 498 | } | ||
| 499 | |||
| 500 | /* initialize the APIC for the IBS interrupts if available */ | 487 | /* initialize the APIC for the IBS interrupts if available */ |
| 501 | static void ibs_init(void) | 488 | static void init_ibs(void) |
| 502 | { | 489 | { |
| 503 | ibs_caps = get_ibs_caps(); | 490 | ibs_caps = get_ibs_caps(); |
| 504 | 491 | ||
| 505 | if (!ibs_caps) | 492 | if (!ibs_caps) |
| 506 | return; | 493 | return; |
| 507 | 494 | ||
| 508 | if (init_ibs_nmi()) { | 495 | if (__init_ibs_nmi()) { |
| 509 | ibs_caps = 0; | 496 | ibs_caps = 0; |
| 510 | return; | 497 | return; |
| 511 | } | 498 | } |
| @@ -514,14 +501,6 @@ static void ibs_init(void) | |||
| 514 | (unsigned)ibs_caps); | 501 | (unsigned)ibs_caps); |
| 515 | } | 502 | } |
| 516 | 503 | ||
| 517 | static void ibs_exit(void) | ||
| 518 | { | ||
| 519 | if (!ibs_caps) | ||
| 520 | return; | ||
| 521 | |||
| 522 | clear_ibs_nmi(); | ||
| 523 | } | ||
| 524 | |||
| 525 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); | 504 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); |
| 526 | 505 | ||
| 527 | static int setup_ibs_files(struct super_block *sb, struct dentry *root) | 506 | static int setup_ibs_files(struct super_block *sb, struct dentry *root) |
| @@ -570,27 +549,22 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
| 570 | 549 | ||
| 571 | static int op_amd_init(struct oprofile_operations *ops) | 550 | static int op_amd_init(struct oprofile_operations *ops) |
| 572 | { | 551 | { |
| 573 | ibs_init(); | 552 | init_ibs(); |
| 574 | create_arch_files = ops->create_files; | 553 | create_arch_files = ops->create_files; |
| 575 | ops->create_files = setup_ibs_files; | 554 | ops->create_files = setup_ibs_files; |
| 576 | return 0; | 555 | return 0; |
| 577 | } | 556 | } |
| 578 | 557 | ||
| 579 | static void op_amd_exit(void) | ||
| 580 | { | ||
| 581 | ibs_exit(); | ||
| 582 | } | ||
| 583 | |||
| 584 | struct op_x86_model_spec op_amd_spec = { | 558 | struct op_x86_model_spec op_amd_spec = { |
| 585 | .num_counters = NUM_COUNTERS, | 559 | .num_counters = NUM_COUNTERS, |
| 586 | .num_controls = NUM_CONTROLS, | 560 | .num_controls = NUM_COUNTERS, |
| 587 | .num_virt_counters = NUM_VIRT_COUNTERS, | 561 | .num_virt_counters = NUM_VIRT_COUNTERS, |
| 588 | .reserved = MSR_AMD_EVENTSEL_RESERVED, | 562 | .reserved = MSR_AMD_EVENTSEL_RESERVED, |
| 589 | .event_mask = OP_EVENT_MASK, | 563 | .event_mask = OP_EVENT_MASK, |
| 590 | .init = op_amd_init, | 564 | .init = op_amd_init, |
| 591 | .exit = op_amd_exit, | ||
| 592 | .fill_in_addresses = &op_amd_fill_in_addresses, | 565 | .fill_in_addresses = &op_amd_fill_in_addresses, |
| 593 | .setup_ctrs = &op_amd_setup_ctrs, | 566 | .setup_ctrs = &op_amd_setup_ctrs, |
| 567 | .cpu_down = &op_amd_cpu_shutdown, | ||
| 594 | .check_ctrs = &op_amd_check_ctrs, | 568 | .check_ctrs = &op_amd_check_ctrs, |
| 595 | .start = &op_amd_start, | 569 | .start = &op_amd_start, |
| 596 | .stop = &op_amd_stop, | 570 | .stop = &op_amd_stop, |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index e6a160a4684a..182558dd5515 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
| @@ -385,8 +385,26 @@ static unsigned int get_stagger(void) | |||
| 385 | 385 | ||
| 386 | static unsigned long reset_value[NUM_COUNTERS_NON_HT]; | 386 | static unsigned long reset_value[NUM_COUNTERS_NON_HT]; |
| 387 | 387 | ||
| 388 | static void p4_shutdown(struct op_msrs const * const msrs) | ||
| 389 | { | ||
| 390 | int i; | ||
| 388 | 391 | ||
| 389 | static void p4_fill_in_addresses(struct op_msrs * const msrs) | 392 | for (i = 0; i < num_counters; ++i) { |
| 393 | if (msrs->counters[i].addr) | ||
| 394 | release_perfctr_nmi(msrs->counters[i].addr); | ||
| 395 | } | ||
| 396 | /* | ||
| 397 | * some of the control registers are specially reserved in | ||
| 398 | * conjunction with the counter registers (hence the starting offset). | ||
| 399 | * This saves a few bits. | ||
| 400 | */ | ||
| 401 | for (i = num_counters; i < num_controls; ++i) { | ||
| 402 | if (msrs->controls[i].addr) | ||
| 403 | release_evntsel_nmi(msrs->controls[i].addr); | ||
| 404 | } | ||
| 405 | } | ||
| 406 | |||
| 407 | static int p4_fill_in_addresses(struct op_msrs * const msrs) | ||
| 390 | { | 408 | { |
| 391 | unsigned int i; | 409 | unsigned int i; |
| 392 | unsigned int addr, cccraddr, stag; | 410 | unsigned int addr, cccraddr, stag; |
| @@ -468,6 +486,18 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) | |||
| 468 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | 486 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; |
| 469 | } | 487 | } |
| 470 | } | 488 | } |
| 489 | |||
| 490 | for (i = 0; i < num_counters; ++i) { | ||
| 491 | if (!counter_config[i].enabled) | ||
| 492 | continue; | ||
| 493 | if (msrs->controls[i].addr) | ||
| 494 | continue; | ||
| 495 | op_x86_warn_reserved(i); | ||
| 496 | p4_shutdown(msrs); | ||
| 497 | return -EBUSY; | ||
| 498 | } | ||
| 499 | |||
| 500 | return 0; | ||
| 471 | } | 501 | } |
| 472 | 502 | ||
| 473 | 503 | ||
| @@ -668,26 +698,6 @@ static void p4_stop(struct op_msrs const * const msrs) | |||
| 668 | } | 698 | } |
| 669 | } | 699 | } |
| 670 | 700 | ||
| 671 | static void p4_shutdown(struct op_msrs const * const msrs) | ||
| 672 | { | ||
| 673 | int i; | ||
| 674 | |||
| 675 | for (i = 0; i < num_counters; ++i) { | ||
| 676 | if (msrs->counters[i].addr) | ||
| 677 | release_perfctr_nmi(msrs->counters[i].addr); | ||
| 678 | } | ||
| 679 | /* | ||
| 680 | * some of the control registers are specially reserved in | ||
| 681 | * conjunction with the counter registers (hence the starting offset). | ||
| 682 | * This saves a few bits. | ||
| 683 | */ | ||
| 684 | for (i = num_counters; i < num_controls; ++i) { | ||
| 685 | if (msrs->controls[i].addr) | ||
| 686 | release_evntsel_nmi(msrs->controls[i].addr); | ||
| 687 | } | ||
| 688 | } | ||
| 689 | |||
| 690 | |||
| 691 | #ifdef CONFIG_SMP | 701 | #ifdef CONFIG_SMP |
| 692 | struct op_x86_model_spec op_p4_ht2_spec = { | 702 | struct op_x86_model_spec op_p4_ht2_spec = { |
| 693 | .num_counters = NUM_COUNTERS_HT2, | 703 | .num_counters = NUM_COUNTERS_HT2, |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 2bf90fafa7b5..d769cda54082 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
| @@ -30,19 +30,46 @@ static int counter_width = 32; | |||
| 30 | 30 | ||
| 31 | static u64 *reset_value; | 31 | static u64 *reset_value; |
| 32 | 32 | ||
| 33 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) | 33 | static void ppro_shutdown(struct op_msrs const * const msrs) |
| 34 | { | 34 | { |
| 35 | int i; | 35 | int i; |
| 36 | 36 | ||
| 37 | for (i = 0; i < num_counters; i++) { | 37 | for (i = 0; i < num_counters; ++i) { |
| 38 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | 38 | if (!msrs->counters[i].addr) |
| 39 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | 39 | continue; |
| 40 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | ||
| 41 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | ||
| 42 | } | ||
| 43 | if (reset_value) { | ||
| 44 | kfree(reset_value); | ||
| 45 | reset_value = NULL; | ||
| 40 | } | 46 | } |
| 47 | } | ||
| 48 | |||
| 49 | static int ppro_fill_in_addresses(struct op_msrs * const msrs) | ||
| 50 | { | ||
| 51 | int i; | ||
| 41 | 52 | ||
| 42 | for (i = 0; i < num_counters; i++) { | 53 | for (i = 0; i < num_counters; i++) { |
| 43 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) | 54 | if (!reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
| 44 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | 55 | goto fail; |
| 56 | if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) { | ||
| 57 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | ||
| 58 | goto fail; | ||
| 59 | } | ||
| 60 | /* both registers must be reserved */ | ||
| 61 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | ||
| 62 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | ||
| 63 | continue; | ||
| 64 | fail: | ||
| 65 | if (!counter_config[i].enabled) | ||
| 66 | continue; | ||
| 67 | op_x86_warn_reserved(i); | ||
| 68 | ppro_shutdown(msrs); | ||
| 69 | return -EBUSY; | ||
| 45 | } | 70 | } |
| 71 | |||
| 72 | return 0; | ||
| 46 | } | 73 | } |
| 47 | 74 | ||
| 48 | 75 | ||
| @@ -78,26 +105,17 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, | |||
| 78 | 105 | ||
| 79 | /* clear all counters */ | 106 | /* clear all counters */ |
| 80 | for (i = 0; i < num_counters; ++i) { | 107 | for (i = 0; i < num_counters; ++i) { |
| 81 | if (unlikely(!msrs->controls[i].addr)) { | 108 | if (!msrs->controls[i].addr) |
| 82 | if (counter_config[i].enabled && !smp_processor_id()) | ||
| 83 | /* | ||
| 84 | * counter is reserved, this is on all | ||
| 85 | * cpus, so report only for cpu #0 | ||
| 86 | */ | ||
| 87 | op_x86_warn_reserved(i); | ||
| 88 | continue; | 109 | continue; |
| 89 | } | ||
| 90 | rdmsrl(msrs->controls[i].addr, val); | 110 | rdmsrl(msrs->controls[i].addr, val); |
| 91 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | 111 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) |
| 92 | op_x86_warn_in_use(i); | 112 | op_x86_warn_in_use(i); |
| 93 | val &= model->reserved; | 113 | val &= model->reserved; |
| 94 | wrmsrl(msrs->controls[i].addr, val); | 114 | wrmsrl(msrs->controls[i].addr, val); |
| 95 | } | 115 | /* |
| 96 | 116 | * avoid a false detection of ctr overflows in NMI * | |
| 97 | /* avoid a false detection of ctr overflows in NMI handler */ | 117 | * handler |
| 98 | for (i = 0; i < num_counters; ++i) { | 118 | */ |
| 99 | if (unlikely(!msrs->counters[i].addr)) | ||
| 100 | continue; | ||
| 101 | wrmsrl(msrs->counters[i].addr, -1LL); | 119 | wrmsrl(msrs->counters[i].addr, -1LL); |
| 102 | } | 120 | } |
| 103 | 121 | ||
| @@ -189,25 +207,6 @@ static void ppro_stop(struct op_msrs const * const msrs) | |||
| 189 | } | 207 | } |
| 190 | } | 208 | } |
| 191 | 209 | ||
| 192 | static void ppro_shutdown(struct op_msrs const * const msrs) | ||
| 193 | { | ||
| 194 | int i; | ||
| 195 | |||
| 196 | for (i = 0; i < num_counters; ++i) { | ||
| 197 | if (msrs->counters[i].addr) | ||
| 198 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | ||
| 199 | } | ||
| 200 | for (i = 0; i < num_counters; ++i) { | ||
| 201 | if (msrs->controls[i].addr) | ||
| 202 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | ||
| 203 | } | ||
| 204 | if (reset_value) { | ||
| 205 | kfree(reset_value); | ||
| 206 | reset_value = NULL; | ||
| 207 | } | ||
| 208 | } | ||
| 209 | |||
| 210 | |||
| 211 | struct op_x86_model_spec op_ppro_spec = { | 210 | struct op_x86_model_spec op_ppro_spec = { |
| 212 | .num_counters = 2, | 211 | .num_counters = 2, |
| 213 | .num_controls = 2, | 212 | .num_controls = 2, |
| @@ -239,11 +238,11 @@ static void arch_perfmon_setup_counters(void) | |||
| 239 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | 238 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && |
| 240 | current_cpu_data.x86_model == 15) { | 239 | current_cpu_data.x86_model == 15) { |
| 241 | eax.split.version_id = 2; | 240 | eax.split.version_id = 2; |
| 242 | eax.split.num_events = 2; | 241 | eax.split.num_counters = 2; |
| 243 | eax.split.bit_width = 40; | 242 | eax.split.bit_width = 40; |
| 244 | } | 243 | } |
| 245 | 244 | ||
| 246 | num_counters = eax.split.num_events; | 245 | num_counters = eax.split.num_counters; |
| 247 | 246 | ||
| 248 | op_arch_perfmon_spec.num_counters = num_counters; | 247 | op_arch_perfmon_spec.num_counters = num_counters; |
| 249 | op_arch_perfmon_spec.num_controls = num_counters; | 248 | op_arch_perfmon_spec.num_controls = num_counters; |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index ff82a755edd4..89017fa1fd63 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
| @@ -40,10 +40,10 @@ struct op_x86_model_spec { | |||
| 40 | u64 reserved; | 40 | u64 reserved; |
| 41 | u16 event_mask; | 41 | u16 event_mask; |
| 42 | int (*init)(struct oprofile_operations *ops); | 42 | int (*init)(struct oprofile_operations *ops); |
| 43 | void (*exit)(void); | 43 | int (*fill_in_addresses)(struct op_msrs * const msrs); |
| 44 | void (*fill_in_addresses)(struct op_msrs * const msrs); | ||
| 45 | void (*setup_ctrs)(struct op_x86_model_spec const *model, | 44 | void (*setup_ctrs)(struct op_x86_model_spec const *model, |
| 46 | struct op_msrs const * const msrs); | 45 | struct op_msrs const * const msrs); |
| 46 | void (*cpu_down)(void); | ||
| 47 | int (*check_ctrs)(struct pt_regs * const regs, | 47 | int (*check_ctrs)(struct pt_regs * const regs, |
| 48 | struct op_msrs const * const msrs); | 48 | struct op_msrs const * const msrs); |
| 49 | void (*start)(struct op_msrs const * const msrs); | 49 | void (*start)(struct op_msrs const * const msrs); |
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index d5c7aefe56ff..7ef3a2735df3 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c | |||
| @@ -247,6 +247,10 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) | |||
| 247 | u32 size; | 247 | u32 size; |
| 248 | int i; | 248 | int i; |
| 249 | 249 | ||
| 250 | /* Must have extended configuration space */ | ||
| 251 | if (dev->cfg_size < PCIE_CAP_OFFSET + 4) | ||
| 252 | return; | ||
| 253 | |||
| 250 | /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ | 254 | /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ |
| 251 | offset = fixed_bar_cap(dev->bus, dev->devfn); | 255 | offset = fixed_bar_cap(dev->bus, dev->devfn); |
| 252 | if (!offset || PCI_DEVFN(2, 0) == dev->devfn || | 256 | if (!offset || PCI_DEVFN(2, 0) == dev->devfn || |
