diff options
| author | Robert Richter <robert.richter@amd.com> | 2010-06-04 05:33:10 -0400 |
|---|---|---|
| committer | Robert Richter <robert.richter@amd.com> | 2010-06-04 05:33:10 -0400 |
| commit | d8a382d2662822248a97ce9d670b90e68aefbd3a (patch) | |
| tree | 4f5bbd5d0a5881ed42de611402ea4ac2c6d6ff48 /arch/x86 | |
| parent | 45c34e05c4e3d36e7c44e790241ea11a1d90d54e (diff) | |
| parent | c6df8d5ab87a246942d138321e1721edbb69f6e1 (diff) | |
Merge remote branch 'tip/perf/urgent' into oprofile/urgent
Diffstat (limited to 'arch/x86')
185 files changed, 11126 insertions, 7955 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9458685902bd..dcb0593b4a66 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -53,11 +53,15 @@ config X86 | |||
| 53 | select HAVE_KERNEL_LZMA | 53 | select HAVE_KERNEL_LZMA |
| 54 | select HAVE_KERNEL_LZO | 54 | select HAVE_KERNEL_LZO |
| 55 | select HAVE_HW_BREAKPOINT | 55 | select HAVE_HW_BREAKPOINT |
| 56 | select HAVE_MIXED_BREAKPOINTS_REGS | ||
| 56 | select PERF_EVENTS | 57 | select PERF_EVENTS |
| 57 | select ANON_INODES | 58 | select ANON_INODES |
| 58 | select HAVE_ARCH_KMEMCHECK | 59 | select HAVE_ARCH_KMEMCHECK |
| 59 | select HAVE_USER_RETURN_NOTIFIER | 60 | select HAVE_USER_RETURN_NOTIFIER |
| 60 | 61 | ||
| 62 | config INSTRUCTION_DECODER | ||
| 63 | def_bool (KPROBES || PERF_EVENTS) | ||
| 64 | |||
| 61 | config OUTPUT_FORMAT | 65 | config OUTPUT_FORMAT |
| 62 | string | 66 | string |
| 63 | default "elf32-i386" if X86_32 | 67 | default "elf32-i386" if X86_32 |
| @@ -105,6 +109,9 @@ config SBUS | |||
| 105 | config NEED_DMA_MAP_STATE | 109 | config NEED_DMA_MAP_STATE |
| 106 | def_bool (X86_64 || DMAR || DMA_API_DEBUG) | 110 | def_bool (X86_64 || DMAR || DMA_API_DEBUG) |
| 107 | 111 | ||
| 112 | config NEED_SG_DMA_LENGTH | ||
| 113 | def_bool y | ||
| 114 | |||
| 108 | config GENERIC_ISA_DMA | 115 | config GENERIC_ISA_DMA |
| 109 | def_bool y | 116 | def_bool y |
| 110 | 117 | ||
| @@ -197,20 +204,17 @@ config HAVE_INTEL_TXT | |||
| 197 | 204 | ||
| 198 | # Use the generic interrupt handling code in kernel/irq/: | 205 | # Use the generic interrupt handling code in kernel/irq/: |
| 199 | config GENERIC_HARDIRQS | 206 | config GENERIC_HARDIRQS |
| 200 | bool | 207 | def_bool y |
| 201 | default y | ||
| 202 | 208 | ||
| 203 | config GENERIC_HARDIRQS_NO__DO_IRQ | 209 | config GENERIC_HARDIRQS_NO__DO_IRQ |
| 204 | def_bool y | 210 | def_bool y |
| 205 | 211 | ||
| 206 | config GENERIC_IRQ_PROBE | 212 | config GENERIC_IRQ_PROBE |
| 207 | bool | 213 | def_bool y |
| 208 | default y | ||
| 209 | 214 | ||
| 210 | config GENERIC_PENDING_IRQ | 215 | config GENERIC_PENDING_IRQ |
| 211 | bool | 216 | def_bool y |
| 212 | depends on GENERIC_HARDIRQS && SMP | 217 | depends on GENERIC_HARDIRQS && SMP |
| 213 | default y | ||
| 214 | 218 | ||
| 215 | config USE_GENERIC_SMP_HELPERS | 219 | config USE_GENERIC_SMP_HELPERS |
| 216 | def_bool y | 220 | def_bool y |
| @@ -225,19 +229,22 @@ config X86_64_SMP | |||
| 225 | depends on X86_64 && SMP | 229 | depends on X86_64 && SMP |
| 226 | 230 | ||
| 227 | config X86_HT | 231 | config X86_HT |
| 228 | bool | 232 | def_bool y |
| 229 | depends on SMP | 233 | depends on SMP |
| 230 | default y | ||
| 231 | 234 | ||
| 232 | config X86_TRAMPOLINE | 235 | config X86_TRAMPOLINE |
| 233 | bool | 236 | def_bool y |
| 234 | depends on SMP || (64BIT && ACPI_SLEEP) | 237 | depends on SMP || (64BIT && ACPI_SLEEP) |
| 235 | default y | ||
| 236 | 238 | ||
| 237 | config X86_32_LAZY_GS | 239 | config X86_32_LAZY_GS |
| 238 | def_bool y | 240 | def_bool y |
| 239 | depends on X86_32 && !CC_STACKPROTECTOR | 241 | depends on X86_32 && !CC_STACKPROTECTOR |
| 240 | 242 | ||
| 243 | config ARCH_HWEIGHT_CFLAGS | ||
| 244 | string | ||
| 245 | default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 | ||
| 246 | default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 | ||
| 247 | |||
| 241 | config KTIME_SCALAR | 248 | config KTIME_SCALAR |
| 242 | def_bool X86_32 | 249 | def_bool X86_32 |
| 243 | source "init/Kconfig" | 250 | source "init/Kconfig" |
| @@ -447,7 +454,7 @@ config X86_NUMAQ | |||
| 447 | firmware with - send email to <Martin.Bligh@us.ibm.com>. | 454 | firmware with - send email to <Martin.Bligh@us.ibm.com>. |
| 448 | 455 | ||
| 449 | config X86_SUPPORTS_MEMORY_FAILURE | 456 | config X86_SUPPORTS_MEMORY_FAILURE |
| 450 | bool | 457 | def_bool y |
| 451 | # MCE code calls memory_failure(): | 458 | # MCE code calls memory_failure(): |
| 452 | depends on X86_MCE | 459 | depends on X86_MCE |
| 453 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: | 460 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: |
| @@ -455,7 +462,6 @@ config X86_SUPPORTS_MEMORY_FAILURE | |||
| 455 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: | 462 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: |
| 456 | depends on X86_64 || !SPARSEMEM | 463 | depends on X86_64 || !SPARSEMEM |
| 457 | select ARCH_SUPPORTS_MEMORY_FAILURE | 464 | select ARCH_SUPPORTS_MEMORY_FAILURE |
| 458 | default y | ||
| 459 | 465 | ||
| 460 | config X86_VISWS | 466 | config X86_VISWS |
| 461 | bool "SGI 320/540 (Visual Workstation)" | 467 | bool "SGI 320/540 (Visual Workstation)" |
| @@ -570,7 +576,6 @@ config PARAVIRT_SPINLOCKS | |||
| 570 | 576 | ||
| 571 | config PARAVIRT_CLOCK | 577 | config PARAVIRT_CLOCK |
| 572 | bool | 578 | bool |
| 573 | default n | ||
| 574 | 579 | ||
| 575 | endif | 580 | endif |
| 576 | 581 | ||
| @@ -749,7 +754,6 @@ config MAXSMP | |||
| 749 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" | 754 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" |
| 750 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL | 755 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL |
| 751 | select CPUMASK_OFFSTACK | 756 | select CPUMASK_OFFSTACK |
| 752 | default n | ||
| 753 | ---help--- | 757 | ---help--- |
| 754 | Configure maximum number of CPUS and NUMA Nodes for this architecture. | 758 | Configure maximum number of CPUS and NUMA Nodes for this architecture. |
| 755 | If unsure, say N. | 759 | If unsure, say N. |
| @@ -829,7 +833,6 @@ config X86_VISWS_APIC | |||
| 829 | 833 | ||
| 830 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | 834 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS |
| 831 | bool "Reroute for broken boot IRQs" | 835 | bool "Reroute for broken boot IRQs" |
| 832 | default n | ||
| 833 | depends on X86_IO_APIC | 836 | depends on X86_IO_APIC |
| 834 | ---help--- | 837 | ---help--- |
| 835 | This option enables a workaround that fixes a source of | 838 | This option enables a workaround that fixes a source of |
| @@ -876,9 +879,8 @@ config X86_MCE_AMD | |||
| 876 | the DRAM Error Threshold. | 879 | the DRAM Error Threshold. |
| 877 | 880 | ||
| 878 | config X86_ANCIENT_MCE | 881 | config X86_ANCIENT_MCE |
| 879 | def_bool n | 882 | bool "Support for old Pentium 5 / WinChip machine checks" |
| 880 | depends on X86_32 && X86_MCE | 883 | depends on X86_32 && X86_MCE |
| 881 | prompt "Support for old Pentium 5 / WinChip machine checks" | ||
| 882 | ---help--- | 884 | ---help--- |
| 883 | Include support for machine check handling on old Pentium 5 or WinChip | 885 | Include support for machine check handling on old Pentium 5 or WinChip |
| 884 | systems. These typically need to be enabled explicitely on the command | 886 | systems. These typically need to be enabled explicitely on the command |
| @@ -886,8 +888,7 @@ config X86_ANCIENT_MCE | |||
| 886 | 888 | ||
| 887 | config X86_MCE_THRESHOLD | 889 | config X86_MCE_THRESHOLD |
| 888 | depends on X86_MCE_AMD || X86_MCE_INTEL | 890 | depends on X86_MCE_AMD || X86_MCE_INTEL |
| 889 | bool | 891 | def_bool y |
| 890 | default y | ||
| 891 | 892 | ||
| 892 | config X86_MCE_INJECT | 893 | config X86_MCE_INJECT |
| 893 | depends on X86_MCE | 894 | depends on X86_MCE |
| @@ -1026,8 +1027,8 @@ config X86_CPUID | |||
| 1026 | 1027 | ||
| 1027 | choice | 1028 | choice |
| 1028 | prompt "High Memory Support" | 1029 | prompt "High Memory Support" |
| 1029 | default HIGHMEM4G if !X86_NUMAQ | ||
| 1030 | default HIGHMEM64G if X86_NUMAQ | 1030 | default HIGHMEM64G if X86_NUMAQ |
| 1031 | default HIGHMEM4G | ||
| 1031 | depends on X86_32 | 1032 | depends on X86_32 |
| 1032 | 1033 | ||
| 1033 | config NOHIGHMEM | 1034 | config NOHIGHMEM |
| @@ -1285,7 +1286,7 @@ source "mm/Kconfig" | |||
| 1285 | 1286 | ||
| 1286 | config HIGHPTE | 1287 | config HIGHPTE |
| 1287 | bool "Allocate 3rd-level pagetables from highmem" | 1288 | bool "Allocate 3rd-level pagetables from highmem" |
| 1288 | depends on X86_32 && (HIGHMEM4G || HIGHMEM64G) | 1289 | depends on HIGHMEM |
| 1289 | ---help--- | 1290 | ---help--- |
| 1290 | The VM uses one page table entry for each page of physical memory. | 1291 | The VM uses one page table entry for each page of physical memory. |
| 1291 | For systems with a lot of RAM, this can be wasteful of precious | 1292 | For systems with a lot of RAM, this can be wasteful of precious |
| @@ -1369,8 +1370,7 @@ config MATH_EMULATION | |||
| 1369 | kernel, it won't hurt. | 1370 | kernel, it won't hurt. |
| 1370 | 1371 | ||
| 1371 | config MTRR | 1372 | config MTRR |
| 1372 | bool | 1373 | def_bool y |
| 1373 | default y | ||
| 1374 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED | 1374 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED |
| 1375 | ---help--- | 1375 | ---help--- |
| 1376 | On Intel P6 family processors (Pentium Pro, Pentium II and later) | 1376 | On Intel P6 family processors (Pentium Pro, Pentium II and later) |
| @@ -1436,8 +1436,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT | |||
| 1436 | mtrr_spare_reg_nr=N on the kernel command line. | 1436 | mtrr_spare_reg_nr=N on the kernel command line. |
| 1437 | 1437 | ||
| 1438 | config X86_PAT | 1438 | config X86_PAT |
| 1439 | bool | 1439 | def_bool y |
| 1440 | default y | ||
| 1441 | prompt "x86 PAT support" if EMBEDDED | 1440 | prompt "x86 PAT support" if EMBEDDED |
| 1442 | depends on MTRR | 1441 | depends on MTRR |
| 1443 | ---help--- | 1442 | ---help--- |
| @@ -1605,8 +1604,7 @@ config X86_NEED_RELOCS | |||
| 1605 | depends on X86_32 && RELOCATABLE | 1604 | depends on X86_32 && RELOCATABLE |
| 1606 | 1605 | ||
| 1607 | config PHYSICAL_ALIGN | 1606 | config PHYSICAL_ALIGN |
| 1608 | hex | 1607 | hex "Alignment value to which kernel should be aligned" if X86_32 |
| 1609 | prompt "Alignment value to which kernel should be aligned" if X86_32 | ||
| 1610 | default "0x1000000" | 1608 | default "0x1000000" |
| 1611 | range 0x2000 0x1000000 | 1609 | range 0x2000 0x1000000 |
| 1612 | ---help--- | 1610 | ---help--- |
| @@ -1653,7 +1651,6 @@ config COMPAT_VDSO | |||
| 1653 | 1651 | ||
| 1654 | config CMDLINE_BOOL | 1652 | config CMDLINE_BOOL |
| 1655 | bool "Built-in kernel command line" | 1653 | bool "Built-in kernel command line" |
| 1656 | default n | ||
| 1657 | ---help--- | 1654 | ---help--- |
| 1658 | Allow for specifying boot arguments to the kernel at | 1655 | Allow for specifying boot arguments to the kernel at |
| 1659 | build time. On some systems (e.g. embedded ones), it is | 1656 | build time. On some systems (e.g. embedded ones), it is |
| @@ -1687,7 +1684,6 @@ config CMDLINE | |||
| 1687 | 1684 | ||
| 1688 | config CMDLINE_OVERRIDE | 1685 | config CMDLINE_OVERRIDE |
| 1689 | bool "Built-in command line overrides boot loader arguments" | 1686 | bool "Built-in command line overrides boot loader arguments" |
| 1690 | default n | ||
| 1691 | depends on CMDLINE_BOOL | 1687 | depends on CMDLINE_BOOL |
| 1692 | ---help--- | 1688 | ---help--- |
| 1693 | Set this option to 'Y' to have the kernel ignore the boot loader | 1689 | Set this option to 'Y' to have the kernel ignore the boot loader |
| @@ -1710,6 +1706,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID | |||
| 1710 | def_bool X86_64 | 1706 | def_bool X86_64 |
| 1711 | depends on NUMA | 1707 | depends on NUMA |
| 1712 | 1708 | ||
| 1709 | config USE_PERCPU_NUMA_NODE_ID | ||
| 1710 | def_bool X86_64 | ||
| 1711 | depends on NUMA | ||
| 1712 | |||
| 1713 | menu "Power management and ACPI options" | 1713 | menu "Power management and ACPI options" |
| 1714 | 1714 | ||
| 1715 | config ARCH_HIBERNATION_HEADER | 1715 | config ARCH_HIBERNATION_HEADER |
| @@ -1723,8 +1723,7 @@ source "drivers/acpi/Kconfig" | |||
| 1723 | source "drivers/sfi/Kconfig" | 1723 | source "drivers/sfi/Kconfig" |
| 1724 | 1724 | ||
| 1725 | config X86_APM_BOOT | 1725 | config X86_APM_BOOT |
| 1726 | bool | 1726 | def_bool y |
| 1727 | default y | ||
| 1728 | depends on APM || APM_MODULE | 1727 | depends on APM || APM_MODULE |
| 1729 | 1728 | ||
| 1730 | menuconfig APM | 1729 | menuconfig APM |
| @@ -1931,6 +1930,14 @@ config PCI_MMCONFIG | |||
| 1931 | bool "Support mmconfig PCI config space access" | 1930 | bool "Support mmconfig PCI config space access" |
| 1932 | depends on X86_64 && PCI && ACPI | 1931 | depends on X86_64 && PCI && ACPI |
| 1933 | 1932 | ||
| 1933 | config PCI_CNB20LE_QUIRK | ||
| 1934 | bool "Read CNB20LE Host Bridge Windows" | ||
| 1935 | depends on PCI | ||
| 1936 | help | ||
| 1937 | Read the PCI windows out of the CNB20LE host bridge. This allows | ||
| 1938 | PCI hotplug to work on systems with the CNB20LE chipset which do | ||
| 1939 | not have ACPI. | ||
| 1940 | |||
| 1934 | config DMAR | 1941 | config DMAR |
| 1935 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" | 1942 | bool "Support for DMA Remapping Devices (EXPERIMENTAL)" |
| 1936 | depends on PCI_MSI && ACPI && EXPERIMENTAL | 1943 | depends on PCI_MSI && ACPI && EXPERIMENTAL |
| @@ -1953,8 +1960,7 @@ config DMAR_DEFAULT_ON | |||
| 1953 | experimental. | 1960 | experimental. |
| 1954 | 1961 | ||
| 1955 | config DMAR_BROKEN_GFX_WA | 1962 | config DMAR_BROKEN_GFX_WA |
| 1956 | def_bool n | 1963 | bool "Workaround broken graphics drivers (going away soon)" |
| 1957 | prompt "Workaround broken graphics drivers (going away soon)" | ||
| 1958 | depends on DMAR && BROKEN | 1964 | depends on DMAR && BROKEN |
| 1959 | ---help--- | 1965 | ---help--- |
| 1960 | Current Graphics drivers tend to use physical address | 1966 | Current Graphics drivers tend to use physical address |
| @@ -2052,7 +2058,6 @@ config SCx200HR_TIMER | |||
| 2052 | config OLPC | 2058 | config OLPC |
| 2053 | bool "One Laptop Per Child support" | 2059 | bool "One Laptop Per Child support" |
| 2054 | select GPIOLIB | 2060 | select GPIOLIB |
| 2055 | default n | ||
| 2056 | ---help--- | 2061 | ---help--- |
| 2057 | Add support for detecting the unique features of the OLPC | 2062 | Add support for detecting the unique features of the OLPC |
| 2058 | XO hardware. | 2063 | XO hardware. |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index a19829374e6a..2ac9069890cd 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
| @@ -338,6 +338,10 @@ config X86_F00F_BUG | |||
| 338 | def_bool y | 338 | def_bool y |
| 339 | depends on M586MMX || M586TSC || M586 || M486 || M386 | 339 | depends on M586MMX || M586TSC || M586 || M486 || M386 |
| 340 | 340 | ||
| 341 | config X86_INVD_BUG | ||
| 342 | def_bool y | ||
| 343 | depends on M486 || M386 | ||
| 344 | |||
| 341 | config X86_WP_WORKS_OK | 345 | config X86_WP_WORKS_OK |
| 342 | def_bool y | 346 | def_bool y |
| 343 | depends on !M386 | 347 | depends on !M386 |
| @@ -502,23 +506,3 @@ config CPU_SUP_UMC_32 | |||
| 502 | CPU might render the kernel unbootable. | 506 | CPU might render the kernel unbootable. |
| 503 | 507 | ||
| 504 | If unsure, say N. | 508 | If unsure, say N. |
| 505 | |||
| 506 | config X86_DS | ||
| 507 | def_bool X86_PTRACE_BTS | ||
| 508 | depends on X86_DEBUGCTLMSR | ||
| 509 | select HAVE_HW_BRANCH_TRACER | ||
| 510 | |||
| 511 | config X86_PTRACE_BTS | ||
| 512 | bool "Branch Trace Store" | ||
| 513 | default y | ||
| 514 | depends on X86_DEBUGCTLMSR | ||
| 515 | depends on BROKEN | ||
| 516 | ---help--- | ||
| 517 | This adds a ptrace interface to the hardware's branch trace store. | ||
| 518 | |||
| 519 | Debuggers may use it to collect an execution trace of the debugged | ||
| 520 | application in order to answer the question 'how did I get here?'. | ||
| 521 | Debuggers may trace user mode as well as kernel mode. | ||
| 522 | |||
| 523 | Say Y unless there is no application development on this machine | ||
| 524 | and you want to save a small amount of code size. | ||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bc01e3ebfeb2..75085080b63e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
| @@ -45,7 +45,6 @@ config EARLY_PRINTK | |||
| 45 | 45 | ||
| 46 | config EARLY_PRINTK_DBGP | 46 | config EARLY_PRINTK_DBGP |
| 47 | bool "Early printk via EHCI debug port" | 47 | bool "Early printk via EHCI debug port" |
| 48 | default n | ||
| 49 | depends on EARLY_PRINTK && PCI | 48 | depends on EARLY_PRINTK && PCI |
| 50 | ---help--- | 49 | ---help--- |
| 51 | Write kernel log output directly into the EHCI debug port. | 50 | Write kernel log output directly into the EHCI debug port. |
| @@ -76,7 +75,6 @@ config DEBUG_PER_CPU_MAPS | |||
| 76 | bool "Debug access to per_cpu maps" | 75 | bool "Debug access to per_cpu maps" |
| 77 | depends on DEBUG_KERNEL | 76 | depends on DEBUG_KERNEL |
| 78 | depends on SMP | 77 | depends on SMP |
| 79 | default n | ||
| 80 | ---help--- | 78 | ---help--- |
| 81 | Say Y to verify that the per_cpu map being accessed has | 79 | Say Y to verify that the per_cpu map being accessed has |
| 82 | been setup. Adds a fair amount of code to kernel memory | 80 | been setup. Adds a fair amount of code to kernel memory |
| @@ -174,15 +172,6 @@ config IOMMU_LEAK | |||
| 174 | Add a simple leak tracer to the IOMMU code. This is useful when you | 172 | Add a simple leak tracer to the IOMMU code. This is useful when you |
| 175 | are debugging a buggy device driver that leaks IOMMU mappings. | 173 | are debugging a buggy device driver that leaks IOMMU mappings. |
| 176 | 174 | ||
| 177 | config X86_DS_SELFTEST | ||
| 178 | bool "DS selftest" | ||
| 179 | default y | ||
| 180 | depends on DEBUG_KERNEL | ||
| 181 | depends on X86_DS | ||
| 182 | ---help--- | ||
| 183 | Perform Debug Store selftests at boot time. | ||
| 184 | If in doubt, say "N". | ||
| 185 | |||
| 186 | config HAVE_MMIOTRACE_SUPPORT | 175 | config HAVE_MMIOTRACE_SUPPORT |
| 187 | def_bool y | 176 | def_bool y |
| 188 | 177 | ||
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 0a43dc515e4c..8aa1b59b9074 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
| @@ -95,8 +95,9 @@ sp-$(CONFIG_X86_64) := rsp | |||
| 95 | cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) | 95 | cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) |
| 96 | # is .cfi_signal_frame supported too? | 96 | # is .cfi_signal_frame supported too? |
| 97 | cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) | 97 | cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) |
| 98 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) | 98 | cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) |
| 99 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) | 99 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) |
| 100 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) | ||
| 100 | 101 | ||
| 101 | LDFLAGS := -m elf_$(UTS_MACHINE) | 102 | LDFLAGS := -m elf_$(UTS_MACHINE) |
| 102 | 103 | ||
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 20bb0e1ac681..ff16756a51c1 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
| @@ -32,6 +32,9 @@ | |||
| 32 | #define IN IN1 | 32 | #define IN IN1 |
| 33 | #define KEY %xmm2 | 33 | #define KEY %xmm2 |
| 34 | #define IV %xmm3 | 34 | #define IV %xmm3 |
| 35 | #define BSWAP_MASK %xmm10 | ||
| 36 | #define CTR %xmm11 | ||
| 37 | #define INC %xmm12 | ||
| 35 | 38 | ||
| 36 | #define KEYP %rdi | 39 | #define KEYP %rdi |
| 37 | #define OUTP %rsi | 40 | #define OUTP %rsi |
| @@ -42,6 +45,7 @@ | |||
| 42 | #define T1 %r10 | 45 | #define T1 %r10 |
| 43 | #define TKEYP T1 | 46 | #define TKEYP T1 |
| 44 | #define T2 %r11 | 47 | #define T2 %r11 |
| 48 | #define TCTR_LOW T2 | ||
| 45 | 49 | ||
| 46 | _key_expansion_128: | 50 | _key_expansion_128: |
| 47 | _key_expansion_256a: | 51 | _key_expansion_256a: |
| @@ -724,3 +728,114 @@ ENTRY(aesni_cbc_dec) | |||
| 724 | movups IV, (IVP) | 728 | movups IV, (IVP) |
| 725 | .Lcbc_dec_just_ret: | 729 | .Lcbc_dec_just_ret: |
| 726 | ret | 730 | ret |
| 731 | |||
| 732 | .align 16 | ||
| 733 | .Lbswap_mask: | ||
| 734 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
| 735 | |||
| 736 | /* | ||
| 737 | * _aesni_inc_init: internal ABI | ||
| 738 | * setup registers used by _aesni_inc | ||
| 739 | * input: | ||
| 740 | * IV | ||
| 741 | * output: | ||
| 742 | * CTR: == IV, in little endian | ||
| 743 | * TCTR_LOW: == lower qword of CTR | ||
| 744 | * INC: == 1, in little endian | ||
| 745 | * BSWAP_MASK == endian swapping mask | ||
| 746 | */ | ||
| 747 | _aesni_inc_init: | ||
| 748 | movaps .Lbswap_mask, BSWAP_MASK | ||
| 749 | movaps IV, CTR | ||
| 750 | PSHUFB_XMM BSWAP_MASK CTR | ||
| 751 | mov $1, TCTR_LOW | ||
| 752 | MOVQ_R64_XMM TCTR_LOW INC | ||
| 753 | MOVQ_R64_XMM CTR TCTR_LOW | ||
| 754 | ret | ||
| 755 | |||
| 756 | /* | ||
| 757 | * _aesni_inc: internal ABI | ||
| 758 | * Increase IV by 1, IV is in big endian | ||
| 759 | * input: | ||
| 760 | * IV | ||
| 761 | * CTR: == IV, in little endian | ||
| 762 | * TCTR_LOW: == lower qword of CTR | ||
| 763 | * INC: == 1, in little endian | ||
| 764 | * BSWAP_MASK == endian swapping mask | ||
| 765 | * output: | ||
| 766 | * IV: Increase by 1 | ||
| 767 | * changed: | ||
| 768 | * CTR: == output IV, in little endian | ||
| 769 | * TCTR_LOW: == lower qword of CTR | ||
| 770 | */ | ||
| 771 | _aesni_inc: | ||
| 772 | paddq INC, CTR | ||
| 773 | add $1, TCTR_LOW | ||
| 774 | jnc .Linc_low | ||
| 775 | pslldq $8, INC | ||
| 776 | paddq INC, CTR | ||
| 777 | psrldq $8, INC | ||
| 778 | .Linc_low: | ||
| 779 | movaps CTR, IV | ||
| 780 | PSHUFB_XMM BSWAP_MASK IV | ||
| 781 | ret | ||
| 782 | |||
| 783 | /* | ||
| 784 | * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | ||
| 785 | * size_t len, u8 *iv) | ||
| 786 | */ | ||
| 787 | ENTRY(aesni_ctr_enc) | ||
| 788 | cmp $16, LEN | ||
| 789 | jb .Lctr_enc_just_ret | ||
| 790 | mov 480(KEYP), KLEN | ||
| 791 | movups (IVP), IV | ||
| 792 | call _aesni_inc_init | ||
| 793 | cmp $64, LEN | ||
| 794 | jb .Lctr_enc_loop1 | ||
| 795 | .align 4 | ||
| 796 | .Lctr_enc_loop4: | ||
| 797 | movaps IV, STATE1 | ||
| 798 | call _aesni_inc | ||
| 799 | movups (INP), IN1 | ||
| 800 | movaps IV, STATE2 | ||
| 801 | call _aesni_inc | ||
| 802 | movups 0x10(INP), IN2 | ||
| 803 | movaps IV, STATE3 | ||
| 804 | call _aesni_inc | ||
| 805 | movups 0x20(INP), IN3 | ||
| 806 | movaps IV, STATE4 | ||
| 807 | call _aesni_inc | ||
| 808 | movups 0x30(INP), IN4 | ||
| 809 | call _aesni_enc4 | ||
| 810 | pxor IN1, STATE1 | ||
| 811 | movups STATE1, (OUTP) | ||
| 812 | pxor IN2, STATE2 | ||
| 813 | movups STATE2, 0x10(OUTP) | ||
| 814 | pxor IN3, STATE3 | ||
| 815 | movups STATE3, 0x20(OUTP) | ||
| 816 | pxor IN4, STATE4 | ||
| 817 | movups STATE4, 0x30(OUTP) | ||
| 818 | sub $64, LEN | ||
| 819 | add $64, INP | ||
| 820 | add $64, OUTP | ||
| 821 | cmp $64, LEN | ||
| 822 | jge .Lctr_enc_loop4 | ||
| 823 | cmp $16, LEN | ||
| 824 | jb .Lctr_enc_ret | ||
| 825 | .align 4 | ||
| 826 | .Lctr_enc_loop1: | ||
| 827 | movaps IV, STATE | ||
| 828 | call _aesni_inc | ||
| 829 | movups (INP), IN | ||
| 830 | call _aesni_enc1 | ||
| 831 | pxor IN, STATE | ||
| 832 | movups STATE, (OUTP) | ||
| 833 | sub $16, LEN | ||
| 834 | add $16, INP | ||
| 835 | add $16, OUTP | ||
| 836 | cmp $16, LEN | ||
| 837 | jge .Lctr_enc_loop1 | ||
| 838 | .Lctr_enc_ret: | ||
| 839 | movups IV, (IVP) | ||
| 840 | .Lctr_enc_just_ret: | ||
| 841 | ret | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 49c552c060e9..2cb3dcc4490a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <crypto/algapi.h> | 18 | #include <crypto/algapi.h> |
| 19 | #include <crypto/aes.h> | 19 | #include <crypto/aes.h> |
| 20 | #include <crypto/cryptd.h> | 20 | #include <crypto/cryptd.h> |
| 21 | #include <crypto/ctr.h> | ||
| 21 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
| 22 | #include <asm/aes.h> | 23 | #include <asm/aes.h> |
| 23 | 24 | ||
| @@ -58,6 +59,8 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, | |||
| 58 | const u8 *in, unsigned int len, u8 *iv); | 59 | const u8 *in, unsigned int len, u8 *iv); |
| 59 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | 60 | asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, |
| 60 | const u8 *in, unsigned int len, u8 *iv); | 61 | const u8 *in, unsigned int len, u8 *iv); |
| 62 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | ||
| 63 | const u8 *in, unsigned int len, u8 *iv); | ||
| 61 | 64 | ||
| 62 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) | 65 | static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) |
| 63 | { | 66 | { |
| @@ -321,6 +324,72 @@ static struct crypto_alg blk_cbc_alg = { | |||
| 321 | }, | 324 | }, |
| 322 | }; | 325 | }; |
| 323 | 326 | ||
| 327 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, | ||
| 328 | struct blkcipher_walk *walk) | ||
| 329 | { | ||
| 330 | u8 *ctrblk = walk->iv; | ||
| 331 | u8 keystream[AES_BLOCK_SIZE]; | ||
| 332 | u8 *src = walk->src.virt.addr; | ||
| 333 | u8 *dst = walk->dst.virt.addr; | ||
| 334 | unsigned int nbytes = walk->nbytes; | ||
| 335 | |||
| 336 | aesni_enc(ctx, keystream, ctrblk); | ||
| 337 | crypto_xor(keystream, src, nbytes); | ||
| 338 | memcpy(dst, keystream, nbytes); | ||
| 339 | crypto_inc(ctrblk, AES_BLOCK_SIZE); | ||
| 340 | } | ||
| 341 | |||
| 342 | static int ctr_crypt(struct blkcipher_desc *desc, | ||
| 343 | struct scatterlist *dst, struct scatterlist *src, | ||
| 344 | unsigned int nbytes) | ||
| 345 | { | ||
| 346 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); | ||
| 347 | struct blkcipher_walk walk; | ||
| 348 | int err; | ||
| 349 | |||
| 350 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
| 351 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | ||
| 352 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
| 353 | |||
| 354 | kernel_fpu_begin(); | ||
| 355 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | ||
| 356 | aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | ||
| 357 | nbytes & AES_BLOCK_MASK, walk.iv); | ||
| 358 | nbytes &= AES_BLOCK_SIZE - 1; | ||
| 359 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
| 360 | } | ||
| 361 | if (walk.nbytes) { | ||
| 362 | ctr_crypt_final(ctx, &walk); | ||
| 363 | err = blkcipher_walk_done(desc, &walk, 0); | ||
| 364 | } | ||
| 365 | kernel_fpu_end(); | ||
| 366 | |||
| 367 | return err; | ||
| 368 | } | ||
| 369 | |||
| 370 | static struct crypto_alg blk_ctr_alg = { | ||
| 371 | .cra_name = "__ctr-aes-aesni", | ||
| 372 | .cra_driver_name = "__driver-ctr-aes-aesni", | ||
| 373 | .cra_priority = 0, | ||
| 374 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
| 375 | .cra_blocksize = 1, | ||
| 376 | .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, | ||
| 377 | .cra_alignmask = 0, | ||
| 378 | .cra_type = &crypto_blkcipher_type, | ||
| 379 | .cra_module = THIS_MODULE, | ||
| 380 | .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), | ||
| 381 | .cra_u = { | ||
| 382 | .blkcipher = { | ||
| 383 | .min_keysize = AES_MIN_KEY_SIZE, | ||
| 384 | .max_keysize = AES_MAX_KEY_SIZE, | ||
| 385 | .ivsize = AES_BLOCK_SIZE, | ||
| 386 | .setkey = aes_set_key, | ||
| 387 | .encrypt = ctr_crypt, | ||
| 388 | .decrypt = ctr_crypt, | ||
| 389 | }, | ||
| 390 | }, | ||
| 391 | }; | ||
| 392 | |||
| 324 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | 393 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, |
| 325 | unsigned int key_len) | 394 | unsigned int key_len) |
| 326 | { | 395 | { |
| @@ -467,13 +536,11 @@ static struct crypto_alg ablk_cbc_alg = { | |||
| 467 | }, | 536 | }, |
| 468 | }; | 537 | }; |
| 469 | 538 | ||
| 470 | #ifdef HAS_CTR | ||
| 471 | static int ablk_ctr_init(struct crypto_tfm *tfm) | 539 | static int ablk_ctr_init(struct crypto_tfm *tfm) |
| 472 | { | 540 | { |
| 473 | struct cryptd_ablkcipher *cryptd_tfm; | 541 | struct cryptd_ablkcipher *cryptd_tfm; |
| 474 | 542 | ||
| 475 | cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))", | 543 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0); |
| 476 | 0, 0); | ||
| 477 | if (IS_ERR(cryptd_tfm)) | 544 | if (IS_ERR(cryptd_tfm)) |
| 478 | return PTR_ERR(cryptd_tfm); | 545 | return PTR_ERR(cryptd_tfm); |
| 479 | ablk_init_common(tfm, cryptd_tfm); | 546 | ablk_init_common(tfm, cryptd_tfm); |
| @@ -500,11 +567,50 @@ static struct crypto_alg ablk_ctr_alg = { | |||
| 500 | .ivsize = AES_BLOCK_SIZE, | 567 | .ivsize = AES_BLOCK_SIZE, |
| 501 | .setkey = ablk_set_key, | 568 | .setkey = ablk_set_key, |
| 502 | .encrypt = ablk_encrypt, | 569 | .encrypt = ablk_encrypt, |
| 503 | .decrypt = ablk_decrypt, | 570 | .decrypt = ablk_encrypt, |
| 504 | .geniv = "chainiv", | 571 | .geniv = "chainiv", |
| 505 | }, | 572 | }, |
| 506 | }, | 573 | }, |
| 507 | }; | 574 | }; |
| 575 | |||
| 576 | #ifdef HAS_CTR | ||
| 577 | static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) | ||
| 578 | { | ||
| 579 | struct cryptd_ablkcipher *cryptd_tfm; | ||
| 580 | |||
| 581 | cryptd_tfm = cryptd_alloc_ablkcipher( | ||
| 582 | "rfc3686(__driver-ctr-aes-aesni)", 0, 0); | ||
| 583 | if (IS_ERR(cryptd_tfm)) | ||
| 584 | return PTR_ERR(cryptd_tfm); | ||
| 585 | ablk_init_common(tfm, cryptd_tfm); | ||
| 586 | return 0; | ||
| 587 | } | ||
| 588 | |||
| 589 | static struct crypto_alg ablk_rfc3686_ctr_alg = { | ||
| 590 | .cra_name = "rfc3686(ctr(aes))", | ||
| 591 | .cra_driver_name = "rfc3686-ctr-aes-aesni", | ||
| 592 | .cra_priority = 400, | ||
| 593 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
| 594 | .cra_blocksize = 1, | ||
| 595 | .cra_ctxsize = sizeof(struct async_aes_ctx), | ||
| 596 | .cra_alignmask = 0, | ||
| 597 | .cra_type = &crypto_ablkcipher_type, | ||
| 598 | .cra_module = THIS_MODULE, | ||
| 599 | .cra_list = LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list), | ||
| 600 | .cra_init = ablk_rfc3686_ctr_init, | ||
| 601 | .cra_exit = ablk_exit, | ||
| 602 | .cra_u = { | ||
| 603 | .ablkcipher = { | ||
| 604 | .min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, | ||
| 605 | .max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, | ||
| 606 | .ivsize = CTR_RFC3686_IV_SIZE, | ||
| 607 | .setkey = ablk_set_key, | ||
| 608 | .encrypt = ablk_encrypt, | ||
| 609 | .decrypt = ablk_decrypt, | ||
| 610 | .geniv = "seqiv", | ||
| 611 | }, | ||
| 612 | }, | ||
| 613 | }; | ||
| 508 | #endif | 614 | #endif |
| 509 | 615 | ||
| 510 | #ifdef HAS_LRW | 616 | #ifdef HAS_LRW |
| @@ -640,13 +746,17 @@ static int __init aesni_init(void) | |||
| 640 | goto blk_ecb_err; | 746 | goto blk_ecb_err; |
| 641 | if ((err = crypto_register_alg(&blk_cbc_alg))) | 747 | if ((err = crypto_register_alg(&blk_cbc_alg))) |
| 642 | goto blk_cbc_err; | 748 | goto blk_cbc_err; |
| 749 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
| 750 | goto blk_ctr_err; | ||
| 643 | if ((err = crypto_register_alg(&ablk_ecb_alg))) | 751 | if ((err = crypto_register_alg(&ablk_ecb_alg))) |
| 644 | goto ablk_ecb_err; | 752 | goto ablk_ecb_err; |
| 645 | if ((err = crypto_register_alg(&ablk_cbc_alg))) | 753 | if ((err = crypto_register_alg(&ablk_cbc_alg))) |
| 646 | goto ablk_cbc_err; | 754 | goto ablk_cbc_err; |
| 647 | #ifdef HAS_CTR | ||
| 648 | if ((err = crypto_register_alg(&ablk_ctr_alg))) | 755 | if ((err = crypto_register_alg(&ablk_ctr_alg))) |
| 649 | goto ablk_ctr_err; | 756 | goto ablk_ctr_err; |
| 757 | #ifdef HAS_CTR | ||
| 758 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) | ||
| 759 | goto ablk_rfc3686_ctr_err; | ||
| 650 | #endif | 760 | #endif |
| 651 | #ifdef HAS_LRW | 761 | #ifdef HAS_LRW |
| 652 | if ((err = crypto_register_alg(&ablk_lrw_alg))) | 762 | if ((err = crypto_register_alg(&ablk_lrw_alg))) |
| @@ -675,13 +785,17 @@ ablk_pcbc_err: | |||
| 675 | ablk_lrw_err: | 785 | ablk_lrw_err: |
| 676 | #endif | 786 | #endif |
| 677 | #ifdef HAS_CTR | 787 | #ifdef HAS_CTR |
| 788 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | ||
| 789 | ablk_rfc3686_ctr_err: | ||
| 790 | #endif | ||
| 678 | crypto_unregister_alg(&ablk_ctr_alg); | 791 | crypto_unregister_alg(&ablk_ctr_alg); |
| 679 | ablk_ctr_err: | 792 | ablk_ctr_err: |
| 680 | #endif | ||
| 681 | crypto_unregister_alg(&ablk_cbc_alg); | 793 | crypto_unregister_alg(&ablk_cbc_alg); |
| 682 | ablk_cbc_err: | 794 | ablk_cbc_err: |
| 683 | crypto_unregister_alg(&ablk_ecb_alg); | 795 | crypto_unregister_alg(&ablk_ecb_alg); |
| 684 | ablk_ecb_err: | 796 | ablk_ecb_err: |
| 797 | crypto_unregister_alg(&blk_ctr_alg); | ||
| 798 | blk_ctr_err: | ||
| 685 | crypto_unregister_alg(&blk_cbc_alg); | 799 | crypto_unregister_alg(&blk_cbc_alg); |
| 686 | blk_cbc_err: | 800 | blk_cbc_err: |
| 687 | crypto_unregister_alg(&blk_ecb_alg); | 801 | crypto_unregister_alg(&blk_ecb_alg); |
| @@ -705,10 +819,12 @@ static void __exit aesni_exit(void) | |||
| 705 | crypto_unregister_alg(&ablk_lrw_alg); | 819 | crypto_unregister_alg(&ablk_lrw_alg); |
| 706 | #endif | 820 | #endif |
| 707 | #ifdef HAS_CTR | 821 | #ifdef HAS_CTR |
| 708 | crypto_unregister_alg(&ablk_ctr_alg); | 822 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); |
| 709 | #endif | 823 | #endif |
| 824 | crypto_unregister_alg(&ablk_ctr_alg); | ||
| 710 | crypto_unregister_alg(&ablk_cbc_alg); | 825 | crypto_unregister_alg(&ablk_cbc_alg); |
| 711 | crypto_unregister_alg(&ablk_ecb_alg); | 826 | crypto_unregister_alg(&ablk_ecb_alg); |
| 827 | crypto_unregister_alg(&blk_ctr_alg); | ||
| 712 | crypto_unregister_alg(&blk_cbc_alg); | 828 | crypto_unregister_alg(&blk_cbc_alg); |
| 713 | crypto_unregister_alg(&blk_ecb_alg); | 829 | crypto_unregister_alg(&blk_ecb_alg); |
| 714 | crypto_unregister_alg(&__aesni_alg); | 830 | crypto_unregister_alg(&__aesni_alg); |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 56f462cf22d2..aa2c39d968fc 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
| @@ -85,7 +85,6 @@ extern int acpi_ioapic; | |||
| 85 | extern int acpi_noirq; | 85 | extern int acpi_noirq; |
| 86 | extern int acpi_strict; | 86 | extern int acpi_strict; |
| 87 | extern int acpi_disabled; | 87 | extern int acpi_disabled; |
| 88 | extern int acpi_ht; | ||
| 89 | extern int acpi_pci_disabled; | 88 | extern int acpi_pci_disabled; |
| 90 | extern int acpi_skip_timer_override; | 89 | extern int acpi_skip_timer_override; |
| 91 | extern int acpi_use_timer_override; | 90 | extern int acpi_use_timer_override; |
| @@ -97,7 +96,6 @@ void acpi_pic_sci_set_trigger(unsigned int, u16); | |||
| 97 | static inline void disable_acpi(void) | 96 | static inline void disable_acpi(void) |
| 98 | { | 97 | { |
| 99 | acpi_disabled = 1; | 98 | acpi_disabled = 1; |
| 100 | acpi_ht = 0; | ||
| 101 | acpi_pci_disabled = 1; | 99 | acpi_pci_disabled = 1; |
| 102 | acpi_noirq = 1; | 100 | acpi_noirq = 1; |
| 103 | } | 101 | } |
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index b97f786a48d5..a63a68be1cce 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
| @@ -6,8 +6,8 @@ | |||
| 6 | .macro LOCK_PREFIX | 6 | .macro LOCK_PREFIX |
| 7 | 1: lock | 7 | 1: lock |
| 8 | .section .smp_locks,"a" | 8 | .section .smp_locks,"a" |
| 9 | _ASM_ALIGN | 9 | .balign 4 |
| 10 | _ASM_PTR 1b | 10 | .long 1b - . |
| 11 | .previous | 11 | .previous |
| 12 | .endm | 12 | .endm |
| 13 | #else | 13 | #else |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index b09ec55650b3..03b6bb5394a0 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
| @@ -28,20 +28,20 @@ | |||
| 28 | */ | 28 | */ |
| 29 | 29 | ||
| 30 | #ifdef CONFIG_SMP | 30 | #ifdef CONFIG_SMP |
| 31 | #define LOCK_PREFIX \ | 31 | #define LOCK_PREFIX_HERE \ |
| 32 | ".section .smp_locks,\"a\"\n" \ | 32 | ".section .smp_locks,\"a\"\n" \ |
| 33 | _ASM_ALIGN "\n" \ | 33 | ".balign 4\n" \ |
| 34 | _ASM_PTR "661f\n" /* address */ \ | 34 | ".long 671f - .\n" /* offset */ \ |
| 35 | ".previous\n" \ | 35 | ".previous\n" \ |
| 36 | "661:\n\tlock; " | 36 | "671:" |
| 37 | |||
| 38 | #define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " | ||
| 37 | 39 | ||
| 38 | #else /* ! CONFIG_SMP */ | 40 | #else /* ! CONFIG_SMP */ |
| 41 | #define LOCK_PREFIX_HERE "" | ||
| 39 | #define LOCK_PREFIX "" | 42 | #define LOCK_PREFIX "" |
| 40 | #endif | 43 | #endif |
| 41 | 44 | ||
| 42 | /* This must be included *after* the definition of LOCK_PREFIX */ | ||
| 43 | #include <asm/cpufeature.h> | ||
| 44 | |||
| 45 | struct alt_instr { | 45 | struct alt_instr { |
| 46 | u8 *instr; /* original instruction */ | 46 | u8 *instr; /* original instruction */ |
| 47 | u8 *replacement; | 47 | u8 *replacement; |
| @@ -96,6 +96,12 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
| 96 | ".previous" | 96 | ".previous" |
| 97 | 97 | ||
| 98 | /* | 98 | /* |
| 99 | * This must be included *after* the definition of ALTERNATIVE due to | ||
| 100 | * <asm/arch_hweight.h> | ||
| 101 | */ | ||
| 102 | #include <asm/cpufeature.h> | ||
| 103 | |||
| 104 | /* | ||
| 99 | * Alternative instructions for different CPU types or capabilities. | 105 | * Alternative instructions for different CPU types or capabilities. |
| 100 | * | 106 | * |
| 101 | * This allows to use optimized instructions even on generic binary | 107 | * This allows to use optimized instructions even on generic binary |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 86a0ff0aeac7..7014e88bc779 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
| @@ -174,6 +174,40 @@ | |||
| 174 | (~((1ULL << (12 + ((lvl) * 9))) - 1))) | 174 | (~((1ULL << (12 + ((lvl) * 9))) - 1))) |
| 175 | #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) | 175 | #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) |
| 176 | 176 | ||
| 177 | /* | ||
| 178 | * Returns the page table level to use for a given page size | ||
| 179 | * Pagesize is expected to be a power-of-two | ||
| 180 | */ | ||
| 181 | #define PAGE_SIZE_LEVEL(pagesize) \ | ||
| 182 | ((__ffs(pagesize) - 12) / 9) | ||
| 183 | /* | ||
| 184 | * Returns the number of ptes to use for a given page size | ||
| 185 | * Pagesize is expected to be a power-of-two | ||
| 186 | */ | ||
| 187 | #define PAGE_SIZE_PTE_COUNT(pagesize) \ | ||
| 188 | (1ULL << ((__ffs(pagesize) - 12) % 9)) | ||
| 189 | |||
| 190 | /* | ||
| 191 | * Aligns a given io-virtual address to a given page size | ||
| 192 | * Pagesize is expected to be a power-of-two | ||
| 193 | */ | ||
| 194 | #define PAGE_SIZE_ALIGN(address, pagesize) \ | ||
| 195 | ((address) & ~((pagesize) - 1)) | ||
| 196 | /* | ||
| 197 | * Creates an IOMMU PTE for an address an a given pagesize | ||
| 198 | * The PTE has no permission bits set | ||
| 199 | * Pagesize is expected to be a power-of-two larger than 4096 | ||
| 200 | */ | ||
| 201 | #define PAGE_SIZE_PTE(address, pagesize) \ | ||
| 202 | (((address) | ((pagesize) - 1)) & \ | ||
| 203 | (~(pagesize >> 1)) & PM_ADDR_MASK) | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Takes a PTE value with mode=0x07 and returns the page size it maps | ||
| 207 | */ | ||
| 208 | #define PTE_PAGE_SIZE(pte) \ | ||
| 209 | (1ULL << (1 + ffz(((pte) | 0xfffULL)))) | ||
| 210 | |||
| 177 | #define IOMMU_PTE_P (1ULL << 0) | 211 | #define IOMMU_PTE_P (1ULL << 0) |
| 178 | #define IOMMU_PTE_TV (1ULL << 1) | 212 | #define IOMMU_PTE_TV (1ULL << 1) |
| 179 | #define IOMMU_PTE_U (1ULL << 59) | 213 | #define IOMMU_PTE_U (1ULL << 59) |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b4ac2cdcb64f..1fa03e04ae44 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
| @@ -373,6 +373,7 @@ extern atomic_t init_deasserted; | |||
| 373 | extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); | 373 | extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); |
| 374 | #endif | 374 | #endif |
| 375 | 375 | ||
| 376 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 376 | static inline u32 apic_read(u32 reg) | 377 | static inline u32 apic_read(u32 reg) |
| 377 | { | 378 | { |
| 378 | return apic->read(reg); | 379 | return apic->read(reg); |
| @@ -403,10 +404,19 @@ static inline u32 safe_apic_wait_icr_idle(void) | |||
| 403 | return apic->safe_wait_icr_idle(); | 404 | return apic->safe_wait_icr_idle(); |
| 404 | } | 405 | } |
| 405 | 406 | ||
| 407 | #else /* CONFIG_X86_LOCAL_APIC */ | ||
| 408 | |||
| 409 | static inline u32 apic_read(u32 reg) { return 0; } | ||
| 410 | static inline void apic_write(u32 reg, u32 val) { } | ||
| 411 | static inline u64 apic_icr_read(void) { return 0; } | ||
| 412 | static inline void apic_icr_write(u32 low, u32 high) { } | ||
| 413 | static inline void apic_wait_icr_idle(void) { } | ||
| 414 | static inline u32 safe_apic_wait_icr_idle(void) { return 0; } | ||
| 415 | |||
| 416 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
| 406 | 417 | ||
| 407 | static inline void ack_APIC_irq(void) | 418 | static inline void ack_APIC_irq(void) |
| 408 | { | 419 | { |
| 409 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 410 | /* | 420 | /* |
| 411 | * ack_APIC_irq() actually gets compiled as a single instruction | 421 | * ack_APIC_irq() actually gets compiled as a single instruction |
| 412 | * ... yummie. | 422 | * ... yummie. |
| @@ -414,7 +424,6 @@ static inline void ack_APIC_irq(void) | |||
| 414 | 424 | ||
| 415 | /* Docs say use 0 for future compatibility */ | 425 | /* Docs say use 0 for future compatibility */ |
| 416 | apic_write(APIC_EOI, 0); | 426 | apic_write(APIC_EOI, 0); |
| 417 | #endif | ||
| 418 | } | 427 | } |
| 419 | 428 | ||
| 420 | static inline unsigned default_get_apic_id(unsigned long x) | 429 | static inline unsigned default_get_apic_id(unsigned long x) |
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h new file mode 100644 index 000000000000..9686c3d9ff73 --- /dev/null +++ b/arch/x86/include/asm/arch_hweight.h | |||
| @@ -0,0 +1,61 @@ | |||
| 1 | #ifndef _ASM_X86_HWEIGHT_H | ||
| 2 | #define _ASM_X86_HWEIGHT_H | ||
| 3 | |||
| 4 | #ifdef CONFIG_64BIT | ||
| 5 | /* popcnt %edi, %eax -- redundant REX prefix for alignment */ | ||
| 6 | #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" | ||
| 7 | /* popcnt %rdi, %rax */ | ||
| 8 | #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" | ||
| 9 | #define REG_IN "D" | ||
| 10 | #define REG_OUT "a" | ||
| 11 | #else | ||
| 12 | /* popcnt %eax, %eax */ | ||
| 13 | #define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" | ||
| 14 | #define REG_IN "a" | ||
| 15 | #define REG_OUT "a" | ||
| 16 | #endif | ||
| 17 | |||
| 18 | /* | ||
| 19 | * __sw_hweightXX are called from within the alternatives below | ||
| 20 | * and callee-clobbered registers need to be taken care of. See | ||
| 21 | * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective | ||
| 22 | * compiler switches. | ||
| 23 | */ | ||
| 24 | static inline unsigned int __arch_hweight32(unsigned int w) | ||
| 25 | { | ||
| 26 | unsigned int res = 0; | ||
| 27 | |||
| 28 | asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) | ||
| 29 | : "="REG_OUT (res) | ||
| 30 | : REG_IN (w)); | ||
| 31 | |||
| 32 | return res; | ||
| 33 | } | ||
| 34 | |||
| 35 | static inline unsigned int __arch_hweight16(unsigned int w) | ||
| 36 | { | ||
| 37 | return __arch_hweight32(w & 0xffff); | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline unsigned int __arch_hweight8(unsigned int w) | ||
| 41 | { | ||
| 42 | return __arch_hweight32(w & 0xff); | ||
| 43 | } | ||
| 44 | |||
| 45 | static inline unsigned long __arch_hweight64(__u64 w) | ||
| 46 | { | ||
| 47 | unsigned long res = 0; | ||
| 48 | |||
| 49 | #ifdef CONFIG_X86_32 | ||
| 50 | return __arch_hweight32((u32)w) + | ||
| 51 | __arch_hweight32((u32)(w >> 32)); | ||
| 52 | #else | ||
| 53 | asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) | ||
| 54 | : "="REG_OUT (res) | ||
| 55 | : REG_IN (w)); | ||
| 56 | #endif /* CONFIG_X86_32 */ | ||
| 57 | |||
| 58 | return res; | ||
| 59 | } | ||
| 60 | |||
| 61 | #endif | ||
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 8f8217b9bdac..952a826ac4e5 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | */ | 22 | */ |
| 23 | static inline int atomic_read(const atomic_t *v) | 23 | static inline int atomic_read(const atomic_t *v) |
| 24 | { | 24 | { |
| 25 | return v->counter; | 25 | return (*(volatile int *)&(v)->counter); |
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | /** | 28 | /** |
| @@ -246,6 +246,29 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
| 246 | 246 | ||
| 247 | #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) | 247 | #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) |
| 248 | 248 | ||
| 249 | /* | ||
| 250 | * atomic_dec_if_positive - decrement by 1 if old value positive | ||
| 251 | * @v: pointer of type atomic_t | ||
| 252 | * | ||
| 253 | * The function returns the old value of *v minus 1, even if | ||
| 254 | * the atomic variable, v, was not decremented. | ||
| 255 | */ | ||
| 256 | static inline int atomic_dec_if_positive(atomic_t *v) | ||
| 257 | { | ||
| 258 | int c, old, dec; | ||
| 259 | c = atomic_read(v); | ||
| 260 | for (;;) { | ||
| 261 | dec = c - 1; | ||
| 262 | if (unlikely(dec < 0)) | ||
| 263 | break; | ||
| 264 | old = atomic_cmpxchg((v), c, dec); | ||
| 265 | if (likely(old == c)) | ||
| 266 | break; | ||
| 267 | c = old; | ||
| 268 | } | ||
| 269 | return dec; | ||
| 270 | } | ||
| 271 | |||
| 249 | /** | 272 | /** |
| 250 | * atomic_inc_short - increment of a short integer | 273 | * atomic_inc_short - increment of a short integer |
| 251 | * @v: pointer to type int | 274 | * @v: pointer to type int |
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 03027bf28de5..2a934aa19a43 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h | |||
| @@ -14,109 +14,193 @@ typedef struct { | |||
| 14 | 14 | ||
| 15 | #define ATOMIC64_INIT(val) { (val) } | 15 | #define ATOMIC64_INIT(val) { (val) } |
| 16 | 16 | ||
| 17 | extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); | 17 | #ifdef CONFIG_X86_CMPXCHG64 |
| 18 | #define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8" | ||
| 19 | #else | ||
| 20 | #define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8) | ||
| 21 | #endif | ||
| 22 | |||
| 23 | #define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f) | ||
| 24 | |||
| 25 | /** | ||
| 26 | * atomic64_cmpxchg - cmpxchg atomic64 variable | ||
| 27 | * @p: pointer to type atomic64_t | ||
| 28 | * @o: expected value | ||
| 29 | * @n: new value | ||
| 30 | * | ||
| 31 | * Atomically sets @v to @n if it was equal to @o and returns | ||
| 32 | * the old value. | ||
| 33 | */ | ||
| 34 | |||
| 35 | static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) | ||
| 36 | { | ||
| 37 | return cmpxchg64(&v->counter, o, n); | ||
| 38 | } | ||
| 18 | 39 | ||
| 19 | /** | 40 | /** |
| 20 | * atomic64_xchg - xchg atomic64 variable | 41 | * atomic64_xchg - xchg atomic64 variable |
| 21 | * @ptr: pointer to type atomic64_t | 42 | * @v: pointer to type atomic64_t |
| 22 | * @new_val: value to assign | 43 | * @n: value to assign |
| 23 | * | 44 | * |
| 24 | * Atomically xchgs the value of @ptr to @new_val and returns | 45 | * Atomically xchgs the value of @v to @n and returns |
| 25 | * the old value. | 46 | * the old value. |
| 26 | */ | 47 | */ |
| 27 | extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); | 48 | static inline long long atomic64_xchg(atomic64_t *v, long long n) |
| 49 | { | ||
| 50 | long long o; | ||
| 51 | unsigned high = (unsigned)(n >> 32); | ||
| 52 | unsigned low = (unsigned)n; | ||
| 53 | asm volatile(ATOMIC64_ALTERNATIVE(xchg) | ||
| 54 | : "=A" (o), "+b" (low), "+c" (high) | ||
| 55 | : "S" (v) | ||
| 56 | : "memory" | ||
| 57 | ); | ||
| 58 | return o; | ||
| 59 | } | ||
| 28 | 60 | ||
| 29 | /** | 61 | /** |
| 30 | * atomic64_set - set atomic64 variable | 62 | * atomic64_set - set atomic64 variable |
| 31 | * @ptr: pointer to type atomic64_t | 63 | * @v: pointer to type atomic64_t |
| 32 | * @new_val: value to assign | 64 | * @n: value to assign |
| 33 | * | 65 | * |
| 34 | * Atomically sets the value of @ptr to @new_val. | 66 | * Atomically sets the value of @v to @n. |
| 35 | */ | 67 | */ |
| 36 | extern void atomic64_set(atomic64_t *ptr, u64 new_val); | 68 | static inline void atomic64_set(atomic64_t *v, long long i) |
| 69 | { | ||
| 70 | unsigned high = (unsigned)(i >> 32); | ||
| 71 | unsigned low = (unsigned)i; | ||
| 72 | asm volatile(ATOMIC64_ALTERNATIVE(set) | ||
| 73 | : "+b" (low), "+c" (high) | ||
| 74 | : "S" (v) | ||
| 75 | : "eax", "edx", "memory" | ||
| 76 | ); | ||
| 77 | } | ||
| 37 | 78 | ||
| 38 | /** | 79 | /** |
| 39 | * atomic64_read - read atomic64 variable | 80 | * atomic64_read - read atomic64 variable |
| 40 | * @ptr: pointer to type atomic64_t | 81 | * @v: pointer to type atomic64_t |
| 41 | * | 82 | * |
| 42 | * Atomically reads the value of @ptr and returns it. | 83 | * Atomically reads the value of @v and returns it. |
| 43 | */ | 84 | */ |
| 44 | static inline u64 atomic64_read(atomic64_t *ptr) | 85 | static inline long long atomic64_read(atomic64_t *v) |
| 45 | { | 86 | { |
| 46 | u64 res; | 87 | long long r; |
| 47 | 88 | asm volatile(ATOMIC64_ALTERNATIVE(read) | |
| 48 | /* | 89 | : "=A" (r), "+c" (v) |
| 49 | * Note, we inline this atomic64_t primitive because | 90 | : : "memory" |
| 50 | * it only clobbers EAX/EDX and leaves the others | 91 | ); |
| 51 | * untouched. We also (somewhat subtly) rely on the | 92 | return r; |
| 52 | * fact that cmpxchg8b returns the current 64-bit value | 93 | } |
| 53 | * of the memory location we are touching: | ||
| 54 | */ | ||
| 55 | asm volatile( | ||
| 56 | "mov %%ebx, %%eax\n\t" | ||
| 57 | "mov %%ecx, %%edx\n\t" | ||
| 58 | LOCK_PREFIX "cmpxchg8b %1\n" | ||
| 59 | : "=&A" (res) | ||
| 60 | : "m" (*ptr) | ||
| 61 | ); | ||
| 62 | |||
| 63 | return res; | ||
| 64 | } | ||
| 65 | |||
| 66 | extern u64 atomic64_read(atomic64_t *ptr); | ||
| 67 | 94 | ||
| 68 | /** | 95 | /** |
| 69 | * atomic64_add_return - add and return | 96 | * atomic64_add_return - add and return |
| 70 | * @delta: integer value to add | 97 | * @i: integer value to add |
| 71 | * @ptr: pointer to type atomic64_t | 98 | * @v: pointer to type atomic64_t |
| 72 | * | 99 | * |
| 73 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | 100 | * Atomically adds @i to @v and returns @i + *@v |
| 74 | */ | 101 | */ |
| 75 | extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr); | 102 | static inline long long atomic64_add_return(long long i, atomic64_t *v) |
| 103 | { | ||
| 104 | asm volatile(ATOMIC64_ALTERNATIVE(add_return) | ||
| 105 | : "+A" (i), "+c" (v) | ||
| 106 | : : "memory" | ||
| 107 | ); | ||
| 108 | return i; | ||
| 109 | } | ||
| 76 | 110 | ||
| 77 | /* | 111 | /* |
| 78 | * Other variants with different arithmetic operators: | 112 | * Other variants with different arithmetic operators: |
| 79 | */ | 113 | */ |
| 80 | extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr); | 114 | static inline long long atomic64_sub_return(long long i, atomic64_t *v) |
| 81 | extern u64 atomic64_inc_return(atomic64_t *ptr); | 115 | { |
| 82 | extern u64 atomic64_dec_return(atomic64_t *ptr); | 116 | asm volatile(ATOMIC64_ALTERNATIVE(sub_return) |
| 117 | : "+A" (i), "+c" (v) | ||
| 118 | : : "memory" | ||
| 119 | ); | ||
| 120 | return i; | ||
| 121 | } | ||
| 122 | |||
| 123 | static inline long long atomic64_inc_return(atomic64_t *v) | ||
| 124 | { | ||
| 125 | long long a; | ||
| 126 | asm volatile(ATOMIC64_ALTERNATIVE(inc_return) | ||
| 127 | : "=A" (a) | ||
| 128 | : "S" (v) | ||
| 129 | : "memory", "ecx" | ||
| 130 | ); | ||
| 131 | return a; | ||
| 132 | } | ||
| 133 | |||
| 134 | static inline long long atomic64_dec_return(atomic64_t *v) | ||
| 135 | { | ||
| 136 | long long a; | ||
| 137 | asm volatile(ATOMIC64_ALTERNATIVE(dec_return) | ||
| 138 | : "=A" (a) | ||
| 139 | : "S" (v) | ||
| 140 | : "memory", "ecx" | ||
| 141 | ); | ||
| 142 | return a; | ||
| 143 | } | ||
| 83 | 144 | ||
| 84 | /** | 145 | /** |
| 85 | * atomic64_add - add integer to atomic64 variable | 146 | * atomic64_add - add integer to atomic64 variable |
| 86 | * @delta: integer value to add | 147 | * @i: integer value to add |
| 87 | * @ptr: pointer to type atomic64_t | 148 | * @v: pointer to type atomic64_t |
| 88 | * | 149 | * |
| 89 | * Atomically adds @delta to @ptr. | 150 | * Atomically adds @i to @v. |
| 90 | */ | 151 | */ |
| 91 | extern void atomic64_add(u64 delta, atomic64_t *ptr); | 152 | static inline long long atomic64_add(long long i, atomic64_t *v) |
| 153 | { | ||
| 154 | asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return) | ||
| 155 | : "+A" (i), "+c" (v) | ||
| 156 | : : "memory" | ||
| 157 | ); | ||
| 158 | return i; | ||
| 159 | } | ||
| 92 | 160 | ||
| 93 | /** | 161 | /** |
| 94 | * atomic64_sub - subtract the atomic64 variable | 162 | * atomic64_sub - subtract the atomic64 variable |
| 95 | * @delta: integer value to subtract | 163 | * @i: integer value to subtract |
| 96 | * @ptr: pointer to type atomic64_t | 164 | * @v: pointer to type atomic64_t |
| 97 | * | 165 | * |
| 98 | * Atomically subtracts @delta from @ptr. | 166 | * Atomically subtracts @i from @v. |
| 99 | */ | 167 | */ |
| 100 | extern void atomic64_sub(u64 delta, atomic64_t *ptr); | 168 | static inline long long atomic64_sub(long long i, atomic64_t *v) |
| 169 | { | ||
| 170 | asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return) | ||
| 171 | : "+A" (i), "+c" (v) | ||
| 172 | : : "memory" | ||
| 173 | ); | ||
| 174 | return i; | ||
| 175 | } | ||
| 101 | 176 | ||
| 102 | /** | 177 | /** |
| 103 | * atomic64_sub_and_test - subtract value from variable and test result | 178 | * atomic64_sub_and_test - subtract value from variable and test result |
| 104 | * @delta: integer value to subtract | 179 | * @i: integer value to subtract |
| 105 | * @ptr: pointer to type atomic64_t | 180 | * @v: pointer to type atomic64_t |
| 106 | * | 181 | * |
| 107 | * Atomically subtracts @delta from @ptr and returns | 182 | * Atomically subtracts @i from @v and returns |
| 108 | * true if the result is zero, or false for all | 183 | * true if the result is zero, or false for all |
| 109 | * other cases. | 184 | * other cases. |
| 110 | */ | 185 | */ |
| 111 | extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr); | 186 | static inline int atomic64_sub_and_test(long long i, atomic64_t *v) |
| 187 | { | ||
| 188 | return atomic64_sub_return(i, v) == 0; | ||
| 189 | } | ||
| 112 | 190 | ||
| 113 | /** | 191 | /** |
| 114 | * atomic64_inc - increment atomic64 variable | 192 | * atomic64_inc - increment atomic64 variable |
| 115 | * @ptr: pointer to type atomic64_t | 193 | * @v: pointer to type atomic64_t |
| 116 | * | 194 | * |
| 117 | * Atomically increments @ptr by 1. | 195 | * Atomically increments @v by 1. |
| 118 | */ | 196 | */ |
| 119 | extern void atomic64_inc(atomic64_t *ptr); | 197 | static inline void atomic64_inc(atomic64_t *v) |
| 198 | { | ||
| 199 | asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return) | ||
| 200 | : : "S" (v) | ||
| 201 | : "memory", "eax", "ecx", "edx" | ||
| 202 | ); | ||
| 203 | } | ||
| 120 | 204 | ||
| 121 | /** | 205 | /** |
| 122 | * atomic64_dec - decrement atomic64 variable | 206 | * atomic64_dec - decrement atomic64 variable |
| @@ -124,37 +208,97 @@ extern void atomic64_inc(atomic64_t *ptr); | |||
| 124 | * | 208 | * |
| 125 | * Atomically decrements @ptr by 1. | 209 | * Atomically decrements @ptr by 1. |
| 126 | */ | 210 | */ |
| 127 | extern void atomic64_dec(atomic64_t *ptr); | 211 | static inline void atomic64_dec(atomic64_t *v) |
| 212 | { | ||
| 213 | asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return) | ||
| 214 | : : "S" (v) | ||
| 215 | : "memory", "eax", "ecx", "edx" | ||
| 216 | ); | ||
| 217 | } | ||
| 128 | 218 | ||
| 129 | /** | 219 | /** |
| 130 | * atomic64_dec_and_test - decrement and test | 220 | * atomic64_dec_and_test - decrement and test |
| 131 | * @ptr: pointer to type atomic64_t | 221 | * @v: pointer to type atomic64_t |
| 132 | * | 222 | * |
| 133 | * Atomically decrements @ptr by 1 and | 223 | * Atomically decrements @v by 1 and |
| 134 | * returns true if the result is 0, or false for all other | 224 | * returns true if the result is 0, or false for all other |
| 135 | * cases. | 225 | * cases. |
| 136 | */ | 226 | */ |
| 137 | extern int atomic64_dec_and_test(atomic64_t *ptr); | 227 | static inline int atomic64_dec_and_test(atomic64_t *v) |
| 228 | { | ||
| 229 | return atomic64_dec_return(v) == 0; | ||
| 230 | } | ||
| 138 | 231 | ||
| 139 | /** | 232 | /** |
| 140 | * atomic64_inc_and_test - increment and test | 233 | * atomic64_inc_and_test - increment and test |
| 141 | * @ptr: pointer to type atomic64_t | 234 | * @v: pointer to type atomic64_t |
| 142 | * | 235 | * |
| 143 | * Atomically increments @ptr by 1 | 236 | * Atomically increments @v by 1 |
| 144 | * and returns true if the result is zero, or false for all | 237 | * and returns true if the result is zero, or false for all |
| 145 | * other cases. | 238 | * other cases. |
| 146 | */ | 239 | */ |
| 147 | extern int atomic64_inc_and_test(atomic64_t *ptr); | 240 | static inline int atomic64_inc_and_test(atomic64_t *v) |
| 241 | { | ||
| 242 | return atomic64_inc_return(v) == 0; | ||
| 243 | } | ||
| 148 | 244 | ||
| 149 | /** | 245 | /** |
| 150 | * atomic64_add_negative - add and test if negative | 246 | * atomic64_add_negative - add and test if negative |
| 151 | * @delta: integer value to add | 247 | * @i: integer value to add |
| 152 | * @ptr: pointer to type atomic64_t | 248 | * @v: pointer to type atomic64_t |
| 153 | * | 249 | * |
| 154 | * Atomically adds @delta to @ptr and returns true | 250 | * Atomically adds @i to @v and returns true |
| 155 | * if the result is negative, or false when | 251 | * if the result is negative, or false when |
| 156 | * result is greater than or equal to zero. | 252 | * result is greater than or equal to zero. |
| 157 | */ | 253 | */ |
| 158 | extern int atomic64_add_negative(u64 delta, atomic64_t *ptr); | 254 | static inline int atomic64_add_negative(long long i, atomic64_t *v) |
| 255 | { | ||
| 256 | return atomic64_add_return(i, v) < 0; | ||
| 257 | } | ||
| 258 | |||
| 259 | /** | ||
| 260 | * atomic64_add_unless - add unless the number is a given value | ||
| 261 | * @v: pointer of type atomic64_t | ||
| 262 | * @a: the amount to add to v... | ||
| 263 | * @u: ...unless v is equal to u. | ||
| 264 | * | ||
| 265 | * Atomically adds @a to @v, so long as it was not @u. | ||
| 266 | * Returns non-zero if @v was not @u, and zero otherwise. | ||
| 267 | */ | ||
| 268 | static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) | ||
| 269 | { | ||
| 270 | unsigned low = (unsigned)u; | ||
| 271 | unsigned high = (unsigned)(u >> 32); | ||
| 272 | asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t" | ||
| 273 | : "+A" (a), "+c" (v), "+S" (low), "+D" (high) | ||
| 274 | : : "memory"); | ||
| 275 | return (int)a; | ||
| 276 | } | ||
| 277 | |||
| 278 | |||
| 279 | static inline int atomic64_inc_not_zero(atomic64_t *v) | ||
| 280 | { | ||
| 281 | int r; | ||
| 282 | asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero) | ||
| 283 | : "=a" (r) | ||
| 284 | : "S" (v) | ||
| 285 | : "ecx", "edx", "memory" | ||
| 286 | ); | ||
| 287 | return r; | ||
| 288 | } | ||
| 289 | |||
| 290 | static inline long long atomic64_dec_if_positive(atomic64_t *v) | ||
| 291 | { | ||
| 292 | long long r; | ||
| 293 | asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive) | ||
| 294 | : "=A" (r) | ||
| 295 | : "S" (v) | ||
| 296 | : "ecx", "memory" | ||
| 297 | ); | ||
| 298 | return r; | ||
| 299 | } | ||
| 300 | |||
| 301 | #undef ATOMIC64_ALTERNATIVE | ||
| 302 | #undef ATOMIC64_ALTERNATIVE_ | ||
| 159 | 303 | ||
| 160 | #endif /* _ASM_X86_ATOMIC64_32_H */ | 304 | #endif /* _ASM_X86_ATOMIC64_32_H */ |
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 51c5b4056929..49fd1ea22951 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | */ | 18 | */ |
| 19 | static inline long atomic64_read(const atomic64_t *v) | 19 | static inline long atomic64_read(const atomic64_t *v) |
| 20 | { | 20 | { |
| 21 | return v->counter; | 21 | return (*(volatile long *)&(v)->counter); |
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | /** | 24 | /** |
| @@ -221,4 +221,27 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) | |||
| 221 | 221 | ||
| 222 | #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) | 222 | #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) |
| 223 | 223 | ||
| 224 | /* | ||
| 225 | * atomic64_dec_if_positive - decrement by 1 if old value positive | ||
| 226 | * @v: pointer of type atomic_t | ||
| 227 | * | ||
| 228 | * The function returns the old value of *v minus 1, even if | ||
| 229 | * the atomic variable, v, was not decremented. | ||
| 230 | */ | ||
| 231 | static inline long atomic64_dec_if_positive(atomic64_t *v) | ||
| 232 | { | ||
| 233 | long c, old, dec; | ||
| 234 | c = atomic64_read(v); | ||
| 235 | for (;;) { | ||
| 236 | dec = c - 1; | ||
| 237 | if (unlikely(dec < 0)) | ||
| 238 | break; | ||
| 239 | old = atomic64_cmpxchg((v), c, dec); | ||
| 240 | if (likely(old == c)) | ||
| 241 | break; | ||
| 242 | c = old; | ||
| 243 | } | ||
| 244 | return dec; | ||
| 245 | } | ||
| 246 | |||
| 224 | #endif /* _ASM_X86_ATOMIC64_64_H */ | 247 | #endif /* _ASM_X86_ATOMIC64_64_H */ |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 02b47a603fc8..545776efeb16 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
| @@ -444,7 +444,9 @@ static inline int fls(int x) | |||
| 444 | 444 | ||
| 445 | #define ARCH_HAS_FAST_MULTIPLIER 1 | 445 | #define ARCH_HAS_FAST_MULTIPLIER 1 |
| 446 | 446 | ||
| 447 | #include <asm-generic/bitops/hweight.h> | 447 | #include <asm/arch_hweight.h> |
| 448 | |||
| 449 | #include <asm-generic/bitops/const_hweight.h> | ||
| 448 | 450 | ||
| 449 | #endif /* __KERNEL__ */ | 451 | #endif /* __KERNEL__ */ |
| 450 | 452 | ||
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 7a1065958ba9..3b62ab56c7a0 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) | 24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) |
| 25 | 25 | ||
| 26 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ | 26 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ |
| 27 | (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)) | 27 | (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) |
| 28 | #error "Invalid value for CONFIG_PHYSICAL_ALIGN" | 28 | #error "Invalid value for CONFIG_PHYSICAL_ALIGN" |
| 29 | #endif | 29 | #endif |
| 30 | 30 | ||
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 634c40a739a6..63e35ec9075c 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
| @@ -44,9 +44,6 @@ static inline void copy_from_user_page(struct vm_area_struct *vma, | |||
| 44 | memcpy(dst, src, len); | 44 | memcpy(dst, src, len); |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | #define PG_WC PG_arch_1 | ||
| 48 | PAGEFLAG(WC, WC) | ||
| 49 | |||
| 50 | #ifdef CONFIG_X86_PAT | 47 | #ifdef CONFIG_X86_PAT |
| 51 | /* | 48 | /* |
| 52 | * X86 PAT uses page flags WC and Uncached together to keep track of | 49 | * X86 PAT uses page flags WC and Uncached together to keep track of |
| @@ -55,16 +52,24 @@ PAGEFLAG(WC, WC) | |||
| 55 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not | 52 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not |
| 56 | * been changed from its default (value of -1 used to denote this). | 53 | * been changed from its default (value of -1 used to denote this). |
| 57 | * Note we do not support _PAGE_CACHE_UC here. | 54 | * Note we do not support _PAGE_CACHE_UC here. |
| 58 | * | ||
| 59 | * Caller must hold memtype_lock for atomicity. | ||
| 60 | */ | 55 | */ |
| 56 | |||
| 57 | #define _PGMT_DEFAULT 0 | ||
| 58 | #define _PGMT_WC (1UL << PG_arch_1) | ||
| 59 | #define _PGMT_UC_MINUS (1UL << PG_uncached) | ||
| 60 | #define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1) | ||
| 61 | #define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) | ||
| 62 | #define _PGMT_CLEAR_MASK (~_PGMT_MASK) | ||
| 63 | |||
| 61 | static inline unsigned long get_page_memtype(struct page *pg) | 64 | static inline unsigned long get_page_memtype(struct page *pg) |
| 62 | { | 65 | { |
| 63 | if (!PageUncached(pg) && !PageWC(pg)) | 66 | unsigned long pg_flags = pg->flags & _PGMT_MASK; |
| 67 | |||
| 68 | if (pg_flags == _PGMT_DEFAULT) | ||
| 64 | return -1; | 69 | return -1; |
| 65 | else if (!PageUncached(pg) && PageWC(pg)) | 70 | else if (pg_flags == _PGMT_WC) |
| 66 | return _PAGE_CACHE_WC; | 71 | return _PAGE_CACHE_WC; |
| 67 | else if (PageUncached(pg) && !PageWC(pg)) | 72 | else if (pg_flags == _PGMT_UC_MINUS) |
| 68 | return _PAGE_CACHE_UC_MINUS; | 73 | return _PAGE_CACHE_UC_MINUS; |
| 69 | else | 74 | else |
| 70 | return _PAGE_CACHE_WB; | 75 | return _PAGE_CACHE_WB; |
| @@ -72,25 +77,26 @@ static inline unsigned long get_page_memtype(struct page *pg) | |||
| 72 | 77 | ||
| 73 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) | 78 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) |
| 74 | { | 79 | { |
| 80 | unsigned long memtype_flags = _PGMT_DEFAULT; | ||
| 81 | unsigned long old_flags; | ||
| 82 | unsigned long new_flags; | ||
| 83 | |||
| 75 | switch (memtype) { | 84 | switch (memtype) { |
| 76 | case _PAGE_CACHE_WC: | 85 | case _PAGE_CACHE_WC: |
| 77 | ClearPageUncached(pg); | 86 | memtype_flags = _PGMT_WC; |
| 78 | SetPageWC(pg); | ||
| 79 | break; | 87 | break; |
| 80 | case _PAGE_CACHE_UC_MINUS: | 88 | case _PAGE_CACHE_UC_MINUS: |
| 81 | SetPageUncached(pg); | 89 | memtype_flags = _PGMT_UC_MINUS; |
| 82 | ClearPageWC(pg); | ||
| 83 | break; | 90 | break; |
| 84 | case _PAGE_CACHE_WB: | 91 | case _PAGE_CACHE_WB: |
| 85 | SetPageUncached(pg); | 92 | memtype_flags = _PGMT_WB; |
| 86 | SetPageWC(pg); | ||
| 87 | break; | ||
| 88 | default: | ||
| 89 | case -1: | ||
| 90 | ClearPageUncached(pg); | ||
| 91 | ClearPageWC(pg); | ||
| 92 | break; | 93 | break; |
| 93 | } | 94 | } |
| 95 | |||
| 96 | do { | ||
| 97 | old_flags = pg->flags; | ||
| 98 | new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; | ||
| 99 | } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags); | ||
| 94 | } | 100 | } |
| 95 | #else | 101 | #else |
| 96 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } | 102 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } |
| @@ -139,9 +145,11 @@ int set_memory_np(unsigned long addr, int numpages); | |||
| 139 | int set_memory_4k(unsigned long addr, int numpages); | 145 | int set_memory_4k(unsigned long addr, int numpages); |
| 140 | 146 | ||
| 141 | int set_memory_array_uc(unsigned long *addr, int addrinarray); | 147 | int set_memory_array_uc(unsigned long *addr, int addrinarray); |
| 148 | int set_memory_array_wc(unsigned long *addr, int addrinarray); | ||
| 142 | int set_memory_array_wb(unsigned long *addr, int addrinarray); | 149 | int set_memory_array_wb(unsigned long *addr, int addrinarray); |
| 143 | 150 | ||
| 144 | int set_pages_array_uc(struct page **pages, int addrinarray); | 151 | int set_pages_array_uc(struct page **pages, int addrinarray); |
| 152 | int set_pages_array_wc(struct page **pages, int addrinarray); | ||
| 145 | int set_pages_array_wb(struct page **pages, int addrinarray); | 153 | int set_pages_array_wb(struct page **pages, int addrinarray); |
| 146 | 154 | ||
| 147 | /* | 155 | /* |
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ffb9bb6b6c37..8859e12dd3cf 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
| @@ -271,7 +271,8 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); | |||
| 271 | __typeof__(*(ptr)) __ret; \ | 271 | __typeof__(*(ptr)) __ret; \ |
| 272 | __typeof__(*(ptr)) __old = (o); \ | 272 | __typeof__(*(ptr)) __old = (o); \ |
| 273 | __typeof__(*(ptr)) __new = (n); \ | 273 | __typeof__(*(ptr)) __new = (n); \ |
| 274 | alternative_io("call cmpxchg8b_emu", \ | 274 | alternative_io(LOCK_PREFIX_HERE \ |
| 275 | "call cmpxchg8b_emu", \ | ||
| 275 | "lock; cmpxchg8b (%%esi)" , \ | 276 | "lock; cmpxchg8b (%%esi)" , \ |
| 276 | X86_FEATURE_CX8, \ | 277 | X86_FEATURE_CX8, \ |
| 277 | "=A" (__ret), \ | 278 | "=A" (__ret), \ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0cd82d068613..468145914389 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
| @@ -161,6 +161,7 @@ | |||
| 161 | */ | 161 | */ |
| 162 | #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ | 162 | #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ |
| 163 | #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ | 163 | #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ |
| 164 | #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ | ||
| 164 | 165 | ||
| 165 | /* Virtualization flags: Linux defined */ | 166 | /* Virtualization flags: Linux defined */ |
| 166 | #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ | 167 | #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ |
| @@ -175,6 +176,7 @@ | |||
| 175 | 176 | ||
| 176 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 177 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
| 177 | 178 | ||
| 179 | #include <asm/asm.h> | ||
| 178 | #include <linux/bitops.h> | 180 | #include <linux/bitops.h> |
| 179 | 181 | ||
| 180 | extern const char * const x86_cap_flags[NCAPINTS*32]; | 182 | extern const char * const x86_cap_flags[NCAPINTS*32]; |
| @@ -283,6 +285,69 @@ extern const char * const x86_power_flags[32]; | |||
| 283 | 285 | ||
| 284 | #endif /* CONFIG_X86_64 */ | 286 | #endif /* CONFIG_X86_64 */ |
| 285 | 287 | ||
| 288 | /* | ||
| 289 | * Static testing of CPU features. Used the same as boot_cpu_has(). | ||
| 290 | * These are only valid after alternatives have run, but will statically | ||
| 291 | * patch the target code for additional performance. | ||
| 292 | * | ||
| 293 | */ | ||
| 294 | static __always_inline __pure bool __static_cpu_has(u8 bit) | ||
| 295 | { | ||
| 296 | #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) | ||
| 297 | asm goto("1: jmp %l[t_no]\n" | ||
| 298 | "2:\n" | ||
| 299 | ".section .altinstructions,\"a\"\n" | ||
| 300 | _ASM_ALIGN "\n" | ||
| 301 | _ASM_PTR "1b\n" | ||
| 302 | _ASM_PTR "0\n" /* no replacement */ | ||
| 303 | " .byte %P0\n" /* feature bit */ | ||
| 304 | " .byte 2b - 1b\n" /* source len */ | ||
| 305 | " .byte 0\n" /* replacement len */ | ||
| 306 | " .byte 0xff + 0 - (2b-1b)\n" /* padding */ | ||
| 307 | ".previous\n" | ||
| 308 | : : "i" (bit) : : t_no); | ||
| 309 | return true; | ||
| 310 | t_no: | ||
| 311 | return false; | ||
| 312 | #else | ||
| 313 | u8 flag; | ||
| 314 | /* Open-coded due to __stringify() in ALTERNATIVE() */ | ||
| 315 | asm volatile("1: movb $0,%0\n" | ||
| 316 | "2:\n" | ||
| 317 | ".section .altinstructions,\"a\"\n" | ||
| 318 | _ASM_ALIGN "\n" | ||
| 319 | _ASM_PTR "1b\n" | ||
| 320 | _ASM_PTR "3f\n" | ||
| 321 | " .byte %P1\n" /* feature bit */ | ||
| 322 | " .byte 2b - 1b\n" /* source len */ | ||
| 323 | " .byte 4f - 3f\n" /* replacement len */ | ||
| 324 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */ | ||
| 325 | ".previous\n" | ||
| 326 | ".section .altinstr_replacement,\"ax\"\n" | ||
| 327 | "3: movb $1,%0\n" | ||
| 328 | "4:\n" | ||
| 329 | ".previous\n" | ||
| 330 | : "=qm" (flag) : "i" (bit)); | ||
| 331 | return flag; | ||
| 332 | #endif | ||
| 333 | } | ||
| 334 | |||
| 335 | #if __GNUC__ >= 4 | ||
| 336 | #define static_cpu_has(bit) \ | ||
| 337 | ( \ | ||
| 338 | __builtin_constant_p(boot_cpu_has(bit)) ? \ | ||
| 339 | boot_cpu_has(bit) : \ | ||
| 340 | (__builtin_constant_p(bit) && !((bit) & ~0xff)) ? \ | ||
| 341 | __static_cpu_has(bit) : \ | ||
| 342 | boot_cpu_has(bit) \ | ||
| 343 | ) | ||
| 344 | #else | ||
| 345 | /* | ||
| 346 | * gcc 3.x is too stupid to do the static test; fall back to dynamic. | ||
| 347 | */ | ||
| 348 | #define static_cpu_has(bit) boot_cpu_has(bit) | ||
| 349 | #endif | ||
| 350 | |||
| 286 | #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ | 351 | #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ |
| 287 | 352 | ||
| 288 | #endif /* _ASM_X86_CPUFEATURE_H */ | 353 | #endif /* _ASM_X86_CPUFEATURE_H */ |
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h deleted file mode 100644 index 70dac199b093..000000000000 --- a/arch/x86/include/asm/ds.h +++ /dev/null | |||
| @@ -1,302 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Debug Store (DS) support | ||
| 3 | * | ||
| 4 | * This provides a low-level interface to the hardware's Debug Store | ||
| 5 | * feature that is used for branch trace store (BTS) and | ||
| 6 | * precise-event based sampling (PEBS). | ||
| 7 | * | ||
| 8 | * It manages: | ||
| 9 | * - DS and BTS hardware configuration | ||
| 10 | * - buffer overflow handling (to be done) | ||
| 11 | * - buffer access | ||
| 12 | * | ||
| 13 | * It does not do: | ||
| 14 | * - security checking (is the caller allowed to trace the task) | ||
| 15 | * - buffer allocation (memory accounting) | ||
| 16 | * | ||
| 17 | * | ||
| 18 | * Copyright (C) 2007-2009 Intel Corporation. | ||
| 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | ||
| 20 | */ | ||
| 21 | |||
| 22 | #ifndef _ASM_X86_DS_H | ||
| 23 | #define _ASM_X86_DS_H | ||
| 24 | |||
| 25 | |||
| 26 | #include <linux/types.h> | ||
| 27 | #include <linux/init.h> | ||
| 28 | #include <linux/err.h> | ||
| 29 | |||
| 30 | |||
| 31 | #ifdef CONFIG_X86_DS | ||
| 32 | |||
| 33 | struct task_struct; | ||
| 34 | struct ds_context; | ||
| 35 | struct ds_tracer; | ||
| 36 | struct bts_tracer; | ||
| 37 | struct pebs_tracer; | ||
| 38 | |||
| 39 | typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); | ||
| 40 | typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); | ||
| 41 | |||
| 42 | |||
| 43 | /* | ||
| 44 | * A list of features plus corresponding macros to talk about them in | ||
| 45 | * the ds_request function's flags parameter. | ||
| 46 | * | ||
| 47 | * We use the enum to index an array of corresponding control bits; | ||
| 48 | * we use the macro to index a flags bit-vector. | ||
| 49 | */ | ||
| 50 | enum ds_feature { | ||
| 51 | dsf_bts = 0, | ||
| 52 | dsf_bts_kernel, | ||
| 53 | #define BTS_KERNEL (1 << dsf_bts_kernel) | ||
| 54 | /* trace kernel-mode branches */ | ||
| 55 | |||
| 56 | dsf_bts_user, | ||
| 57 | #define BTS_USER (1 << dsf_bts_user) | ||
| 58 | /* trace user-mode branches */ | ||
| 59 | |||
| 60 | dsf_bts_overflow, | ||
| 61 | dsf_bts_max, | ||
| 62 | dsf_pebs = dsf_bts_max, | ||
| 63 | |||
| 64 | dsf_pebs_max, | ||
| 65 | dsf_ctl_max = dsf_pebs_max, | ||
| 66 | dsf_bts_timestamps = dsf_ctl_max, | ||
| 67 | #define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) | ||
| 68 | /* add timestamps into BTS trace */ | ||
| 69 | |||
| 70 | #define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) | ||
| 71 | }; | ||
| 72 | |||
| 73 | |||
| 74 | /* | ||
| 75 | * Request BTS or PEBS | ||
| 76 | * | ||
| 77 | * Due to alignement constraints, the actual buffer may be slightly | ||
| 78 | * smaller than the requested or provided buffer. | ||
| 79 | * | ||
| 80 | * Returns a pointer to a tracer structure on success, or | ||
| 81 | * ERR_PTR(errcode) on failure. | ||
| 82 | * | ||
| 83 | * The interrupt threshold is independent from the overflow callback | ||
| 84 | * to allow users to use their own overflow interrupt handling mechanism. | ||
| 85 | * | ||
| 86 | * The function might sleep. | ||
| 87 | * | ||
| 88 | * task: the task to request recording for | ||
| 89 | * cpu: the cpu to request recording for | ||
| 90 | * base: the base pointer for the (non-pageable) buffer; | ||
| 91 | * size: the size of the provided buffer in bytes | ||
| 92 | * ovfl: pointer to a function to be called on buffer overflow; | ||
| 93 | * NULL if cyclic buffer requested | ||
| 94 | * th: the interrupt threshold in records from the end of the buffer; | ||
| 95 | * -1 if no interrupt threshold is requested. | ||
| 96 | * flags: a bit-mask of the above flags | ||
| 97 | */ | ||
| 98 | extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, | ||
| 99 | void *base, size_t size, | ||
| 100 | bts_ovfl_callback_t ovfl, | ||
| 101 | size_t th, unsigned int flags); | ||
| 102 | extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
| 103 | bts_ovfl_callback_t ovfl, | ||
| 104 | size_t th, unsigned int flags); | ||
| 105 | extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, | ||
| 106 | void *base, size_t size, | ||
| 107 | pebs_ovfl_callback_t ovfl, | ||
| 108 | size_t th, unsigned int flags); | ||
| 109 | extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, | ||
| 110 | void *base, size_t size, | ||
| 111 | pebs_ovfl_callback_t ovfl, | ||
| 112 | size_t th, unsigned int flags); | ||
| 113 | |||
| 114 | /* | ||
| 115 | * Release BTS or PEBS resources | ||
| 116 | * Suspend and resume BTS or PEBS tracing | ||
| 117 | * | ||
| 118 | * Must be called with irq's enabled. | ||
| 119 | * | ||
| 120 | * tracer: the tracer handle returned from ds_request_~() | ||
| 121 | */ | ||
| 122 | extern void ds_release_bts(struct bts_tracer *tracer); | ||
| 123 | extern void ds_suspend_bts(struct bts_tracer *tracer); | ||
| 124 | extern void ds_resume_bts(struct bts_tracer *tracer); | ||
| 125 | extern void ds_release_pebs(struct pebs_tracer *tracer); | ||
| 126 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); | ||
| 127 | extern void ds_resume_pebs(struct pebs_tracer *tracer); | ||
| 128 | |||
| 129 | /* | ||
| 130 | * Release BTS or PEBS resources | ||
| 131 | * Suspend and resume BTS or PEBS tracing | ||
| 132 | * | ||
| 133 | * Cpu tracers must call this on the traced cpu. | ||
| 134 | * Task tracers must call ds_release_~_noirq() for themselves. | ||
| 135 | * | ||
| 136 | * May be called with irq's disabled. | ||
| 137 | * | ||
| 138 | * Returns 0 if successful; | ||
| 139 | * -EPERM if the cpu tracer does not trace the current cpu. | ||
| 140 | * -EPERM if the task tracer does not trace itself. | ||
| 141 | * | ||
| 142 | * tracer: the tracer handle returned from ds_request_~() | ||
| 143 | */ | ||
| 144 | extern int ds_release_bts_noirq(struct bts_tracer *tracer); | ||
| 145 | extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); | ||
| 146 | extern int ds_resume_bts_noirq(struct bts_tracer *tracer); | ||
| 147 | extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); | ||
| 148 | extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); | ||
| 149 | extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); | ||
| 150 | |||
| 151 | |||
| 152 | /* | ||
| 153 | * The raw DS buffer state as it is used for BTS and PEBS recording. | ||
| 154 | * | ||
| 155 | * This is the low-level, arch-dependent interface for working | ||
| 156 | * directly on the raw trace data. | ||
| 157 | */ | ||
| 158 | struct ds_trace { | ||
| 159 | /* the number of bts/pebs records */ | ||
| 160 | size_t n; | ||
| 161 | /* the size of a bts/pebs record in bytes */ | ||
| 162 | size_t size; | ||
| 163 | /* pointers into the raw buffer: | ||
| 164 | - to the first entry */ | ||
| 165 | void *begin; | ||
| 166 | /* - one beyond the last entry */ | ||
| 167 | void *end; | ||
| 168 | /* - one beyond the newest entry */ | ||
| 169 | void *top; | ||
| 170 | /* - the interrupt threshold */ | ||
| 171 | void *ith; | ||
| 172 | /* flags given on ds_request() */ | ||
| 173 | unsigned int flags; | ||
| 174 | }; | ||
| 175 | |||
| 176 | /* | ||
| 177 | * An arch-independent view on branch trace data. | ||
| 178 | */ | ||
| 179 | enum bts_qualifier { | ||
| 180 | bts_invalid, | ||
| 181 | #define BTS_INVALID bts_invalid | ||
| 182 | |||
| 183 | bts_branch, | ||
| 184 | #define BTS_BRANCH bts_branch | ||
| 185 | |||
| 186 | bts_task_arrives, | ||
| 187 | #define BTS_TASK_ARRIVES bts_task_arrives | ||
| 188 | |||
| 189 | bts_task_departs, | ||
| 190 | #define BTS_TASK_DEPARTS bts_task_departs | ||
| 191 | |||
| 192 | bts_qual_bit_size = 4, | ||
| 193 | bts_qual_max = (1 << bts_qual_bit_size), | ||
| 194 | }; | ||
| 195 | |||
| 196 | struct bts_struct { | ||
| 197 | __u64 qualifier; | ||
| 198 | union { | ||
| 199 | /* BTS_BRANCH */ | ||
| 200 | struct { | ||
| 201 | __u64 from; | ||
| 202 | __u64 to; | ||
| 203 | } lbr; | ||
| 204 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ | ||
| 205 | struct { | ||
| 206 | __u64 clock; | ||
| 207 | pid_t pid; | ||
| 208 | } event; | ||
| 209 | } variant; | ||
| 210 | }; | ||
| 211 | |||
| 212 | |||
| 213 | /* | ||
| 214 | * The BTS state. | ||
| 215 | * | ||
| 216 | * This gives access to the raw DS state and adds functions to provide | ||
| 217 | * an arch-independent view of the BTS data. | ||
| 218 | */ | ||
| 219 | struct bts_trace { | ||
| 220 | struct ds_trace ds; | ||
| 221 | |||
| 222 | int (*read)(struct bts_tracer *tracer, const void *at, | ||
| 223 | struct bts_struct *out); | ||
| 224 | int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); | ||
| 225 | }; | ||
| 226 | |||
| 227 | |||
| 228 | /* | ||
| 229 | * The PEBS state. | ||
| 230 | * | ||
| 231 | * This gives access to the raw DS state and the PEBS-specific counter | ||
| 232 | * reset value. | ||
| 233 | */ | ||
| 234 | struct pebs_trace { | ||
| 235 | struct ds_trace ds; | ||
| 236 | |||
| 237 | /* the number of valid counters in the below array */ | ||
| 238 | unsigned int counters; | ||
| 239 | |||
| 240 | #define MAX_PEBS_COUNTERS 4 | ||
| 241 | /* the counter reset value */ | ||
| 242 | unsigned long long counter_reset[MAX_PEBS_COUNTERS]; | ||
| 243 | }; | ||
| 244 | |||
| 245 | |||
| 246 | /* | ||
| 247 | * Read the BTS or PEBS trace. | ||
| 248 | * | ||
| 249 | * Returns a view on the trace collected for the parameter tracer. | ||
| 250 | * | ||
| 251 | * The view remains valid as long as the traced task is not running or | ||
| 252 | * the tracer is suspended. | ||
| 253 | * Writes into the trace buffer are not reflected. | ||
| 254 | * | ||
| 255 | * tracer: the tracer handle returned from ds_request_~() | ||
| 256 | */ | ||
| 257 | extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); | ||
| 258 | extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); | ||
| 259 | |||
| 260 | |||
| 261 | /* | ||
| 262 | * Reset the write pointer of the BTS/PEBS buffer. | ||
| 263 | * | ||
| 264 | * Returns 0 on success; -Eerrno on error | ||
| 265 | * | ||
| 266 | * tracer: the tracer handle returned from ds_request_~() | ||
| 267 | */ | ||
| 268 | extern int ds_reset_bts(struct bts_tracer *tracer); | ||
| 269 | extern int ds_reset_pebs(struct pebs_tracer *tracer); | ||
| 270 | |||
| 271 | /* | ||
| 272 | * Set the PEBS counter reset value. | ||
| 273 | * | ||
| 274 | * Returns 0 on success; -Eerrno on error | ||
| 275 | * | ||
| 276 | * tracer: the tracer handle returned from ds_request_pebs() | ||
| 277 | * counter: the index of the counter | ||
| 278 | * value: the new counter reset value | ||
| 279 | */ | ||
| 280 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, | ||
| 281 | unsigned int counter, u64 value); | ||
| 282 | |||
| 283 | /* | ||
| 284 | * Initialization | ||
| 285 | */ | ||
| 286 | struct cpuinfo_x86; | ||
| 287 | extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); | ||
| 288 | |||
| 289 | /* | ||
| 290 | * Context switch work | ||
| 291 | */ | ||
| 292 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); | ||
| 293 | |||
| 294 | #else /* CONFIG_X86_DS */ | ||
| 295 | |||
| 296 | struct cpuinfo_x86; | ||
| 297 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} | ||
| 298 | static inline void ds_switch_to(struct task_struct *prev, | ||
| 299 | struct task_struct *next) {} | ||
| 300 | |||
| 301 | #endif /* CONFIG_X86_DS */ | ||
| 302 | #endif /* _ASM_X86_DS_H */ | ||
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index ae6253ab9029..733f7e91e7a9 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
| @@ -34,6 +34,18 @@ | |||
| 34 | #define CFI_SIGNAL_FRAME | 34 | #define CFI_SIGNAL_FRAME |
| 35 | #endif | 35 | #endif |
| 36 | 36 | ||
| 37 | #if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) | ||
| 38 | /* | ||
| 39 | * Emit CFI data in .debug_frame sections, not .eh_frame sections. | ||
| 40 | * The latter we currently just discard since we don't do DWARF | ||
| 41 | * unwinding at runtime. So only the offline DWARF information is | ||
| 42 | * useful to anyone. Note we should not use this directive if this | ||
| 43 | * file is used in the vDSO assembly, or if vmlinux.lds.S gets | ||
| 44 | * changed so it doesn't discard .eh_frame. | ||
| 45 | */ | ||
| 46 | .cfi_sections .debug_frame | ||
| 47 | #endif | ||
| 48 | |||
| 37 | #else | 49 | #else |
| 38 | 50 | ||
| 39 | /* | 51 | /* |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 0e22296790d3..ec8a52d14ab1 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
| @@ -45,7 +45,12 @@ | |||
| 45 | #define E820_NVS 4 | 45 | #define E820_NVS 4 |
| 46 | #define E820_UNUSABLE 5 | 46 | #define E820_UNUSABLE 5 |
| 47 | 47 | ||
| 48 | /* reserved RAM used by kernel itself */ | 48 | /* |
| 49 | * reserved RAM used by kernel itself | ||
| 50 | * if CONFIG_INTEL_TXT is enabled, memory of this type will be | ||
| 51 | * included in the S3 integrity calculation and so should not include | ||
| 52 | * any memory that BIOS might alter over the S3 transition | ||
| 53 | */ | ||
| 49 | #define E820_RESERVED_KERN 128 | 54 | #define E820_RESERVED_KERN 128 |
| 50 | 55 | ||
| 51 | #ifndef __ASSEMBLY__ | 56 | #ifndef __ASSEMBLY__ |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f8576427cfe..aeab29aee617 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
| @@ -35,7 +35,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); | |||
| 35 | 35 | ||
| 36 | #define __ARCH_IRQ_STAT | 36 | #define __ARCH_IRQ_STAT |
| 37 | 37 | ||
| 38 | #define inc_irq_stat(member) percpu_add(irq_stat.member, 1) | 38 | #define inc_irq_stat(member) percpu_inc(irq_stat.member) |
| 39 | 39 | ||
| 40 | #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) | 40 | #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) |
| 41 | 41 | ||
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 1d5c08a1bdfd..004e6e25e913 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h | |||
| @@ -68,6 +68,7 @@ extern unsigned long force_hpet_address; | |||
| 68 | extern u8 hpet_blockid; | 68 | extern u8 hpet_blockid; |
| 69 | extern int hpet_force_user; | 69 | extern int hpet_force_user; |
| 70 | extern u8 hpet_msi_disable; | 70 | extern u8 hpet_msi_disable; |
| 71 | extern u8 hpet_readback_cmp; | ||
| 71 | extern int is_hpet_enabled(void); | 72 | extern int is_hpet_enabled(void); |
| 72 | extern int hpet_enable(void); | 73 | extern int hpet_enable(void); |
| 73 | extern void hpet_disable(void); | 74 | extern void hpet_disable(void); |
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 2a1bd8f4f23a..942255310e6a 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
| @@ -41,12 +41,16 @@ struct arch_hw_breakpoint { | |||
| 41 | /* Total number of available HW breakpoint registers */ | 41 | /* Total number of available HW breakpoint registers */ |
| 42 | #define HBP_NUM 4 | 42 | #define HBP_NUM 4 |
| 43 | 43 | ||
| 44 | static inline int hw_breakpoint_slots(int type) | ||
| 45 | { | ||
| 46 | return HBP_NUM; | ||
| 47 | } | ||
| 48 | |||
| 44 | struct perf_event; | 49 | struct perf_event; |
| 45 | struct pmu; | 50 | struct pmu; |
| 46 | 51 | ||
| 47 | extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); | 52 | extern int arch_check_bp_in_kernelspace(struct perf_event *bp); |
| 48 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp, | 53 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp); |
| 49 | struct task_struct *tsk); | ||
| 50 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, | 54 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, |
| 51 | unsigned long val, void *data); | 55 | unsigned long val, void *data); |
| 52 | 56 | ||
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h index e153a2b3889a..5df477ac3af7 100644 --- a/arch/x86/include/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #ifndef _ASM_X86_KVM_HYPERV_H | 1 | #ifndef _ASM_X86_HYPERV_H |
| 2 | #define _ASM_X86_KVM_HYPERV_H | 2 | #define _ASM_X86_HYPERV_H |
| 3 | 3 | ||
| 4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 5 | 5 | ||
| @@ -14,6 +14,10 @@ | |||
| 14 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 | 14 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 |
| 15 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 | 15 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 |
| 16 | 16 | ||
| 17 | #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 | ||
| 18 | #define HYPERV_CPUID_MIN 0x40000005 | ||
| 19 | #define HYPERV_CPUID_MAX 0x4000ffff | ||
| 20 | |||
| 17 | /* | 21 | /* |
| 18 | * Feature identification. EAX indicates which features are available | 22 | * Feature identification. EAX indicates which features are available |
| 19 | * to the partition based upon the current partition privileges. | 23 | * to the partition based upon the current partition privileges. |
| @@ -129,6 +133,9 @@ | |||
| 129 | /* MSR used to provide vcpu index */ | 133 | /* MSR used to provide vcpu index */ |
| 130 | #define HV_X64_MSR_VP_INDEX 0x40000002 | 134 | #define HV_X64_MSR_VP_INDEX 0x40000002 |
| 131 | 135 | ||
| 136 | /* MSR used to read the per-partition time reference counter */ | ||
| 137 | #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 | ||
| 138 | |||
| 132 | /* Define the virtual APIC registers */ | 139 | /* Define the virtual APIC registers */ |
| 133 | #define HV_X64_MSR_EOI 0x40000070 | 140 | #define HV_X64_MSR_EOI 0x40000070 |
| 134 | #define HV_X64_MSR_ICR 0x40000071 | 141 | #define HV_X64_MSR_ICR 0x40000071 |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index b78c0941e422..70abda7058c8 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
| @@ -17,10 +17,33 @@ | |||
| 17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * | 18 | * |
| 19 | */ | 19 | */ |
| 20 | #ifndef ASM_X86__HYPERVISOR_H | 20 | #ifndef _ASM_X86_HYPERVISOR_H |
| 21 | #define ASM_X86__HYPERVISOR_H | 21 | #define _ASM_X86_HYPERVISOR_H |
| 22 | 22 | ||
| 23 | extern void init_hypervisor(struct cpuinfo_x86 *c); | 23 | extern void init_hypervisor(struct cpuinfo_x86 *c); |
| 24 | extern void init_hypervisor_platform(void); | 24 | extern void init_hypervisor_platform(void); |
| 25 | 25 | ||
| 26 | /* | ||
| 27 | * x86 hypervisor information | ||
| 28 | */ | ||
| 29 | struct hypervisor_x86 { | ||
| 30 | /* Hypervisor name */ | ||
| 31 | const char *name; | ||
| 32 | |||
| 33 | /* Detection routine */ | ||
| 34 | bool (*detect)(void); | ||
| 35 | |||
| 36 | /* Adjust CPU feature bits (run once per CPU) */ | ||
| 37 | void (*set_cpu_features)(struct cpuinfo_x86 *); | ||
| 38 | |||
| 39 | /* Platform setup (run once per boot) */ | ||
| 40 | void (*init_platform)(void); | ||
| 41 | }; | ||
| 42 | |||
| 43 | extern const struct hypervisor_x86 *x86_hyper; | ||
| 44 | |||
| 45 | /* Recognized hypervisors */ | ||
| 46 | extern const struct hypervisor_x86 x86_hyper_vmware; | ||
| 47 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | ||
| 48 | |||
| 26 | #endif | 49 | #endif |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index da2930924501..c991b3a7b904 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
| @@ -16,7 +16,9 @@ | |||
| 16 | #include <linux/kernel_stat.h> | 16 | #include <linux/kernel_stat.h> |
| 17 | #include <linux/regset.h> | 17 | #include <linux/regset.h> |
| 18 | #include <linux/hardirq.h> | 18 | #include <linux/hardirq.h> |
| 19 | #include <linux/slab.h> | ||
| 19 | #include <asm/asm.h> | 20 | #include <asm/asm.h> |
| 21 | #include <asm/cpufeature.h> | ||
| 20 | #include <asm/processor.h> | 22 | #include <asm/processor.h> |
| 21 | #include <asm/sigcontext.h> | 23 | #include <asm/sigcontext.h> |
| 22 | #include <asm/user.h> | 24 | #include <asm/user.h> |
| @@ -56,6 +58,11 @@ extern int restore_i387_xstate_ia32(void __user *buf); | |||
| 56 | 58 | ||
| 57 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | 59 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ |
| 58 | 60 | ||
| 61 | static __always_inline __pure bool use_xsave(void) | ||
| 62 | { | ||
| 63 | return static_cpu_has(X86_FEATURE_XSAVE); | ||
| 64 | } | ||
| 65 | |||
| 59 | #ifdef CONFIG_X86_64 | 66 | #ifdef CONFIG_X86_64 |
| 60 | 67 | ||
| 61 | /* Ignore delayed exceptions from user space */ | 68 | /* Ignore delayed exceptions from user space */ |
| @@ -91,15 +98,15 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
| 91 | values. The kernel data segment can be sometimes 0 and sometimes | 98 | values. The kernel data segment can be sometimes 0 and sometimes |
| 92 | new user value. Both should be ok. | 99 | new user value. Both should be ok. |
| 93 | Use the PDA as safe address because it should be already in L1. */ | 100 | Use the PDA as safe address because it should be already in L1. */ |
| 94 | static inline void clear_fpu_state(struct task_struct *tsk) | 101 | static inline void fpu_clear(struct fpu *fpu) |
| 95 | { | 102 | { |
| 96 | struct xsave_struct *xstate = &tsk->thread.xstate->xsave; | 103 | struct xsave_struct *xstate = &fpu->state->xsave; |
| 97 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 104 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
| 98 | 105 | ||
| 99 | /* | 106 | /* |
| 100 | * xsave header may indicate the init state of the FP. | 107 | * xsave header may indicate the init state of the FP. |
| 101 | */ | 108 | */ |
| 102 | if ((task_thread_info(tsk)->status & TS_XSAVE) && | 109 | if (use_xsave() && |
| 103 | !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) | 110 | !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) |
| 104 | return; | 111 | return; |
| 105 | 112 | ||
| @@ -111,6 +118,11 @@ static inline void clear_fpu_state(struct task_struct *tsk) | |||
| 111 | X86_FEATURE_FXSAVE_LEAK); | 118 | X86_FEATURE_FXSAVE_LEAK); |
| 112 | } | 119 | } |
| 113 | 120 | ||
| 121 | static inline void clear_fpu_state(struct task_struct *tsk) | ||
| 122 | { | ||
| 123 | fpu_clear(&tsk->thread.fpu); | ||
| 124 | } | ||
| 125 | |||
| 114 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | 126 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) |
| 115 | { | 127 | { |
| 116 | int err; | 128 | int err; |
| @@ -135,7 +147,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
| 135 | return err; | 147 | return err; |
| 136 | } | 148 | } |
| 137 | 149 | ||
| 138 | static inline void fxsave(struct task_struct *tsk) | 150 | static inline void fpu_fxsave(struct fpu *fpu) |
| 139 | { | 151 | { |
| 140 | /* Using "rex64; fxsave %0" is broken because, if the memory operand | 152 | /* Using "rex64; fxsave %0" is broken because, if the memory operand |
| 141 | uses any extended registers for addressing, a second REX prefix | 153 | uses any extended registers for addressing, a second REX prefix |
| @@ -145,42 +157,45 @@ static inline void fxsave(struct task_struct *tsk) | |||
| 145 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported | 157 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported |
| 146 | starting with gas 2.16. */ | 158 | starting with gas 2.16. */ |
| 147 | __asm__ __volatile__("fxsaveq %0" | 159 | __asm__ __volatile__("fxsaveq %0" |
| 148 | : "=m" (tsk->thread.xstate->fxsave)); | 160 | : "=m" (fpu->state->fxsave)); |
| 149 | #elif 0 | 161 | #elif 0 |
| 150 | /* Using, as a workaround, the properly prefixed form below isn't | 162 | /* Using, as a workaround, the properly prefixed form below isn't |
| 151 | accepted by any binutils version so far released, complaining that | 163 | accepted by any binutils version so far released, complaining that |
| 152 | the same type of prefix is used twice if an extended register is | 164 | the same type of prefix is used twice if an extended register is |
| 153 | needed for addressing (fix submitted to mainline 2005-11-21). */ | 165 | needed for addressing (fix submitted to mainline 2005-11-21). */ |
| 154 | __asm__ __volatile__("rex64/fxsave %0" | 166 | __asm__ __volatile__("rex64/fxsave %0" |
| 155 | : "=m" (tsk->thread.xstate->fxsave)); | 167 | : "=m" (fpu->state->fxsave)); |
| 156 | #else | 168 | #else |
| 157 | /* This, however, we can work around by forcing the compiler to select | 169 | /* This, however, we can work around by forcing the compiler to select |
| 158 | an addressing mode that doesn't require extended registers. */ | 170 | an addressing mode that doesn't require extended registers. */ |
| 159 | __asm__ __volatile__("rex64/fxsave (%1)" | 171 | __asm__ __volatile__("rex64/fxsave (%1)" |
| 160 | : "=m" (tsk->thread.xstate->fxsave) | 172 | : "=m" (fpu->state->fxsave) |
| 161 | : "cdaSDb" (&tsk->thread.xstate->fxsave)); | 173 | : "cdaSDb" (&fpu->state->fxsave)); |
| 162 | #endif | 174 | #endif |
| 163 | } | 175 | } |
| 164 | 176 | ||
| 165 | static inline void __save_init_fpu(struct task_struct *tsk) | 177 | static inline void fpu_save_init(struct fpu *fpu) |
| 166 | { | 178 | { |
| 167 | if (task_thread_info(tsk)->status & TS_XSAVE) | 179 | if (use_xsave()) |
| 168 | xsave(tsk); | 180 | fpu_xsave(fpu); |
| 169 | else | 181 | else |
| 170 | fxsave(tsk); | 182 | fpu_fxsave(fpu); |
| 183 | |||
| 184 | fpu_clear(fpu); | ||
| 185 | } | ||
| 171 | 186 | ||
| 172 | clear_fpu_state(tsk); | 187 | static inline void __save_init_fpu(struct task_struct *tsk) |
| 188 | { | ||
| 189 | fpu_save_init(&tsk->thread.fpu); | ||
| 173 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 190 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
| 174 | } | 191 | } |
| 175 | 192 | ||
| 176 | #else /* CONFIG_X86_32 */ | 193 | #else /* CONFIG_X86_32 */ |
| 177 | 194 | ||
| 178 | #ifdef CONFIG_MATH_EMULATION | 195 | #ifdef CONFIG_MATH_EMULATION |
| 179 | extern void finit_task(struct task_struct *tsk); | 196 | extern void finit_soft_fpu(struct i387_soft_struct *soft); |
| 180 | #else | 197 | #else |
| 181 | static inline void finit_task(struct task_struct *tsk) | 198 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} |
| 182 | { | ||
| 183 | } | ||
| 184 | #endif | 199 | #endif |
| 185 | 200 | ||
| 186 | static inline void tolerant_fwait(void) | 201 | static inline void tolerant_fwait(void) |
| @@ -216,13 +231,13 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
| 216 | /* | 231 | /* |
| 217 | * These must be called with preempt disabled | 232 | * These must be called with preempt disabled |
| 218 | */ | 233 | */ |
| 219 | static inline void __save_init_fpu(struct task_struct *tsk) | 234 | static inline void fpu_save_init(struct fpu *fpu) |
| 220 | { | 235 | { |
| 221 | if (task_thread_info(tsk)->status & TS_XSAVE) { | 236 | if (use_xsave()) { |
| 222 | struct xsave_struct *xstate = &tsk->thread.xstate->xsave; | 237 | struct xsave_struct *xstate = &fpu->state->xsave; |
| 223 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 238 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
| 224 | 239 | ||
| 225 | xsave(tsk); | 240 | fpu_xsave(fpu); |
| 226 | 241 | ||
| 227 | /* | 242 | /* |
| 228 | * xsave header may indicate the init state of the FP. | 243 | * xsave header may indicate the init state of the FP. |
| @@ -246,8 +261,8 @@ static inline void __save_init_fpu(struct task_struct *tsk) | |||
| 246 | "fxsave %[fx]\n" | 261 | "fxsave %[fx]\n" |
| 247 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", | 262 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", |
| 248 | X86_FEATURE_FXSR, | 263 | X86_FEATURE_FXSR, |
| 249 | [fx] "m" (tsk->thread.xstate->fxsave), | 264 | [fx] "m" (fpu->state->fxsave), |
| 250 | [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); | 265 | [fsw] "m" (fpu->state->fxsave.swd) : "memory"); |
| 251 | clear_state: | 266 | clear_state: |
| 252 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 267 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
| 253 | is pending. Clear the x87 state here by setting it to fixed | 268 | is pending. Clear the x87 state here by setting it to fixed |
| @@ -259,17 +274,34 @@ clear_state: | |||
| 259 | X86_FEATURE_FXSAVE_LEAK, | 274 | X86_FEATURE_FXSAVE_LEAK, |
| 260 | [addr] "m" (safe_address)); | 275 | [addr] "m" (safe_address)); |
| 261 | end: | 276 | end: |
| 277 | ; | ||
| 278 | } | ||
| 279 | |||
| 280 | static inline void __save_init_fpu(struct task_struct *tsk) | ||
| 281 | { | ||
| 282 | fpu_save_init(&tsk->thread.fpu); | ||
| 262 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 283 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
| 263 | } | 284 | } |
| 264 | 285 | ||
| 286 | |||
| 265 | #endif /* CONFIG_X86_64 */ | 287 | #endif /* CONFIG_X86_64 */ |
| 266 | 288 | ||
| 267 | static inline int restore_fpu_checking(struct task_struct *tsk) | 289 | static inline int fpu_fxrstor_checking(struct fpu *fpu) |
| 268 | { | 290 | { |
| 269 | if (task_thread_info(tsk)->status & TS_XSAVE) | 291 | return fxrstor_checking(&fpu->state->fxsave); |
| 270 | return xrstor_checking(&tsk->thread.xstate->xsave); | 292 | } |
| 293 | |||
| 294 | static inline int fpu_restore_checking(struct fpu *fpu) | ||
| 295 | { | ||
| 296 | if (use_xsave()) | ||
| 297 | return fpu_xrstor_checking(fpu); | ||
| 271 | else | 298 | else |
| 272 | return fxrstor_checking(&tsk->thread.xstate->fxsave); | 299 | return fpu_fxrstor_checking(fpu); |
| 300 | } | ||
| 301 | |||
| 302 | static inline int restore_fpu_checking(struct task_struct *tsk) | ||
| 303 | { | ||
| 304 | return fpu_restore_checking(&tsk->thread.fpu); | ||
| 273 | } | 305 | } |
| 274 | 306 | ||
| 275 | /* | 307 | /* |
| @@ -397,30 +429,59 @@ static inline void clear_fpu(struct task_struct *tsk) | |||
| 397 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) | 429 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) |
| 398 | { | 430 | { |
| 399 | if (cpu_has_fxsr) { | 431 | if (cpu_has_fxsr) { |
| 400 | return tsk->thread.xstate->fxsave.cwd; | 432 | return tsk->thread.fpu.state->fxsave.cwd; |
| 401 | } else { | 433 | } else { |
| 402 | return (unsigned short)tsk->thread.xstate->fsave.cwd; | 434 | return (unsigned short)tsk->thread.fpu.state->fsave.cwd; |
| 403 | } | 435 | } |
| 404 | } | 436 | } |
| 405 | 437 | ||
| 406 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) | 438 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) |
| 407 | { | 439 | { |
| 408 | if (cpu_has_fxsr) { | 440 | if (cpu_has_fxsr) { |
| 409 | return tsk->thread.xstate->fxsave.swd; | 441 | return tsk->thread.fpu.state->fxsave.swd; |
| 410 | } else { | 442 | } else { |
| 411 | return (unsigned short)tsk->thread.xstate->fsave.swd; | 443 | return (unsigned short)tsk->thread.fpu.state->fsave.swd; |
| 412 | } | 444 | } |
| 413 | } | 445 | } |
| 414 | 446 | ||
| 415 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) | 447 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) |
| 416 | { | 448 | { |
| 417 | if (cpu_has_xmm) { | 449 | if (cpu_has_xmm) { |
| 418 | return tsk->thread.xstate->fxsave.mxcsr; | 450 | return tsk->thread.fpu.state->fxsave.mxcsr; |
| 419 | } else { | 451 | } else { |
| 420 | return MXCSR_DEFAULT; | 452 | return MXCSR_DEFAULT; |
| 421 | } | 453 | } |
| 422 | } | 454 | } |
| 423 | 455 | ||
| 456 | static bool fpu_allocated(struct fpu *fpu) | ||
| 457 | { | ||
| 458 | return fpu->state != NULL; | ||
| 459 | } | ||
| 460 | |||
| 461 | static inline int fpu_alloc(struct fpu *fpu) | ||
| 462 | { | ||
| 463 | if (fpu_allocated(fpu)) | ||
| 464 | return 0; | ||
| 465 | fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); | ||
| 466 | if (!fpu->state) | ||
| 467 | return -ENOMEM; | ||
| 468 | WARN_ON((unsigned long)fpu->state & 15); | ||
| 469 | return 0; | ||
| 470 | } | ||
| 471 | |||
| 472 | static inline void fpu_free(struct fpu *fpu) | ||
| 473 | { | ||
| 474 | if (fpu->state) { | ||
| 475 | kmem_cache_free(task_xstate_cachep, fpu->state); | ||
| 476 | fpu->state = NULL; | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | static inline void fpu_copy(struct fpu *dst, struct fpu *src) | ||
| 481 | { | ||
| 482 | memcpy(dst->state, src->state, xstate_size); | ||
| 483 | } | ||
| 484 | |||
| 424 | #endif /* __ASSEMBLY__ */ | 485 | #endif /* __ASSEMBLY__ */ |
| 425 | 486 | ||
| 426 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | 487 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 |
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h index 1edbf89680fd..fc1f579fb965 100644 --- a/arch/x86/include/asm/i8253.h +++ b/arch/x86/include/asm/i8253.h | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | #define PIT_CH0 0x40 | 6 | #define PIT_CH0 0x40 |
| 7 | #define PIT_CH2 0x42 | 7 | #define PIT_CH2 0x42 |
| 8 | 8 | ||
| 9 | extern spinlock_t i8253_lock; | 9 | extern raw_spinlock_t i8253_lock; |
| 10 | 10 | ||
| 11 | extern struct clock_event_device *global_clock_event; | 11 | extern struct clock_event_device *global_clock_event; |
| 12 | 12 | ||
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 96c2e0ad04ca..88c765e16410 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
| @@ -68,6 +68,8 @@ struct insn { | |||
| 68 | const insn_byte_t *next_byte; | 68 | const insn_byte_t *next_byte; |
| 69 | }; | 69 | }; |
| 70 | 70 | ||
| 71 | #define MAX_INSN_SIZE 16 | ||
| 72 | |||
| 71 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | 73 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) |
| 72 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | 74 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) |
| 73 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) | 75 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) |
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index 14cf526091f9..280bf7fb6aba 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h | |||
| @@ -7,7 +7,66 @@ | |||
| 7 | 7 | ||
| 8 | #ifdef __ASSEMBLY__ | 8 | #ifdef __ASSEMBLY__ |
| 9 | 9 | ||
| 10 | #define REG_NUM_INVALID 100 | ||
| 11 | |||
| 12 | #define REG_TYPE_R64 0 | ||
| 13 | #define REG_TYPE_XMM 1 | ||
| 14 | #define REG_TYPE_INVALID 100 | ||
| 15 | |||
| 16 | .macro R64_NUM opd r64 | ||
| 17 | \opd = REG_NUM_INVALID | ||
| 18 | .ifc \r64,%rax | ||
| 19 | \opd = 0 | ||
| 20 | .endif | ||
| 21 | .ifc \r64,%rcx | ||
| 22 | \opd = 1 | ||
| 23 | .endif | ||
| 24 | .ifc \r64,%rdx | ||
| 25 | \opd = 2 | ||
| 26 | .endif | ||
| 27 | .ifc \r64,%rbx | ||
| 28 | \opd = 3 | ||
| 29 | .endif | ||
| 30 | .ifc \r64,%rsp | ||
| 31 | \opd = 4 | ||
| 32 | .endif | ||
| 33 | .ifc \r64,%rbp | ||
| 34 | \opd = 5 | ||
| 35 | .endif | ||
| 36 | .ifc \r64,%rsi | ||
| 37 | \opd = 6 | ||
| 38 | .endif | ||
| 39 | .ifc \r64,%rdi | ||
| 40 | \opd = 7 | ||
| 41 | .endif | ||
| 42 | .ifc \r64,%r8 | ||
| 43 | \opd = 8 | ||
| 44 | .endif | ||
| 45 | .ifc \r64,%r9 | ||
| 46 | \opd = 9 | ||
| 47 | .endif | ||
| 48 | .ifc \r64,%r10 | ||
| 49 | \opd = 10 | ||
| 50 | .endif | ||
| 51 | .ifc \r64,%r11 | ||
| 52 | \opd = 11 | ||
| 53 | .endif | ||
| 54 | .ifc \r64,%r12 | ||
| 55 | \opd = 12 | ||
| 56 | .endif | ||
| 57 | .ifc \r64,%r13 | ||
| 58 | \opd = 13 | ||
| 59 | .endif | ||
| 60 | .ifc \r64,%r14 | ||
| 61 | \opd = 14 | ||
| 62 | .endif | ||
| 63 | .ifc \r64,%r15 | ||
| 64 | \opd = 15 | ||
| 65 | .endif | ||
| 66 | .endm | ||
| 67 | |||
| 10 | .macro XMM_NUM opd xmm | 68 | .macro XMM_NUM opd xmm |
| 69 | \opd = REG_NUM_INVALID | ||
| 11 | .ifc \xmm,%xmm0 | 70 | .ifc \xmm,%xmm0 |
| 12 | \opd = 0 | 71 | \opd = 0 |
| 13 | .endif | 72 | .endif |
| @@ -58,13 +117,25 @@ | |||
| 58 | .endif | 117 | .endif |
| 59 | .endm | 118 | .endm |
| 60 | 119 | ||
| 120 | .macro REG_TYPE type reg | ||
| 121 | R64_NUM reg_type_r64 \reg | ||
| 122 | XMM_NUM reg_type_xmm \reg | ||
| 123 | .if reg_type_r64 <> REG_NUM_INVALID | ||
| 124 | \type = REG_TYPE_R64 | ||
| 125 | .elseif reg_type_xmm <> REG_NUM_INVALID | ||
| 126 | \type = REG_TYPE_XMM | ||
| 127 | .else | ||
| 128 | \type = REG_TYPE_INVALID | ||
| 129 | .endif | ||
| 130 | .endm | ||
| 131 | |||
| 61 | .macro PFX_OPD_SIZE | 132 | .macro PFX_OPD_SIZE |
| 62 | .byte 0x66 | 133 | .byte 0x66 |
| 63 | .endm | 134 | .endm |
| 64 | 135 | ||
| 65 | .macro PFX_REX opd1 opd2 | 136 | .macro PFX_REX opd1 opd2 W=0 |
| 66 | .if (\opd1 | \opd2) & 8 | 137 | .if ((\opd1 | \opd2) & 8) || \W |
| 67 | .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | 138 | .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3) |
| 68 | .endif | 139 | .endif |
| 69 | .endm | 140 | .endm |
| 70 | 141 | ||
| @@ -145,6 +216,25 @@ | |||
| 145 | .byte 0x0f, 0x38, 0xdf | 216 | .byte 0x0f, 0x38, 0xdf |
| 146 | MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 | 217 | MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2 |
| 147 | .endm | 218 | .endm |
| 219 | |||
| 220 | .macro MOVQ_R64_XMM opd1 opd2 | ||
| 221 | REG_TYPE movq_r64_xmm_opd1_type \opd1 | ||
| 222 | .if movq_r64_xmm_opd1_type == REG_TYPE_XMM | ||
| 223 | XMM_NUM movq_r64_xmm_opd1 \opd1 | ||
| 224 | R64_NUM movq_r64_xmm_opd2 \opd2 | ||
| 225 | .else | ||
| 226 | R64_NUM movq_r64_xmm_opd1 \opd1 | ||
| 227 | XMM_NUM movq_r64_xmm_opd2 \opd2 | ||
| 228 | .endif | ||
| 229 | PFX_OPD_SIZE | ||
| 230 | PFX_REX movq_r64_xmm_opd1 movq_r64_xmm_opd2 1 | ||
| 231 | .if movq_r64_xmm_opd1_type == REG_TYPE_XMM | ||
| 232 | .byte 0x0f, 0x7e | ||
| 233 | .else | ||
| 234 | .byte 0x0f, 0x6e | ||
| 235 | .endif | ||
| 236 | MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2 | ||
| 237 | .endm | ||
| 148 | #endif | 238 | #endif |
| 149 | 239 | ||
| 150 | #endif | 240 | #endif |
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h new file mode 100644 index 000000000000..4470c9ad4a3e --- /dev/null +++ b/arch/x86/include/asm/intel_scu_ipc.h | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | #ifndef _ASM_X86_INTEL_SCU_IPC_H_ | ||
| 2 | #define _ASM_X86_INTEL_SCU_IPC_H_ | ||
| 3 | |||
| 4 | /* Read single register */ | ||
| 5 | int intel_scu_ipc_ioread8(u16 addr, u8 *data); | ||
| 6 | |||
| 7 | /* Read two sequential registers */ | ||
| 8 | int intel_scu_ipc_ioread16(u16 addr, u16 *data); | ||
| 9 | |||
| 10 | /* Read four sequential registers */ | ||
| 11 | int intel_scu_ipc_ioread32(u16 addr, u32 *data); | ||
| 12 | |||
| 13 | /* Read a vector */ | ||
| 14 | int intel_scu_ipc_readv(u16 *addr, u8 *data, int len); | ||
| 15 | |||
| 16 | /* Write single register */ | ||
| 17 | int intel_scu_ipc_iowrite8(u16 addr, u8 data); | ||
| 18 | |||
| 19 | /* Write two sequential registers */ | ||
| 20 | int intel_scu_ipc_iowrite16(u16 addr, u16 data); | ||
| 21 | |||
| 22 | /* Write four sequential registers */ | ||
| 23 | int intel_scu_ipc_iowrite32(u16 addr, u32 data); | ||
| 24 | |||
| 25 | /* Write a vector */ | ||
| 26 | int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); | ||
| 27 | |||
| 28 | /* Update single register based on the mask */ | ||
| 29 | int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); | ||
| 30 | |||
| 31 | /* | ||
| 32 | * Indirect register read | ||
| 33 | * Can be used when SCCB(System Controller Configuration Block) register | ||
| 34 | * HRIM(Honor Restricted IPC Messages) is set (bit 23) | ||
| 35 | */ | ||
| 36 | int intel_scu_ipc_register_read(u32 addr, u32 *data); | ||
| 37 | |||
| 38 | /* | ||
| 39 | * Indirect register write | ||
| 40 | * Can be used when SCCB(System Controller Configuration Block) register | ||
| 41 | * HRIM(Honor Restricted IPC Messages) is set (bit 23) | ||
| 42 | */ | ||
| 43 | int intel_scu_ipc_register_write(u32 addr, u32 data); | ||
| 44 | |||
| 45 | /* Issue commands to the SCU with or without data */ | ||
| 46 | int intel_scu_ipc_simple_command(int cmd, int sub); | ||
| 47 | int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, | ||
| 48 | u32 *out, int outlen); | ||
| 49 | /* I2C control api */ | ||
| 50 | int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data); | ||
| 51 | |||
| 52 | /* Update FW version */ | ||
| 53 | int intel_scu_ipc_fw_update(u8 *buffer, u32 length); | ||
| 54 | |||
| 55 | #endif | ||
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 35832a03a515..63cb4096c3dc 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
| @@ -159,7 +159,6 @@ struct io_apic_irq_attr; | |||
| 159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, | 159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, |
| 160 | struct io_apic_irq_attr *irq_attr); | 160 | struct io_apic_irq_attr *irq_attr); |
| 161 | void setup_IO_APIC_irq_extra(u32 gsi); | 161 | void setup_IO_APIC_irq_extra(u32 gsi); |
| 162 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
| 163 | extern void ioapic_init_mappings(void); | 162 | extern void ioapic_init_mappings(void); |
| 164 | extern void ioapic_insert_resources(void); | 163 | extern void ioapic_insert_resources(void); |
| 165 | 164 | ||
| @@ -180,12 +179,13 @@ extern void ioapic_write_entry(int apic, int pin, | |||
| 180 | extern void setup_ioapic_ids_from_mpc(void); | 179 | extern void setup_ioapic_ids_from_mpc(void); |
| 181 | 180 | ||
| 182 | struct mp_ioapic_gsi{ | 181 | struct mp_ioapic_gsi{ |
| 183 | int gsi_base; | 182 | u32 gsi_base; |
| 184 | int gsi_end; | 183 | u32 gsi_end; |
| 185 | }; | 184 | }; |
| 186 | extern struct mp_ioapic_gsi mp_gsi_routing[]; | 185 | extern struct mp_ioapic_gsi mp_gsi_routing[]; |
| 187 | int mp_find_ioapic(int gsi); | 186 | extern u32 gsi_end; |
| 188 | int mp_find_ioapic_pin(int ioapic, int gsi); | 187 | int mp_find_ioapic(u32 gsi); |
| 188 | int mp_find_ioapic_pin(int ioapic, u32 gsi); | ||
| 189 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); | 189 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); |
| 190 | extern void __init pre_init_apic_IRQ0(void); | 190 | extern void __init pre_init_apic_IRQ0(void); |
| 191 | 191 | ||
| @@ -197,7 +197,8 @@ static const int timer_through_8259 = 0; | |||
| 197 | static inline void ioapic_init_mappings(void) { } | 197 | static inline void ioapic_init_mappings(void) { } |
| 198 | static inline void ioapic_insert_resources(void) { } | 198 | static inline void ioapic_insert_resources(void) { } |
| 199 | static inline void probe_nr_irqs_gsi(void) { } | 199 | static inline void probe_nr_irqs_gsi(void) { } |
| 200 | static inline int mp_find_ioapic(int gsi) { return 0; } | 200 | #define gsi_end (NR_IRQS_LEGACY - 1) |
| 201 | static inline int mp_find_ioapic(u32 gsi) { return 0; } | ||
| 201 | 202 | ||
| 202 | struct io_apic_irq_attr; | 203 | struct io_apic_irq_attr; |
| 203 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, | 204 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, |
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index f70e60071fe8..af00bd1d2089 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h | |||
| @@ -16,11 +16,16 @@ extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); | |||
| 16 | extern int k8_scan_nodes(void); | 16 | extern int k8_scan_nodes(void); |
| 17 | 17 | ||
| 18 | #ifdef CONFIG_K8_NB | 18 | #ifdef CONFIG_K8_NB |
| 19 | extern int num_k8_northbridges; | ||
| 20 | |||
| 19 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 21 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
| 20 | { | 22 | { |
| 21 | return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; | 23 | return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; |
| 22 | } | 24 | } |
| 25 | |||
| 23 | #else | 26 | #else |
| 27 | #define num_k8_northbridges 0 | ||
| 28 | |||
| 24 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 29 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
| 25 | { | 30 | { |
| 26 | return NULL; | 31 | return NULL; |
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h index e6c6c808489f..006da3687cdc 100644 --- a/arch/x86/include/asm/kgdb.h +++ b/arch/x86/include/asm/kgdb.h | |||
| @@ -76,4 +76,7 @@ static inline void arch_kgdb_breakpoint(void) | |||
| 76 | #define BREAK_INSTR_SIZE 1 | 76 | #define BREAK_INSTR_SIZE 1 |
| 77 | #define CACHE_FLUSH_IS_SAFE 1 | 77 | #define CACHE_FLUSH_IS_SAFE 1 |
| 78 | 78 | ||
| 79 | extern int kgdb_ll_trap(int cmd, const char *str, | ||
| 80 | struct pt_regs *regs, long err, int trap, int sig); | ||
| 81 | |||
| 79 | #endif /* _ASM_X86_KGDB_H */ | 82 | #endif /* _ASM_X86_KGDB_H */ |
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4ffa345a8ccb..547882539157 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
| 25 | #include <linux/ptrace.h> | 25 | #include <linux/ptrace.h> |
| 26 | #include <linux/percpu.h> | 26 | #include <linux/percpu.h> |
| 27 | #include <asm/insn.h> | ||
| 27 | 28 | ||
| 28 | #define __ARCH_WANT_KPROBES_INSN_SLOT | 29 | #define __ARCH_WANT_KPROBES_INSN_SLOT |
| 29 | 30 | ||
| @@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t; | |||
| 36 | #define RELATIVEJUMP_SIZE 5 | 37 | #define RELATIVEJUMP_SIZE 5 |
| 37 | #define RELATIVECALL_OPCODE 0xe8 | 38 | #define RELATIVECALL_OPCODE 0xe8 |
| 38 | #define RELATIVE_ADDR_SIZE 4 | 39 | #define RELATIVE_ADDR_SIZE 4 |
| 39 | #define MAX_INSN_SIZE 16 | ||
| 40 | #define MAX_STACK_SIZE 64 | 40 | #define MAX_STACK_SIZE 64 |
| 41 | #define MIN_STACK_SIZE(ADDR) \ | 41 | #define MIN_STACK_SIZE(ADDR) \ |
| 42 | (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ | 42 | (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f46b79f6c16c..ff90055c7f0b 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #define __KVM_HAVE_PIT_STATE2 | 21 | #define __KVM_HAVE_PIT_STATE2 |
| 22 | #define __KVM_HAVE_XEN_HVM | 22 | #define __KVM_HAVE_XEN_HVM |
| 23 | #define __KVM_HAVE_VCPU_EVENTS | 23 | #define __KVM_HAVE_VCPU_EVENTS |
| 24 | #define __KVM_HAVE_DEBUGREGS | ||
| 24 | 25 | ||
| 25 | /* Architectural interrupt line count. */ | 26 | /* Architectural interrupt line count. */ |
| 26 | #define KVM_NR_INTERRUPTS 256 | 27 | #define KVM_NR_INTERRUPTS 256 |
| @@ -257,6 +258,11 @@ struct kvm_reinject_control { | |||
| 257 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ | 258 | /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ |
| 258 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 | 259 | #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 |
| 259 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 | 260 | #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 |
| 261 | #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 | ||
| 262 | |||
| 263 | /* Interrupt shadow states */ | ||
| 264 | #define KVM_X86_SHADOW_INT_MOV_SS 0x01 | ||
| 265 | #define KVM_X86_SHADOW_INT_STI 0x02 | ||
| 260 | 266 | ||
| 261 | /* for KVM_GET/SET_VCPU_EVENTS */ | 267 | /* for KVM_GET/SET_VCPU_EVENTS */ |
| 262 | struct kvm_vcpu_events { | 268 | struct kvm_vcpu_events { |
| @@ -271,7 +277,7 @@ struct kvm_vcpu_events { | |||
| 271 | __u8 injected; | 277 | __u8 injected; |
| 272 | __u8 nr; | 278 | __u8 nr; |
| 273 | __u8 soft; | 279 | __u8 soft; |
| 274 | __u8 pad; | 280 | __u8 shadow; |
| 275 | } interrupt; | 281 | } interrupt; |
| 276 | struct { | 282 | struct { |
| 277 | __u8 injected; | 283 | __u8 injected; |
| @@ -284,4 +290,13 @@ struct kvm_vcpu_events { | |||
| 284 | __u32 reserved[10]; | 290 | __u32 reserved[10]; |
| 285 | }; | 291 | }; |
| 286 | 292 | ||
| 293 | /* for KVM_GET/SET_DEBUGREGS */ | ||
| 294 | struct kvm_debugregs { | ||
| 295 | __u64 db[4]; | ||
| 296 | __u64 dr6; | ||
| 297 | __u64 dr7; | ||
| 298 | __u64 flags; | ||
| 299 | __u64 reserved[9]; | ||
| 300 | }; | ||
| 301 | |||
| 287 | #endif /* _ASM_X86_KVM_H */ | 302 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7a6f54fa13ba..0b2729bf2070 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | #ifndef _ASM_X86_KVM_X86_EMULATE_H | 11 | #ifndef _ASM_X86_KVM_X86_EMULATE_H |
| 12 | #define _ASM_X86_KVM_X86_EMULATE_H | 12 | #define _ASM_X86_KVM_X86_EMULATE_H |
| 13 | 13 | ||
| 14 | #include <asm/desc_defs.h> | ||
| 15 | |||
| 14 | struct x86_emulate_ctxt; | 16 | struct x86_emulate_ctxt; |
| 15 | 17 | ||
| 16 | /* | 18 | /* |
| @@ -63,6 +65,15 @@ struct x86_emulate_ops { | |||
| 63 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | 65 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); |
| 64 | 66 | ||
| 65 | /* | 67 | /* |
| 68 | * write_std: Write bytes of standard (non-emulated/special) memory. | ||
| 69 | * Used for descriptor writing. | ||
| 70 | * @addr: [IN ] Linear address to which to write. | ||
| 71 | * @val: [OUT] Value write to memory, zero-extended to 'u_long'. | ||
| 72 | * @bytes: [IN ] Number of bytes to write to memory. | ||
| 73 | */ | ||
| 74 | int (*write_std)(unsigned long addr, void *val, | ||
| 75 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | ||
| 76 | /* | ||
| 66 | * fetch: Read bytes of standard (non-emulated/special) memory. | 77 | * fetch: Read bytes of standard (non-emulated/special) memory. |
| 67 | * Used for instruction fetch. | 78 | * Used for instruction fetch. |
| 68 | * @addr: [IN ] Linear address from which to read. | 79 | * @addr: [IN ] Linear address from which to read. |
| @@ -109,6 +120,23 @@ struct x86_emulate_ops { | |||
| 109 | unsigned int bytes, | 120 | unsigned int bytes, |
| 110 | struct kvm_vcpu *vcpu); | 121 | struct kvm_vcpu *vcpu); |
| 111 | 122 | ||
| 123 | int (*pio_in_emulated)(int size, unsigned short port, void *val, | ||
| 124 | unsigned int count, struct kvm_vcpu *vcpu); | ||
| 125 | |||
| 126 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, | ||
| 127 | unsigned int count, struct kvm_vcpu *vcpu); | ||
| 128 | |||
| 129 | bool (*get_cached_descriptor)(struct desc_struct *desc, | ||
| 130 | int seg, struct kvm_vcpu *vcpu); | ||
| 131 | void (*set_cached_descriptor)(struct desc_struct *desc, | ||
| 132 | int seg, struct kvm_vcpu *vcpu); | ||
| 133 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | ||
| 134 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | ||
| 135 | void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); | ||
| 136 | ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); | ||
| 137 | void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); | ||
| 138 | int (*cpl)(struct kvm_vcpu *vcpu); | ||
| 139 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | ||
| 112 | }; | 140 | }; |
| 113 | 141 | ||
| 114 | /* Type, address-of, and value of an instruction's operand. */ | 142 | /* Type, address-of, and value of an instruction's operand. */ |
| @@ -124,6 +152,12 @@ struct fetch_cache { | |||
| 124 | unsigned long end; | 152 | unsigned long end; |
| 125 | }; | 153 | }; |
| 126 | 154 | ||
| 155 | struct read_cache { | ||
| 156 | u8 data[1024]; | ||
| 157 | unsigned long pos; | ||
| 158 | unsigned long end; | ||
| 159 | }; | ||
| 160 | |||
| 127 | struct decode_cache { | 161 | struct decode_cache { |
| 128 | u8 twobyte; | 162 | u8 twobyte; |
| 129 | u8 b; | 163 | u8 b; |
| @@ -139,7 +173,7 @@ struct decode_cache { | |||
| 139 | u8 seg_override; | 173 | u8 seg_override; |
| 140 | unsigned int d; | 174 | unsigned int d; |
| 141 | unsigned long regs[NR_VCPU_REGS]; | 175 | unsigned long regs[NR_VCPU_REGS]; |
| 142 | unsigned long eip, eip_orig; | 176 | unsigned long eip; |
| 143 | /* modrm */ | 177 | /* modrm */ |
| 144 | u8 modrm; | 178 | u8 modrm; |
| 145 | u8 modrm_mod; | 179 | u8 modrm_mod; |
| @@ -151,16 +185,15 @@ struct decode_cache { | |||
| 151 | void *modrm_ptr; | 185 | void *modrm_ptr; |
| 152 | unsigned long modrm_val; | 186 | unsigned long modrm_val; |
| 153 | struct fetch_cache fetch; | 187 | struct fetch_cache fetch; |
| 188 | struct read_cache io_read; | ||
| 154 | }; | 189 | }; |
| 155 | 190 | ||
| 156 | #define X86_SHADOW_INT_MOV_SS 1 | ||
| 157 | #define X86_SHADOW_INT_STI 2 | ||
| 158 | |||
| 159 | struct x86_emulate_ctxt { | 191 | struct x86_emulate_ctxt { |
| 160 | /* Register state before/after emulation. */ | 192 | /* Register state before/after emulation. */ |
| 161 | struct kvm_vcpu *vcpu; | 193 | struct kvm_vcpu *vcpu; |
| 162 | 194 | ||
| 163 | unsigned long eflags; | 195 | unsigned long eflags; |
| 196 | unsigned long eip; /* eip before instruction emulation */ | ||
| 164 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | 197 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ |
| 165 | int mode; | 198 | int mode; |
| 166 | u32 cs_base; | 199 | u32 cs_base; |
| @@ -168,6 +201,7 @@ struct x86_emulate_ctxt { | |||
| 168 | /* interruptibility state, as a result of execution of STI or MOV SS */ | 201 | /* interruptibility state, as a result of execution of STI or MOV SS */ |
| 169 | int interruptibility; | 202 | int interruptibility; |
| 170 | 203 | ||
| 204 | bool restart; /* restart string instruction after writeback */ | ||
| 171 | /* decode cache */ | 205 | /* decode cache */ |
| 172 | struct decode_cache decode; | 206 | struct decode_cache decode; |
| 173 | }; | 207 | }; |
| @@ -194,5 +228,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, | |||
| 194 | struct x86_emulate_ops *ops); | 228 | struct x86_emulate_ops *ops); |
| 195 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, | 229 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, |
| 196 | struct x86_emulate_ops *ops); | 230 | struct x86_emulate_ops *ops); |
| 231 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
| 232 | struct x86_emulate_ops *ops, | ||
| 233 | u16 tss_selector, int reason, | ||
| 234 | bool has_error_code, u32 error_code); | ||
| 197 | 235 | ||
| 198 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ | 236 | #endif /* _ASM_X86_KVM_X86_EMULATE_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 06d9e79ca37d..76f5483cffec 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -171,15 +171,15 @@ struct kvm_pte_chain { | |||
| 171 | union kvm_mmu_page_role { | 171 | union kvm_mmu_page_role { |
| 172 | unsigned word; | 172 | unsigned word; |
| 173 | struct { | 173 | struct { |
| 174 | unsigned glevels:4; | ||
| 175 | unsigned level:4; | 174 | unsigned level:4; |
| 175 | unsigned cr4_pae:1; | ||
| 176 | unsigned quadrant:2; | 176 | unsigned quadrant:2; |
| 177 | unsigned pad_for_nice_hex_output:6; | 177 | unsigned pad_for_nice_hex_output:6; |
| 178 | unsigned direct:1; | 178 | unsigned direct:1; |
| 179 | unsigned access:3; | 179 | unsigned access:3; |
| 180 | unsigned invalid:1; | 180 | unsigned invalid:1; |
| 181 | unsigned cr4_pge:1; | ||
| 182 | unsigned nxe:1; | 181 | unsigned nxe:1; |
| 182 | unsigned cr0_wp:1; | ||
| 183 | }; | 183 | }; |
| 184 | }; | 184 | }; |
| 185 | 185 | ||
| @@ -187,8 +187,6 @@ struct kvm_mmu_page { | |||
| 187 | struct list_head link; | 187 | struct list_head link; |
| 188 | struct hlist_node hash_link; | 188 | struct hlist_node hash_link; |
| 189 | 189 | ||
| 190 | struct list_head oos_link; | ||
| 191 | |||
| 192 | /* | 190 | /* |
| 193 | * The following two entries are used to key the shadow page in the | 191 | * The following two entries are used to key the shadow page in the |
| 194 | * hash table. | 192 | * hash table. |
| @@ -204,9 +202,9 @@ struct kvm_mmu_page { | |||
| 204 | * in this shadow page. | 202 | * in this shadow page. |
| 205 | */ | 203 | */ |
| 206 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 204 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
| 207 | int multimapped; /* More than one parent_pte? */ | 205 | bool multimapped; /* More than one parent_pte? */ |
| 208 | int root_count; /* Currently serving as active root */ | ||
| 209 | bool unsync; | 206 | bool unsync; |
| 207 | int root_count; /* Currently serving as active root */ | ||
| 210 | unsigned int unsync_children; | 208 | unsigned int unsync_children; |
| 211 | union { | 209 | union { |
| 212 | u64 *parent_pte; /* !multimapped */ | 210 | u64 *parent_pte; /* !multimapped */ |
| @@ -224,14 +222,9 @@ struct kvm_pv_mmu_op_buffer { | |||
| 224 | 222 | ||
| 225 | struct kvm_pio_request { | 223 | struct kvm_pio_request { |
| 226 | unsigned long count; | 224 | unsigned long count; |
| 227 | int cur_count; | ||
| 228 | gva_t guest_gva; | ||
| 229 | int in; | 225 | int in; |
| 230 | int port; | 226 | int port; |
| 231 | int size; | 227 | int size; |
| 232 | int string; | ||
| 233 | int down; | ||
| 234 | int rep; | ||
| 235 | }; | 228 | }; |
| 236 | 229 | ||
| 237 | /* | 230 | /* |
| @@ -320,6 +313,7 @@ struct kvm_vcpu_arch { | |||
| 320 | struct kvm_queued_exception { | 313 | struct kvm_queued_exception { |
| 321 | bool pending; | 314 | bool pending; |
| 322 | bool has_error_code; | 315 | bool has_error_code; |
| 316 | bool reinject; | ||
| 323 | u8 nr; | 317 | u8 nr; |
| 324 | u32 error_code; | 318 | u32 error_code; |
| 325 | } exception; | 319 | } exception; |
| @@ -362,8 +356,8 @@ struct kvm_vcpu_arch { | |||
| 362 | u64 *mce_banks; | 356 | u64 *mce_banks; |
| 363 | 357 | ||
| 364 | /* used for guest single stepping over the given code position */ | 358 | /* used for guest single stepping over the given code position */ |
| 365 | u16 singlestep_cs; | ||
| 366 | unsigned long singlestep_rip; | 359 | unsigned long singlestep_rip; |
| 360 | |||
| 367 | /* fields used by HYPER-V emulation */ | 361 | /* fields used by HYPER-V emulation */ |
| 368 | u64 hv_vapic; | 362 | u64 hv_vapic; |
| 369 | }; | 363 | }; |
| @@ -389,6 +383,7 @@ struct kvm_arch { | |||
| 389 | unsigned int n_free_mmu_pages; | 383 | unsigned int n_free_mmu_pages; |
| 390 | unsigned int n_requested_mmu_pages; | 384 | unsigned int n_requested_mmu_pages; |
| 391 | unsigned int n_alloc_mmu_pages; | 385 | unsigned int n_alloc_mmu_pages; |
| 386 | atomic_t invlpg_counter; | ||
| 392 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; | 387 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
| 393 | /* | 388 | /* |
| 394 | * Hash table of struct kvm_mmu_page. | 389 | * Hash table of struct kvm_mmu_page. |
| @@ -461,11 +456,6 @@ struct kvm_vcpu_stat { | |||
| 461 | u32 nmi_injections; | 456 | u32 nmi_injections; |
| 462 | }; | 457 | }; |
| 463 | 458 | ||
| 464 | struct descriptor_table { | ||
| 465 | u16 limit; | ||
| 466 | unsigned long base; | ||
| 467 | } __attribute__((packed)); | ||
| 468 | |||
| 469 | struct kvm_x86_ops { | 459 | struct kvm_x86_ops { |
| 470 | int (*cpu_has_kvm_support)(void); /* __init */ | 460 | int (*cpu_has_kvm_support)(void); /* __init */ |
| 471 | int (*disabled_by_bios)(void); /* __init */ | 461 | int (*disabled_by_bios)(void); /* __init */ |
| @@ -503,12 +493,11 @@ struct kvm_x86_ops { | |||
| 503 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 493 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
| 504 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); | 494 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); |
| 505 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); | 495 | void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); |
| 506 | void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 496 | void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
| 507 | void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 497 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
| 508 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 498 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
| 509 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 499 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
| 510 | int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); | 500 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
| 511 | int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); | ||
| 512 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 501 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
| 513 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 502 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
| 514 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 503 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
| @@ -527,7 +516,8 @@ struct kvm_x86_ops { | |||
| 527 | void (*set_irq)(struct kvm_vcpu *vcpu); | 516 | void (*set_irq)(struct kvm_vcpu *vcpu); |
| 528 | void (*set_nmi)(struct kvm_vcpu *vcpu); | 517 | void (*set_nmi)(struct kvm_vcpu *vcpu); |
| 529 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, | 518 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, |
| 530 | bool has_error_code, u32 error_code); | 519 | bool has_error_code, u32 error_code, |
| 520 | bool reinject); | ||
| 531 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); | 521 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); |
| 532 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 522 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
| 533 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | 523 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
| @@ -541,6 +531,8 @@ struct kvm_x86_ops { | |||
| 541 | int (*get_lpage_level)(void); | 531 | int (*get_lpage_level)(void); |
| 542 | bool (*rdtscp_supported)(void); | 532 | bool (*rdtscp_supported)(void); |
| 543 | 533 | ||
| 534 | void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); | ||
| 535 | |||
| 544 | const struct trace_print_flags *exit_reasons_str; | 536 | const struct trace_print_flags *exit_reasons_str; |
| 545 | }; | 537 | }; |
| 546 | 538 | ||
| @@ -587,23 +579,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 587 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | 579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); |
| 588 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
| 589 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 581 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
| 590 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
| 591 | unsigned long *rflags); | ||
| 592 | 582 | ||
| 593 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); | ||
| 594 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, | ||
| 595 | unsigned long *rflags); | ||
| 596 | void kvm_enable_efer_bits(u64); | 583 | void kvm_enable_efer_bits(u64); |
| 597 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 584 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
| 598 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 585 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
| 599 | 586 | ||
| 600 | struct x86_emulate_ctxt; | 587 | struct x86_emulate_ctxt; |
| 601 | 588 | ||
| 602 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, | 589 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); |
| 603 | int size, unsigned port); | ||
| 604 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | ||
| 605 | int size, unsigned long count, int down, | ||
| 606 | gva_t address, int rep, unsigned port); | ||
| 607 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 590 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
| 608 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 591 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
| 609 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); | 592 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); |
| @@ -616,12 +599,15 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | |||
| 616 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 599 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
| 617 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 600 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
| 618 | 601 | ||
| 619 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); | 602 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
| 603 | bool has_error_code, u32 error_code); | ||
| 620 | 604 | ||
| 621 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 605 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
| 622 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 606 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
| 623 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); | 607 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
| 624 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); | 608 | void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
| 609 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); | ||
| 610 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); | ||
| 625 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); | 611 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
| 626 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | 612 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); |
| 627 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | 613 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); |
| @@ -634,6 +620,8 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | |||
| 634 | 620 | ||
| 635 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 621 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
| 636 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 622 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
| 623 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); | ||
| 624 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | ||
| 637 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, | 625 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, |
| 638 | u32 error_code); | 626 | u32 error_code); |
| 639 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); | 627 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
| @@ -649,8 +637,6 @@ int emulator_write_emulated(unsigned long addr, | |||
| 649 | unsigned int bytes, | 637 | unsigned int bytes, |
| 650 | struct kvm_vcpu *vcpu); | 638 | struct kvm_vcpu *vcpu); |
| 651 | 639 | ||
| 652 | unsigned long segment_base(u16 selector); | ||
| 653 | |||
| 654 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 640 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
| 655 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 641 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 656 | const u8 *new, int bytes, | 642 | const u8 *new, int bytes, |
| @@ -675,7 +661,6 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); | |||
| 675 | void kvm_enable_tdp(void); | 661 | void kvm_enable_tdp(void); |
| 676 | void kvm_disable_tdp(void); | 662 | void kvm_disable_tdp(void); |
| 677 | 663 | ||
| 678 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | ||
| 679 | int complete_pio(struct kvm_vcpu *vcpu); | 664 | int complete_pio(struct kvm_vcpu *vcpu); |
| 680 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | 665 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); |
| 681 | 666 | ||
| @@ -724,23 +709,6 @@ static inline void kvm_load_ldt(u16 sel) | |||
| 724 | asm("lldt %0" : : "rm"(sel)); | 709 | asm("lldt %0" : : "rm"(sel)); |
| 725 | } | 710 | } |
| 726 | 711 | ||
| 727 | static inline void kvm_get_idt(struct descriptor_table *table) | ||
| 728 | { | ||
| 729 | asm("sidt %0" : "=m"(*table)); | ||
| 730 | } | ||
| 731 | |||
| 732 | static inline void kvm_get_gdt(struct descriptor_table *table) | ||
| 733 | { | ||
| 734 | asm("sgdt %0" : "=m"(*table)); | ||
| 735 | } | ||
| 736 | |||
| 737 | static inline unsigned long kvm_read_tr_base(void) | ||
| 738 | { | ||
| 739 | u16 tr; | ||
| 740 | asm("str %0" : "=g"(tr)); | ||
| 741 | return segment_base(tr); | ||
| 742 | } | ||
| 743 | |||
| 744 | #ifdef CONFIG_X86_64 | 712 | #ifdef CONFIG_X86_64 |
| 745 | static inline unsigned long read_msr(unsigned long msr) | 713 | static inline unsigned long read_msr(unsigned long msr) |
| 746 | { | 714 | { |
| @@ -826,4 +794,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | |||
| 826 | void kvm_define_shared_msr(unsigned index, u32 msr); | 794 | void kvm_define_shared_msr(unsigned index, u32 msr); |
| 827 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 795 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
| 828 | 796 | ||
| 797 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | ||
| 798 | |||
| 829 | #endif /* _ASM_X86_KVM_HOST_H */ | 799 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index ffae1420e7d7..05eba5e9a8e8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
| @@ -16,10 +16,23 @@ | |||
| 16 | #define KVM_FEATURE_CLOCKSOURCE 0 | 16 | #define KVM_FEATURE_CLOCKSOURCE 0 |
| 17 | #define KVM_FEATURE_NOP_IO_DELAY 1 | 17 | #define KVM_FEATURE_NOP_IO_DELAY 1 |
| 18 | #define KVM_FEATURE_MMU_OP 2 | 18 | #define KVM_FEATURE_MMU_OP 2 |
| 19 | /* This indicates that the new set of kvmclock msrs | ||
| 20 | * are available. The use of 0x11 and 0x12 is deprecated | ||
| 21 | */ | ||
| 22 | #define KVM_FEATURE_CLOCKSOURCE2 3 | ||
| 23 | |||
| 24 | /* The last 8 bits are used to indicate how to interpret the flags field | ||
| 25 | * in pvclock structure. If no bits are set, all flags are ignored. | ||
| 26 | */ | ||
| 27 | #define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24 | ||
| 19 | 28 | ||
| 20 | #define MSR_KVM_WALL_CLOCK 0x11 | 29 | #define MSR_KVM_WALL_CLOCK 0x11 |
| 21 | #define MSR_KVM_SYSTEM_TIME 0x12 | 30 | #define MSR_KVM_SYSTEM_TIME 0x12 |
| 22 | 31 | ||
| 32 | /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ | ||
| 33 | #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 | ||
| 34 | #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 | ||
| 35 | |||
| 23 | #define KVM_MAX_MMU_OP_BATCH 32 | 36 | #define KVM_MAX_MMU_OP_BATCH 32 |
| 24 | 37 | ||
| 25 | /* Operations for KVM_HC_MMU_OP */ | 38 | /* Operations for KVM_HC_MMU_OP */ |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6c3fdd631ed3..f32a4301c4d4 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
| @@ -225,5 +225,13 @@ extern void mcheck_intel_therm_init(void); | |||
| 225 | static inline void mcheck_intel_therm_init(void) { } | 225 | static inline void mcheck_intel_therm_init(void) { } |
| 226 | #endif | 226 | #endif |
| 227 | 227 | ||
| 228 | /* | ||
| 229 | * Used by APEI to report memory error via /dev/mcelog | ||
| 230 | */ | ||
| 231 | |||
| 232 | struct cper_sec_mem_err; | ||
| 233 | extern void apei_mce_report_mem_error(int corrected, | ||
| 234 | struct cper_sec_mem_err *mem_err); | ||
| 235 | |||
| 228 | #endif /* __KERNEL__ */ | 236 | #endif /* __KERNEL__ */ |
| 229 | #endif /* _ASM_X86_MCE_H */ | 237 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index d8bf23a88d05..c82868e9f905 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
| @@ -105,16 +105,6 @@ extern void mp_config_acpi_legacy_irqs(void); | |||
| 105 | struct device; | 105 | struct device; |
| 106 | extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, | 106 | extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, |
| 107 | int active_high_low); | 107 | int active_high_low); |
| 108 | extern int acpi_probe_gsi(void); | ||
| 109 | #ifdef CONFIG_X86_IO_APIC | ||
| 110 | extern int mp_find_ioapic(int gsi); | ||
| 111 | extern int mp_find_ioapic_pin(int ioapic, int gsi); | ||
| 112 | #endif | ||
| 113 | #else /* !CONFIG_ACPI: */ | ||
| 114 | static inline int acpi_probe_gsi(void) | ||
| 115 | { | ||
| 116 | return 0; | ||
| 117 | } | ||
| 118 | #endif /* CONFIG_ACPI */ | 108 | #endif /* CONFIG_ACPI */ |
| 119 | 109 | ||
| 120 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) | 110 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) |
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h new file mode 100644 index 000000000000..79ce5685ab64 --- /dev/null +++ b/arch/x86/include/asm/mshyperv.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #ifndef _ASM_X86_MSHYPER_H | ||
| 2 | #define _ASM_X86_MSHYPER_H | ||
| 3 | |||
| 4 | #include <linux/types.h> | ||
| 5 | #include <asm/hyperv.h> | ||
| 6 | |||
| 7 | struct ms_hyperv_info { | ||
| 8 | u32 features; | ||
| 9 | u32 hints; | ||
| 10 | }; | ||
| 11 | |||
| 12 | extern struct ms_hyperv_info ms_hyperv; | ||
| 13 | |||
| 14 | #endif | ||
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4604e6a54d36..b49d8ca228f6 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
| @@ -71,11 +71,14 @@ | |||
| 71 | #define MSR_IA32_LASTINTTOIP 0x000001de | 71 | #define MSR_IA32_LASTINTTOIP 0x000001de |
| 72 | 72 | ||
| 73 | /* DEBUGCTLMSR bits (others vary by model): */ | 73 | /* DEBUGCTLMSR bits (others vary by model): */ |
| 74 | #define _DEBUGCTLMSR_LBR 0 /* last branch recording */ | 74 | #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ |
| 75 | #define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ | 75 | #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ |
| 76 | 76 | #define DEBUGCTLMSR_TR (1UL << 6) | |
| 77 | #define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) | 77 | #define DEBUGCTLMSR_BTS (1UL << 7) |
| 78 | #define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) | 78 | #define DEBUGCTLMSR_BTINT (1UL << 8) |
| 79 | #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) | ||
| 80 | #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) | ||
| 81 | #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) | ||
| 79 | 82 | ||
| 80 | #define MSR_IA32_MC0_CTL 0x00000400 | 83 | #define MSR_IA32_MC0_CTL 0x00000400 |
| 81 | #define MSR_IA32_MC0_STATUS 0x00000401 | 84 | #define MSR_IA32_MC0_STATUS 0x00000401 |
| @@ -199,8 +202,9 @@ | |||
| 199 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a | 202 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a |
| 200 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a | 203 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a |
| 201 | 204 | ||
| 202 | #define FEATURE_CONTROL_LOCKED (1<<0) | 205 | #define FEATURE_CONTROL_LOCKED (1<<0) |
| 203 | #define FEATURE_CONTROL_VMXON_ENABLED (1<<2) | 206 | #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) |
| 207 | #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) | ||
| 204 | 208 | ||
| 205 | #define MSR_IA32_APICBASE 0x0000001b | 209 | #define MSR_IA32_APICBASE 0x0000001b |
| 206 | #define MSR_IA32_APICBASE_BSP (1<<8) | 210 | #define MSR_IA32_APICBASE_BSP (1<<8) |
| @@ -232,6 +236,8 @@ | |||
| 232 | 236 | ||
| 233 | #define MSR_IA32_MISC_ENABLE 0x000001a0 | 237 | #define MSR_IA32_MISC_ENABLE 0x000001a0 |
| 234 | 238 | ||
| 239 | #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 | ||
| 240 | |||
| 235 | /* MISC_ENABLE bits: architectural */ | 241 | /* MISC_ENABLE bits: architectural */ |
| 236 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) | 242 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) |
| 237 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) | 243 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) |
| @@ -359,6 +365,8 @@ | |||
| 359 | #define MSR_P4_U2L_ESCR0 0x000003b0 | 365 | #define MSR_P4_U2L_ESCR0 0x000003b0 |
| 360 | #define MSR_P4_U2L_ESCR1 0x000003b1 | 366 | #define MSR_P4_U2L_ESCR1 0x000003b1 |
| 361 | 367 | ||
| 368 | #define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 | ||
| 369 | |||
| 362 | /* Intel Core-based CPU performance counters */ | 370 | /* Intel Core-based CPU performance counters */ |
| 363 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 | 371 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 |
| 364 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a | 372 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a |
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 1a0422348d6d..8d8797eae5d7 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
| @@ -83,7 +83,7 @@ struct irq_routing_table { | |||
| 83 | 83 | ||
| 84 | extern unsigned int pcibios_irq_mask; | 84 | extern unsigned int pcibios_irq_mask; |
| 85 | 85 | ||
| 86 | extern spinlock_t pci_config_lock; | 86 | extern raw_spinlock_t pci_config_lock; |
| 87 | 87 | ||
| 88 | extern int (*pcibios_enable_irq)(struct pci_dev *dev); | 88 | extern int (*pcibios_enable_irq)(struct pci_dev *dev); |
| 89 | extern void (*pcibios_disable_irq)(struct pci_dev *dev); | 89 | extern void (*pcibios_disable_irq)(struct pci_dev *dev); |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 66a272dfd8b8..0797e748d280 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
| @@ -105,7 +105,7 @@ do { \ | |||
| 105 | 105 | ||
| 106 | /* | 106 | /* |
| 107 | * Generate a percpu add to memory instruction and optimize code | 107 | * Generate a percpu add to memory instruction and optimize code |
| 108 | * if a one is added or subtracted. | 108 | * if one is added or subtracted. |
| 109 | */ | 109 | */ |
| 110 | #define percpu_add_op(var, val) \ | 110 | #define percpu_add_op(var, val) \ |
| 111 | do { \ | 111 | do { \ |
| @@ -190,6 +190,29 @@ do { \ | |||
| 190 | pfo_ret__; \ | 190 | pfo_ret__; \ |
| 191 | }) | 191 | }) |
| 192 | 192 | ||
| 193 | #define percpu_unary_op(op, var) \ | ||
| 194 | ({ \ | ||
| 195 | switch (sizeof(var)) { \ | ||
| 196 | case 1: \ | ||
| 197 | asm(op "b "__percpu_arg(0) \ | ||
| 198 | : "+m" (var)); \ | ||
| 199 | break; \ | ||
| 200 | case 2: \ | ||
| 201 | asm(op "w "__percpu_arg(0) \ | ||
| 202 | : "+m" (var)); \ | ||
| 203 | break; \ | ||
| 204 | case 4: \ | ||
| 205 | asm(op "l "__percpu_arg(0) \ | ||
| 206 | : "+m" (var)); \ | ||
| 207 | break; \ | ||
| 208 | case 8: \ | ||
| 209 | asm(op "q "__percpu_arg(0) \ | ||
| 210 | : "+m" (var)); \ | ||
| 211 | break; \ | ||
| 212 | default: __bad_percpu_size(); \ | ||
| 213 | } \ | ||
| 214 | }) | ||
| 215 | |||
| 193 | /* | 216 | /* |
| 194 | * percpu_read() makes gcc load the percpu variable every time it is | 217 | * percpu_read() makes gcc load the percpu variable every time it is |
| 195 | * accessed while percpu_read_stable() allows the value to be cached. | 218 | * accessed while percpu_read_stable() allows the value to be cached. |
| @@ -207,6 +230,7 @@ do { \ | |||
| 207 | #define percpu_and(var, val) percpu_to_op("and", var, val) | 230 | #define percpu_and(var, val) percpu_to_op("and", var, val) |
| 208 | #define percpu_or(var, val) percpu_to_op("or", var, val) | 231 | #define percpu_or(var, val) percpu_to_op("or", var, val) |
| 209 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) | 232 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) |
| 233 | #define percpu_inc(var) percpu_unary_op("inc", var) | ||
| 210 | 234 | ||
| 211 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 235 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
| 212 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 236 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index db6109a885a7..254883d0c7e0 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * Performance event hw details: | 5 | * Performance event hw details: |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #define X86_PMC_MAX_GENERIC 8 | 8 | #define X86_PMC_MAX_GENERIC 32 |
| 9 | #define X86_PMC_MAX_FIXED 3 | 9 | #define X86_PMC_MAX_FIXED 3 |
| 10 | 10 | ||
| 11 | #define X86_PMC_IDX_GENERIC 0 | 11 | #define X86_PMC_IDX_GENERIC 0 |
| @@ -18,39 +18,31 @@ | |||
| 18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | 18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 |
| 19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | 19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 |
| 20 | 20 | ||
| 21 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22) | 21 | #define ARCH_PERFMON_EVENTSEL_EVENT 0x000000FFULL |
| 22 | #define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) | 22 | #define ARCH_PERFMON_EVENTSEL_UMASK 0x0000FF00ULL |
| 23 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | 23 | #define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) |
| 24 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | 24 | #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) |
| 25 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | 25 | #define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) |
| 26 | 26 | #define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) | |
| 27 | /* | 27 | #define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) |
| 28 | * Includes eventsel and unit mask as well: | 28 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) |
| 29 | */ | 29 | #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) |
| 30 | 30 | #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL | |
| 31 | 31 | ||
| 32 | #define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL | 32 | #define AMD64_EVENTSEL_EVENT \ |
| 33 | #define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL | 33 | (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) |
| 34 | #define INTEL_ARCH_EDGE_MASK 0x00040000ULL | 34 | #define INTEL_ARCH_EVENT_MASK \ |
| 35 | #define INTEL_ARCH_INV_MASK 0x00800000ULL | 35 | (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) |
| 36 | #define INTEL_ARCH_CNT_MASK 0xFF000000ULL | 36 | |
| 37 | #define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) | 37 | #define X86_RAW_EVENT_MASK \ |
| 38 | 38 | (ARCH_PERFMON_EVENTSEL_EVENT | \ | |
| 39 | /* | 39 | ARCH_PERFMON_EVENTSEL_UMASK | \ |
| 40 | * filter mask to validate fixed counter events. | 40 | ARCH_PERFMON_EVENTSEL_EDGE | \ |
| 41 | * the following filters disqualify for fixed counters: | 41 | ARCH_PERFMON_EVENTSEL_INV | \ |
| 42 | * - inv | 42 | ARCH_PERFMON_EVENTSEL_CMASK) |
| 43 | * - edge | 43 | #define AMD64_RAW_EVENT_MASK \ |
| 44 | * - cnt-mask | 44 | (X86_RAW_EVENT_MASK | \ |
| 45 | * The other filters are supported by fixed counters. | 45 | AMD64_EVENTSEL_EVENT) |
| 46 | * The any-thread option is supported starting with v3. | ||
| 47 | */ | ||
| 48 | #define INTEL_ARCH_FIXED_MASK \ | ||
| 49 | (INTEL_ARCH_CNT_MASK| \ | ||
| 50 | INTEL_ARCH_INV_MASK| \ | ||
| 51 | INTEL_ARCH_EDGE_MASK|\ | ||
| 52 | INTEL_ARCH_UNIT_MASK|\ | ||
| 53 | INTEL_ARCH_EVENT_MASK) | ||
| 54 | 46 | ||
| 55 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 47 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
| 56 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 48 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
| @@ -67,7 +59,7 @@ | |||
| 67 | union cpuid10_eax { | 59 | union cpuid10_eax { |
| 68 | struct { | 60 | struct { |
| 69 | unsigned int version_id:8; | 61 | unsigned int version_id:8; |
| 70 | unsigned int num_events:8; | 62 | unsigned int num_counters:8; |
| 71 | unsigned int bit_width:8; | 63 | unsigned int bit_width:8; |
| 72 | unsigned int mask_length:8; | 64 | unsigned int mask_length:8; |
| 73 | } split; | 65 | } split; |
| @@ -76,7 +68,7 @@ union cpuid10_eax { | |||
| 76 | 68 | ||
| 77 | union cpuid10_edx { | 69 | union cpuid10_edx { |
| 78 | struct { | 70 | struct { |
| 79 | unsigned int num_events_fixed:4; | 71 | unsigned int num_counters_fixed:4; |
| 80 | unsigned int reserved:28; | 72 | unsigned int reserved:28; |
| 81 | } split; | 73 | } split; |
| 82 | unsigned int full; | 74 | unsigned int full; |
| @@ -136,6 +128,18 @@ extern void perf_events_lapic_init(void); | |||
| 136 | 128 | ||
| 137 | #define PERF_EVENT_INDEX_OFFSET 0 | 129 | #define PERF_EVENT_INDEX_OFFSET 0 |
| 138 | 130 | ||
| 131 | /* | ||
| 132 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | ||
| 133 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | ||
| 134 | * care what we do with it. | ||
| 135 | */ | ||
| 136 | #define PERF_EFLAGS_EXACT (1UL << 3) | ||
| 137 | |||
| 138 | struct pt_regs; | ||
| 139 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | ||
| 140 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | ||
| 141 | #define perf_misc_flags(regs) perf_misc_flags(regs) | ||
| 142 | |||
| 139 | #else | 143 | #else |
| 140 | static inline void init_hw_perf_events(void) { } | 144 | static inline void init_hw_perf_events(void) { } |
| 141 | static inline void perf_events_lapic_init(void) { } | 145 | static inline void perf_events_lapic_init(void) { } |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h new file mode 100644 index 000000000000..64a8ebff06fc --- /dev/null +++ b/arch/x86/include/asm/perf_event_p4.h | |||
| @@ -0,0 +1,795 @@ | |||
| 1 | /* | ||
| 2 | * Netburst Perfomance Events (P4, old Xeon) | ||
| 3 | */ | ||
| 4 | |||
| 5 | #ifndef PERF_EVENT_P4_H | ||
| 6 | #define PERF_EVENT_P4_H | ||
| 7 | |||
| 8 | #include <linux/cpu.h> | ||
| 9 | #include <linux/bitops.h> | ||
| 10 | |||
| 11 | /* | ||
| 12 | * NetBurst has perfomance MSRs shared between | ||
| 13 | * threads if HT is turned on, ie for both logical | ||
| 14 | * processors (mem: in turn in Atom with HT support | ||
| 15 | * perf-MSRs are not shared and every thread has its | ||
| 16 | * own perf-MSRs set) | ||
| 17 | */ | ||
| 18 | #define ARCH_P4_TOTAL_ESCR (46) | ||
| 19 | #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ | ||
| 20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) | ||
| 21 | #define ARCH_P4_MAX_CCCR (18) | ||
| 22 | #define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) | ||
| 23 | |||
| 24 | #define P4_ESCR_EVENT_MASK 0x7e000000U | ||
| 25 | #define P4_ESCR_EVENT_SHIFT 25 | ||
| 26 | #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U | ||
| 27 | #define P4_ESCR_EVENTMASK_SHIFT 9 | ||
| 28 | #define P4_ESCR_TAG_MASK 0x000001e0U | ||
| 29 | #define P4_ESCR_TAG_SHIFT 5 | ||
| 30 | #define P4_ESCR_TAG_ENABLE 0x00000010U | ||
| 31 | #define P4_ESCR_T0_OS 0x00000008U | ||
| 32 | #define P4_ESCR_T0_USR 0x00000004U | ||
| 33 | #define P4_ESCR_T1_OS 0x00000002U | ||
| 34 | #define P4_ESCR_T1_USR 0x00000001U | ||
| 35 | |||
| 36 | #define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT) | ||
| 37 | #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) | ||
| 38 | #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) | ||
| 39 | |||
| 40 | /* Non HT mask */ | ||
| 41 | #define P4_ESCR_MASK \ | ||
| 42 | (P4_ESCR_EVENT_MASK | \ | ||
| 43 | P4_ESCR_EVENTMASK_MASK | \ | ||
| 44 | P4_ESCR_TAG_MASK | \ | ||
| 45 | P4_ESCR_TAG_ENABLE | \ | ||
| 46 | P4_ESCR_T0_OS | \ | ||
| 47 | P4_ESCR_T0_USR) | ||
| 48 | |||
| 49 | /* HT mask */ | ||
| 50 | #define P4_ESCR_MASK_HT \ | ||
| 51 | (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) | ||
| 52 | |||
| 53 | #define P4_CCCR_OVF 0x80000000U | ||
| 54 | #define P4_CCCR_CASCADE 0x40000000U | ||
| 55 | #define P4_CCCR_OVF_PMI_T0 0x04000000U | ||
| 56 | #define P4_CCCR_OVF_PMI_T1 0x08000000U | ||
| 57 | #define P4_CCCR_FORCE_OVF 0x02000000U | ||
| 58 | #define P4_CCCR_EDGE 0x01000000U | ||
| 59 | #define P4_CCCR_THRESHOLD_MASK 0x00f00000U | ||
| 60 | #define P4_CCCR_THRESHOLD_SHIFT 20 | ||
| 61 | #define P4_CCCR_COMPLEMENT 0x00080000U | ||
| 62 | #define P4_CCCR_COMPARE 0x00040000U | ||
| 63 | #define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U | ||
| 64 | #define P4_CCCR_ESCR_SELECT_SHIFT 13 | ||
| 65 | #define P4_CCCR_ENABLE 0x00001000U | ||
| 66 | #define P4_CCCR_THREAD_SINGLE 0x00010000U | ||
| 67 | #define P4_CCCR_THREAD_BOTH 0x00020000U | ||
| 68 | #define P4_CCCR_THREAD_ANY 0x00030000U | ||
| 69 | #define P4_CCCR_RESERVED 0x00000fffU | ||
| 70 | |||
| 71 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) | ||
| 72 | #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) | ||
| 73 | |||
| 74 | /* Custom bits in reerved CCCR area */ | ||
| 75 | #define P4_CCCR_CACHE_OPS_MASK 0x0000003fU | ||
| 76 | |||
| 77 | |||
| 78 | /* Non HT mask */ | ||
| 79 | #define P4_CCCR_MASK \ | ||
| 80 | (P4_CCCR_OVF | \ | ||
| 81 | P4_CCCR_CASCADE | \ | ||
| 82 | P4_CCCR_OVF_PMI_T0 | \ | ||
| 83 | P4_CCCR_FORCE_OVF | \ | ||
| 84 | P4_CCCR_EDGE | \ | ||
| 85 | P4_CCCR_THRESHOLD_MASK | \ | ||
| 86 | P4_CCCR_COMPLEMENT | \ | ||
| 87 | P4_CCCR_COMPARE | \ | ||
| 88 | P4_CCCR_ESCR_SELECT_MASK | \ | ||
| 89 | P4_CCCR_ENABLE) | ||
| 90 | |||
| 91 | /* HT mask */ | ||
| 92 | #define P4_CCCR_MASK_HT \ | ||
| 93 | (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY) | ||
| 94 | |||
| 95 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ | ||
| 96 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) | ||
| 97 | #define P4_ESCR_EMASK_BIT(class, name) class##__##name | ||
| 98 | |||
| 99 | /* | ||
| 100 | * config field is 64bit width and consists of | ||
| 101 | * HT << 63 | ESCR << 32 | CCCR | ||
| 102 | * where HT is HyperThreading bit (since ESCR | ||
| 103 | * has it reserved we may use it for own purpose) | ||
| 104 | * | ||
| 105 | * note that this is NOT the addresses of respective | ||
| 106 | * ESCR and CCCR but rather an only packed value should | ||
| 107 | * be unpacked and written to a proper addresses | ||
| 108 | * | ||
| 109 | * the base idea is to pack as much info as | ||
| 110 | * possible | ||
| 111 | */ | ||
| 112 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) | ||
| 113 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) | ||
| 114 | #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) | ||
| 115 | #define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) | ||
| 116 | |||
| 117 | #define p4_config_unpack_emask(v) \ | ||
| 118 | ({ \ | ||
| 119 | u32 t = p4_config_unpack_escr((v)); \ | ||
| 120 | t = t & P4_ESCR_EVENTMASK_MASK; \ | ||
| 121 | t = t >> P4_ESCR_EVENTMASK_SHIFT; \ | ||
| 122 | t; \ | ||
| 123 | }) | ||
| 124 | |||
| 125 | #define p4_config_unpack_event(v) \ | ||
| 126 | ({ \ | ||
| 127 | u32 t = p4_config_unpack_escr((v)); \ | ||
| 128 | t = t & P4_ESCR_EVENT_MASK; \ | ||
| 129 | t = t >> P4_ESCR_EVENT_SHIFT; \ | ||
| 130 | t; \ | ||
| 131 | }) | ||
| 132 | |||
| 133 | #define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) | ||
| 134 | |||
| 135 | #define P4_CONFIG_HT_SHIFT 63 | ||
| 136 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) | ||
| 137 | |||
| 138 | static inline bool p4_is_event_cascaded(u64 config) | ||
| 139 | { | ||
| 140 | u32 cccr = p4_config_unpack_cccr(config); | ||
| 141 | return !!(cccr & P4_CCCR_CASCADE); | ||
| 142 | } | ||
| 143 | |||
| 144 | static inline int p4_ht_config_thread(u64 config) | ||
| 145 | { | ||
| 146 | return !!(config & P4_CONFIG_HT); | ||
| 147 | } | ||
| 148 | |||
| 149 | static inline u64 p4_set_ht_bit(u64 config) | ||
| 150 | { | ||
| 151 | return config | P4_CONFIG_HT; | ||
| 152 | } | ||
| 153 | |||
| 154 | static inline u64 p4_clear_ht_bit(u64 config) | ||
| 155 | { | ||
| 156 | return config & ~P4_CONFIG_HT; | ||
| 157 | } | ||
| 158 | |||
| 159 | static inline int p4_ht_active(void) | ||
| 160 | { | ||
| 161 | #ifdef CONFIG_SMP | ||
| 162 | return smp_num_siblings > 1; | ||
| 163 | #endif | ||
| 164 | return 0; | ||
| 165 | } | ||
| 166 | |||
| 167 | static inline int p4_ht_thread(int cpu) | ||
| 168 | { | ||
| 169 | #ifdef CONFIG_SMP | ||
| 170 | if (smp_num_siblings == 2) | ||
| 171 | return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); | ||
| 172 | #endif | ||
| 173 | return 0; | ||
| 174 | } | ||
| 175 | |||
| 176 | static inline int p4_should_swap_ts(u64 config, int cpu) | ||
| 177 | { | ||
| 178 | return p4_ht_config_thread(config) ^ p4_ht_thread(cpu); | ||
| 179 | } | ||
| 180 | |||
| 181 | static inline u32 p4_default_cccr_conf(int cpu) | ||
| 182 | { | ||
| 183 | /* | ||
| 184 | * Note that P4_CCCR_THREAD_ANY is "required" on | ||
| 185 | * non-HT machines (on HT machines we count TS events | ||
| 186 | * regardless the state of second logical processor | ||
| 187 | */ | ||
| 188 | u32 cccr = P4_CCCR_THREAD_ANY; | ||
| 189 | |||
| 190 | if (!p4_ht_thread(cpu)) | ||
| 191 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
| 192 | else | ||
| 193 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
| 194 | |||
| 195 | return cccr; | ||
| 196 | } | ||
| 197 | |||
| 198 | static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) | ||
| 199 | { | ||
| 200 | u32 escr = 0; | ||
| 201 | |||
| 202 | if (!p4_ht_thread(cpu)) { | ||
| 203 | if (!exclude_os) | ||
| 204 | escr |= P4_ESCR_T0_OS; | ||
| 205 | if (!exclude_usr) | ||
| 206 | escr |= P4_ESCR_T0_USR; | ||
| 207 | } else { | ||
| 208 | if (!exclude_os) | ||
| 209 | escr |= P4_ESCR_T1_OS; | ||
| 210 | if (!exclude_usr) | ||
| 211 | escr |= P4_ESCR_T1_USR; | ||
| 212 | } | ||
| 213 | |||
| 214 | return escr; | ||
| 215 | } | ||
| 216 | |||
| 217 | enum P4_EVENTS { | ||
| 218 | P4_EVENT_TC_DELIVER_MODE, | ||
| 219 | P4_EVENT_BPU_FETCH_REQUEST, | ||
| 220 | P4_EVENT_ITLB_REFERENCE, | ||
| 221 | P4_EVENT_MEMORY_CANCEL, | ||
| 222 | P4_EVENT_MEMORY_COMPLETE, | ||
| 223 | P4_EVENT_LOAD_PORT_REPLAY, | ||
| 224 | P4_EVENT_STORE_PORT_REPLAY, | ||
| 225 | P4_EVENT_MOB_LOAD_REPLAY, | ||
| 226 | P4_EVENT_PAGE_WALK_TYPE, | ||
| 227 | P4_EVENT_BSQ_CACHE_REFERENCE, | ||
| 228 | P4_EVENT_IOQ_ALLOCATION, | ||
| 229 | P4_EVENT_IOQ_ACTIVE_ENTRIES, | ||
| 230 | P4_EVENT_FSB_DATA_ACTIVITY, | ||
| 231 | P4_EVENT_BSQ_ALLOCATION, | ||
| 232 | P4_EVENT_BSQ_ACTIVE_ENTRIES, | ||
| 233 | P4_EVENT_SSE_INPUT_ASSIST, | ||
| 234 | P4_EVENT_PACKED_SP_UOP, | ||
| 235 | P4_EVENT_PACKED_DP_UOP, | ||
| 236 | P4_EVENT_SCALAR_SP_UOP, | ||
| 237 | P4_EVENT_SCALAR_DP_UOP, | ||
| 238 | P4_EVENT_64BIT_MMX_UOP, | ||
| 239 | P4_EVENT_128BIT_MMX_UOP, | ||
| 240 | P4_EVENT_X87_FP_UOP, | ||
| 241 | P4_EVENT_TC_MISC, | ||
| 242 | P4_EVENT_GLOBAL_POWER_EVENTS, | ||
| 243 | P4_EVENT_TC_MS_XFER, | ||
| 244 | P4_EVENT_UOP_QUEUE_WRITES, | ||
| 245 | P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, | ||
| 246 | P4_EVENT_RETIRED_BRANCH_TYPE, | ||
| 247 | P4_EVENT_RESOURCE_STALL, | ||
| 248 | P4_EVENT_WC_BUFFER, | ||
| 249 | P4_EVENT_B2B_CYCLES, | ||
| 250 | P4_EVENT_BNR, | ||
| 251 | P4_EVENT_SNOOP, | ||
| 252 | P4_EVENT_RESPONSE, | ||
| 253 | P4_EVENT_FRONT_END_EVENT, | ||
| 254 | P4_EVENT_EXECUTION_EVENT, | ||
| 255 | P4_EVENT_REPLAY_EVENT, | ||
| 256 | P4_EVENT_INSTR_RETIRED, | ||
| 257 | P4_EVENT_UOPS_RETIRED, | ||
| 258 | P4_EVENT_UOP_TYPE, | ||
| 259 | P4_EVENT_BRANCH_RETIRED, | ||
| 260 | P4_EVENT_MISPRED_BRANCH_RETIRED, | ||
| 261 | P4_EVENT_X87_ASSIST, | ||
| 262 | P4_EVENT_MACHINE_CLEAR, | ||
| 263 | P4_EVENT_INSTR_COMPLETED, | ||
| 264 | }; | ||
| 265 | |||
| 266 | #define P4_OPCODE(event) event##_OPCODE | ||
| 267 | #define P4_OPCODE_ESEL(opcode) ((opcode & 0x00ff) >> 0) | ||
| 268 | #define P4_OPCODE_EVNT(opcode) ((opcode & 0xff00) >> 8) | ||
| 269 | #define P4_OPCODE_PACK(event, sel) (((event) << 8) | sel) | ||
| 270 | |||
| 271 | /* | ||
| 272 | * Comments below the event represent ESCR restriction | ||
| 273 | * for this event and counter index per ESCR | ||
| 274 | * | ||
| 275 | * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early | ||
| 276 | * processor builds (family 0FH, models 01H-02H). These MSRs | ||
| 277 | * are not available on later versions, so that we don't use | ||
| 278 | * them completely | ||
| 279 | * | ||
| 280 | * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly | ||
| 281 | * working so that we should not use this CCCR and respective | ||
| 282 | * counter as result | ||
| 283 | */ | ||
| 284 | enum P4_EVENT_OPCODES { | ||
| 285 | P4_OPCODE(P4_EVENT_TC_DELIVER_MODE) = P4_OPCODE_PACK(0x01, 0x01), | ||
| 286 | /* | ||
| 287 | * MSR_P4_TC_ESCR0: 4, 5 | ||
| 288 | * MSR_P4_TC_ESCR1: 6, 7 | ||
| 289 | */ | ||
| 290 | |||
| 291 | P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST) = P4_OPCODE_PACK(0x03, 0x00), | ||
| 292 | /* | ||
| 293 | * MSR_P4_BPU_ESCR0: 0, 1 | ||
| 294 | * MSR_P4_BPU_ESCR1: 2, 3 | ||
| 295 | */ | ||
| 296 | |||
| 297 | P4_OPCODE(P4_EVENT_ITLB_REFERENCE) = P4_OPCODE_PACK(0x18, 0x03), | ||
| 298 | /* | ||
| 299 | * MSR_P4_ITLB_ESCR0: 0, 1 | ||
| 300 | * MSR_P4_ITLB_ESCR1: 2, 3 | ||
| 301 | */ | ||
| 302 | |||
| 303 | P4_OPCODE(P4_EVENT_MEMORY_CANCEL) = P4_OPCODE_PACK(0x02, 0x05), | ||
| 304 | /* | ||
| 305 | * MSR_P4_DAC_ESCR0: 8, 9 | ||
| 306 | * MSR_P4_DAC_ESCR1: 10, 11 | ||
| 307 | */ | ||
| 308 | |||
| 309 | P4_OPCODE(P4_EVENT_MEMORY_COMPLETE) = P4_OPCODE_PACK(0x08, 0x02), | ||
| 310 | /* | ||
| 311 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
| 312 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
| 313 | */ | ||
| 314 | |||
| 315 | P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY) = P4_OPCODE_PACK(0x04, 0x02), | ||
| 316 | /* | ||
| 317 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
| 318 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
| 319 | */ | ||
| 320 | |||
| 321 | P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY) = P4_OPCODE_PACK(0x05, 0x02), | ||
| 322 | /* | ||
| 323 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
| 324 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
| 325 | */ | ||
| 326 | |||
| 327 | P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY) = P4_OPCODE_PACK(0x03, 0x02), | ||
| 328 | /* | ||
| 329 | * MSR_P4_MOB_ESCR0: 0, 1 | ||
| 330 | * MSR_P4_MOB_ESCR1: 2, 3 | ||
| 331 | */ | ||
| 332 | |||
| 333 | P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE) = P4_OPCODE_PACK(0x01, 0x04), | ||
| 334 | /* | ||
| 335 | * MSR_P4_PMH_ESCR0: 0, 1 | ||
| 336 | * MSR_P4_PMH_ESCR1: 2, 3 | ||
| 337 | */ | ||
| 338 | |||
| 339 | P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE) = P4_OPCODE_PACK(0x0c, 0x07), | ||
| 340 | /* | ||
| 341 | * MSR_P4_BSU_ESCR0: 0, 1 | ||
| 342 | * MSR_P4_BSU_ESCR1: 2, 3 | ||
| 343 | */ | ||
| 344 | |||
| 345 | P4_OPCODE(P4_EVENT_IOQ_ALLOCATION) = P4_OPCODE_PACK(0x03, 0x06), | ||
| 346 | /* | ||
| 347 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 348 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 349 | */ | ||
| 350 | |||
| 351 | P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x1a, 0x06), | ||
| 352 | /* | ||
| 353 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 354 | */ | ||
| 355 | |||
| 356 | P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY) = P4_OPCODE_PACK(0x17, 0x06), | ||
| 357 | /* | ||
| 358 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 359 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 360 | */ | ||
| 361 | |||
| 362 | P4_OPCODE(P4_EVENT_BSQ_ALLOCATION) = P4_OPCODE_PACK(0x05, 0x07), | ||
| 363 | /* | ||
| 364 | * MSR_P4_BSU_ESCR0: 0, 1 | ||
| 365 | */ | ||
| 366 | |||
| 367 | P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x06, 0x07), | ||
| 368 | /* | ||
| 369 | * NOTE: no ESCR name in docs, it's guessed | ||
| 370 | * MSR_P4_BSU_ESCR1: 2, 3 | ||
| 371 | */ | ||
| 372 | |||
| 373 | P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST) = P4_OPCODE_PACK(0x34, 0x01), | ||
| 374 | /* | ||
| 375 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 376 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 377 | */ | ||
| 378 | |||
| 379 | P4_OPCODE(P4_EVENT_PACKED_SP_UOP) = P4_OPCODE_PACK(0x08, 0x01), | ||
| 380 | /* | ||
| 381 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 382 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 383 | */ | ||
| 384 | |||
| 385 | P4_OPCODE(P4_EVENT_PACKED_DP_UOP) = P4_OPCODE_PACK(0x0c, 0x01), | ||
| 386 | /* | ||
| 387 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 388 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 389 | */ | ||
| 390 | |||
| 391 | P4_OPCODE(P4_EVENT_SCALAR_SP_UOP) = P4_OPCODE_PACK(0x0a, 0x01), | ||
| 392 | /* | ||
| 393 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 394 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 395 | */ | ||
| 396 | |||
| 397 | P4_OPCODE(P4_EVENT_SCALAR_DP_UOP) = P4_OPCODE_PACK(0x0e, 0x01), | ||
| 398 | /* | ||
| 399 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 400 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 401 | */ | ||
| 402 | |||
| 403 | P4_OPCODE(P4_EVENT_64BIT_MMX_UOP) = P4_OPCODE_PACK(0x02, 0x01), | ||
| 404 | /* | ||
| 405 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 406 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 407 | */ | ||
| 408 | |||
| 409 | P4_OPCODE(P4_EVENT_128BIT_MMX_UOP) = P4_OPCODE_PACK(0x1a, 0x01), | ||
| 410 | /* | ||
| 411 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 412 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 413 | */ | ||
| 414 | |||
| 415 | P4_OPCODE(P4_EVENT_X87_FP_UOP) = P4_OPCODE_PACK(0x04, 0x01), | ||
| 416 | /* | ||
| 417 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
| 418 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
| 419 | */ | ||
| 420 | |||
| 421 | P4_OPCODE(P4_EVENT_TC_MISC) = P4_OPCODE_PACK(0x06, 0x01), | ||
| 422 | /* | ||
| 423 | * MSR_P4_TC_ESCR0: 4, 5 | ||
| 424 | * MSR_P4_TC_ESCR1: 6, 7 | ||
| 425 | */ | ||
| 426 | |||
| 427 | P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS) = P4_OPCODE_PACK(0x13, 0x06), | ||
| 428 | /* | ||
| 429 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 430 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 431 | */ | ||
| 432 | |||
| 433 | P4_OPCODE(P4_EVENT_TC_MS_XFER) = P4_OPCODE_PACK(0x05, 0x00), | ||
| 434 | /* | ||
| 435 | * MSR_P4_MS_ESCR0: 4, 5 | ||
| 436 | * MSR_P4_MS_ESCR1: 6, 7 | ||
| 437 | */ | ||
| 438 | |||
| 439 | P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES) = P4_OPCODE_PACK(0x09, 0x00), | ||
| 440 | /* | ||
| 441 | * MSR_P4_MS_ESCR0: 4, 5 | ||
| 442 | * MSR_P4_MS_ESCR1: 6, 7 | ||
| 443 | */ | ||
| 444 | |||
| 445 | P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x05, 0x02), | ||
| 446 | /* | ||
| 447 | * MSR_P4_TBPU_ESCR0: 4, 5 | ||
| 448 | * MSR_P4_TBPU_ESCR1: 6, 7 | ||
| 449 | */ | ||
| 450 | |||
| 451 | P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x04, 0x02), | ||
| 452 | /* | ||
| 453 | * MSR_P4_TBPU_ESCR0: 4, 5 | ||
| 454 | * MSR_P4_TBPU_ESCR1: 6, 7 | ||
| 455 | */ | ||
| 456 | |||
| 457 | P4_OPCODE(P4_EVENT_RESOURCE_STALL) = P4_OPCODE_PACK(0x01, 0x01), | ||
| 458 | /* | ||
| 459 | * MSR_P4_ALF_ESCR0: 12, 13, 16 | ||
| 460 | * MSR_P4_ALF_ESCR1: 14, 15, 17 | ||
| 461 | */ | ||
| 462 | |||
| 463 | P4_OPCODE(P4_EVENT_WC_BUFFER) = P4_OPCODE_PACK(0x05, 0x05), | ||
| 464 | /* | ||
| 465 | * MSR_P4_DAC_ESCR0: 8, 9 | ||
| 466 | * MSR_P4_DAC_ESCR1: 10, 11 | ||
| 467 | */ | ||
| 468 | |||
| 469 | P4_OPCODE(P4_EVENT_B2B_CYCLES) = P4_OPCODE_PACK(0x16, 0x03), | ||
| 470 | /* | ||
| 471 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 472 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 473 | */ | ||
| 474 | |||
| 475 | P4_OPCODE(P4_EVENT_BNR) = P4_OPCODE_PACK(0x08, 0x03), | ||
| 476 | /* | ||
| 477 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 478 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 479 | */ | ||
| 480 | |||
| 481 | P4_OPCODE(P4_EVENT_SNOOP) = P4_OPCODE_PACK(0x06, 0x03), | ||
| 482 | /* | ||
| 483 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 484 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 485 | */ | ||
| 486 | |||
| 487 | P4_OPCODE(P4_EVENT_RESPONSE) = P4_OPCODE_PACK(0x04, 0x03), | ||
| 488 | /* | ||
| 489 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
| 490 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
| 491 | */ | ||
| 492 | |||
| 493 | P4_OPCODE(P4_EVENT_FRONT_END_EVENT) = P4_OPCODE_PACK(0x08, 0x05), | ||
| 494 | /* | ||
| 495 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 496 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 497 | */ | ||
| 498 | |||
| 499 | P4_OPCODE(P4_EVENT_EXECUTION_EVENT) = P4_OPCODE_PACK(0x0c, 0x05), | ||
| 500 | /* | ||
| 501 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 502 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 503 | */ | ||
| 504 | |||
| 505 | P4_OPCODE(P4_EVENT_REPLAY_EVENT) = P4_OPCODE_PACK(0x09, 0x05), | ||
| 506 | /* | ||
| 507 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 508 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 509 | */ | ||
| 510 | |||
| 511 | P4_OPCODE(P4_EVENT_INSTR_RETIRED) = P4_OPCODE_PACK(0x02, 0x04), | ||
| 512 | /* | ||
| 513 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
| 514 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
| 515 | */ | ||
| 516 | |||
| 517 | P4_OPCODE(P4_EVENT_UOPS_RETIRED) = P4_OPCODE_PACK(0x01, 0x04), | ||
| 518 | /* | ||
| 519 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
| 520 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
| 521 | */ | ||
| 522 | |||
| 523 | P4_OPCODE(P4_EVENT_UOP_TYPE) = P4_OPCODE_PACK(0x02, 0x02), | ||
| 524 | /* | ||
| 525 | * MSR_P4_RAT_ESCR0: 12, 13, 16 | ||
| 526 | * MSR_P4_RAT_ESCR1: 14, 15, 17 | ||
| 527 | */ | ||
| 528 | |||
| 529 | P4_OPCODE(P4_EVENT_BRANCH_RETIRED) = P4_OPCODE_PACK(0x06, 0x05), | ||
| 530 | /* | ||
| 531 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 532 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 533 | */ | ||
| 534 | |||
| 535 | P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED) = P4_OPCODE_PACK(0x03, 0x04), | ||
| 536 | /* | ||
| 537 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
| 538 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
| 539 | */ | ||
| 540 | |||
| 541 | P4_OPCODE(P4_EVENT_X87_ASSIST) = P4_OPCODE_PACK(0x03, 0x05), | ||
| 542 | /* | ||
| 543 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 544 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 545 | */ | ||
| 546 | |||
| 547 | P4_OPCODE(P4_EVENT_MACHINE_CLEAR) = P4_OPCODE_PACK(0x02, 0x05), | ||
| 548 | /* | ||
| 549 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
| 550 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
| 551 | */ | ||
| 552 | |||
| 553 | P4_OPCODE(P4_EVENT_INSTR_COMPLETED) = P4_OPCODE_PACK(0x07, 0x04), | ||
| 554 | /* | ||
| 555 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
| 556 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
| 557 | */ | ||
| 558 | }; | ||
| 559 | |||
| 560 | /* | ||
| 561 | * a caller should use P4_ESCR_EMASK_NAME helper to | ||
| 562 | * pick the EventMask needed, for example | ||
| 563 | * | ||
| 564 | * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) | ||
| 565 | */ | ||
| 566 | enum P4_ESCR_EMASKS { | ||
| 567 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), | ||
| 568 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DB, 1), | ||
| 569 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DI, 2), | ||
| 570 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BD, 3), | ||
| 571 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BB, 4), | ||
| 572 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BI, 5), | ||
| 573 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, ID, 6), | ||
| 574 | |||
| 575 | P4_GEN_ESCR_EMASK(P4_EVENT_BPU_FETCH_REQUEST, TCMISS, 0), | ||
| 576 | |||
| 577 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT, 0), | ||
| 578 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, MISS, 1), | ||
| 579 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT_UK, 2), | ||
| 580 | |||
| 581 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL, 2), | ||
| 582 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, 64K_CONF, 3), | ||
| 583 | |||
| 584 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, LSC, 0), | ||
| 585 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, SSC, 1), | ||
| 586 | |||
| 587 | P4_GEN_ESCR_EMASK(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD, 1), | ||
| 588 | |||
| 589 | P4_GEN_ESCR_EMASK(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST, 1), | ||
| 590 | |||
| 591 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STA, 1), | ||
| 592 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STD, 3), | ||
| 593 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), | ||
| 594 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), | ||
| 595 | |||
| 596 | P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, DTMISS, 0), | ||
| 597 | P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, ITMISS, 1), | ||
| 598 | |||
| 599 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), | ||
| 600 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), | ||
| 601 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), | ||
| 602 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), | ||
| 603 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), | ||
| 604 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), | ||
| 605 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), | ||
| 606 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), | ||
| 607 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), | ||
| 608 | |||
| 609 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, DEFAULT, 0), | ||
| 610 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_READ, 5), | ||
| 611 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE, 6), | ||
| 612 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_UC, 7), | ||
| 613 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WC, 8), | ||
| 614 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WT, 9), | ||
| 615 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WP, 10), | ||
| 616 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WB, 11), | ||
| 617 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OWN, 13), | ||
| 618 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OTHER, 14), | ||
| 619 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, PREFETCH, 15), | ||
| 620 | |||
| 621 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), | ||
| 622 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), | ||
| 623 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), | ||
| 624 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), | ||
| 625 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), | ||
| 626 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), | ||
| 627 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), | ||
| 628 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), | ||
| 629 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN, 13), | ||
| 630 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER, 14), | ||
| 631 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), | ||
| 632 | |||
| 633 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV, 0), | ||
| 634 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN, 1), | ||
| 635 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), | ||
| 636 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV, 3), | ||
| 637 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN, 4), | ||
| 638 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), | ||
| 639 | |||
| 640 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0, 0), | ||
| 641 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1, 1), | ||
| 642 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0, 2), | ||
| 643 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1, 3), | ||
| 644 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE, 5), | ||
| 645 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), | ||
| 646 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), | ||
| 647 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), | ||
| 648 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), | ||
| 649 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), | ||
| 650 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0, 11), | ||
| 651 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1, 12), | ||
| 652 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2, 13), | ||
| 653 | |||
| 654 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), | ||
| 655 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), | ||
| 656 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), | ||
| 657 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), | ||
| 658 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), | ||
| 659 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), | ||
| 660 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), | ||
| 661 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), | ||
| 662 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), | ||
| 663 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), | ||
| 664 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), | ||
| 665 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), | ||
| 666 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), | ||
| 667 | |||
| 668 | P4_GEN_ESCR_EMASK(P4_EVENT_SSE_INPUT_ASSIST, ALL, 15), | ||
| 669 | |||
| 670 | P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_SP_UOP, ALL, 15), | ||
| 671 | |||
| 672 | P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_DP_UOP, ALL, 15), | ||
| 673 | |||
| 674 | P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_SP_UOP, ALL, 15), | ||
| 675 | |||
| 676 | P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_DP_UOP, ALL, 15), | ||
| 677 | |||
| 678 | P4_GEN_ESCR_EMASK(P4_EVENT_64BIT_MMX_UOP, ALL, 15), | ||
| 679 | |||
| 680 | P4_GEN_ESCR_EMASK(P4_EVENT_128BIT_MMX_UOP, ALL, 15), | ||
| 681 | |||
| 682 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_FP_UOP, ALL, 15), | ||
| 683 | |||
| 684 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_MISC, FLUSH, 4), | ||
| 685 | |||
| 686 | P4_GEN_ESCR_EMASK(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING, 0), | ||
| 687 | |||
| 688 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_MS_XFER, CISC, 0), | ||
| 689 | |||
| 690 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), | ||
| 691 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), | ||
| 692 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM, 2), | ||
| 693 | |||
| 694 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), | ||
| 695 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), | ||
| 696 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), | ||
| 697 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), | ||
| 698 | |||
| 699 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), | ||
| 700 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CALL, 2), | ||
| 701 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN, 3), | ||
| 702 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT, 4), | ||
| 703 | |||
| 704 | P4_GEN_ESCR_EMASK(P4_EVENT_RESOURCE_STALL, SBFULL, 5), | ||
| 705 | |||
| 706 | P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_EVICTS, 0), | ||
| 707 | P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS, 1), | ||
| 708 | |||
| 709 | P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, NBOGUS, 0), | ||
| 710 | P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, BOGUS, 1), | ||
| 711 | |||
| 712 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS0, 0), | ||
| 713 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS1, 1), | ||
| 714 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS2, 2), | ||
| 715 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS3, 3), | ||
| 716 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS0, 4), | ||
| 717 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS1, 5), | ||
| 718 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS2, 6), | ||
| 719 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS3, 7), | ||
| 720 | |||
| 721 | P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, NBOGUS, 0), | ||
| 722 | P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, BOGUS, 1), | ||
| 723 | |||
| 724 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG, 0), | ||
| 725 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSTAG, 1), | ||
| 726 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSNTAG, 2), | ||
| 727 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSTAG, 3), | ||
| 728 | |||
| 729 | P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, NBOGUS, 0), | ||
| 730 | P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, BOGUS, 1), | ||
| 731 | |||
| 732 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGLOADS, 1), | ||
| 733 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGSTORES, 2), | ||
| 734 | |||
| 735 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNP, 0), | ||
| 736 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNM, 1), | ||
| 737 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTP, 2), | ||
| 738 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTM, 3), | ||
| 739 | |||
| 740 | P4_GEN_ESCR_EMASK(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS, 0), | ||
| 741 | |||
| 742 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSU, 0), | ||
| 743 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSO, 1), | ||
| 744 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAO, 2), | ||
| 745 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAU, 3), | ||
| 746 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, PREA, 4), | ||
| 747 | |||
| 748 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, CLEAR, 0), | ||
| 749 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, MOCLEAR, 1), | ||
| 750 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, SMCLEAR, 2), | ||
| 751 | |||
| 752 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, NBOGUS, 0), | ||
| 753 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), | ||
| 754 | }; | ||
| 755 | |||
| 756 | /* P4 PEBS: stale for a while */ | ||
| 757 | #define P4_PEBS_METRIC_MASK 0x00001fffU | ||
| 758 | #define P4_PEBS_UOB_TAG 0x01000000U | ||
| 759 | #define P4_PEBS_ENABLE 0x02000000U | ||
| 760 | |||
| 761 | /* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ | ||
| 762 | #define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 | ||
| 763 | #define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 | ||
| 764 | #define P4_PEBS__dtlb_load_miss_retired 0x3000004 | ||
| 765 | #define P4_PEBS__dtlb_store_miss_retired 0x3000004 | ||
| 766 | #define P4_PEBS__dtlb_all_miss_retired 0x3000004 | ||
| 767 | #define P4_PEBS__tagged_mispred_branch 0x3018000 | ||
| 768 | #define P4_PEBS__mob_load_replay_retired 0x3000200 | ||
| 769 | #define P4_PEBS__split_load_retired 0x3000400 | ||
| 770 | #define P4_PEBS__split_store_retired 0x3000400 | ||
| 771 | |||
| 772 | #define P4_VERT__1stl_cache_load_miss_retired 0x0000001 | ||
| 773 | #define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 | ||
| 774 | #define P4_VERT__dtlb_load_miss_retired 0x0000001 | ||
| 775 | #define P4_VERT__dtlb_store_miss_retired 0x0000002 | ||
| 776 | #define P4_VERT__dtlb_all_miss_retired 0x0000003 | ||
| 777 | #define P4_VERT__tagged_mispred_branch 0x0000010 | ||
| 778 | #define P4_VERT__mob_load_replay_retired 0x0000001 | ||
| 779 | #define P4_VERT__split_load_retired 0x0000001 | ||
| 780 | #define P4_VERT__split_store_retired 0x0000002 | ||
| 781 | |||
| 782 | enum P4_CACHE_EVENTS { | ||
| 783 | P4_CACHE__NONE, | ||
| 784 | |||
| 785 | P4_CACHE__1stl_cache_load_miss_retired, | ||
| 786 | P4_CACHE__2ndl_cache_load_miss_retired, | ||
| 787 | P4_CACHE__dtlb_load_miss_retired, | ||
| 788 | P4_CACHE__dtlb_store_miss_retired, | ||
| 789 | P4_CACHE__itlb_reference_hit, | ||
| 790 | P4_CACHE__itlb_reference_miss, | ||
| 791 | |||
| 792 | P4_CACHE__MAX | ||
| 793 | }; | ||
| 794 | |||
| 795 | #endif /* PERF_EVENT_P4_H */ | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b753ea59703a..7e5c6a60b8ee 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -21,7 +21,6 @@ struct mm_struct; | |||
| 21 | #include <asm/msr.h> | 21 | #include <asm/msr.h> |
| 22 | #include <asm/desc_defs.h> | 22 | #include <asm/desc_defs.h> |
| 23 | #include <asm/nops.h> | 23 | #include <asm/nops.h> |
| 24 | #include <asm/ds.h> | ||
| 25 | 24 | ||
| 26 | #include <linux/personality.h> | 25 | #include <linux/personality.h> |
| 27 | #include <linux/cpumask.h> | 26 | #include <linux/cpumask.h> |
| @@ -29,6 +28,7 @@ struct mm_struct; | |||
| 29 | #include <linux/threads.h> | 28 | #include <linux/threads.h> |
| 30 | #include <linux/math64.h> | 29 | #include <linux/math64.h> |
| 31 | #include <linux/init.h> | 30 | #include <linux/init.h> |
| 31 | #include <linux/err.h> | ||
| 32 | 32 | ||
| 33 | #define HBP_NUM 4 | 33 | #define HBP_NUM 4 |
| 34 | /* | 34 | /* |
| @@ -113,7 +113,6 @@ struct cpuinfo_x86 { | |||
| 113 | /* Index into per_cpu list: */ | 113 | /* Index into per_cpu list: */ |
| 114 | u16 cpu_index; | 114 | u16 cpu_index; |
| 115 | #endif | 115 | #endif |
| 116 | unsigned int x86_hyper_vendor; | ||
| 117 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 116 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
| 118 | 117 | ||
| 119 | #define X86_VENDOR_INTEL 0 | 118 | #define X86_VENDOR_INTEL 0 |
| @@ -127,9 +126,6 @@ struct cpuinfo_x86 { | |||
| 127 | 126 | ||
| 128 | #define X86_VENDOR_UNKNOWN 0xff | 127 | #define X86_VENDOR_UNKNOWN 0xff |
| 129 | 128 | ||
| 130 | #define X86_HYPER_VENDOR_NONE 0 | ||
| 131 | #define X86_HYPER_VENDOR_VMWARE 1 | ||
| 132 | |||
| 133 | /* | 129 | /* |
| 134 | * capabilities of CPUs | 130 | * capabilities of CPUs |
| 135 | */ | 131 | */ |
| @@ -380,6 +376,10 @@ union thread_xstate { | |||
| 380 | struct xsave_struct xsave; | 376 | struct xsave_struct xsave; |
| 381 | }; | 377 | }; |
| 382 | 378 | ||
| 379 | struct fpu { | ||
| 380 | union thread_xstate *state; | ||
| 381 | }; | ||
| 382 | |||
| 383 | #ifdef CONFIG_X86_64 | 383 | #ifdef CONFIG_X86_64 |
| 384 | DECLARE_PER_CPU(struct orig_ist, orig_ist); | 384 | DECLARE_PER_CPU(struct orig_ist, orig_ist); |
| 385 | 385 | ||
| @@ -457,7 +457,7 @@ struct thread_struct { | |||
| 457 | unsigned long trap_no; | 457 | unsigned long trap_no; |
| 458 | unsigned long error_code; | 458 | unsigned long error_code; |
| 459 | /* floating point and extended processor state */ | 459 | /* floating point and extended processor state */ |
| 460 | union thread_xstate *xstate; | 460 | struct fpu fpu; |
| 461 | #ifdef CONFIG_X86_32 | 461 | #ifdef CONFIG_X86_32 |
| 462 | /* Virtual 86 mode info */ | 462 | /* Virtual 86 mode info */ |
| 463 | struct vm86_struct __user *vm86_info; | 463 | struct vm86_struct __user *vm86_info; |
| @@ -473,10 +473,6 @@ struct thread_struct { | |||
| 473 | unsigned long iopl; | 473 | unsigned long iopl; |
| 474 | /* Max allowed port in the bitmap, in bytes: */ | 474 | /* Max allowed port in the bitmap, in bytes: */ |
| 475 | unsigned io_bitmap_max; | 475 | unsigned io_bitmap_max; |
| 476 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ | ||
| 477 | unsigned long debugctlmsr; | ||
| 478 | /* Debug Store context; see asm/ds.h */ | ||
| 479 | struct ds_context *ds_ctx; | ||
| 480 | }; | 476 | }; |
| 481 | 477 | ||
| 482 | static inline unsigned long native_get_debugreg(int regno) | 478 | static inline unsigned long native_get_debugreg(int regno) |
| @@ -793,6 +789,8 @@ static inline void wbinvd_halt(void) | |||
| 793 | extern void enable_sep_cpu(void); | 789 | extern void enable_sep_cpu(void); |
| 794 | extern int sysenter_setup(void); | 790 | extern int sysenter_setup(void); |
| 795 | 791 | ||
| 792 | extern void early_trap_init(void); | ||
| 793 | |||
| 796 | /* Defined in head.S */ | 794 | /* Defined in head.S */ |
| 797 | extern struct desc_ptr early_gdt_descr; | 795 | extern struct desc_ptr early_gdt_descr; |
| 798 | 796 | ||
| @@ -803,7 +801,7 @@ extern void cpu_init(void); | |||
| 803 | 801 | ||
| 804 | static inline unsigned long get_debugctlmsr(void) | 802 | static inline unsigned long get_debugctlmsr(void) |
| 805 | { | 803 | { |
| 806 | unsigned long debugctlmsr = 0; | 804 | unsigned long debugctlmsr = 0; |
| 807 | 805 | ||
| 808 | #ifndef CONFIG_X86_DEBUGCTLMSR | 806 | #ifndef CONFIG_X86_DEBUGCTLMSR |
| 809 | if (boot_cpu_data.x86 < 6) | 807 | if (boot_cpu_data.x86 < 6) |
| @@ -811,21 +809,6 @@ static inline unsigned long get_debugctlmsr(void) | |||
| 811 | #endif | 809 | #endif |
| 812 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 810 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
| 813 | 811 | ||
| 814 | return debugctlmsr; | ||
| 815 | } | ||
| 816 | |||
| 817 | static inline unsigned long get_debugctlmsr_on_cpu(int cpu) | ||
| 818 | { | ||
| 819 | u64 debugctlmsr = 0; | ||
| 820 | u32 val1, val2; | ||
| 821 | |||
| 822 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
| 823 | if (boot_cpu_data.x86 < 6) | ||
| 824 | return 0; | ||
| 825 | #endif | ||
| 826 | rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); | ||
| 827 | debugctlmsr = val1 | ((u64)val2 << 32); | ||
| 828 | |||
| 829 | return debugctlmsr; | 812 | return debugctlmsr; |
| 830 | } | 813 | } |
| 831 | 814 | ||
| @@ -838,18 +821,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) | |||
| 838 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 821 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
| 839 | } | 822 | } |
| 840 | 823 | ||
| 841 | static inline void update_debugctlmsr_on_cpu(int cpu, | ||
| 842 | unsigned long debugctlmsr) | ||
| 843 | { | ||
| 844 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
| 845 | if (boot_cpu_data.x86 < 6) | ||
| 846 | return; | ||
| 847 | #endif | ||
| 848 | wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, | ||
| 849 | (u32)((u64)debugctlmsr), | ||
| 850 | (u32)((u64)debugctlmsr >> 32)); | ||
| 851 | } | ||
| 852 | |||
| 853 | /* | 824 | /* |
| 854 | * from system description table in BIOS. Mostly for MCA use, but | 825 | * from system description table in BIOS. Mostly for MCA use, but |
| 855 | * others may find it useful: | 826 | * others may find it useful: |
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 86723035a515..52b098a6eebb 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h | |||
| @@ -82,61 +82,6 @@ | |||
| 82 | 82 | ||
| 83 | #ifndef __ASSEMBLY__ | 83 | #ifndef __ASSEMBLY__ |
| 84 | #include <linux/types.h> | 84 | #include <linux/types.h> |
| 85 | 85 | #endif | |
| 86 | /* configuration/status structure used in PTRACE_BTS_CONFIG and | ||
| 87 | PTRACE_BTS_STATUS commands. | ||
| 88 | */ | ||
| 89 | struct ptrace_bts_config { | ||
| 90 | /* requested or actual size of BTS buffer in bytes */ | ||
| 91 | __u32 size; | ||
| 92 | /* bitmask of below flags */ | ||
| 93 | __u32 flags; | ||
| 94 | /* buffer overflow signal */ | ||
| 95 | __u32 signal; | ||
| 96 | /* actual size of bts_struct in bytes */ | ||
| 97 | __u32 bts_size; | ||
| 98 | }; | ||
| 99 | #endif /* __ASSEMBLY__ */ | ||
| 100 | |||
| 101 | #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ | ||
| 102 | #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ | ||
| 103 | #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow | ||
| 104 | instead of wrapping around */ | ||
| 105 | #define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ | ||
| 106 | |||
| 107 | #define PTRACE_BTS_CONFIG 40 | ||
| 108 | /* Configure branch trace recording. | ||
| 109 | ADDR points to a struct ptrace_bts_config. | ||
| 110 | DATA gives the size of that buffer. | ||
| 111 | A new buffer is allocated, if requested in the flags. | ||
| 112 | An overflow signal may only be requested for new buffers. | ||
| 113 | Returns the number of bytes read. | ||
| 114 | */ | ||
| 115 | #define PTRACE_BTS_STATUS 41 | ||
| 116 | /* Return the current configuration in a struct ptrace_bts_config | ||
| 117 | pointed to by ADDR; DATA gives the size of that buffer. | ||
| 118 | Returns the number of bytes written. | ||
| 119 | */ | ||
| 120 | #define PTRACE_BTS_SIZE 42 | ||
| 121 | /* Return the number of available BTS records for draining. | ||
| 122 | DATA and ADDR are ignored. | ||
| 123 | */ | ||
| 124 | #define PTRACE_BTS_GET 43 | ||
| 125 | /* Get a single BTS record. | ||
| 126 | DATA defines the index into the BTS array, where 0 is the newest | ||
| 127 | entry, and higher indices refer to older entries. | ||
| 128 | ADDR is pointing to struct bts_struct (see asm/ds.h). | ||
| 129 | */ | ||
| 130 | #define PTRACE_BTS_CLEAR 44 | ||
| 131 | /* Clear the BTS buffer. | ||
| 132 | DATA and ADDR are ignored. | ||
| 133 | */ | ||
| 134 | #define PTRACE_BTS_DRAIN 45 | ||
| 135 | /* Read all available BTS records and clear the buffer. | ||
| 136 | ADDR points to an array of struct bts_struct. | ||
| 137 | DATA gives the size of that buffer. | ||
| 138 | BTS records are read from oldest to newest. | ||
| 139 | Returns number of BTS records drained. | ||
| 140 | */ | ||
| 141 | 86 | ||
| 142 | #endif /* _ASM_X86_PTRACE_ABI_H */ | 87 | #endif /* _ASM_X86_PTRACE_ABI_H */ |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 69a686a7dff0..78cd1ea94500 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
| @@ -289,12 +289,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx, | |||
| 289 | extern int do_set_thread_area(struct task_struct *p, int idx, | 289 | extern int do_set_thread_area(struct task_struct *p, int idx, |
| 290 | struct user_desc __user *info, int can_allocate); | 290 | struct user_desc __user *info, int can_allocate); |
| 291 | 291 | ||
| 292 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 293 | extern void ptrace_bts_untrace(struct task_struct *tsk); | ||
| 294 | |||
| 295 | #define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) | ||
| 296 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 297 | |||
| 298 | #endif /* __KERNEL__ */ | 292 | #endif /* __KERNEL__ */ |
| 299 | 293 | ||
| 300 | #endif /* !__ASSEMBLY__ */ | 294 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 6d93508f2626..35f2d1948ada 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h | |||
| @@ -29,7 +29,8 @@ struct pvclock_vcpu_time_info { | |||
| 29 | u64 system_time; | 29 | u64 system_time; |
| 30 | u32 tsc_to_system_mul; | 30 | u32 tsc_to_system_mul; |
| 31 | s8 tsc_shift; | 31 | s8 tsc_shift; |
| 32 | u8 pad[3]; | 32 | u8 flags; |
| 33 | u8 pad[2]; | ||
| 33 | } __attribute__((__packed__)); /* 32 bytes */ | 34 | } __attribute__((__packed__)); /* 32 bytes */ |
| 34 | 35 | ||
| 35 | struct pvclock_wall_clock { | 36 | struct pvclock_wall_clock { |
| @@ -38,5 +39,6 @@ struct pvclock_wall_clock { | |||
| 38 | u32 nsec; | 39 | u32 nsec; |
| 39 | } __attribute__((__packed__)); | 40 | } __attribute__((__packed__)); |
| 40 | 41 | ||
| 42 | #define PVCLOCK_TSC_STABLE_BIT (1 << 0) | ||
| 41 | #endif /* __ASSEMBLY__ */ | 43 | #endif /* __ASSEMBLY__ */ |
| 42 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ | 44 | #endif /* _ASM_X86_PVCLOCK_ABI_H */ |
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 53235fd5f8ce..cd02f324aa6b 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | /* some helper functions for xen and kvm pv clock sources */ | 7 | /* some helper functions for xen and kvm pv clock sources */ |
| 8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); | 8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); |
| 9 | void pvclock_set_flags(u8 flags); | ||
| 9 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); | 10 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); |
| 10 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, | 11 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, |
| 11 | struct pvclock_vcpu_time_info *vcpu, | 12 | struct pvclock_vcpu_time_info *vcpu, |
diff --git a/arch/x86/include/asm/rdc321x_defs.h b/arch/x86/include/asm/rdc321x_defs.h deleted file mode 100644 index c8e9c8bed3d0..000000000000 --- a/arch/x86/include/asm/rdc321x_defs.h +++ /dev/null | |||
| @@ -1,12 +0,0 @@ | |||
| 1 | #define PFX "rdc321x: " | ||
| 2 | |||
| 3 | /* General purpose configuration and data registers */ | ||
| 4 | #define RDC3210_CFGREG_ADDR 0x0CF8 | ||
| 5 | #define RDC3210_CFGREG_DATA 0x0CFC | ||
| 6 | |||
| 7 | #define RDC321X_GPIO_CTRL_REG1 0x48 | ||
| 8 | #define RDC321X_GPIO_CTRL_REG2 0x84 | ||
| 9 | #define RDC321X_GPIO_DATA_REG1 0x4c | ||
| 10 | #define RDC321X_GPIO_DATA_REG2 0x88 | ||
| 11 | |||
| 12 | #define RDC321X_MAX_GPIO 58 | ||
diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h index 75af592677ec..fb0b1874396f 100644 --- a/arch/x86/include/asm/scatterlist.h +++ b/arch/x86/include/asm/scatterlist.h | |||
| @@ -1,8 +1,9 @@ | |||
| 1 | #ifndef _ASM_X86_SCATTERLIST_H | 1 | #ifndef _ASM_X86_SCATTERLIST_H |
| 2 | #define _ASM_X86_SCATTERLIST_H | 2 | #define _ASM_X86_SCATTERLIST_H |
| 3 | 3 | ||
| 4 | #define ISA_DMA_THRESHOLD (0x00ffffff) | ||
| 5 | |||
| 6 | #include <asm-generic/scatterlist.h> | 4 | #include <asm-generic/scatterlist.h> |
| 7 | 5 | ||
| 6 | #define ISA_DMA_THRESHOLD (0x00ffffff) | ||
| 7 | #define ARCH_HAS_SG_CHAIN | ||
| 8 | |||
| 8 | #endif /* _ASM_X86_SCATTERLIST_H */ | 9 | #endif /* _ASM_X86_SCATTERLIST_H */ |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 38638cd2fa4c..0e831059ac5a 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
| @@ -81,7 +81,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
| 81 | u32 event_inj_err; | 81 | u32 event_inj_err; |
| 82 | u64 nested_cr3; | 82 | u64 nested_cr3; |
| 83 | u64 lbr_ctl; | 83 | u64 lbr_ctl; |
| 84 | u8 reserved_5[832]; | 84 | u64 reserved_5; |
| 85 | u64 next_rip; | ||
| 86 | u8 reserved_6[816]; | ||
| 85 | }; | 87 | }; |
| 86 | 88 | ||
| 87 | 89 | ||
| @@ -115,6 +117,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
| 115 | #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) | 117 | #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) |
| 116 | #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) | 118 | #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) |
| 117 | 119 | ||
| 120 | #define SVM_VM_CR_VALID_MASK 0x001fULL | ||
| 121 | #define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL | ||
| 122 | #define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL | ||
| 123 | |||
| 118 | struct __attribute__ ((__packed__)) vmcb_seg { | 124 | struct __attribute__ ((__packed__)) vmcb_seg { |
| 119 | u16 selector; | 125 | u16 selector; |
| 120 | u16 attrib; | 126 | u16 attrib; |
| @@ -238,6 +244,7 @@ struct __attribute__ ((__packed__)) vmcb { | |||
| 238 | 244 | ||
| 239 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 | 245 | #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 |
| 240 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 | 246 | #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 |
| 247 | #define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 | ||
| 241 | 248 | ||
| 242 | #define SVM_EXIT_READ_CR0 0x000 | 249 | #define SVM_EXIT_READ_CR0 0x000 |
| 243 | #define SVM_EXIT_READ_CR3 0x003 | 250 | #define SVM_EXIT_READ_CR3 0x003 |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0d28901e969..f0b6e5dbc5a0 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
| @@ -87,13 +87,12 @@ struct thread_info { | |||
| 87 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 87 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
| 88 | #define TIF_IA32 17 /* 32bit process */ | 88 | #define TIF_IA32 17 /* 32bit process */ |
| 89 | #define TIF_FORK 18 /* ret_from_fork */ | 89 | #define TIF_FORK 18 /* ret_from_fork */ |
| 90 | #define TIF_MEMDIE 20 | 90 | #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ |
| 91 | #define TIF_DEBUG 21 /* uses debug registers */ | 91 | #define TIF_DEBUG 21 /* uses debug registers */ |
| 92 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ | 92 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ |
| 93 | #define TIF_FREEZE 23 /* is freezing for suspend */ | 93 | #define TIF_FREEZE 23 /* is freezing for suspend */ |
| 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
| 95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 95 | #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ |
| 96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | ||
| 97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ | 96 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
| 98 | #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ | 97 | #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ |
| 99 | 98 | ||
| @@ -115,8 +114,7 @@ struct thread_info { | |||
| 115 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) | 114 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
| 116 | #define _TIF_FREEZE (1 << TIF_FREEZE) | 115 | #define _TIF_FREEZE (1 << TIF_FREEZE) |
| 117 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 116 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
| 118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 117 | #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) |
| 119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | ||
| 120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | 118 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) |
| 121 | #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) | 119 | #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) |
| 122 | 120 | ||
| @@ -147,7 +145,7 @@ struct thread_info { | |||
| 147 | 145 | ||
| 148 | /* flags to check in __switch_to() */ | 146 | /* flags to check in __switch_to() */ |
| 149 | #define _TIF_WORK_CTXSW \ | 147 | #define _TIF_WORK_CTXSW \ |
| 150 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) | 148 | (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) |
| 151 | 149 | ||
| 152 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) | 150 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
| 153 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | 151 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) |
| @@ -241,10 +239,9 @@ static inline struct thread_info *current_thread_info(void) | |||
| 241 | #define TS_USEDFPU 0x0001 /* FPU was used by this task | 239 | #define TS_USEDFPU 0x0001 /* FPU was used by this task |
| 242 | this quantum (SMP) */ | 240 | this quantum (SMP) */ |
| 243 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ | 241 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ |
| 244 | #define TS_POLLING 0x0004 /* true if in idle loop | 242 | #define TS_POLLING 0x0004 /* idle task polling need_resched, |
| 245 | and not sleeping */ | 243 | skip sending interrupt */ |
| 246 | #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ | 244 | #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ |
| 247 | #define TS_XSAVE 0x0010 /* Use xsave/xrstor */ | ||
| 248 | 245 | ||
| 249 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) | 246 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) |
| 250 | 247 | ||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index c5087d796587..21899cc31e52 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
| @@ -53,33 +53,29 @@ | |||
| 53 | extern int cpu_to_node_map[]; | 53 | extern int cpu_to_node_map[]; |
| 54 | 54 | ||
| 55 | /* Returns the number of the node containing CPU 'cpu' */ | 55 | /* Returns the number of the node containing CPU 'cpu' */ |
| 56 | static inline int cpu_to_node(int cpu) | 56 | static inline int __cpu_to_node(int cpu) |
| 57 | { | 57 | { |
| 58 | return cpu_to_node_map[cpu]; | 58 | return cpu_to_node_map[cpu]; |
| 59 | } | 59 | } |
| 60 | #define early_cpu_to_node(cpu) cpu_to_node(cpu) | 60 | #define early_cpu_to_node __cpu_to_node |
| 61 | #define cpu_to_node __cpu_to_node | ||
| 61 | 62 | ||
| 62 | #else /* CONFIG_X86_64 */ | 63 | #else /* CONFIG_X86_64 */ |
| 63 | 64 | ||
| 64 | /* Mappings between logical cpu number and node number */ | 65 | /* Mappings between logical cpu number and node number */ |
| 65 | DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); | 66 | DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); |
| 66 | 67 | ||
| 67 | /* Returns the number of the current Node. */ | ||
| 68 | DECLARE_PER_CPU(int, node_number); | ||
| 69 | #define numa_node_id() percpu_read(node_number) | ||
| 70 | |||
| 71 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 68 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
| 72 | extern int cpu_to_node(int cpu); | 69 | /* |
| 70 | * override generic percpu implementation of cpu_to_node | ||
| 71 | */ | ||
| 72 | extern int __cpu_to_node(int cpu); | ||
| 73 | #define cpu_to_node __cpu_to_node | ||
| 74 | |||
| 73 | extern int early_cpu_to_node(int cpu); | 75 | extern int early_cpu_to_node(int cpu); |
| 74 | 76 | ||
| 75 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 77 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
| 76 | 78 | ||
| 77 | /* Returns the number of the node containing CPU 'cpu' */ | ||
| 78 | static inline int cpu_to_node(int cpu) | ||
| 79 | { | ||
| 80 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
| 81 | } | ||
| 82 | |||
| 83 | /* Same function but used if called before per_cpu areas are setup */ | 79 | /* Same function but used if called before per_cpu areas are setup */ |
| 84 | static inline int early_cpu_to_node(int cpu) | 80 | static inline int early_cpu_to_node(int cpu) |
| 85 | { | 81 | { |
| @@ -170,6 +166,10 @@ static inline int numa_node_id(void) | |||
| 170 | { | 166 | { |
| 171 | return 0; | 167 | return 0; |
| 172 | } | 168 | } |
| 169 | /* | ||
| 170 | * indicate override: | ||
| 171 | */ | ||
| 172 | #define numa_node_id numa_node_id | ||
| 173 | 173 | ||
| 174 | static inline int early_cpu_to_node(int cpu) | 174 | static inline int early_cpu_to_node(int cpu) |
| 175 | { | 175 | { |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 4da91ad69e0d..f66cda56781d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
| @@ -79,7 +79,7 @@ static inline int get_si_code(unsigned long condition) | |||
| 79 | 79 | ||
| 80 | extern int panic_on_unrecovered_nmi; | 80 | extern int panic_on_unrecovered_nmi; |
| 81 | 81 | ||
| 82 | void math_error(void __user *); | 82 | void math_error(struct pt_regs *, int, int); |
| 83 | void math_emulate(struct math_emu_info *); | 83 | void math_emulate(struct math_emu_info *); |
| 84 | #ifndef CONFIG_X86_32 | 84 | #ifndef CONFIG_X86_32 |
| 85 | asmlinkage void smp_thermal_interrupt(void); | 85 | asmlinkage void smp_thermal_interrupt(void); |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index b414d2b401f6..aa558ac0306e 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
| @@ -27,13 +27,14 @@ | |||
| 27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. | 27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. |
| 28 | * | 28 | * |
| 29 | * We will use 31 sets, one for sending BAU messages from each of the 32 | 29 | * We will use 31 sets, one for sending BAU messages from each of the 32 |
| 30 | * cpu's on the node. | 30 | * cpu's on the uvhub. |
| 31 | * | 31 | * |
| 32 | * TLB shootdown will use the first of the 8 descriptors of each set. | 32 | * TLB shootdown will use the first of the 8 descriptors of each set. |
| 33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). | 33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). |
| 34 | */ | 34 | */ |
| 35 | 35 | ||
| 36 | #define UV_ITEMS_PER_DESCRIPTOR 8 | 36 | #define UV_ITEMS_PER_DESCRIPTOR 8 |
| 37 | #define MAX_BAU_CONCURRENT 3 | ||
| 37 | #define UV_CPUS_PER_ACT_STATUS 32 | 38 | #define UV_CPUS_PER_ACT_STATUS 32 |
| 38 | #define UV_ACT_STATUS_MASK 0x3 | 39 | #define UV_ACT_STATUS_MASK 0x3 |
| 39 | #define UV_ACT_STATUS_SIZE 2 | 40 | #define UV_ACT_STATUS_SIZE 2 |
| @@ -45,6 +46,9 @@ | |||
| 45 | #define UV_PAYLOADQ_PNODE_SHIFT 49 | 46 | #define UV_PAYLOADQ_PNODE_SHIFT 49 |
| 46 | #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" | 47 | #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" |
| 47 | #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) | 48 | #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) |
| 49 | #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 | ||
| 50 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 | ||
| 51 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL | ||
| 48 | 52 | ||
| 49 | /* | 53 | /* |
| 50 | * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 | 54 | * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 |
| @@ -55,15 +59,29 @@ | |||
| 55 | #define DESC_STATUS_SOURCE_TIMEOUT 3 | 59 | #define DESC_STATUS_SOURCE_TIMEOUT 3 |
| 56 | 60 | ||
| 57 | /* | 61 | /* |
| 58 | * source side thresholds at which message retries print a warning | 62 | * source side threshholds at which message retries print a warning |
| 59 | */ | 63 | */ |
| 60 | #define SOURCE_TIMEOUT_LIMIT 20 | 64 | #define SOURCE_TIMEOUT_LIMIT 20 |
| 61 | #define DESTINATION_TIMEOUT_LIMIT 20 | 65 | #define DESTINATION_TIMEOUT_LIMIT 20 |
| 62 | 66 | ||
| 63 | /* | 67 | /* |
| 68 | * misc. delays, in microseconds | ||
| 69 | */ | ||
| 70 | #define THROTTLE_DELAY 10 | ||
| 71 | #define TIMEOUT_DELAY 10 | ||
| 72 | #define BIOS_TO 1000 | ||
| 73 | /* BIOS is assumed to set the destination timeout to 1003520 nanoseconds */ | ||
| 74 | |||
| 75 | /* | ||
| 76 | * threshholds at which to use IPI to free resources | ||
| 77 | */ | ||
| 78 | #define PLUGSB4RESET 100 | ||
| 79 | #define TIMEOUTSB4RESET 100 | ||
| 80 | |||
| 81 | /* | ||
| 64 | * number of entries in the destination side payload queue | 82 | * number of entries in the destination side payload queue |
| 65 | */ | 83 | */ |
| 66 | #define DEST_Q_SIZE 17 | 84 | #define DEST_Q_SIZE 20 |
| 67 | /* | 85 | /* |
| 68 | * number of destination side software ack resources | 86 | * number of destination side software ack resources |
| 69 | */ | 87 | */ |
| @@ -72,9 +90,10 @@ | |||
| 72 | /* | 90 | /* |
| 73 | * completion statuses for sending a TLB flush message | 91 | * completion statuses for sending a TLB flush message |
| 74 | */ | 92 | */ |
| 75 | #define FLUSH_RETRY 1 | 93 | #define FLUSH_RETRY_PLUGGED 1 |
| 76 | #define FLUSH_GIVEUP 2 | 94 | #define FLUSH_RETRY_TIMEOUT 2 |
| 77 | #define FLUSH_COMPLETE 3 | 95 | #define FLUSH_GIVEUP 3 |
| 96 | #define FLUSH_COMPLETE 4 | ||
| 78 | 97 | ||
| 79 | /* | 98 | /* |
| 80 | * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) | 99 | * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) |
| @@ -86,14 +105,14 @@ | |||
| 86 | * 'base_dest_nodeid' field of the header corresponds to the | 105 | * 'base_dest_nodeid' field of the header corresponds to the |
| 87 | * destination nodeID associated with that specified bit. | 106 | * destination nodeID associated with that specified bit. |
| 88 | */ | 107 | */ |
| 89 | struct bau_target_nodemask { | 108 | struct bau_target_uvhubmask { |
| 90 | unsigned long bits[BITS_TO_LONGS(256)]; | 109 | unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; |
| 91 | }; | 110 | }; |
| 92 | 111 | ||
| 93 | /* | 112 | /* |
| 94 | * mask of cpu's on a node | 113 | * mask of cpu's on a uvhub |
| 95 | * (during initialization we need to check that unsigned long has | 114 | * (during initialization we need to check that unsigned long has |
| 96 | * enough bits for max. cpu's per node) | 115 | * enough bits for max. cpu's per uvhub) |
| 97 | */ | 116 | */ |
| 98 | struct bau_local_cpumask { | 117 | struct bau_local_cpumask { |
| 99 | unsigned long bits; | 118 | unsigned long bits; |
| @@ -135,8 +154,8 @@ struct bau_msg_payload { | |||
| 135 | struct bau_msg_header { | 154 | struct bau_msg_header { |
| 136 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ | 155 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ |
| 137 | /* bits 5:0 */ | 156 | /* bits 5:0 */ |
| 138 | unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ | 157 | unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */ |
| 139 | /* bits 20:6 */ /* first bit in node_map */ | 158 | /* bits 20:6 */ /* first bit in uvhub map */ |
| 140 | unsigned int command:8; /* message type */ | 159 | unsigned int command:8; /* message type */ |
| 141 | /* bits 28:21 */ | 160 | /* bits 28:21 */ |
| 142 | /* 0x38: SN3net EndPoint Message */ | 161 | /* 0x38: SN3net EndPoint Message */ |
| @@ -146,26 +165,38 @@ struct bau_msg_header { | |||
| 146 | unsigned int rsvd_2:9; /* must be zero */ | 165 | unsigned int rsvd_2:9; /* must be zero */ |
| 147 | /* bits 40:32 */ | 166 | /* bits 40:32 */ |
| 148 | /* Suppl_A is 56-41 */ | 167 | /* Suppl_A is 56-41 */ |
| 149 | unsigned int payload_2a:8;/* becomes byte 16 of msg */ | 168 | unsigned int sequence:16;/* message sequence number */ |
| 150 | /* bits 48:41 */ /* not currently using */ | 169 | /* bits 56:41 */ /* becomes bytes 16-17 of msg */ |
| 151 | unsigned int payload_2b:8;/* becomes byte 17 of msg */ | ||
| 152 | /* bits 56:49 */ /* not currently using */ | ||
| 153 | /* Address field (96:57) is never used as an | 170 | /* Address field (96:57) is never used as an |
| 154 | address (these are address bits 42:3) */ | 171 | address (these are address bits 42:3) */ |
| 172 | |||
| 155 | unsigned int rsvd_3:1; /* must be zero */ | 173 | unsigned int rsvd_3:1; /* must be zero */ |
| 156 | /* bit 57 */ | 174 | /* bit 57 */ |
| 157 | /* address bits 27:4 are payload */ | 175 | /* address bits 27:4 are payload */ |
| 158 | /* these 24 bits become bytes 12-14 of msg */ | 176 | /* these next 24 (58-81) bits become bytes 12-14 of msg */ |
| 177 | |||
| 178 | /* bits 65:58 land in byte 12 */ | ||
| 159 | unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ | 179 | unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ |
| 160 | /* bit 58 */ | 180 | /* bit 58 */ |
| 161 | 181 | unsigned int msg_type:3; /* software type of the message*/ | |
| 162 | unsigned int payload_1a:5;/* not currently used */ | 182 | /* bits 61:59 */ |
| 163 | /* bits 63:59 */ | 183 | unsigned int canceled:1; /* message canceled, resource to be freed*/ |
| 164 | unsigned int payload_1b:8;/* not currently used */ | 184 | /* bit 62 */ |
| 165 | /* bits 71:64 */ | 185 | unsigned int payload_1a:1;/* not currently used */ |
| 166 | unsigned int payload_1c:8;/* not currently used */ | 186 | /* bit 63 */ |
| 167 | /* bits 79:72 */ | 187 | unsigned int payload_1b:2;/* not currently used */ |
| 168 | unsigned int payload_1d:2;/* not currently used */ | 188 | /* bits 65:64 */ |
| 189 | |||
| 190 | /* bits 73:66 land in byte 13 */ | ||
| 191 | unsigned int payload_1ca:6;/* not currently used */ | ||
| 192 | /* bits 71:66 */ | ||
| 193 | unsigned int payload_1c:2;/* not currently used */ | ||
| 194 | /* bits 73:72 */ | ||
| 195 | |||
| 196 | /* bits 81:74 land in byte 14 */ | ||
| 197 | unsigned int payload_1d:6;/* not currently used */ | ||
| 198 | /* bits 79:74 */ | ||
| 199 | unsigned int payload_1e:2;/* not currently used */ | ||
| 169 | /* bits 81:80 */ | 200 | /* bits 81:80 */ |
| 170 | 201 | ||
| 171 | unsigned int rsvd_4:7; /* must be zero */ | 202 | unsigned int rsvd_4:7; /* must be zero */ |
| @@ -178,7 +209,7 @@ struct bau_msg_header { | |||
| 178 | /* bits 95:90 */ | 209 | /* bits 95:90 */ |
| 179 | unsigned int rsvd_6:5; /* must be zero */ | 210 | unsigned int rsvd_6:5; /* must be zero */ |
| 180 | /* bits 100:96 */ | 211 | /* bits 100:96 */ |
| 181 | unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ | 212 | unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */ |
| 182 | /* bit 101*/ | 213 | /* bit 101*/ |
| 183 | unsigned int fairness:3;/* usually zero */ | 214 | unsigned int fairness:3;/* usually zero */ |
| 184 | /* bits 104:102 */ | 215 | /* bits 104:102 */ |
| @@ -191,13 +222,18 @@ struct bau_msg_header { | |||
| 191 | /* bits 127:107 */ | 222 | /* bits 127:107 */ |
| 192 | }; | 223 | }; |
| 193 | 224 | ||
| 225 | /* see msg_type: */ | ||
| 226 | #define MSG_NOOP 0 | ||
| 227 | #define MSG_REGULAR 1 | ||
| 228 | #define MSG_RETRY 2 | ||
| 229 | |||
| 194 | /* | 230 | /* |
| 195 | * The activation descriptor: | 231 | * The activation descriptor: |
| 196 | * The format of the message to send, plus all accompanying control | 232 | * The format of the message to send, plus all accompanying control |
| 197 | * Should be 64 bytes | 233 | * Should be 64 bytes |
| 198 | */ | 234 | */ |
| 199 | struct bau_desc { | 235 | struct bau_desc { |
| 200 | struct bau_target_nodemask distribution; | 236 | struct bau_target_uvhubmask distribution; |
| 201 | /* | 237 | /* |
| 202 | * message template, consisting of header and payload: | 238 | * message template, consisting of header and payload: |
| 203 | */ | 239 | */ |
| @@ -237,19 +273,25 @@ struct bau_payload_queue_entry { | |||
| 237 | unsigned short acknowledge_count; /* filled in by destination */ | 273 | unsigned short acknowledge_count; /* filled in by destination */ |
| 238 | /* 16 bits, bytes 10-11 */ | 274 | /* 16 bits, bytes 10-11 */ |
| 239 | 275 | ||
| 240 | unsigned short replied_to:1; /* sent as 0 by the source */ | 276 | /* these next 3 bytes come from bits 58-81 of the message header */ |
| 241 | /* 1 bit */ | 277 | unsigned short replied_to:1; /* sent as 0 by the source */ |
| 242 | unsigned short unused1:7; /* not currently using */ | 278 | unsigned short msg_type:3; /* software message type */ |
| 243 | /* 7 bits: byte 12) */ | 279 | unsigned short canceled:1; /* sent as 0 by the source */ |
| 280 | unsigned short unused1:3; /* not currently using */ | ||
| 281 | /* byte 12 */ | ||
| 244 | 282 | ||
| 245 | unsigned char unused2[2]; /* not currently using */ | 283 | unsigned char unused2a; /* not currently using */ |
| 246 | /* bytes 13-14 */ | 284 | /* byte 13 */ |
| 285 | unsigned char unused2; /* not currently using */ | ||
| 286 | /* byte 14 */ | ||
| 247 | 287 | ||
| 248 | unsigned char sw_ack_vector; /* filled in by the hardware */ | 288 | unsigned char sw_ack_vector; /* filled in by the hardware */ |
| 249 | /* byte 15 (bits 127:120) */ | 289 | /* byte 15 (bits 127:120) */ |
| 250 | 290 | ||
| 251 | unsigned char unused4[3]; /* not currently using bytes 17-19 */ | 291 | unsigned short sequence; /* message sequence number */ |
| 252 | /* bytes 17-19 */ | 292 | /* bytes 16-17 */ |
| 293 | unsigned char unused4[2]; /* not currently using bytes 18-19 */ | ||
| 294 | /* bytes 18-19 */ | ||
| 253 | 295 | ||
| 254 | int number_of_cpus; /* filled in at destination */ | 296 | int number_of_cpus; /* filled in at destination */ |
| 255 | /* 32 bits, bytes 20-23 (aligned) */ | 297 | /* 32 bits, bytes 20-23 (aligned) */ |
| @@ -259,63 +301,93 @@ struct bau_payload_queue_entry { | |||
| 259 | }; | 301 | }; |
| 260 | 302 | ||
| 261 | /* | 303 | /* |
| 262 | * one for every slot in the destination payload queue | 304 | * one per-cpu; to locate the software tables |
| 263 | */ | ||
| 264 | struct bau_msg_status { | ||
| 265 | struct bau_local_cpumask seen_by; /* map of cpu's */ | ||
| 266 | }; | ||
| 267 | |||
| 268 | /* | ||
| 269 | * one for every slot in the destination software ack resources | ||
| 270 | */ | ||
| 271 | struct bau_sw_ack_status { | ||
| 272 | struct bau_payload_queue_entry *msg; /* associated message */ | ||
| 273 | int watcher; /* cpu monitoring, or -1 */ | ||
| 274 | }; | ||
| 275 | |||
| 276 | /* | ||
| 277 | * one on every node and per-cpu; to locate the software tables | ||
| 278 | */ | 305 | */ |
| 279 | struct bau_control { | 306 | struct bau_control { |
| 280 | struct bau_desc *descriptor_base; | 307 | struct bau_desc *descriptor_base; |
| 281 | struct bau_payload_queue_entry *bau_msg_head; | ||
| 282 | struct bau_payload_queue_entry *va_queue_first; | 308 | struct bau_payload_queue_entry *va_queue_first; |
| 283 | struct bau_payload_queue_entry *va_queue_last; | 309 | struct bau_payload_queue_entry *va_queue_last; |
| 284 | struct bau_msg_status *msg_statuses; | 310 | struct bau_payload_queue_entry *bau_msg_head; |
| 285 | int *watching; /* pointer to array */ | 311 | struct bau_control *uvhub_master; |
| 312 | struct bau_control *socket_master; | ||
| 313 | unsigned long timeout_interval; | ||
| 314 | atomic_t active_descriptor_count; | ||
| 315 | int max_concurrent; | ||
| 316 | int max_concurrent_constant; | ||
| 317 | int retry_message_scans; | ||
| 318 | int plugged_tries; | ||
| 319 | int timeout_tries; | ||
| 320 | int ipi_attempts; | ||
| 321 | int conseccompletes; | ||
| 322 | short cpu; | ||
| 323 | short uvhub_cpu; | ||
| 324 | short uvhub; | ||
| 325 | short cpus_in_socket; | ||
| 326 | short cpus_in_uvhub; | ||
| 327 | unsigned short message_number; | ||
| 328 | unsigned short uvhub_quiesce; | ||
| 329 | short socket_acknowledge_count[DEST_Q_SIZE]; | ||
| 330 | cycles_t send_message; | ||
| 331 | spinlock_t masks_lock; | ||
| 332 | spinlock_t uvhub_lock; | ||
| 333 | spinlock_t queue_lock; | ||
| 286 | }; | 334 | }; |
| 287 | 335 | ||
| 288 | /* | 336 | /* |
| 289 | * This structure is allocated per_cpu for UV TLB shootdown statistics. | 337 | * This structure is allocated per_cpu for UV TLB shootdown statistics. |
| 290 | */ | 338 | */ |
| 291 | struct ptc_stats { | 339 | struct ptc_stats { |
| 292 | unsigned long ptc_i; /* number of IPI-style flushes */ | 340 | /* sender statistics */ |
| 293 | unsigned long requestor; /* number of nodes this cpu sent to */ | 341 | unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ |
| 294 | unsigned long requestee; /* times cpu was remotely requested */ | 342 | unsigned long s_requestor; /* number of shootdown requests */ |
| 295 | unsigned long alltlb; /* times all tlb's on this cpu were flushed */ | 343 | unsigned long s_stimeout; /* source side timeouts */ |
| 296 | unsigned long onetlb; /* times just one tlb on this cpu was flushed */ | 344 | unsigned long s_dtimeout; /* destination side timeouts */ |
| 297 | unsigned long s_retry; /* retries on source side timeouts */ | 345 | unsigned long s_time; /* time spent in sending side */ |
| 298 | unsigned long d_retry; /* retries on destination side timeouts */ | 346 | unsigned long s_retriesok; /* successful retries */ |
| 299 | unsigned long sflush; /* cycles spent in uv_flush_tlb_others */ | 347 | unsigned long s_ntargcpu; /* number of cpus targeted */ |
| 300 | unsigned long dflush; /* cycles spent on destination side */ | 348 | unsigned long s_ntarguvhub; /* number of uvhubs targeted */ |
| 301 | unsigned long retriesok; /* successes on retries */ | 349 | unsigned long s_ntarguvhub16; /* number of times >= 16 target hubs */ |
| 302 | unsigned long nomsg; /* interrupts with no message */ | 350 | unsigned long s_ntarguvhub8; /* number of times >= 8 target hubs */ |
| 303 | unsigned long multmsg; /* interrupts with multiple messages */ | 351 | unsigned long s_ntarguvhub4; /* number of times >= 4 target hubs */ |
| 304 | unsigned long ntargeted;/* nodes targeted */ | 352 | unsigned long s_ntarguvhub2; /* number of times >= 2 target hubs */ |
| 353 | unsigned long s_ntarguvhub1; /* number of times == 1 target hub */ | ||
| 354 | unsigned long s_resets_plug; /* ipi-style resets from plug state */ | ||
| 355 | unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ | ||
| 356 | unsigned long s_busy; /* status stayed busy past s/w timer */ | ||
| 357 | unsigned long s_throttles; /* waits in throttle */ | ||
| 358 | unsigned long s_retry_messages; /* retry broadcasts */ | ||
| 359 | /* destination statistics */ | ||
| 360 | unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ | ||
| 361 | unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ | ||
| 362 | unsigned long d_multmsg; /* interrupts with multiple messages */ | ||
| 363 | unsigned long d_nomsg; /* interrupts with no message */ | ||
| 364 | unsigned long d_time; /* time spent on destination side */ | ||
| 365 | unsigned long d_requestee; /* number of messages processed */ | ||
| 366 | unsigned long d_retries; /* number of retry messages processed */ | ||
| 367 | unsigned long d_canceled; /* number of messages canceled by retries */ | ||
| 368 | unsigned long d_nocanceled; /* retries that found nothing to cancel */ | ||
| 369 | unsigned long d_resets; /* number of ipi-style requests processed */ | ||
| 370 | unsigned long d_rcanceled; /* number of messages canceled by resets */ | ||
| 305 | }; | 371 | }; |
| 306 | 372 | ||
| 307 | static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp) | 373 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) |
| 308 | { | 374 | { |
| 309 | return constant_test_bit(node, &dstp->bits[0]); | 375 | return constant_test_bit(uvhub, &dstp->bits[0]); |
| 310 | } | 376 | } |
| 311 | static inline void bau_node_set(int node, struct bau_target_nodemask *dstp) | 377 | static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) |
| 312 | { | 378 | { |
| 313 | __set_bit(node, &dstp->bits[0]); | 379 | __set_bit(uvhub, &dstp->bits[0]); |
| 314 | } | 380 | } |
| 315 | static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits) | 381 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, |
| 382 | int nbits) | ||
| 316 | { | 383 | { |
| 317 | bitmap_zero(&dstp->bits[0], nbits); | 384 | bitmap_zero(&dstp->bits[0], nbits); |
| 318 | } | 385 | } |
| 386 | static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp) | ||
| 387 | { | ||
| 388 | return bitmap_weight((unsigned long *)&dstp->bits[0], | ||
| 389 | UV_DISTRIBUTION_SIZE); | ||
| 390 | } | ||
| 319 | 391 | ||
| 320 | static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) | 392 | static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) |
| 321 | { | 393 | { |
| @@ -328,4 +400,35 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) | |||
| 328 | extern void uv_bau_message_intr1(void); | 400 | extern void uv_bau_message_intr1(void); |
| 329 | extern void uv_bau_timeout_intr1(void); | 401 | extern void uv_bau_timeout_intr1(void); |
| 330 | 402 | ||
| 403 | struct atomic_short { | ||
| 404 | short counter; | ||
| 405 | }; | ||
| 406 | |||
| 407 | /** | ||
| 408 | * atomic_read_short - read a short atomic variable | ||
| 409 | * @v: pointer of type atomic_short | ||
| 410 | * | ||
| 411 | * Atomically reads the value of @v. | ||
| 412 | */ | ||
| 413 | static inline int atomic_read_short(const struct atomic_short *v) | ||
| 414 | { | ||
| 415 | return v->counter; | ||
| 416 | } | ||
| 417 | |||
| 418 | /** | ||
| 419 | * atomic_add_short_return - add and return a short int | ||
| 420 | * @i: short value to add | ||
| 421 | * @v: pointer of type atomic_short | ||
| 422 | * | ||
| 423 | * Atomically adds @i to @v and returns @i + @v | ||
| 424 | */ | ||
| 425 | static inline int atomic_add_short_return(short i, struct atomic_short *v) | ||
| 426 | { | ||
| 427 | short __i = i; | ||
| 428 | asm volatile(LOCK_PREFIX "xaddw %0, %1" | ||
| 429 | : "+r" (i), "+m" (v->counter) | ||
| 430 | : : "memory"); | ||
| 431 | return i + __i; | ||
| 432 | } | ||
| 433 | |||
| 331 | #endif /* _ASM_X86_UV_UV_BAU_H */ | 434 | #endif /* _ASM_X86_UV_UV_BAU_H */ |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 14cc74ba5d23..bf6b88ef8eeb 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
| @@ -307,7 +307,7 @@ static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset | |||
| 307 | * Access Global MMR space using the MMR space located at the top of physical | 307 | * Access Global MMR space using the MMR space located at the top of physical |
| 308 | * memory. | 308 | * memory. |
| 309 | */ | 309 | */ |
| 310 | static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) | 310 | static inline volatile void __iomem *uv_global_mmr64_address(int pnode, unsigned long offset) |
| 311 | { | 311 | { |
| 312 | return __va(UV_GLOBAL_MMR64_BASE | | 312 | return __va(UV_GLOBAL_MMR64_BASE | |
| 313 | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); | 313 | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 2cae46c7c8a2..b2f2d2e05cec 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h | |||
| @@ -1,4 +1,3 @@ | |||
| 1 | |||
| 2 | /* | 1 | /* |
| 3 | * This file is subject to the terms and conditions of the GNU General Public | 2 | * This file is subject to the terms and conditions of the GNU General Public |
| 4 | * License. See the file "COPYING" in the main directory of this archive | 3 | * License. See the file "COPYING" in the main directory of this archive |
| @@ -15,13 +14,25 @@ | |||
| 15 | #define UV_MMR_ENABLE (1UL << 63) | 14 | #define UV_MMR_ENABLE (1UL << 63) |
| 16 | 15 | ||
| 17 | /* ========================================================================= */ | 16 | /* ========================================================================= */ |
| 17 | /* UVH_BAU_DATA_BROADCAST */ | ||
| 18 | /* ========================================================================= */ | ||
| 19 | #define UVH_BAU_DATA_BROADCAST 0x61688UL | ||
| 20 | #define UVH_BAU_DATA_BROADCAST_32 0x0440 | ||
| 21 | |||
| 22 | #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 | ||
| 23 | #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL | ||
| 24 | |||
| 25 | union uvh_bau_data_broadcast_u { | ||
| 26 | unsigned long v; | ||
| 27 | struct uvh_bau_data_broadcast_s { | ||
| 28 | unsigned long enable : 1; /* RW */ | ||
| 29 | unsigned long rsvd_1_63: 63; /* */ | ||
| 30 | } s; | ||
| 31 | }; | ||
| 32 | |||
| 33 | /* ========================================================================= */ | ||
| 18 | /* UVH_BAU_DATA_CONFIG */ | 34 | /* UVH_BAU_DATA_CONFIG */ |
| 19 | /* ========================================================================= */ | 35 | /* ========================================================================= */ |
| 20 | #define UVH_LB_BAU_MISC_CONTROL 0x320170UL | ||
| 21 | #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 | ||
| 22 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 | ||
| 23 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL | ||
| 24 | /* 1011 timebase 7 (168millisec) * 3 ticks -> 500ms */ | ||
| 25 | #define UVH_BAU_DATA_CONFIG 0x61680UL | 36 | #define UVH_BAU_DATA_CONFIG 0x61680UL |
| 26 | #define UVH_BAU_DATA_CONFIG_32 0x0438 | 37 | #define UVH_BAU_DATA_CONFIG_32 0x0438 |
| 27 | 38 | ||
| @@ -604,6 +615,68 @@ union uvh_lb_bau_intd_software_acknowledge_u { | |||
| 604 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 | 615 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 |
| 605 | 616 | ||
| 606 | /* ========================================================================= */ | 617 | /* ========================================================================= */ |
| 618 | /* UVH_LB_BAU_MISC_CONTROL */ | ||
| 619 | /* ========================================================================= */ | ||
| 620 | #define UVH_LB_BAU_MISC_CONTROL 0x320170UL | ||
| 621 | #define UVH_LB_BAU_MISC_CONTROL_32 0x00a10 | ||
| 622 | |||
| 623 | #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 | ||
| 624 | #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL | ||
| 625 | #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 | ||
| 626 | #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL | ||
| 627 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 | ||
| 628 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL | ||
| 629 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 | ||
| 630 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL | ||
| 631 | #define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_SHFT 11 | ||
| 632 | #define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL | ||
| 633 | #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 | ||
| 634 | #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL | ||
| 635 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 | ||
| 636 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL | ||
| 637 | #define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 | ||
| 638 | #define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL | ||
| 639 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 | ||
| 640 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL | ||
| 641 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 | ||
| 642 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL | ||
| 643 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 | ||
| 644 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL | ||
| 645 | #define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 | ||
| 646 | #define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL | ||
| 647 | #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 | ||
| 648 | #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL | ||
| 649 | #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 | ||
| 650 | #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL | ||
| 651 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 | ||
| 652 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL | ||
| 653 | #define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48 | ||
| 654 | #define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL | ||
| 655 | |||
| 656 | union uvh_lb_bau_misc_control_u { | ||
| 657 | unsigned long v; | ||
| 658 | struct uvh_lb_bau_misc_control_s { | ||
| 659 | unsigned long rejection_delay : 8; /* RW */ | ||
| 660 | unsigned long apic_mode : 1; /* RW */ | ||
| 661 | unsigned long force_broadcast : 1; /* RW */ | ||
| 662 | unsigned long force_lock_nop : 1; /* RW */ | ||
| 663 | unsigned long csi_agent_presence_vector : 3; /* RW */ | ||
| 664 | unsigned long descriptor_fetch_mode : 1; /* RW */ | ||
| 665 | unsigned long enable_intd_soft_ack_mode : 1; /* RW */ | ||
| 666 | unsigned long intd_soft_ack_timeout_period : 4; /* RW */ | ||
| 667 | unsigned long enable_dual_mapping_mode : 1; /* RW */ | ||
| 668 | unsigned long vga_io_port_decode_enable : 1; /* RW */ | ||
| 669 | unsigned long vga_io_port_16_bit_decode : 1; /* RW */ | ||
| 670 | unsigned long suppress_dest_registration : 1; /* RW */ | ||
| 671 | unsigned long programmed_initial_priority : 3; /* RW */ | ||
| 672 | unsigned long use_incoming_priority : 1; /* RW */ | ||
| 673 | unsigned long enable_programmed_initial_priority : 1; /* RW */ | ||
| 674 | unsigned long rsvd_29_47 : 19; /* */ | ||
| 675 | unsigned long fun : 16; /* RW */ | ||
| 676 | } s; | ||
| 677 | }; | ||
| 678 | |||
| 679 | /* ========================================================================= */ | ||
| 607 | /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ | 680 | /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ |
| 608 | /* ========================================================================= */ | 681 | /* ========================================================================= */ |
| 609 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL | 682 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL |
| @@ -681,334 +754,6 @@ union uvh_lb_bau_sb_descriptor_base_u { | |||
| 681 | }; | 754 | }; |
| 682 | 755 | ||
| 683 | /* ========================================================================= */ | 756 | /* ========================================================================= */ |
| 684 | /* UVH_LB_MCAST_AOERR0_RPT_ENABLE */ | ||
| 685 | /* ========================================================================= */ | ||
| 686 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE 0x50b20UL | ||
| 687 | |||
| 688 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_SHFT 0 | ||
| 689 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_MASK 0x0000000000000001UL | ||
| 690 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_SHFT 1 | ||
| 691 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_MASK 0x0000000000000002UL | ||
| 692 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_SHFT 2 | ||
| 693 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_MASK 0x0000000000000004UL | ||
| 694 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_SHFT 3 | ||
| 695 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_MASK 0x0000000000000008UL | ||
| 696 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_SHFT 4 | ||
| 697 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_MASK 0x0000000000000010UL | ||
| 698 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_SHFT 5 | ||
| 699 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_MASK 0x0000000000000020UL | ||
| 700 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_SHFT 6 | ||
| 701 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_MASK 0x0000000000000040UL | ||
| 702 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_SHFT 7 | ||
| 703 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_MASK 0x0000000000000080UL | ||
| 704 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_SHFT 8 | ||
| 705 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_MASK 0x0000000000000100UL | ||
| 706 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_SHFT 9 | ||
| 707 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_MASK 0x0000000000000200UL | ||
| 708 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_SHFT 10 | ||
| 709 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_MASK 0x0000000000000400UL | ||
| 710 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_SHFT 11 | ||
| 711 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_MASK 0x0000000000000800UL | ||
| 712 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_SHFT 12 | ||
| 713 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_MASK 0x0000000000001000UL | ||
| 714 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_SHFT 13 | ||
| 715 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_MASK 0x0000000000002000UL | ||
| 716 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_SHFT 14 | ||
| 717 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_MASK 0x0000000000004000UL | ||
| 718 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_SHFT 15 | ||
| 719 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_MASK 0x0000000000008000UL | ||
| 720 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_SHFT 16 | ||
| 721 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_MASK 0x0000000000010000UL | ||
| 722 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_SHFT 17 | ||
| 723 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_MASK 0x0000000000020000UL | ||
| 724 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_SHFT 18 | ||
| 725 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_MASK 0x0000000000040000UL | ||
| 726 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_SHFT 19 | ||
| 727 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_MASK 0x0000000000080000UL | ||
| 728 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_SHFT 20 | ||
| 729 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL | ||
| 730 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21 | ||
| 731 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL | ||
| 732 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_SHFT 22 | ||
| 733 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_MASK 0x0000000000400000UL | ||
| 734 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_SHFT 23 | ||
| 735 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_MASK 0x0000000000800000UL | ||
| 736 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 24 | ||
| 737 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000001000000UL | ||
| 738 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 25 | ||
| 739 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000002000000UL | ||
| 740 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 26 | ||
| 741 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000004000000UL | ||
| 742 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 27 | ||
| 743 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000008000000UL | ||
| 744 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 28 | ||
| 745 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000010000000UL | ||
| 746 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 29 | ||
| 747 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000020000000UL | ||
| 748 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 30 | ||
| 749 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000040000000UL | ||
| 750 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 31 | ||
| 751 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000080000000UL | ||
| 752 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 32 | ||
| 753 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000100000000UL | ||
| 754 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 33 | ||
| 755 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000200000000UL | ||
| 756 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 34 | ||
| 757 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000400000000UL | ||
| 758 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 35 | ||
| 759 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000800000000UL | ||
| 760 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 36 | ||
| 761 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000001000000000UL | ||
| 762 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 37 | ||
| 763 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000002000000000UL | ||
| 764 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 38 | ||
| 765 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000004000000000UL | ||
| 766 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 39 | ||
| 767 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000008000000000UL | ||
| 768 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 40 | ||
| 769 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000010000000000UL | ||
| 770 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 41 | ||
| 771 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000020000000000UL | ||
| 772 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 42 | ||
| 773 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000040000000000UL | ||
| 774 | |||
| 775 | union uvh_lb_mcast_aoerr0_rpt_enable_u { | ||
| 776 | unsigned long v; | ||
| 777 | struct uvh_lb_mcast_aoerr0_rpt_enable_s { | ||
| 778 | unsigned long mcast_obese_msg : 1; /* RW */ | ||
| 779 | unsigned long mcast_data_sb_err : 1; /* RW */ | ||
| 780 | unsigned long mcast_nack_buff_parity : 1; /* RW */ | ||
| 781 | unsigned long mcast_timeout : 1; /* RW */ | ||
| 782 | unsigned long mcast_inactive_reply : 1; /* RW */ | ||
| 783 | unsigned long mcast_upgrade_error : 1; /* RW */ | ||
| 784 | unsigned long mcast_reg_count_underflow : 1; /* RW */ | ||
| 785 | unsigned long mcast_rep_obese_msg : 1; /* RW */ | ||
| 786 | unsigned long ucache_req_runt_msg : 1; /* RW */ | ||
| 787 | unsigned long ucache_req_obese_msg : 1; /* RW */ | ||
| 788 | unsigned long ucache_req_data_sb_err : 1; /* RW */ | ||
| 789 | unsigned long ucache_rep_runt_msg : 1; /* RW */ | ||
| 790 | unsigned long ucache_rep_obese_msg : 1; /* RW */ | ||
| 791 | unsigned long ucache_rep_data_sb_err : 1; /* RW */ | ||
| 792 | unsigned long ucache_rep_command_err : 1; /* RW */ | ||
| 793 | unsigned long ucache_pend_timeout : 1; /* RW */ | ||
| 794 | unsigned long macc_req_runt_msg : 1; /* RW */ | ||
| 795 | unsigned long macc_req_obese_msg : 1; /* RW */ | ||
| 796 | unsigned long macc_req_data_sb_err : 1; /* RW */ | ||
| 797 | unsigned long macc_rep_runt_msg : 1; /* RW */ | ||
| 798 | unsigned long macc_rep_obese_msg : 1; /* RW */ | ||
| 799 | unsigned long macc_rep_data_sb_err : 1; /* RW */ | ||
| 800 | unsigned long macc_amo_timeout : 1; /* RW */ | ||
| 801 | unsigned long macc_put_timeout : 1; /* RW */ | ||
| 802 | unsigned long macc_spurious_event : 1; /* RW */ | ||
| 803 | unsigned long ioh_destination_table_parity : 1; /* RW */ | ||
| 804 | unsigned long get_had_error_reply : 1; /* RW */ | ||
| 805 | unsigned long get_timeout : 1; /* RW */ | ||
| 806 | unsigned long lock_manager_had_error_reply : 1; /* RW */ | ||
| 807 | unsigned long put_had_error_reply : 1; /* RW */ | ||
| 808 | unsigned long put_timeout : 1; /* RW */ | ||
| 809 | unsigned long sb_activation_overrun : 1; /* RW */ | ||
| 810 | unsigned long completed_gb_activation_had_error_reply : 1; /* RW */ | ||
| 811 | unsigned long completed_gb_activation_timeout : 1; /* RW */ | ||
| 812 | unsigned long descriptor_buffer_0_parity : 1; /* RW */ | ||
| 813 | unsigned long descriptor_buffer_1_parity : 1; /* RW */ | ||
| 814 | unsigned long socket_destination_table_parity : 1; /* RW */ | ||
| 815 | unsigned long bau_reply_payload_corruption : 1; /* RW */ | ||
| 816 | unsigned long io_port_destination_table_parity : 1; /* RW */ | ||
| 817 | unsigned long intd_soft_ack_timeout : 1; /* RW */ | ||
| 818 | unsigned long int_rep_obese_msg : 1; /* RW */ | ||
| 819 | unsigned long int_rep_command_err : 1; /* RW */ | ||
| 820 | unsigned long int_timeout : 1; /* RW */ | ||
| 821 | unsigned long rsvd_43_63 : 21; /* */ | ||
| 822 | } s; | ||
| 823 | }; | ||
| 824 | |||
| 825 | /* ========================================================================= */ | ||
| 826 | /* UVH_LOCAL_INT0_CONFIG */ | ||
| 827 | /* ========================================================================= */ | ||
| 828 | #define UVH_LOCAL_INT0_CONFIG 0x61000UL | ||
| 829 | |||
| 830 | #define UVH_LOCAL_INT0_CONFIG_VECTOR_SHFT 0 | ||
| 831 | #define UVH_LOCAL_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL | ||
| 832 | #define UVH_LOCAL_INT0_CONFIG_DM_SHFT 8 | ||
| 833 | #define UVH_LOCAL_INT0_CONFIG_DM_MASK 0x0000000000000700UL | ||
| 834 | #define UVH_LOCAL_INT0_CONFIG_DESTMODE_SHFT 11 | ||
| 835 | #define UVH_LOCAL_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL | ||
| 836 | #define UVH_LOCAL_INT0_CONFIG_STATUS_SHFT 12 | ||
| 837 | #define UVH_LOCAL_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL | ||
| 838 | #define UVH_LOCAL_INT0_CONFIG_P_SHFT 13 | ||
| 839 | #define UVH_LOCAL_INT0_CONFIG_P_MASK 0x0000000000002000UL | ||
| 840 | #define UVH_LOCAL_INT0_CONFIG_T_SHFT 15 | ||
| 841 | #define UVH_LOCAL_INT0_CONFIG_T_MASK 0x0000000000008000UL | ||
| 842 | #define UVH_LOCAL_INT0_CONFIG_M_SHFT 16 | ||
| 843 | #define UVH_LOCAL_INT0_CONFIG_M_MASK 0x0000000000010000UL | ||
| 844 | #define UVH_LOCAL_INT0_CONFIG_APIC_ID_SHFT 32 | ||
| 845 | #define UVH_LOCAL_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | ||
| 846 | |||
| 847 | union uvh_local_int0_config_u { | ||
| 848 | unsigned long v; | ||
| 849 | struct uvh_local_int0_config_s { | ||
| 850 | unsigned long vector_ : 8; /* RW */ | ||
| 851 | unsigned long dm : 3; /* RW */ | ||
| 852 | unsigned long destmode : 1; /* RW */ | ||
| 853 | unsigned long status : 1; /* RO */ | ||
| 854 | unsigned long p : 1; /* RO */ | ||
| 855 | unsigned long rsvd_14 : 1; /* */ | ||
| 856 | unsigned long t : 1; /* RO */ | ||
| 857 | unsigned long m : 1; /* RW */ | ||
| 858 | unsigned long rsvd_17_31: 15; /* */ | ||
| 859 | unsigned long apic_id : 32; /* RW */ | ||
| 860 | } s; | ||
| 861 | }; | ||
| 862 | |||
| 863 | /* ========================================================================= */ | ||
| 864 | /* UVH_LOCAL_INT0_ENABLE */ | ||
| 865 | /* ========================================================================= */ | ||
| 866 | #define UVH_LOCAL_INT0_ENABLE 0x65000UL | ||
| 867 | |||
| 868 | #define UVH_LOCAL_INT0_ENABLE_LB_HCERR_SHFT 0 | ||
| 869 | #define UVH_LOCAL_INT0_ENABLE_LB_HCERR_MASK 0x0000000000000001UL | ||
| 870 | #define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_SHFT 1 | ||
| 871 | #define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_MASK 0x0000000000000002UL | ||
| 872 | #define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_SHFT 2 | ||
| 873 | #define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_MASK 0x0000000000000004UL | ||
| 874 | #define UVH_LOCAL_INT0_ENABLE_LH_HCERR_SHFT 3 | ||
| 875 | #define UVH_LOCAL_INT0_ENABLE_LH_HCERR_MASK 0x0000000000000008UL | ||
| 876 | #define UVH_LOCAL_INT0_ENABLE_RH_HCERR_SHFT 4 | ||
| 877 | #define UVH_LOCAL_INT0_ENABLE_RH_HCERR_MASK 0x0000000000000010UL | ||
| 878 | #define UVH_LOCAL_INT0_ENABLE_XN_HCERR_SHFT 5 | ||
| 879 | #define UVH_LOCAL_INT0_ENABLE_XN_HCERR_MASK 0x0000000000000020UL | ||
| 880 | #define UVH_LOCAL_INT0_ENABLE_SI_HCERR_SHFT 6 | ||
| 881 | #define UVH_LOCAL_INT0_ENABLE_SI_HCERR_MASK 0x0000000000000040UL | ||
| 882 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_SHFT 7 | ||
| 883 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_MASK 0x0000000000000080UL | ||
| 884 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_SHFT 8 | ||
| 885 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_MASK 0x0000000000000100UL | ||
| 886 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_SHFT 9 | ||
| 887 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_MASK 0x0000000000000200UL | ||
| 888 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_SHFT 10 | ||
| 889 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_MASK 0x0000000000000400UL | ||
| 890 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_SHFT 11 | ||
| 891 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_MASK 0x0000000000000800UL | ||
| 892 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_SHFT 12 | ||
| 893 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_MASK 0x0000000000001000UL | ||
| 894 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_SHFT 13 | ||
| 895 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_MASK 0x0000000000002000UL | ||
| 896 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_SHFT 14 | ||
| 897 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_MASK 0x0000000000004000UL | ||
| 898 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_SHFT 15 | ||
| 899 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_MASK 0x0000000000008000UL | ||
| 900 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_SHFT 16 | ||
| 901 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_MASK 0x0000000000010000UL | ||
| 902 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_SHFT 17 | ||
| 903 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_MASK 0x0000000000020000UL | ||
| 904 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_SHFT 18 | ||
| 905 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_MASK 0x0000000000040000UL | ||
| 906 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_SHFT 19 | ||
| 907 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_MASK 0x0000000000080000UL | ||
| 908 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_SHFT 20 | ||
| 909 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_MASK 0x0000000000100000UL | ||
| 910 | #define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_SHFT 21 | ||
| 911 | #define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_MASK 0x0000000000200000UL | ||
| 912 | #define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_SHFT 22 | ||
| 913 | #define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL | ||
| 914 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_SHFT 23 | ||
| 915 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_MASK 0x0000000000800000UL | ||
| 916 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_SHFT 24 | ||
| 917 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_MASK 0x0000000001000000UL | ||
| 918 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_SHFT 25 | ||
| 919 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_MASK 0x0000000002000000UL | ||
| 920 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_SHFT 26 | ||
| 921 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_MASK 0x0000000004000000UL | ||
| 922 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_SHFT 27 | ||
| 923 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_MASK 0x0000000008000000UL | ||
| 924 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_SHFT 28 | ||
| 925 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_MASK 0x0000000010000000UL | ||
| 926 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_SHFT 29 | ||
| 927 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_MASK 0x0000000020000000UL | ||
| 928 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_SHFT 30 | ||
| 929 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_MASK 0x0000000040000000UL | ||
| 930 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_SHFT 31 | ||
| 931 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_MASK 0x0000000080000000UL | ||
| 932 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_SHFT 32 | ||
| 933 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_MASK 0x0000000100000000UL | ||
| 934 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_SHFT 33 | ||
| 935 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_MASK 0x0000000200000000UL | ||
| 936 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_SHFT 34 | ||
| 937 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_MASK 0x0000000400000000UL | ||
| 938 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_SHFT 35 | ||
| 939 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_MASK 0x0000000800000000UL | ||
| 940 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_SHFT 36 | ||
| 941 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_MASK 0x0000001000000000UL | ||
| 942 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_SHFT 37 | ||
| 943 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_MASK 0x0000002000000000UL | ||
| 944 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_SHFT 38 | ||
| 945 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_MASK 0x0000004000000000UL | ||
| 946 | #define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_SHFT 39 | ||
| 947 | #define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_MASK 0x0000008000000000UL | ||
| 948 | #define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_SHFT 40 | ||
| 949 | #define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_MASK 0x0000010000000000UL | ||
| 950 | #define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_SHFT 41 | ||
| 951 | #define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_MASK 0x0000020000000000UL | ||
| 952 | #define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_SHFT 42 | ||
| 953 | #define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_MASK 0x0000040000000000UL | ||
| 954 | #define UVH_LOCAL_INT0_ENABLE_LTC_INT_SHFT 43 | ||
| 955 | #define UVH_LOCAL_INT0_ENABLE_LTC_INT_MASK 0x0000080000000000UL | ||
| 956 | #define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_SHFT 44 | ||
| 957 | #define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL | ||
| 958 | |||
| 959 | union uvh_local_int0_enable_u { | ||
| 960 | unsigned long v; | ||
| 961 | struct uvh_local_int0_enable_s { | ||
| 962 | unsigned long lb_hcerr : 1; /* RW */ | ||
| 963 | unsigned long gr0_hcerr : 1; /* RW */ | ||
| 964 | unsigned long gr1_hcerr : 1; /* RW */ | ||
| 965 | unsigned long lh_hcerr : 1; /* RW */ | ||
| 966 | unsigned long rh_hcerr : 1; /* RW */ | ||
| 967 | unsigned long xn_hcerr : 1; /* RW */ | ||
| 968 | unsigned long si_hcerr : 1; /* RW */ | ||
| 969 | unsigned long lb_aoerr0 : 1; /* RW */ | ||
| 970 | unsigned long gr0_aoerr0 : 1; /* RW */ | ||
| 971 | unsigned long gr1_aoerr0 : 1; /* RW */ | ||
| 972 | unsigned long lh_aoerr0 : 1; /* RW */ | ||
| 973 | unsigned long rh_aoerr0 : 1; /* RW */ | ||
| 974 | unsigned long xn_aoerr0 : 1; /* RW */ | ||
| 975 | unsigned long si_aoerr0 : 1; /* RW */ | ||
| 976 | unsigned long lb_aoerr1 : 1; /* RW */ | ||
| 977 | unsigned long gr0_aoerr1 : 1; /* RW */ | ||
| 978 | unsigned long gr1_aoerr1 : 1; /* RW */ | ||
| 979 | unsigned long lh_aoerr1 : 1; /* RW */ | ||
| 980 | unsigned long rh_aoerr1 : 1; /* RW */ | ||
| 981 | unsigned long xn_aoerr1 : 1; /* RW */ | ||
| 982 | unsigned long si_aoerr1 : 1; /* RW */ | ||
| 983 | unsigned long rh_vpi_int : 1; /* RW */ | ||
| 984 | unsigned long system_shutdown_int : 1; /* RW */ | ||
| 985 | unsigned long lb_irq_int_0 : 1; /* RW */ | ||
| 986 | unsigned long lb_irq_int_1 : 1; /* RW */ | ||
| 987 | unsigned long lb_irq_int_2 : 1; /* RW */ | ||
| 988 | unsigned long lb_irq_int_3 : 1; /* RW */ | ||
| 989 | unsigned long lb_irq_int_4 : 1; /* RW */ | ||
| 990 | unsigned long lb_irq_int_5 : 1; /* RW */ | ||
| 991 | unsigned long lb_irq_int_6 : 1; /* RW */ | ||
| 992 | unsigned long lb_irq_int_7 : 1; /* RW */ | ||
| 993 | unsigned long lb_irq_int_8 : 1; /* RW */ | ||
| 994 | unsigned long lb_irq_int_9 : 1; /* RW */ | ||
| 995 | unsigned long lb_irq_int_10 : 1; /* RW */ | ||
| 996 | unsigned long lb_irq_int_11 : 1; /* RW */ | ||
| 997 | unsigned long lb_irq_int_12 : 1; /* RW */ | ||
| 998 | unsigned long lb_irq_int_13 : 1; /* RW */ | ||
| 999 | unsigned long lb_irq_int_14 : 1; /* RW */ | ||
| 1000 | unsigned long lb_irq_int_15 : 1; /* RW */ | ||
| 1001 | unsigned long l1_nmi_int : 1; /* RW */ | ||
| 1002 | unsigned long stop_clock : 1; /* RW */ | ||
| 1003 | unsigned long asic_to_l1 : 1; /* RW */ | ||
| 1004 | unsigned long l1_to_asic : 1; /* RW */ | ||
| 1005 | unsigned long ltc_int : 1; /* RW */ | ||
| 1006 | unsigned long la_seq_trigger : 1; /* RW */ | ||
| 1007 | unsigned long rsvd_45_63 : 19; /* */ | ||
| 1008 | } s; | ||
| 1009 | }; | ||
| 1010 | |||
| 1011 | /* ========================================================================= */ | ||
| 1012 | /* UVH_NODE_ID */ | 757 | /* UVH_NODE_ID */ |
| 1013 | /* ========================================================================= */ | 758 | /* ========================================================================= */ |
| 1014 | #define UVH_NODE_ID 0x0UL | 759 | #define UVH_NODE_ID 0x0UL |
| @@ -1112,26 +857,6 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { | |||
| 1112 | }; | 857 | }; |
| 1113 | 858 | ||
| 1114 | /* ========================================================================= */ | 859 | /* ========================================================================= */ |
| 1115 | /* UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR */ | ||
| 1116 | /* ========================================================================= */ | ||
| 1117 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR 0x1600020UL | ||
| 1118 | |||
| 1119 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT 26 | ||
| 1120 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL | ||
| 1121 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 | ||
| 1122 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL | ||
| 1123 | |||
| 1124 | union uvh_rh_gam_cfg_overlay_config_mmr_u { | ||
| 1125 | unsigned long v; | ||
| 1126 | struct uvh_rh_gam_cfg_overlay_config_mmr_s { | ||
| 1127 | unsigned long rsvd_0_25: 26; /* */ | ||
| 1128 | unsigned long base : 20; /* RW */ | ||
| 1129 | unsigned long rsvd_46_62: 17; /* */ | ||
| 1130 | unsigned long enable : 1; /* RW */ | ||
| 1131 | } s; | ||
| 1132 | }; | ||
| 1133 | |||
| 1134 | /* ========================================================================= */ | ||
| 1135 | /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ | 860 | /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ |
| 1136 | /* ========================================================================= */ | 861 | /* ========================================================================= */ |
| 1137 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL | 862 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL |
| @@ -1263,101 +988,6 @@ union uvh_rtc1_int_config_u { | |||
| 1263 | }; | 988 | }; |
| 1264 | 989 | ||
| 1265 | /* ========================================================================= */ | 990 | /* ========================================================================= */ |
| 1266 | /* UVH_RTC2_INT_CONFIG */ | ||
| 1267 | /* ========================================================================= */ | ||
| 1268 | #define UVH_RTC2_INT_CONFIG 0x61600UL | ||
| 1269 | |||
| 1270 | #define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0 | ||
| 1271 | #define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL | ||
| 1272 | #define UVH_RTC2_INT_CONFIG_DM_SHFT 8 | ||
| 1273 | #define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL | ||
| 1274 | #define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11 | ||
| 1275 | #define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL | ||
| 1276 | #define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12 | ||
| 1277 | #define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL | ||
| 1278 | #define UVH_RTC2_INT_CONFIG_P_SHFT 13 | ||
| 1279 | #define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL | ||
| 1280 | #define UVH_RTC2_INT_CONFIG_T_SHFT 15 | ||
| 1281 | #define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL | ||
| 1282 | #define UVH_RTC2_INT_CONFIG_M_SHFT 16 | ||
| 1283 | #define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL | ||
| 1284 | #define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32 | ||
| 1285 | #define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | ||
| 1286 | |||
| 1287 | union uvh_rtc2_int_config_u { | ||
| 1288 | unsigned long v; | ||
| 1289 | struct uvh_rtc2_int_config_s { | ||
| 1290 | unsigned long vector_ : 8; /* RW */ | ||
| 1291 | unsigned long dm : 3; /* RW */ | ||
| 1292 | unsigned long destmode : 1; /* RW */ | ||
| 1293 | unsigned long status : 1; /* RO */ | ||
| 1294 | unsigned long p : 1; /* RO */ | ||
| 1295 | unsigned long rsvd_14 : 1; /* */ | ||
| 1296 | unsigned long t : 1; /* RO */ | ||
| 1297 | unsigned long m : 1; /* RW */ | ||
| 1298 | unsigned long rsvd_17_31: 15; /* */ | ||
| 1299 | unsigned long apic_id : 32; /* RW */ | ||
| 1300 | } s; | ||
| 1301 | }; | ||
| 1302 | |||
| 1303 | /* ========================================================================= */ | ||
| 1304 | /* UVH_RTC3_INT_CONFIG */ | ||
| 1305 | /* ========================================================================= */ | ||
| 1306 | #define UVH_RTC3_INT_CONFIG 0x61640UL | ||
| 1307 | |||
| 1308 | #define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0 | ||
| 1309 | #define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL | ||
| 1310 | #define UVH_RTC3_INT_CONFIG_DM_SHFT 8 | ||
| 1311 | #define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL | ||
| 1312 | #define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11 | ||
| 1313 | #define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL | ||
| 1314 | #define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12 | ||
| 1315 | #define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL | ||
| 1316 | #define UVH_RTC3_INT_CONFIG_P_SHFT 13 | ||
| 1317 | #define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL | ||
| 1318 | #define UVH_RTC3_INT_CONFIG_T_SHFT 15 | ||
| 1319 | #define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL | ||
| 1320 | #define UVH_RTC3_INT_CONFIG_M_SHFT 16 | ||
| 1321 | #define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL | ||
| 1322 | #define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32 | ||
| 1323 | #define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | ||
| 1324 | |||
| 1325 | union uvh_rtc3_int_config_u { | ||
| 1326 | unsigned long v; | ||
| 1327 | struct uvh_rtc3_int_config_s { | ||
| 1328 | unsigned long vector_ : 8; /* RW */ | ||
| 1329 | unsigned long dm : 3; /* RW */ | ||
| 1330 | unsigned long destmode : 1; /* RW */ | ||
| 1331 | unsigned long status : 1; /* RO */ | ||
| 1332 | unsigned long p : 1; /* RO */ | ||
| 1333 | unsigned long rsvd_14 : 1; /* */ | ||
| 1334 | unsigned long t : 1; /* RO */ | ||
| 1335 | unsigned long m : 1; /* RW */ | ||
| 1336 | unsigned long rsvd_17_31: 15; /* */ | ||
| 1337 | unsigned long apic_id : 32; /* RW */ | ||
| 1338 | } s; | ||
| 1339 | }; | ||
| 1340 | |||
| 1341 | /* ========================================================================= */ | ||
| 1342 | /* UVH_RTC_INC_RATIO */ | ||
| 1343 | /* ========================================================================= */ | ||
| 1344 | #define UVH_RTC_INC_RATIO 0x350000UL | ||
| 1345 | |||
| 1346 | #define UVH_RTC_INC_RATIO_FRACTION_SHFT 0 | ||
| 1347 | #define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL | ||
| 1348 | #define UVH_RTC_INC_RATIO_RATIO_SHFT 20 | ||
| 1349 | #define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL | ||
| 1350 | |||
| 1351 | union uvh_rtc_inc_ratio_u { | ||
| 1352 | unsigned long v; | ||
| 1353 | struct uvh_rtc_inc_ratio_s { | ||
| 1354 | unsigned long fraction : 20; /* RW */ | ||
| 1355 | unsigned long ratio : 3; /* RW */ | ||
| 1356 | unsigned long rsvd_23_63: 41; /* */ | ||
| 1357 | } s; | ||
| 1358 | }; | ||
| 1359 | |||
| 1360 | /* ========================================================================= */ | ||
| 1361 | /* UVH_SI_ADDR_MAP_CONFIG */ | 991 | /* UVH_SI_ADDR_MAP_CONFIG */ |
| 1362 | /* ========================================================================= */ | 992 | /* ========================================================================= */ |
| 1363 | #define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL | 993 | #define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL |
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h deleted file mode 100644 index e49ed6d2fd4e..000000000000 --- a/arch/x86/include/asm/vmware.h +++ /dev/null | |||
| @@ -1,27 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2008, VMware, Inc. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, but | ||
| 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
| 12 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
| 13 | * details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License | ||
| 16 | * along with this program; if not, write to the Free Software | ||
| 17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 18 | * | ||
| 19 | */ | ||
| 20 | #ifndef ASM_X86__VMWARE_H | ||
| 21 | #define ASM_X86__VMWARE_H | ||
| 22 | |||
| 23 | extern void vmware_platform_setup(void); | ||
| 24 | extern int vmware_platform(void); | ||
| 25 | extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); | ||
| 26 | |||
| 27 | #endif | ||
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index fb9a080740ec..9e6779f7cf2d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
| @@ -25,6 +25,8 @@ | |||
| 25 | * | 25 | * |
| 26 | */ | 26 | */ |
| 27 | 27 | ||
| 28 | #include <linux/types.h> | ||
| 29 | |||
| 28 | /* | 30 | /* |
| 29 | * Definitions of Primary Processor-Based VM-Execution Controls. | 31 | * Definitions of Primary Processor-Based VM-Execution Controls. |
| 30 | */ | 32 | */ |
| @@ -120,6 +122,8 @@ enum vmcs_field { | |||
| 120 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | 122 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, |
| 121 | GUEST_IA32_PAT = 0x00002804, | 123 | GUEST_IA32_PAT = 0x00002804, |
| 122 | GUEST_IA32_PAT_HIGH = 0x00002805, | 124 | GUEST_IA32_PAT_HIGH = 0x00002805, |
| 125 | GUEST_IA32_EFER = 0x00002806, | ||
| 126 | GUEST_IA32_EFER_HIGH = 0x00002807, | ||
| 123 | GUEST_PDPTR0 = 0x0000280a, | 127 | GUEST_PDPTR0 = 0x0000280a, |
| 124 | GUEST_PDPTR0_HIGH = 0x0000280b, | 128 | GUEST_PDPTR0_HIGH = 0x0000280b, |
| 125 | GUEST_PDPTR1 = 0x0000280c, | 129 | GUEST_PDPTR1 = 0x0000280c, |
| @@ -130,6 +134,8 @@ enum vmcs_field { | |||
| 130 | GUEST_PDPTR3_HIGH = 0x00002811, | 134 | GUEST_PDPTR3_HIGH = 0x00002811, |
| 131 | HOST_IA32_PAT = 0x00002c00, | 135 | HOST_IA32_PAT = 0x00002c00, |
| 132 | HOST_IA32_PAT_HIGH = 0x00002c01, | 136 | HOST_IA32_PAT_HIGH = 0x00002c01, |
| 137 | HOST_IA32_EFER = 0x00002c02, | ||
| 138 | HOST_IA32_EFER_HIGH = 0x00002c03, | ||
| 133 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 139 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
| 134 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 140 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
| 135 | EXCEPTION_BITMAP = 0x00004004, | 141 | EXCEPTION_BITMAP = 0x00004004, |
| @@ -394,6 +400,10 @@ enum vmcs_field { | |||
| 394 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" | 400 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" |
| 395 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" | 401 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" |
| 396 | 402 | ||
| 397 | 403 | struct vmx_msr_entry { | |
| 404 | u32 index; | ||
| 405 | u32 reserved; | ||
| 406 | u64 value; | ||
| 407 | } __aligned(16); | ||
| 398 | 408 | ||
| 399 | #endif | 409 | #endif |
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index ddc04ccad03b..2c4390cae228 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
| @@ -37,8 +37,9 @@ extern int check_for_xstate(struct i387_fxsave_struct __user *buf, | |||
| 37 | void __user *fpstate, | 37 | void __user *fpstate, |
| 38 | struct _fpx_sw_bytes *sw); | 38 | struct _fpx_sw_bytes *sw); |
| 39 | 39 | ||
| 40 | static inline int xrstor_checking(struct xsave_struct *fx) | 40 | static inline int fpu_xrstor_checking(struct fpu *fpu) |
| 41 | { | 41 | { |
| 42 | struct xsave_struct *fx = &fpu->state->xsave; | ||
| 42 | int err; | 43 | int err; |
| 43 | 44 | ||
| 44 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" | 45 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" |
| @@ -110,12 +111,12 @@ static inline void xrstor_state(struct xsave_struct *fx, u64 mask) | |||
| 110 | : "memory"); | 111 | : "memory"); |
| 111 | } | 112 | } |
| 112 | 113 | ||
| 113 | static inline void xsave(struct task_struct *tsk) | 114 | static inline void fpu_xsave(struct fpu *fpu) |
| 114 | { | 115 | { |
| 115 | /* This, however, we can work around by forcing the compiler to select | 116 | /* This, however, we can work around by forcing the compiler to select |
| 116 | an addressing mode that doesn't require extended registers. */ | 117 | an addressing mode that doesn't require extended registers. */ |
| 117 | __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" | 118 | __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" |
| 118 | : : "D" (&(tsk->thread.xstate->xsave)), | 119 | : : "D" (&(fpu->state->xsave)), |
| 119 | "a" (-1), "d"(-1) : "memory"); | 120 | "a" (-1), "d"(-1) : "memory"); |
| 120 | } | 121 | } |
| 121 | #endif | 122 | #endif |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4c58352209e0..e77b22083721 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -47,8 +47,6 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | |||
| 47 | obj-y += process.o | 47 | obj-y += process.o |
| 48 | obj-y += i387.o xsave.o | 48 | obj-y += i387.o xsave.o |
| 49 | obj-y += ptrace.o | 49 | obj-y += ptrace.o |
| 50 | obj-$(CONFIG_X86_DS) += ds.o | ||
| 51 | obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o | ||
| 52 | obj-$(CONFIG_X86_32) += tls.o | 50 | obj-$(CONFIG_X86_32) += tls.o |
| 53 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 51 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
| 54 | obj-y += step.o | 52 | obj-y += step.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index cd40aba6aa95..60cc4058ed5f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
| @@ -63,7 +63,6 @@ EXPORT_SYMBOL(acpi_disabled); | |||
| 63 | int acpi_noirq; /* skip ACPI IRQ initialization */ | 63 | int acpi_noirq; /* skip ACPI IRQ initialization */ |
| 64 | int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */ | 64 | int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */ |
| 65 | EXPORT_SYMBOL(acpi_pci_disabled); | 65 | EXPORT_SYMBOL(acpi_pci_disabled); |
| 66 | int acpi_ht __initdata = 1; /* enable HT */ | ||
| 67 | 66 | ||
| 68 | int acpi_lapic; | 67 | int acpi_lapic; |
| 69 | int acpi_ioapic; | 68 | int acpi_ioapic; |
| @@ -94,6 +93,53 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; | |||
| 94 | 93 | ||
| 95 | 94 | ||
| 96 | /* | 95 | /* |
| 96 | * ISA irqs by default are the first 16 gsis but can be | ||
| 97 | * any gsi as specified by an interrupt source override. | ||
| 98 | */ | ||
| 99 | static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { | ||
| 100 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 | ||
| 101 | }; | ||
| 102 | |||
| 103 | static unsigned int gsi_to_irq(unsigned int gsi) | ||
| 104 | { | ||
| 105 | unsigned int irq = gsi + NR_IRQS_LEGACY; | ||
| 106 | unsigned int i; | ||
| 107 | |||
| 108 | for (i = 0; i < NR_IRQS_LEGACY; i++) { | ||
| 109 | if (isa_irq_to_gsi[i] == gsi) { | ||
| 110 | return i; | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | /* Provide an identity mapping of gsi == irq | ||
| 115 | * except on truly weird platforms that have | ||
| 116 | * non isa irqs in the first 16 gsis. | ||
| 117 | */ | ||
| 118 | if (gsi >= NR_IRQS_LEGACY) | ||
| 119 | irq = gsi; | ||
| 120 | else | ||
| 121 | irq = gsi_end + 1 + gsi; | ||
| 122 | |||
| 123 | return irq; | ||
| 124 | } | ||
| 125 | |||
| 126 | static u32 irq_to_gsi(int irq) | ||
| 127 | { | ||
| 128 | unsigned int gsi; | ||
| 129 | |||
| 130 | if (irq < NR_IRQS_LEGACY) | ||
| 131 | gsi = isa_irq_to_gsi[irq]; | ||
| 132 | else if (irq <= gsi_end) | ||
| 133 | gsi = irq; | ||
| 134 | else if (irq <= (gsi_end + NR_IRQS_LEGACY)) | ||
| 135 | gsi = irq - gsi_end; | ||
| 136 | else | ||
| 137 | gsi = 0xffffffff; | ||
| 138 | |||
| 139 | return gsi; | ||
| 140 | } | ||
| 141 | |||
| 142 | /* | ||
| 97 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, | 143 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, |
| 98 | * to map the target physical address. The problem is that set_fixmap() | 144 | * to map the target physical address. The problem is that set_fixmap() |
| 99 | * provides a single page, and it is possible that the page is not | 145 | * provides a single page, and it is possible that the page is not |
| @@ -313,7 +359,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) | |||
| 313 | /* | 359 | /* |
| 314 | * Parse Interrupt Source Override for the ACPI SCI | 360 | * Parse Interrupt Source Override for the ACPI SCI |
| 315 | */ | 361 | */ |
| 316 | static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) | 362 | static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, u32 gsi) |
| 317 | { | 363 | { |
| 318 | if (trigger == 0) /* compatible SCI trigger is level */ | 364 | if (trigger == 0) /* compatible SCI trigger is level */ |
| 319 | trigger = 3; | 365 | trigger = 3; |
| @@ -333,7 +379,7 @@ static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) | |||
| 333 | * If GSI is < 16, this will update its flags, | 379 | * If GSI is < 16, this will update its flags, |
| 334 | * else it will create a new mp_irqs[] entry. | 380 | * else it will create a new mp_irqs[] entry. |
| 335 | */ | 381 | */ |
| 336 | mp_override_legacy_irq(gsi, polarity, trigger, gsi); | 382 | mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); |
| 337 | 383 | ||
| 338 | /* | 384 | /* |
| 339 | * stash over-ride to indicate we've been here | 385 | * stash over-ride to indicate we've been here |
| @@ -357,9 +403,10 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, | |||
| 357 | acpi_table_print_madt_entry(header); | 403 | acpi_table_print_madt_entry(header); |
| 358 | 404 | ||
| 359 | if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { | 405 | if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { |
| 360 | acpi_sci_ioapic_setup(intsrc->global_irq, | 406 | acpi_sci_ioapic_setup(intsrc->source_irq, |
| 361 | intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, | 407 | intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, |
| 362 | (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2); | 408 | (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2, |
| 409 | intsrc->global_irq); | ||
| 363 | return 0; | 410 | return 0; |
| 364 | } | 411 | } |
| 365 | 412 | ||
| @@ -448,7 +495,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) | |||
| 448 | 495 | ||
| 449 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | 496 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) |
| 450 | { | 497 | { |
| 451 | *irq = gsi; | 498 | *irq = gsi_to_irq(gsi); |
| 452 | 499 | ||
| 453 | #ifdef CONFIG_X86_IO_APIC | 500 | #ifdef CONFIG_X86_IO_APIC |
| 454 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) | 501 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) |
| @@ -458,6 +505,14 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | |||
| 458 | return 0; | 505 | return 0; |
| 459 | } | 506 | } |
| 460 | 507 | ||
| 508 | int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) | ||
| 509 | { | ||
| 510 | if (isa_irq >= 16) | ||
| 511 | return -1; | ||
| 512 | *gsi = irq_to_gsi(isa_irq); | ||
| 513 | return 0; | ||
| 514 | } | ||
| 515 | |||
| 461 | /* | 516 | /* |
| 462 | * success: return IRQ number (>=0) | 517 | * success: return IRQ number (>=0) |
| 463 | * failure: return < 0 | 518 | * failure: return < 0 |
| @@ -482,7 +537,7 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
| 482 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); | 537 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); |
| 483 | } | 538 | } |
| 484 | #endif | 539 | #endif |
| 485 | irq = plat_gsi; | 540 | irq = gsi_to_irq(plat_gsi); |
| 486 | 541 | ||
| 487 | return irq; | 542 | return irq; |
| 488 | } | 543 | } |
| @@ -867,29 +922,6 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
| 867 | extern int es7000_plat; | 922 | extern int es7000_plat; |
| 868 | #endif | 923 | #endif |
| 869 | 924 | ||
| 870 | int __init acpi_probe_gsi(void) | ||
| 871 | { | ||
| 872 | int idx; | ||
| 873 | int gsi; | ||
| 874 | int max_gsi = 0; | ||
| 875 | |||
| 876 | if (acpi_disabled) | ||
| 877 | return 0; | ||
| 878 | |||
| 879 | if (!acpi_ioapic) | ||
| 880 | return 0; | ||
| 881 | |||
| 882 | max_gsi = 0; | ||
| 883 | for (idx = 0; idx < nr_ioapics; idx++) { | ||
| 884 | gsi = mp_gsi_routing[idx].gsi_end; | ||
| 885 | |||
| 886 | if (gsi > max_gsi) | ||
| 887 | max_gsi = gsi; | ||
| 888 | } | ||
| 889 | |||
| 890 | return max_gsi + 1; | ||
| 891 | } | ||
| 892 | |||
| 893 | static void assign_to_mp_irq(struct mpc_intsrc *m, | 925 | static void assign_to_mp_irq(struct mpc_intsrc *m, |
| 894 | struct mpc_intsrc *mp_irq) | 926 | struct mpc_intsrc *mp_irq) |
| 895 | { | 927 | { |
| @@ -947,13 +979,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
| 947 | mp_irq.dstirq = pin; /* INTIN# */ | 979 | mp_irq.dstirq = pin; /* INTIN# */ |
| 948 | 980 | ||
| 949 | save_mp_irq(&mp_irq); | 981 | save_mp_irq(&mp_irq); |
| 982 | |||
| 983 | isa_irq_to_gsi[bus_irq] = gsi; | ||
| 950 | } | 984 | } |
| 951 | 985 | ||
| 952 | void __init mp_config_acpi_legacy_irqs(void) | 986 | void __init mp_config_acpi_legacy_irqs(void) |
| 953 | { | 987 | { |
| 954 | int i; | 988 | int i; |
| 955 | int ioapic; | ||
| 956 | unsigned int dstapic; | ||
| 957 | struct mpc_intsrc mp_irq; | 989 | struct mpc_intsrc mp_irq; |
| 958 | 990 | ||
| 959 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 991 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) |
| @@ -974,19 +1006,27 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 974 | #endif | 1006 | #endif |
| 975 | 1007 | ||
| 976 | /* | 1008 | /* |
| 977 | * Locate the IOAPIC that manages the ISA IRQs (0-15). | ||
| 978 | */ | ||
| 979 | ioapic = mp_find_ioapic(0); | ||
| 980 | if (ioapic < 0) | ||
| 981 | return; | ||
| 982 | dstapic = mp_ioapics[ioapic].apicid; | ||
| 983 | |||
| 984 | /* | ||
| 985 | * Use the default configuration for the IRQs 0-15. Unless | 1009 | * Use the default configuration for the IRQs 0-15. Unless |
| 986 | * overridden by (MADT) interrupt source override entries. | 1010 | * overridden by (MADT) interrupt source override entries. |
| 987 | */ | 1011 | */ |
| 988 | for (i = 0; i < 16; i++) { | 1012 | for (i = 0; i < 16; i++) { |
| 1013 | int ioapic, pin; | ||
| 1014 | unsigned int dstapic; | ||
| 989 | int idx; | 1015 | int idx; |
| 1016 | u32 gsi; | ||
| 1017 | |||
| 1018 | /* Locate the gsi that irq i maps to. */ | ||
| 1019 | if (acpi_isa_irq_to_gsi(i, &gsi)) | ||
| 1020 | continue; | ||
| 1021 | |||
| 1022 | /* | ||
| 1023 | * Locate the IOAPIC that manages the ISA IRQ. | ||
| 1024 | */ | ||
| 1025 | ioapic = mp_find_ioapic(gsi); | ||
| 1026 | if (ioapic < 0) | ||
| 1027 | continue; | ||
| 1028 | pin = mp_find_ioapic_pin(ioapic, gsi); | ||
| 1029 | dstapic = mp_ioapics[ioapic].apicid; | ||
| 990 | 1030 | ||
| 991 | for (idx = 0; idx < mp_irq_entries; idx++) { | 1031 | for (idx = 0; idx < mp_irq_entries; idx++) { |
| 992 | struct mpc_intsrc *irq = mp_irqs + idx; | 1032 | struct mpc_intsrc *irq = mp_irqs + idx; |
| @@ -996,7 +1036,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 996 | break; | 1036 | break; |
| 997 | 1037 | ||
| 998 | /* Do we already have a mapping for this IOAPIC pin */ | 1038 | /* Do we already have a mapping for this IOAPIC pin */ |
| 999 | if (irq->dstapic == dstapic && irq->dstirq == i) | 1039 | if (irq->dstapic == dstapic && irq->dstirq == pin) |
| 1000 | break; | 1040 | break; |
| 1001 | } | 1041 | } |
| 1002 | 1042 | ||
| @@ -1011,7 +1051,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
| 1011 | mp_irq.dstapic = dstapic; | 1051 | mp_irq.dstapic = dstapic; |
| 1012 | mp_irq.irqtype = mp_INT; | 1052 | mp_irq.irqtype = mp_INT; |
| 1013 | mp_irq.srcbusirq = i; /* Identity mapped */ | 1053 | mp_irq.srcbusirq = i; /* Identity mapped */ |
| 1014 | mp_irq.dstirq = i; | 1054 | mp_irq.dstirq = pin; |
| 1015 | 1055 | ||
| 1016 | save_mp_irq(&mp_irq); | 1056 | save_mp_irq(&mp_irq); |
| 1017 | } | 1057 | } |
| @@ -1076,11 +1116,6 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
| 1076 | 1116 | ||
| 1077 | ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); | 1117 | ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); |
| 1078 | 1118 | ||
| 1079 | #ifdef CONFIG_X86_32 | ||
| 1080 | if (ioapic_renumber_irq) | ||
| 1081 | gsi = ioapic_renumber_irq(ioapic, gsi); | ||
| 1082 | #endif | ||
| 1083 | |||
| 1084 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | 1119 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { |
| 1085 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | 1120 | printk(KERN_ERR "Invalid reference to IOAPIC pin " |
| 1086 | "%d-%d\n", mp_ioapics[ioapic].apicid, | 1121 | "%d-%d\n", mp_ioapics[ioapic].apicid, |
| @@ -1094,7 +1129,7 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
| 1094 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, | 1129 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, |
| 1095 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, | 1130 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, |
| 1096 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | 1131 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); |
| 1097 | io_apic_set_pci_routing(dev, gsi, &irq_attr); | 1132 | io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); |
| 1098 | 1133 | ||
| 1099 | return gsi; | 1134 | return gsi; |
| 1100 | } | 1135 | } |
| @@ -1154,7 +1189,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
| 1154 | * pretend we got one so we can set the SCI flags. | 1189 | * pretend we got one so we can set the SCI flags. |
| 1155 | */ | 1190 | */ |
| 1156 | if (!acpi_sci_override_gsi) | 1191 | if (!acpi_sci_override_gsi) |
| 1157 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); | 1192 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, |
| 1193 | acpi_gbl_FADT.sci_interrupt); | ||
| 1158 | 1194 | ||
| 1159 | /* Fill in identity legacy mappings where no override */ | 1195 | /* Fill in identity legacy mappings where no override */ |
| 1160 | mp_config_acpi_legacy_irqs(); | 1196 | mp_config_acpi_legacy_irqs(); |
| @@ -1464,9 +1500,8 @@ void __init acpi_boot_table_init(void) | |||
| 1464 | 1500 | ||
| 1465 | /* | 1501 | /* |
| 1466 | * If acpi_disabled, bail out | 1502 | * If acpi_disabled, bail out |
| 1467 | * One exception: acpi=ht continues far enough to enumerate LAPICs | ||
| 1468 | */ | 1503 | */ |
| 1469 | if (acpi_disabled && !acpi_ht) | 1504 | if (acpi_disabled) |
| 1470 | return; | 1505 | return; |
| 1471 | 1506 | ||
| 1472 | /* | 1507 | /* |
| @@ -1497,9 +1532,8 @@ int __init early_acpi_boot_init(void) | |||
| 1497 | { | 1532 | { |
| 1498 | /* | 1533 | /* |
| 1499 | * If acpi_disabled, bail out | 1534 | * If acpi_disabled, bail out |
| 1500 | * One exception: acpi=ht continues far enough to enumerate LAPICs | ||
| 1501 | */ | 1535 | */ |
| 1502 | if (acpi_disabled && !acpi_ht) | 1536 | if (acpi_disabled) |
| 1503 | return 1; | 1537 | return 1; |
| 1504 | 1538 | ||
| 1505 | /* | 1539 | /* |
| @@ -1517,9 +1551,8 @@ int __init acpi_boot_init(void) | |||
| 1517 | 1551 | ||
| 1518 | /* | 1552 | /* |
| 1519 | * If acpi_disabled, bail out | 1553 | * If acpi_disabled, bail out |
| 1520 | * One exception: acpi=ht continues far enough to enumerate LAPICs | ||
| 1521 | */ | 1554 | */ |
| 1522 | if (acpi_disabled && !acpi_ht) | 1555 | if (acpi_disabled) |
| 1523 | return 1; | 1556 | return 1; |
| 1524 | 1557 | ||
| 1525 | acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); | 1558 | acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); |
| @@ -1554,21 +1587,12 @@ static int __init parse_acpi(char *arg) | |||
| 1554 | /* acpi=force to over-ride black-list */ | 1587 | /* acpi=force to over-ride black-list */ |
| 1555 | else if (strcmp(arg, "force") == 0) { | 1588 | else if (strcmp(arg, "force") == 0) { |
| 1556 | acpi_force = 1; | 1589 | acpi_force = 1; |
| 1557 | acpi_ht = 1; | ||
| 1558 | acpi_disabled = 0; | 1590 | acpi_disabled = 0; |
| 1559 | } | 1591 | } |
| 1560 | /* acpi=strict disables out-of-spec workarounds */ | 1592 | /* acpi=strict disables out-of-spec workarounds */ |
| 1561 | else if (strcmp(arg, "strict") == 0) { | 1593 | else if (strcmp(arg, "strict") == 0) { |
| 1562 | acpi_strict = 1; | 1594 | acpi_strict = 1; |
| 1563 | } | 1595 | } |
| 1564 | /* Limit ACPI just to boot-time to enable HT */ | ||
| 1565 | else if (strcmp(arg, "ht") == 0) { | ||
| 1566 | if (!acpi_force) { | ||
| 1567 | printk(KERN_WARNING "acpi=ht will be removed in Linux-2.6.35\n"); | ||
| 1568 | disable_acpi(); | ||
| 1569 | } | ||
| 1570 | acpi_ht = 1; | ||
| 1571 | } | ||
| 1572 | /* acpi=rsdt use RSDT instead of XSDT */ | 1596 | /* acpi=rsdt use RSDT instead of XSDT */ |
| 1573 | else if (strcmp(arg, "rsdt") == 0) { | 1597 | else if (strcmp(arg, "rsdt") == 0) { |
| 1574 | acpi_rsdt_forced = 1; | 1598 | acpi_rsdt_forced = 1; |
| @@ -1576,6 +1600,10 @@ static int __init parse_acpi(char *arg) | |||
| 1576 | /* "acpi=noirq" disables ACPI interrupt routing */ | 1600 | /* "acpi=noirq" disables ACPI interrupt routing */ |
| 1577 | else if (strcmp(arg, "noirq") == 0) { | 1601 | else if (strcmp(arg, "noirq") == 0) { |
| 1578 | acpi_noirq_set(); | 1602 | acpi_noirq_set(); |
| 1603 | } | ||
| 1604 | /* "acpi=copy_dsdt" copys DSDT */ | ||
| 1605 | else if (strcmp(arg, "copy_dsdt") == 0) { | ||
| 1606 | acpi_gbl_copy_dsdt_locally = 1; | ||
| 1579 | } else { | 1607 | } else { |
| 1580 | /* Core will printk when we return error. */ | 1608 | /* Core will printk when we return error. */ |
| 1581 | return -EINVAL; | 1609 | return -EINVAL; |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index f9961034e557..82e508677b91 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
| @@ -162,8 +162,6 @@ static int __init acpi_sleep_setup(char *str) | |||
| 162 | #endif | 162 | #endif |
| 163 | if (strncmp(str, "old_ordering", 12) == 0) | 163 | if (strncmp(str, "old_ordering", 12) == 0) |
| 164 | acpi_old_suspend_ordering(); | 164 | acpi_old_suspend_ordering(); |
| 165 | if (strncmp(str, "sci_force_enable", 16) == 0) | ||
| 166 | acpi_set_sci_en_on_resume(); | ||
| 167 | str = strchr(str, ','); | 165 | str = strchr(str, ','); |
| 168 | if (str != NULL) | 166 | if (str != NULL) |
| 169 | str += strspn(str, ", \t"); | 167 | str += strspn(str, ", \t"); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1a160d5d44d0..70237732a6c7 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
| @@ -194,7 +194,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
| 194 | } | 194 | } |
| 195 | 195 | ||
| 196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
| 197 | extern u8 *__smp_locks[], *__smp_locks_end[]; | 197 | extern s32 __smp_locks[], __smp_locks_end[]; |
| 198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); | 198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); |
| 199 | 199 | ||
| 200 | /* Replace instructions with better alternatives for this CPU type. | 200 | /* Replace instructions with better alternatives for this CPU type. |
| @@ -235,37 +235,41 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
| 235 | 235 | ||
| 236 | #ifdef CONFIG_SMP | 236 | #ifdef CONFIG_SMP |
| 237 | 237 | ||
| 238 | static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 238 | static void alternatives_smp_lock(const s32 *start, const s32 *end, |
| 239 | u8 *text, u8 *text_end) | ||
| 239 | { | 240 | { |
| 240 | u8 **ptr; | 241 | const s32 *poff; |
| 241 | 242 | ||
| 242 | mutex_lock(&text_mutex); | 243 | mutex_lock(&text_mutex); |
| 243 | for (ptr = start; ptr < end; ptr++) { | 244 | for (poff = start; poff < end; poff++) { |
| 244 | if (*ptr < text) | 245 | u8 *ptr = (u8 *)poff + *poff; |
| 245 | continue; | 246 | |
| 246 | if (*ptr > text_end) | 247 | if (!*poff || ptr < text || ptr >= text_end) |
| 247 | continue; | 248 | continue; |
| 248 | /* turn DS segment override prefix into lock prefix */ | 249 | /* turn DS segment override prefix into lock prefix */ |
| 249 | text_poke(*ptr, ((unsigned char []){0xf0}), 1); | 250 | if (*ptr == 0x3e) |
| 251 | text_poke(ptr, ((unsigned char []){0xf0}), 1); | ||
| 250 | }; | 252 | }; |
| 251 | mutex_unlock(&text_mutex); | 253 | mutex_unlock(&text_mutex); |
| 252 | } | 254 | } |
| 253 | 255 | ||
| 254 | static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 256 | static void alternatives_smp_unlock(const s32 *start, const s32 *end, |
| 257 | u8 *text, u8 *text_end) | ||
| 255 | { | 258 | { |
| 256 | u8 **ptr; | 259 | const s32 *poff; |
| 257 | 260 | ||
| 258 | if (noreplace_smp) | 261 | if (noreplace_smp) |
| 259 | return; | 262 | return; |
| 260 | 263 | ||
| 261 | mutex_lock(&text_mutex); | 264 | mutex_lock(&text_mutex); |
| 262 | for (ptr = start; ptr < end; ptr++) { | 265 | for (poff = start; poff < end; poff++) { |
| 263 | if (*ptr < text) | 266 | u8 *ptr = (u8 *)poff + *poff; |
| 264 | continue; | 267 | |
| 265 | if (*ptr > text_end) | 268 | if (!*poff || ptr < text || ptr >= text_end) |
| 266 | continue; | 269 | continue; |
| 267 | /* turn lock prefix into DS segment override prefix */ | 270 | /* turn lock prefix into DS segment override prefix */ |
| 268 | text_poke(*ptr, ((unsigned char []){0x3E}), 1); | 271 | if (*ptr == 0xf0) |
| 272 | text_poke(ptr, ((unsigned char []){0x3E}), 1); | ||
| 269 | }; | 273 | }; |
| 270 | mutex_unlock(&text_mutex); | 274 | mutex_unlock(&text_mutex); |
| 271 | } | 275 | } |
| @@ -276,8 +280,8 @@ struct smp_alt_module { | |||
| 276 | char *name; | 280 | char *name; |
| 277 | 281 | ||
| 278 | /* ptrs to lock prefixes */ | 282 | /* ptrs to lock prefixes */ |
| 279 | u8 **locks; | 283 | const s32 *locks; |
| 280 | u8 **locks_end; | 284 | const s32 *locks_end; |
| 281 | 285 | ||
| 282 | /* .text segment, needed to avoid patching init code ;) */ | 286 | /* .text segment, needed to avoid patching init code ;) */ |
| 283 | u8 *text; | 287 | u8 *text; |
| @@ -398,16 +402,19 @@ void alternatives_smp_switch(int smp) | |||
| 398 | int alternatives_text_reserved(void *start, void *end) | 402 | int alternatives_text_reserved(void *start, void *end) |
| 399 | { | 403 | { |
| 400 | struct smp_alt_module *mod; | 404 | struct smp_alt_module *mod; |
| 401 | u8 **ptr; | 405 | const s32 *poff; |
| 402 | u8 *text_start = start; | 406 | u8 *text_start = start; |
| 403 | u8 *text_end = end; | 407 | u8 *text_end = end; |
| 404 | 408 | ||
| 405 | list_for_each_entry(mod, &smp_alt_modules, next) { | 409 | list_for_each_entry(mod, &smp_alt_modules, next) { |
| 406 | if (mod->text > text_end || mod->text_end < text_start) | 410 | if (mod->text > text_end || mod->text_end < text_start) |
| 407 | continue; | 411 | continue; |
| 408 | for (ptr = mod->locks; ptr < mod->locks_end; ptr++) | 412 | for (poff = mod->locks; poff < mod->locks_end; poff++) { |
| 409 | if (text_start <= *ptr && text_end >= *ptr) | 413 | const u8 *ptr = (const u8 *)poff + *poff; |
| 414 | |||
| 415 | if (text_start <= ptr && text_end > ptr) | ||
| 410 | return 1; | 416 | return 1; |
| 417 | } | ||
| 411 | } | 418 | } |
| 412 | 419 | ||
| 413 | return 0; | 420 | return 0; |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f854d89b7edf..fa5a1474cd18 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -731,18 +731,22 @@ static bool increase_address_space(struct protection_domain *domain, | |||
| 731 | 731 | ||
| 732 | static u64 *alloc_pte(struct protection_domain *domain, | 732 | static u64 *alloc_pte(struct protection_domain *domain, |
| 733 | unsigned long address, | 733 | unsigned long address, |
| 734 | int end_lvl, | 734 | unsigned long page_size, |
| 735 | u64 **pte_page, | 735 | u64 **pte_page, |
| 736 | gfp_t gfp) | 736 | gfp_t gfp) |
| 737 | { | 737 | { |
| 738 | int level, end_lvl; | ||
| 738 | u64 *pte, *page; | 739 | u64 *pte, *page; |
| 739 | int level; | 740 | |
| 741 | BUG_ON(!is_power_of_2(page_size)); | ||
| 740 | 742 | ||
| 741 | while (address > PM_LEVEL_SIZE(domain->mode)) | 743 | while (address > PM_LEVEL_SIZE(domain->mode)) |
| 742 | increase_address_space(domain, gfp); | 744 | increase_address_space(domain, gfp); |
| 743 | 745 | ||
| 744 | level = domain->mode - 1; | 746 | level = domain->mode - 1; |
| 745 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | 747 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
| 748 | address = PAGE_SIZE_ALIGN(address, page_size); | ||
| 749 | end_lvl = PAGE_SIZE_LEVEL(page_size); | ||
| 746 | 750 | ||
| 747 | while (level > end_lvl) { | 751 | while (level > end_lvl) { |
| 748 | if (!IOMMU_PTE_PRESENT(*pte)) { | 752 | if (!IOMMU_PTE_PRESENT(*pte)) { |
| @@ -752,6 +756,10 @@ static u64 *alloc_pte(struct protection_domain *domain, | |||
| 752 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | 756 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); |
| 753 | } | 757 | } |
| 754 | 758 | ||
| 759 | /* No level skipping support yet */ | ||
| 760 | if (PM_PTE_LEVEL(*pte) != level) | ||
| 761 | return NULL; | ||
| 762 | |||
| 755 | level -= 1; | 763 | level -= 1; |
| 756 | 764 | ||
| 757 | pte = IOMMU_PTE_PAGE(*pte); | 765 | pte = IOMMU_PTE_PAGE(*pte); |
| @@ -769,28 +777,47 @@ static u64 *alloc_pte(struct protection_domain *domain, | |||
| 769 | * This function checks if there is a PTE for a given dma address. If | 777 | * This function checks if there is a PTE for a given dma address. If |
| 770 | * there is one, it returns the pointer to it. | 778 | * there is one, it returns the pointer to it. |
| 771 | */ | 779 | */ |
| 772 | static u64 *fetch_pte(struct protection_domain *domain, | 780 | static u64 *fetch_pte(struct protection_domain *domain, unsigned long address) |
| 773 | unsigned long address, int map_size) | ||
| 774 | { | 781 | { |
| 775 | int level; | 782 | int level; |
| 776 | u64 *pte; | 783 | u64 *pte; |
| 777 | 784 | ||
| 778 | level = domain->mode - 1; | 785 | if (address > PM_LEVEL_SIZE(domain->mode)) |
| 779 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | 786 | return NULL; |
| 787 | |||
| 788 | level = domain->mode - 1; | ||
| 789 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
| 780 | 790 | ||
| 781 | while (level > map_size) { | 791 | while (level > 0) { |
| 792 | |||
| 793 | /* Not Present */ | ||
| 782 | if (!IOMMU_PTE_PRESENT(*pte)) | 794 | if (!IOMMU_PTE_PRESENT(*pte)) |
| 783 | return NULL; | 795 | return NULL; |
| 784 | 796 | ||
| 797 | /* Large PTE */ | ||
| 798 | if (PM_PTE_LEVEL(*pte) == 0x07) { | ||
| 799 | unsigned long pte_mask, __pte; | ||
| 800 | |||
| 801 | /* | ||
| 802 | * If we have a series of large PTEs, make | ||
| 803 | * sure to return a pointer to the first one. | ||
| 804 | */ | ||
| 805 | pte_mask = PTE_PAGE_SIZE(*pte); | ||
| 806 | pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); | ||
| 807 | __pte = ((unsigned long)pte) & pte_mask; | ||
| 808 | |||
| 809 | return (u64 *)__pte; | ||
| 810 | } | ||
| 811 | |||
| 812 | /* No level skipping support yet */ | ||
| 813 | if (PM_PTE_LEVEL(*pte) != level) | ||
| 814 | return NULL; | ||
| 815 | |||
| 785 | level -= 1; | 816 | level -= 1; |
| 786 | 817 | ||
| 818 | /* Walk to the next level */ | ||
| 787 | pte = IOMMU_PTE_PAGE(*pte); | 819 | pte = IOMMU_PTE_PAGE(*pte); |
| 788 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | 820 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
| 789 | |||
| 790 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { | ||
| 791 | pte = NULL; | ||
| 792 | break; | ||
| 793 | } | ||
| 794 | } | 821 | } |
| 795 | 822 | ||
| 796 | return pte; | 823 | return pte; |
| @@ -807,44 +834,84 @@ static int iommu_map_page(struct protection_domain *dom, | |||
| 807 | unsigned long bus_addr, | 834 | unsigned long bus_addr, |
| 808 | unsigned long phys_addr, | 835 | unsigned long phys_addr, |
| 809 | int prot, | 836 | int prot, |
| 810 | int map_size) | 837 | unsigned long page_size) |
| 811 | { | 838 | { |
| 812 | u64 __pte, *pte; | 839 | u64 __pte, *pte; |
| 813 | 840 | int i, count; | |
| 814 | bus_addr = PAGE_ALIGN(bus_addr); | ||
| 815 | phys_addr = PAGE_ALIGN(phys_addr); | ||
| 816 | |||
| 817 | BUG_ON(!PM_ALIGNED(map_size, bus_addr)); | ||
| 818 | BUG_ON(!PM_ALIGNED(map_size, phys_addr)); | ||
| 819 | 841 | ||
| 820 | if (!(prot & IOMMU_PROT_MASK)) | 842 | if (!(prot & IOMMU_PROT_MASK)) |
| 821 | return -EINVAL; | 843 | return -EINVAL; |
| 822 | 844 | ||
| 823 | pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); | 845 | bus_addr = PAGE_ALIGN(bus_addr); |
| 846 | phys_addr = PAGE_ALIGN(phys_addr); | ||
| 847 | count = PAGE_SIZE_PTE_COUNT(page_size); | ||
| 848 | pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); | ||
| 849 | |||
| 850 | for (i = 0; i < count; ++i) | ||
| 851 | if (IOMMU_PTE_PRESENT(pte[i])) | ||
| 852 | return -EBUSY; | ||
| 824 | 853 | ||
| 825 | if (IOMMU_PTE_PRESENT(*pte)) | 854 | if (page_size > PAGE_SIZE) { |
| 826 | return -EBUSY; | 855 | __pte = PAGE_SIZE_PTE(phys_addr, page_size); |
| 856 | __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; | ||
| 857 | } else | ||
| 858 | __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; | ||
| 827 | 859 | ||
| 828 | __pte = phys_addr | IOMMU_PTE_P; | ||
| 829 | if (prot & IOMMU_PROT_IR) | 860 | if (prot & IOMMU_PROT_IR) |
| 830 | __pte |= IOMMU_PTE_IR; | 861 | __pte |= IOMMU_PTE_IR; |
| 831 | if (prot & IOMMU_PROT_IW) | 862 | if (prot & IOMMU_PROT_IW) |
| 832 | __pte |= IOMMU_PTE_IW; | 863 | __pte |= IOMMU_PTE_IW; |
| 833 | 864 | ||
| 834 | *pte = __pte; | 865 | for (i = 0; i < count; ++i) |
| 866 | pte[i] = __pte; | ||
| 835 | 867 | ||
| 836 | update_domain(dom); | 868 | update_domain(dom); |
| 837 | 869 | ||
| 838 | return 0; | 870 | return 0; |
| 839 | } | 871 | } |
| 840 | 872 | ||
| 841 | static void iommu_unmap_page(struct protection_domain *dom, | 873 | static unsigned long iommu_unmap_page(struct protection_domain *dom, |
| 842 | unsigned long bus_addr, int map_size) | 874 | unsigned long bus_addr, |
| 875 | unsigned long page_size) | ||
| 843 | { | 876 | { |
| 844 | u64 *pte = fetch_pte(dom, bus_addr, map_size); | 877 | unsigned long long unmap_size, unmapped; |
| 878 | u64 *pte; | ||
| 879 | |||
| 880 | BUG_ON(!is_power_of_2(page_size)); | ||
| 881 | |||
| 882 | unmapped = 0; | ||
| 845 | 883 | ||
| 846 | if (pte) | 884 | while (unmapped < page_size) { |
| 847 | *pte = 0; | 885 | |
| 886 | pte = fetch_pte(dom, bus_addr); | ||
| 887 | |||
| 888 | if (!pte) { | ||
| 889 | /* | ||
| 890 | * No PTE for this address | ||
| 891 | * move forward in 4kb steps | ||
| 892 | */ | ||
| 893 | unmap_size = PAGE_SIZE; | ||
| 894 | } else if (PM_PTE_LEVEL(*pte) == 0) { | ||
| 895 | /* 4kb PTE found for this address */ | ||
| 896 | unmap_size = PAGE_SIZE; | ||
| 897 | *pte = 0ULL; | ||
| 898 | } else { | ||
| 899 | int count, i; | ||
| 900 | |||
| 901 | /* Large PTE found which maps this address */ | ||
| 902 | unmap_size = PTE_PAGE_SIZE(*pte); | ||
| 903 | count = PAGE_SIZE_PTE_COUNT(unmap_size); | ||
| 904 | for (i = 0; i < count; i++) | ||
| 905 | pte[i] = 0ULL; | ||
| 906 | } | ||
| 907 | |||
| 908 | bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; | ||
| 909 | unmapped += unmap_size; | ||
| 910 | } | ||
| 911 | |||
| 912 | BUG_ON(!is_power_of_2(unmapped)); | ||
| 913 | |||
| 914 | return unmapped; | ||
| 848 | } | 915 | } |
| 849 | 916 | ||
| 850 | /* | 917 | /* |
| @@ -878,7 +945,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
| 878 | for (addr = e->address_start; addr < e->address_end; | 945 | for (addr = e->address_start; addr < e->address_end; |
| 879 | addr += PAGE_SIZE) { | 946 | addr += PAGE_SIZE) { |
| 880 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, | 947 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, |
| 881 | PM_MAP_4k); | 948 | PAGE_SIZE); |
| 882 | if (ret) | 949 | if (ret) |
| 883 | return ret; | 950 | return ret; |
| 884 | /* | 951 | /* |
| @@ -1006,7 +1073,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, | |||
| 1006 | u64 *pte, *pte_page; | 1073 | u64 *pte, *pte_page; |
| 1007 | 1074 | ||
| 1008 | for (i = 0; i < num_ptes; ++i) { | 1075 | for (i = 0; i < num_ptes; ++i) { |
| 1009 | pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, | 1076 | pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE, |
| 1010 | &pte_page, gfp); | 1077 | &pte_page, gfp); |
| 1011 | if (!pte) | 1078 | if (!pte) |
| 1012 | goto out_free; | 1079 | goto out_free; |
| @@ -1042,7 +1109,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, | |||
| 1042 | for (i = dma_dom->aperture[index]->offset; | 1109 | for (i = dma_dom->aperture[index]->offset; |
| 1043 | i < dma_dom->aperture_size; | 1110 | i < dma_dom->aperture_size; |
| 1044 | i += PAGE_SIZE) { | 1111 | i += PAGE_SIZE) { |
| 1045 | u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); | 1112 | u64 *pte = fetch_pte(&dma_dom->domain, i); |
| 1046 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 1113 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
| 1047 | continue; | 1114 | continue; |
| 1048 | 1115 | ||
| @@ -1712,7 +1779,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
| 1712 | 1779 | ||
| 1713 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | 1780 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; |
| 1714 | if (!pte) { | 1781 | if (!pte) { |
| 1715 | pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, | 1782 | pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page, |
| 1716 | GFP_ATOMIC); | 1783 | GFP_ATOMIC); |
| 1717 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; | 1784 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; |
| 1718 | } else | 1785 | } else |
| @@ -2439,12 +2506,11 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, | |||
| 2439 | return ret; | 2506 | return ret; |
| 2440 | } | 2507 | } |
| 2441 | 2508 | ||
| 2442 | static int amd_iommu_map_range(struct iommu_domain *dom, | 2509 | static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, |
| 2443 | unsigned long iova, phys_addr_t paddr, | 2510 | phys_addr_t paddr, int gfp_order, int iommu_prot) |
| 2444 | size_t size, int iommu_prot) | ||
| 2445 | { | 2511 | { |
| 2512 | unsigned long page_size = 0x1000UL << gfp_order; | ||
| 2446 | struct protection_domain *domain = dom->priv; | 2513 | struct protection_domain *domain = dom->priv; |
| 2447 | unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE); | ||
| 2448 | int prot = 0; | 2514 | int prot = 0; |
| 2449 | int ret; | 2515 | int ret; |
| 2450 | 2516 | ||
| @@ -2453,61 +2519,50 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
| 2453 | if (iommu_prot & IOMMU_WRITE) | 2519 | if (iommu_prot & IOMMU_WRITE) |
| 2454 | prot |= IOMMU_PROT_IW; | 2520 | prot |= IOMMU_PROT_IW; |
| 2455 | 2521 | ||
| 2456 | iova &= PAGE_MASK; | ||
| 2457 | paddr &= PAGE_MASK; | ||
| 2458 | |||
| 2459 | mutex_lock(&domain->api_lock); | 2522 | mutex_lock(&domain->api_lock); |
| 2460 | 2523 | ret = iommu_map_page(domain, iova, paddr, prot, page_size); | |
| 2461 | for (i = 0; i < npages; ++i) { | ||
| 2462 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); | ||
| 2463 | if (ret) | ||
| 2464 | return ret; | ||
| 2465 | |||
| 2466 | iova += PAGE_SIZE; | ||
| 2467 | paddr += PAGE_SIZE; | ||
| 2468 | } | ||
| 2469 | |||
| 2470 | mutex_unlock(&domain->api_lock); | 2524 | mutex_unlock(&domain->api_lock); |
| 2471 | 2525 | ||
| 2472 | return 0; | 2526 | return ret; |
| 2473 | } | 2527 | } |
| 2474 | 2528 | ||
| 2475 | static void amd_iommu_unmap_range(struct iommu_domain *dom, | 2529 | static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, |
| 2476 | unsigned long iova, size_t size) | 2530 | int gfp_order) |
| 2477 | { | 2531 | { |
| 2478 | |||
| 2479 | struct protection_domain *domain = dom->priv; | 2532 | struct protection_domain *domain = dom->priv; |
| 2480 | unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); | 2533 | unsigned long page_size, unmap_size; |
| 2481 | 2534 | ||
| 2482 | iova &= PAGE_MASK; | 2535 | page_size = 0x1000UL << gfp_order; |
| 2483 | 2536 | ||
| 2484 | mutex_lock(&domain->api_lock); | 2537 | mutex_lock(&domain->api_lock); |
| 2485 | 2538 | unmap_size = iommu_unmap_page(domain, iova, page_size); | |
| 2486 | for (i = 0; i < npages; ++i) { | 2539 | mutex_unlock(&domain->api_lock); |
| 2487 | iommu_unmap_page(domain, iova, PM_MAP_4k); | ||
| 2488 | iova += PAGE_SIZE; | ||
| 2489 | } | ||
| 2490 | 2540 | ||
| 2491 | iommu_flush_tlb_pde(domain); | 2541 | iommu_flush_tlb_pde(domain); |
| 2492 | 2542 | ||
| 2493 | mutex_unlock(&domain->api_lock); | 2543 | return get_order(unmap_size); |
| 2494 | } | 2544 | } |
| 2495 | 2545 | ||
| 2496 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | 2546 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, |
| 2497 | unsigned long iova) | 2547 | unsigned long iova) |
| 2498 | { | 2548 | { |
| 2499 | struct protection_domain *domain = dom->priv; | 2549 | struct protection_domain *domain = dom->priv; |
| 2500 | unsigned long offset = iova & ~PAGE_MASK; | 2550 | unsigned long offset_mask; |
| 2501 | phys_addr_t paddr; | 2551 | phys_addr_t paddr; |
| 2502 | u64 *pte; | 2552 | u64 *pte, __pte; |
| 2503 | 2553 | ||
| 2504 | pte = fetch_pte(domain, iova, PM_MAP_4k); | 2554 | pte = fetch_pte(domain, iova); |
| 2505 | 2555 | ||
| 2506 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 2556 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
| 2507 | return 0; | 2557 | return 0; |
| 2508 | 2558 | ||
| 2509 | paddr = *pte & IOMMU_PAGE_MASK; | 2559 | if (PM_PTE_LEVEL(*pte) == 0) |
| 2510 | paddr |= offset; | 2560 | offset_mask = PAGE_SIZE - 1; |
| 2561 | else | ||
| 2562 | offset_mask = PTE_PAGE_SIZE(*pte) - 1; | ||
| 2563 | |||
| 2564 | __pte = *pte & PM_ADDR_MASK; | ||
| 2565 | paddr = (__pte & ~offset_mask) | (iova & offset_mask); | ||
| 2511 | 2566 | ||
| 2512 | return paddr; | 2567 | return paddr; |
| 2513 | } | 2568 | } |
| @@ -2523,8 +2578,8 @@ static struct iommu_ops amd_iommu_ops = { | |||
| 2523 | .domain_destroy = amd_iommu_domain_destroy, | 2578 | .domain_destroy = amd_iommu_domain_destroy, |
| 2524 | .attach_dev = amd_iommu_attach_device, | 2579 | .attach_dev = amd_iommu_attach_device, |
| 2525 | .detach_dev = amd_iommu_detach_device, | 2580 | .detach_dev = amd_iommu_detach_device, |
| 2526 | .map = amd_iommu_map_range, | 2581 | .map = amd_iommu_map, |
| 2527 | .unmap = amd_iommu_unmap_range, | 2582 | .unmap = amd_iommu_unmap, |
| 2528 | .iova_to_phys = amd_iommu_iova_to_phys, | 2583 | .iova_to_phys = amd_iommu_iova_to_phys, |
| 2529 | .domain_has_cap = amd_iommu_domain_has_cap, | 2584 | .domain_has_cap = amd_iommu_domain_has_cap, |
| 2530 | }; | 2585 | }; |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 6360abf993d4..3bacb4d0844c 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -120,6 +120,7 @@ struct ivmd_header { | |||
| 120 | bool amd_iommu_dump; | 120 | bool amd_iommu_dump; |
| 121 | 121 | ||
| 122 | static int __initdata amd_iommu_detected; | 122 | static int __initdata amd_iommu_detected; |
| 123 | static bool __initdata amd_iommu_disabled; | ||
| 123 | 124 | ||
| 124 | u16 amd_iommu_last_bdf; /* largest PCI device id we have | 125 | u16 amd_iommu_last_bdf; /* largest PCI device id we have |
| 125 | to handle */ | 126 | to handle */ |
| @@ -1372,6 +1373,9 @@ void __init amd_iommu_detect(void) | |||
| 1372 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) | 1373 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) |
| 1373 | return; | 1374 | return; |
| 1374 | 1375 | ||
| 1376 | if (amd_iommu_disabled) | ||
| 1377 | return; | ||
| 1378 | |||
| 1375 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1379 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
| 1376 | iommu_detected = 1; | 1380 | iommu_detected = 1; |
| 1377 | amd_iommu_detected = 1; | 1381 | amd_iommu_detected = 1; |
| @@ -1401,6 +1405,8 @@ static int __init parse_amd_iommu_options(char *str) | |||
| 1401 | for (; *str; ++str) { | 1405 | for (; *str; ++str) { |
| 1402 | if (strncmp(str, "fullflush", 9) == 0) | 1406 | if (strncmp(str, "fullflush", 9) == 0) |
| 1403 | amd_iommu_unmap_flush = true; | 1407 | amd_iommu_unmap_flush = true; |
| 1408 | if (strncmp(str, "off", 3) == 0) | ||
| 1409 | amd_iommu_disabled = true; | ||
| 1404 | } | 1410 | } |
| 1405 | 1411 | ||
| 1406 | return 1; | 1412 | return 1; |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e5a4a1e01618..c02cc692985c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include <asm/smp.h> | 51 | #include <asm/smp.h> |
| 52 | #include <asm/mce.h> | 52 | #include <asm/mce.h> |
| 53 | #include <asm/kvm_para.h> | 53 | #include <asm/kvm_para.h> |
| 54 | #include <asm/tsc.h> | ||
| 54 | 55 | ||
| 55 | unsigned int num_processors; | 56 | unsigned int num_processors; |
| 56 | 57 | ||
| @@ -1151,8 +1152,13 @@ static void __cpuinit lapic_setup_esr(void) | |||
| 1151 | */ | 1152 | */ |
| 1152 | void __cpuinit setup_local_APIC(void) | 1153 | void __cpuinit setup_local_APIC(void) |
| 1153 | { | 1154 | { |
| 1154 | unsigned int value; | 1155 | unsigned int value, queued; |
| 1155 | int i, j; | 1156 | int i, j, acked = 0; |
| 1157 | unsigned long long tsc = 0, ntsc; | ||
| 1158 | long long max_loops = cpu_khz; | ||
| 1159 | |||
| 1160 | if (cpu_has_tsc) | ||
| 1161 | rdtscll(tsc); | ||
| 1156 | 1162 | ||
| 1157 | if (disable_apic) { | 1163 | if (disable_apic) { |
| 1158 | arch_disable_smp_support(); | 1164 | arch_disable_smp_support(); |
| @@ -1204,13 +1210,32 @@ void __cpuinit setup_local_APIC(void) | |||
| 1204 | * the interrupt. Hence a vector might get locked. It was noticed | 1210 | * the interrupt. Hence a vector might get locked. It was noticed |
| 1205 | * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. | 1211 | * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. |
| 1206 | */ | 1212 | */ |
| 1207 | for (i = APIC_ISR_NR - 1; i >= 0; i--) { | 1213 | do { |
| 1208 | value = apic_read(APIC_ISR + i*0x10); | 1214 | queued = 0; |
| 1209 | for (j = 31; j >= 0; j--) { | 1215 | for (i = APIC_ISR_NR - 1; i >= 0; i--) |
| 1210 | if (value & (1<<j)) | 1216 | queued |= apic_read(APIC_IRR + i*0x10); |
| 1211 | ack_APIC_irq(); | 1217 | |
| 1218 | for (i = APIC_ISR_NR - 1; i >= 0; i--) { | ||
| 1219 | value = apic_read(APIC_ISR + i*0x10); | ||
| 1220 | for (j = 31; j >= 0; j--) { | ||
| 1221 | if (value & (1<<j)) { | ||
| 1222 | ack_APIC_irq(); | ||
| 1223 | acked++; | ||
| 1224 | } | ||
| 1225 | } | ||
| 1212 | } | 1226 | } |
| 1213 | } | 1227 | if (acked > 256) { |
| 1228 | printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n", | ||
| 1229 | acked); | ||
| 1230 | break; | ||
| 1231 | } | ||
| 1232 | if (cpu_has_tsc) { | ||
| 1233 | rdtscll(ntsc); | ||
| 1234 | max_loops = (cpu_khz << 10) - (ntsc - tsc); | ||
| 1235 | } else | ||
| 1236 | max_loops--; | ||
| 1237 | } while (queued && max_loops > 0); | ||
| 1238 | WARN_ON(max_loops <= 0); | ||
| 1214 | 1239 | ||
| 1215 | /* | 1240 | /* |
| 1216 | * Now that we are all set up, enable the APIC | 1241 | * Now that we are all set up, enable the APIC |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 03ba1b895f5e..425e53a87feb 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
| @@ -131,24 +131,6 @@ int es7000_plat; | |||
| 131 | 131 | ||
| 132 | static unsigned int base; | 132 | static unsigned int base; |
| 133 | 133 | ||
| 134 | static int | ||
| 135 | es7000_rename_gsi(int ioapic, int gsi) | ||
| 136 | { | ||
| 137 | if (es7000_plat == ES7000_ZORRO) | ||
| 138 | return gsi; | ||
| 139 | |||
| 140 | if (!base) { | ||
| 141 | int i; | ||
| 142 | for (i = 0; i < nr_ioapics; i++) | ||
| 143 | base += nr_ioapic_registers[i]; | ||
| 144 | } | ||
| 145 | |||
| 146 | if (!ioapic && (gsi < 16)) | ||
| 147 | gsi += base; | ||
| 148 | |||
| 149 | return gsi; | ||
| 150 | } | ||
| 151 | |||
| 152 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | 134 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) |
| 153 | { | 135 | { |
| 154 | unsigned long vect = 0, psaival = 0; | 136 | unsigned long vect = 0, psaival = 0; |
| @@ -190,7 +172,6 @@ static void setup_unisys(void) | |||
| 190 | es7000_plat = ES7000_ZORRO; | 172 | es7000_plat = ES7000_ZORRO; |
| 191 | else | 173 | else |
| 192 | es7000_plat = ES7000_CLASSIC; | 174 | es7000_plat = ES7000_CLASSIC; |
| 193 | ioapic_renumber_irq = es7000_rename_gsi; | ||
| 194 | } | 175 | } |
| 195 | 176 | ||
| 196 | /* | 177 | /* |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index eb2789c3f721..33f3563a2a52 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
| @@ -89,6 +89,9 @@ int nr_ioapics; | |||
| 89 | /* IO APIC gsi routing info */ | 89 | /* IO APIC gsi routing info */ |
| 90 | struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; | 90 | struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; |
| 91 | 91 | ||
| 92 | /* The last gsi number used */ | ||
| 93 | u32 gsi_end; | ||
| 94 | |||
| 92 | /* MP IRQ source entries */ | 95 | /* MP IRQ source entries */ |
| 93 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 96 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
| 94 | 97 | ||
| @@ -1013,10 +1016,9 @@ static inline int irq_trigger(int idx) | |||
| 1013 | return MPBIOS_trigger(idx); | 1016 | return MPBIOS_trigger(idx); |
| 1014 | } | 1017 | } |
| 1015 | 1018 | ||
| 1016 | int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
| 1017 | static int pin_2_irq(int idx, int apic, int pin) | 1019 | static int pin_2_irq(int idx, int apic, int pin) |
| 1018 | { | 1020 | { |
| 1019 | int irq, i; | 1021 | int irq; |
| 1020 | int bus = mp_irqs[idx].srcbus; | 1022 | int bus = mp_irqs[idx].srcbus; |
| 1021 | 1023 | ||
| 1022 | /* | 1024 | /* |
| @@ -1028,18 +1030,12 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
| 1028 | if (test_bit(bus, mp_bus_not_pci)) { | 1030 | if (test_bit(bus, mp_bus_not_pci)) { |
| 1029 | irq = mp_irqs[idx].srcbusirq; | 1031 | irq = mp_irqs[idx].srcbusirq; |
| 1030 | } else { | 1032 | } else { |
| 1031 | /* | 1033 | u32 gsi = mp_gsi_routing[apic].gsi_base + pin; |
| 1032 | * PCI IRQs are mapped in order | 1034 | |
| 1033 | */ | 1035 | if (gsi >= NR_IRQS_LEGACY) |
| 1034 | i = irq = 0; | 1036 | irq = gsi; |
| 1035 | while (i < apic) | 1037 | else |
| 1036 | irq += nr_ioapic_registers[i++]; | 1038 | irq = gsi_end + 1 + gsi; |
| 1037 | irq += pin; | ||
| 1038 | /* | ||
| 1039 | * For MPS mode, so far only needed by ES7000 platform | ||
| 1040 | */ | ||
| 1041 | if (ioapic_renumber_irq) | ||
| 1042 | irq = ioapic_renumber_irq(apic, irq); | ||
| 1043 | } | 1039 | } |
| 1044 | 1040 | ||
| 1045 | #ifdef CONFIG_X86_32 | 1041 | #ifdef CONFIG_X86_32 |
| @@ -1950,20 +1946,8 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | |||
| 1950 | 1946 | ||
| 1951 | void __init enable_IO_APIC(void) | 1947 | void __init enable_IO_APIC(void) |
| 1952 | { | 1948 | { |
| 1953 | union IO_APIC_reg_01 reg_01; | ||
| 1954 | int i8259_apic, i8259_pin; | 1949 | int i8259_apic, i8259_pin; |
| 1955 | int apic; | 1950 | int apic; |
| 1956 | unsigned long flags; | ||
| 1957 | |||
| 1958 | /* | ||
| 1959 | * The number of IO-APIC IRQ registers (== #pins): | ||
| 1960 | */ | ||
| 1961 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1962 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 1963 | reg_01.raw = io_apic_read(apic, 1); | ||
| 1964 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 1965 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | ||
| 1966 | } | ||
| 1967 | 1951 | ||
| 1968 | if (!legacy_pic->nr_legacy_irqs) | 1952 | if (!legacy_pic->nr_legacy_irqs) |
| 1969 | return; | 1953 | return; |
| @@ -3858,27 +3842,20 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
| 3858 | reg_01.raw = io_apic_read(ioapic, 1); | 3842 | reg_01.raw = io_apic_read(ioapic, 1); |
| 3859 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 3843 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 3860 | 3844 | ||
| 3861 | return reg_01.bits.entries; | 3845 | /* The register returns the maximum index redir index |
| 3846 | * supported, which is one less than the total number of redir | ||
| 3847 | * entries. | ||
| 3848 | */ | ||
| 3849 | return reg_01.bits.entries + 1; | ||
| 3862 | } | 3850 | } |
| 3863 | 3851 | ||
| 3864 | void __init probe_nr_irqs_gsi(void) | 3852 | void __init probe_nr_irqs_gsi(void) |
| 3865 | { | 3853 | { |
| 3866 | int nr = 0; | 3854 | int nr; |
| 3867 | 3855 | ||
| 3868 | nr = acpi_probe_gsi(); | 3856 | nr = gsi_end + 1 + NR_IRQS_LEGACY; |
| 3869 | if (nr > nr_irqs_gsi) { | 3857 | if (nr > nr_irqs_gsi) |
| 3870 | nr_irqs_gsi = nr; | 3858 | nr_irqs_gsi = nr; |
| 3871 | } else { | ||
| 3872 | /* for acpi=off or acpi is not compiled in */ | ||
| 3873 | int idx; | ||
| 3874 | |||
| 3875 | nr = 0; | ||
| 3876 | for (idx = 0; idx < nr_ioapics; idx++) | ||
| 3877 | nr += io_apic_get_redir_entries(idx) + 1; | ||
| 3878 | |||
| 3879 | if (nr > nr_irqs_gsi) | ||
| 3880 | nr_irqs_gsi = nr; | ||
| 3881 | } | ||
| 3882 | 3859 | ||
| 3883 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); | 3860 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); |
| 3884 | } | 3861 | } |
| @@ -4085,22 +4062,27 @@ int __init io_apic_get_version(int ioapic) | |||
| 4085 | return reg_01.bits.version; | 4062 | return reg_01.bits.version; |
| 4086 | } | 4063 | } |
| 4087 | 4064 | ||
| 4088 | int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | 4065 | int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) |
| 4089 | { | 4066 | { |
| 4090 | int i; | 4067 | int ioapic, pin, idx; |
| 4091 | 4068 | ||
| 4092 | if (skip_ioapic_setup) | 4069 | if (skip_ioapic_setup) |
| 4093 | return -1; | 4070 | return -1; |
| 4094 | 4071 | ||
| 4095 | for (i = 0; i < mp_irq_entries; i++) | 4072 | ioapic = mp_find_ioapic(gsi); |
| 4096 | if (mp_irqs[i].irqtype == mp_INT && | 4073 | if (ioapic < 0) |
| 4097 | mp_irqs[i].srcbusirq == bus_irq) | ||
| 4098 | break; | ||
| 4099 | if (i >= mp_irq_entries) | ||
| 4100 | return -1; | 4074 | return -1; |
| 4101 | 4075 | ||
| 4102 | *trigger = irq_trigger(i); | 4076 | pin = mp_find_ioapic_pin(ioapic, gsi); |
| 4103 | *polarity = irq_polarity(i); | 4077 | if (pin < 0) |
| 4078 | return -1; | ||
| 4079 | |||
| 4080 | idx = find_irq_entry(ioapic, pin, mp_INT); | ||
| 4081 | if (idx < 0) | ||
| 4082 | return -1; | ||
| 4083 | |||
| 4084 | *trigger = irq_trigger(idx); | ||
| 4085 | *polarity = irq_polarity(idx); | ||
| 4104 | return 0; | 4086 | return 0; |
| 4105 | } | 4087 | } |
| 4106 | 4088 | ||
| @@ -4241,7 +4223,7 @@ void __init ioapic_insert_resources(void) | |||
| 4241 | } | 4223 | } |
| 4242 | } | 4224 | } |
| 4243 | 4225 | ||
| 4244 | int mp_find_ioapic(int gsi) | 4226 | int mp_find_ioapic(u32 gsi) |
| 4245 | { | 4227 | { |
| 4246 | int i = 0; | 4228 | int i = 0; |
| 4247 | 4229 | ||
| @@ -4256,7 +4238,7 @@ int mp_find_ioapic(int gsi) | |||
| 4256 | return -1; | 4238 | return -1; |
| 4257 | } | 4239 | } |
| 4258 | 4240 | ||
| 4259 | int mp_find_ioapic_pin(int ioapic, int gsi) | 4241 | int mp_find_ioapic_pin(int ioapic, u32 gsi) |
| 4260 | { | 4242 | { |
| 4261 | if (WARN_ON(ioapic == -1)) | 4243 | if (WARN_ON(ioapic == -1)) |
| 4262 | return -1; | 4244 | return -1; |
| @@ -4284,6 +4266,7 @@ static int bad_ioapic(unsigned long address) | |||
| 4284 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | 4266 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) |
| 4285 | { | 4267 | { |
| 4286 | int idx = 0; | 4268 | int idx = 0; |
| 4269 | int entries; | ||
| 4287 | 4270 | ||
| 4288 | if (bad_ioapic(address)) | 4271 | if (bad_ioapic(address)) |
| 4289 | return; | 4272 | return; |
| @@ -4302,9 +4285,17 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
| 4302 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | 4285 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups |
| 4303 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | 4286 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). |
| 4304 | */ | 4287 | */ |
| 4288 | entries = io_apic_get_redir_entries(idx); | ||
| 4305 | mp_gsi_routing[idx].gsi_base = gsi_base; | 4289 | mp_gsi_routing[idx].gsi_base = gsi_base; |
| 4306 | mp_gsi_routing[idx].gsi_end = gsi_base + | 4290 | mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1; |
| 4307 | io_apic_get_redir_entries(idx); | 4291 | |
| 4292 | /* | ||
| 4293 | * The number of IO-APIC IRQ registers (== #pins): | ||
| 4294 | */ | ||
| 4295 | nr_ioapic_registers[idx] = entries; | ||
| 4296 | |||
| 4297 | if (mp_gsi_routing[idx].gsi_end > gsi_end) | ||
| 4298 | gsi_end = mp_gsi_routing[idx].gsi_end; | ||
| 4308 | 4299 | ||
| 4309 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | 4300 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " |
| 4310 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, | 4301 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c085d52dbaf2..e46f98f36e31 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
| @@ -735,9 +735,6 @@ void __init uv_system_init(void) | |||
| 735 | uv_node_to_blade[nid] = blade; | 735 | uv_node_to_blade[nid] = blade; |
| 736 | uv_cpu_to_blade[cpu] = blade; | 736 | uv_cpu_to_blade[cpu] = blade; |
| 737 | max_pnode = max(pnode, max_pnode); | 737 | max_pnode = max(pnode, max_pnode); |
| 738 | |||
| 739 | printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", | ||
| 740 | cpu, apicid, pnode, nid, lcpu, blade); | ||
| 741 | } | 738 | } |
| 742 | 739 | ||
| 743 | /* Add blade/pnode info for nodes without cpus */ | 740 | /* Add blade/pnode info for nodes without cpus */ |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 031aa887b0eb..c4f9182ca3ac 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
| @@ -1224,7 +1224,7 @@ static void reinit_timer(void) | |||
| 1224 | #ifdef INIT_TIMER_AFTER_SUSPEND | 1224 | #ifdef INIT_TIMER_AFTER_SUSPEND |
| 1225 | unsigned long flags; | 1225 | unsigned long flags; |
| 1226 | 1226 | ||
| 1227 | spin_lock_irqsave(&i8253_lock, flags); | 1227 | raw_spin_lock_irqsave(&i8253_lock, flags); |
| 1228 | /* set the clock to HZ */ | 1228 | /* set the clock to HZ */ |
| 1229 | outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | 1229 | outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ |
| 1230 | udelay(10); | 1230 | udelay(10); |
| @@ -1232,7 +1232,7 @@ static void reinit_timer(void) | |||
| 1232 | udelay(10); | 1232 | udelay(10); |
| 1233 | outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ | 1233 | outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ |
| 1234 | udelay(10); | 1234 | udelay(10); |
| 1235 | spin_unlock_irqrestore(&i8253_lock, flags); | 1235 | raw_spin_unlock_irqrestore(&i8253_lock, flags); |
| 1236 | #endif | 1236 | #endif |
| 1237 | } | 1237 | } |
| 1238 | 1238 | ||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c202b62f3671..3a785da34b6f 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
| @@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) | |||
| 14 | 14 | ||
| 15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
| 16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
| 17 | obj-y += vmware.o hypervisor.o sched.o | 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o |
| 18 | 18 | ||
| 19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
| 20 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 97ad79cdf688..10fa5684a662 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
| @@ -30,12 +30,14 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
| 30 | const struct cpuid_bit *cb; | 30 | const struct cpuid_bit *cb; |
| 31 | 31 | ||
| 32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | 32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { |
| 33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | 33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, |
| 34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | 34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, |
| 35 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, | 35 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006 }, |
| 36 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, | 36 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007 }, |
| 37 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | 37 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, |
| 38 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | 38 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, |
| 39 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | ||
| 40 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | ||
| 39 | { 0, 0, 0, 0 } | 41 | { 0, 0, 0, 0 } |
| 40 | }; | 42 | }; |
| 41 | 43 | ||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01a265212395..c39576cb3018 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
| @@ -86,7 +86,7 @@ static void __init check_fpu(void) | |||
| 86 | 86 | ||
| 87 | static void __init check_hlt(void) | 87 | static void __init check_hlt(void) |
| 88 | { | 88 | { |
| 89 | if (paravirt_enabled()) | 89 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) |
| 90 | return; | 90 | return; |
| 91 | 91 | ||
| 92 | printk(KERN_INFO "Checking 'hlt' instruction... "); | 92 | printk(KERN_INFO "Checking 'hlt' instruction... "); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4868e4a951ee..68e4a6f2211e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -1084,6 +1084,20 @@ static void clear_all_debug_regs(void) | |||
| 1084 | } | 1084 | } |
| 1085 | } | 1085 | } |
| 1086 | 1086 | ||
| 1087 | #ifdef CONFIG_KGDB | ||
| 1088 | /* | ||
| 1089 | * Restore debug regs if using kgdbwait and you have a kernel debugger | ||
| 1090 | * connection established. | ||
| 1091 | */ | ||
| 1092 | static void dbg_restore_debug_regs(void) | ||
| 1093 | { | ||
| 1094 | if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) | ||
| 1095 | arch_kgdb_ops.correct_hw_break(); | ||
| 1096 | } | ||
| 1097 | #else /* ! CONFIG_KGDB */ | ||
| 1098 | #define dbg_restore_debug_regs() | ||
| 1099 | #endif /* ! CONFIG_KGDB */ | ||
| 1100 | |||
| 1087 | /* | 1101 | /* |
| 1088 | * cpu_init() initializes state that is per-CPU. Some data is already | 1102 | * cpu_init() initializes state that is per-CPU. Some data is already |
| 1089 | * initialized (naturally) in the bootstrap process, such as the GDT | 1103 | * initialized (naturally) in the bootstrap process, such as the GDT |
| @@ -1107,9 +1121,9 @@ void __cpuinit cpu_init(void) | |||
| 1107 | oist = &per_cpu(orig_ist, cpu); | 1121 | oist = &per_cpu(orig_ist, cpu); |
| 1108 | 1122 | ||
| 1109 | #ifdef CONFIG_NUMA | 1123 | #ifdef CONFIG_NUMA |
| 1110 | if (cpu != 0 && percpu_read(node_number) == 0 && | 1124 | if (cpu != 0 && percpu_read(numa_node) == 0 && |
| 1111 | cpu_to_node(cpu) != NUMA_NO_NODE) | 1125 | early_cpu_to_node(cpu) != NUMA_NO_NODE) |
| 1112 | percpu_write(node_number, cpu_to_node(cpu)); | 1126 | set_numa_node(early_cpu_to_node(cpu)); |
| 1113 | #endif | 1127 | #endif |
| 1114 | 1128 | ||
| 1115 | me = current; | 1129 | me = current; |
| @@ -1174,18 +1188,8 @@ void __cpuinit cpu_init(void) | |||
| 1174 | load_TR_desc(); | 1188 | load_TR_desc(); |
| 1175 | load_LDT(&init_mm.context); | 1189 | load_LDT(&init_mm.context); |
| 1176 | 1190 | ||
| 1177 | #ifdef CONFIG_KGDB | 1191 | clear_all_debug_regs(); |
| 1178 | /* | 1192 | dbg_restore_debug_regs(); |
| 1179 | * If the kgdb is connected no debug regs should be altered. This | ||
| 1180 | * is only applicable when KGDB and a KGDB I/O module are built | ||
| 1181 | * into the kernel and you are using early debugging with | ||
| 1182 | * kgdbwait. KGDB will control the kernel HW breakpoint registers. | ||
| 1183 | */ | ||
| 1184 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | ||
| 1185 | arch_kgdb_ops.correct_hw_break(); | ||
| 1186 | else | ||
| 1187 | #endif | ||
| 1188 | clear_all_debug_regs(); | ||
| 1189 | 1193 | ||
| 1190 | fpu_init(); | 1194 | fpu_init(); |
| 1191 | 1195 | ||
| @@ -1239,14 +1243,12 @@ void __cpuinit cpu_init(void) | |||
| 1239 | #endif | 1243 | #endif |
| 1240 | 1244 | ||
| 1241 | clear_all_debug_regs(); | 1245 | clear_all_debug_regs(); |
| 1246 | dbg_restore_debug_regs(); | ||
| 1242 | 1247 | ||
| 1243 | /* | 1248 | /* |
| 1244 | * Force FPU initialization: | 1249 | * Force FPU initialization: |
| 1245 | */ | 1250 | */ |
| 1246 | if (cpu_has_xsave) | 1251 | current_thread_info()->status = 0; |
| 1247 | current_thread_info()->status = TS_XSAVE; | ||
| 1248 | else | ||
| 1249 | current_thread_info()->status = 0; | ||
| 1250 | clear_used_math(); | 1252 | clear_used_math(); |
| 1251 | mxcsr_feature_mask_init(); | 1253 | mxcsr_feature_mask_init(); |
| 1252 | 1254 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 1840c0a5170b..bd54bf67e6fb 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile | |||
| @@ -2,8 +2,8 @@ | |||
| 2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. | 2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. |
| 3 | # speedstep-* is preferred over p4-clockmod. | 3 | # speedstep-* is preferred over p4-clockmod. |
| 4 | 4 | ||
| 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o |
| 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o | 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o |
| 7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | 7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o |
| 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o |
| 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 459168083b77..1d3cddaa40ee 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
| @@ -46,6 +46,7 @@ | |||
| 46 | #include <asm/msr.h> | 46 | #include <asm/msr.h> |
| 47 | #include <asm/processor.h> | 47 | #include <asm/processor.h> |
| 48 | #include <asm/cpufeature.h> | 48 | #include <asm/cpufeature.h> |
| 49 | #include "mperf.h" | ||
| 49 | 50 | ||
| 50 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | 51 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ |
| 51 | "acpi-cpufreq", msg) | 52 | "acpi-cpufreq", msg) |
| @@ -71,8 +72,6 @@ struct acpi_cpufreq_data { | |||
| 71 | 72 | ||
| 72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); | 73 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); |
| 73 | 74 | ||
| 74 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
| 75 | |||
| 76 | /* acpi_perf_data is a pointer to percpu data. */ | 75 | /* acpi_perf_data is a pointer to percpu data. */ |
| 77 | static struct acpi_processor_performance *acpi_perf_data; | 76 | static struct acpi_processor_performance *acpi_perf_data; |
| 78 | 77 | ||
| @@ -240,45 +239,6 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
| 240 | return cmd.val; | 239 | return cmd.val; |
| 241 | } | 240 | } |
| 242 | 241 | ||
| 243 | /* Called via smp_call_function_single(), on the target CPU */ | ||
| 244 | static void read_measured_perf_ctrs(void *_cur) | ||
| 245 | { | ||
| 246 | struct aperfmperf *am = _cur; | ||
| 247 | |||
| 248 | get_aperfmperf(am); | ||
| 249 | } | ||
| 250 | |||
| 251 | /* | ||
| 252 | * Return the measured active (C0) frequency on this CPU since last call | ||
| 253 | * to this function. | ||
| 254 | * Input: cpu number | ||
| 255 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
| 256 | * | ||
| 257 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
| 258 | * over a period of time, while CPU is in C0 state. | ||
| 259 | * IA32_MPERF counts at the rate of max advertised frequency | ||
| 260 | * IA32_APERF counts at the rate of actual CPU frequency | ||
| 261 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
| 262 | * no meaning should be associated with absolute values of these MSRs. | ||
| 263 | */ | ||
| 264 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | ||
| 265 | unsigned int cpu) | ||
| 266 | { | ||
| 267 | struct aperfmperf perf; | ||
| 268 | unsigned long ratio; | ||
| 269 | unsigned int retval; | ||
| 270 | |||
| 271 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
| 272 | return 0; | ||
| 273 | |||
| 274 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
| 275 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
| 276 | |||
| 277 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
| 278 | |||
| 279 | return retval; | ||
| 280 | } | ||
| 281 | |||
| 282 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 242 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
| 283 | { | 243 | { |
| 284 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); | 244 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); |
| @@ -702,7 +662,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
| 702 | 662 | ||
| 703 | /* Check for APERF/MPERF support in hardware */ | 663 | /* Check for APERF/MPERF support in hardware */ |
| 704 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | 664 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
| 705 | acpi_cpufreq_driver.getavg = get_measured_perf; | 665 | acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; |
| 706 | 666 | ||
| 707 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 667 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
| 708 | for (i = 0; i < perf->state_count; i++) | 668 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c new file mode 100644 index 000000000000..911e193018ae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.c | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | #include <linux/smp.h> | ||
| 3 | #include <linux/module.h> | ||
| 4 | #include <linux/init.h> | ||
| 5 | #include <linux/cpufreq.h> | ||
| 6 | #include <linux/slab.h> | ||
| 7 | |||
| 8 | #include "mperf.h" | ||
| 9 | |||
| 10 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
| 11 | |||
| 12 | /* Called via smp_call_function_single(), on the target CPU */ | ||
| 13 | static void read_measured_perf_ctrs(void *_cur) | ||
| 14 | { | ||
| 15 | struct aperfmperf *am = _cur; | ||
| 16 | |||
| 17 | get_aperfmperf(am); | ||
| 18 | } | ||
| 19 | |||
| 20 | /* | ||
| 21 | * Return the measured active (C0) frequency on this CPU since last call | ||
| 22 | * to this function. | ||
| 23 | * Input: cpu number | ||
| 24 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
| 25 | * | ||
| 26 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
| 27 | * over a period of time, while CPU is in C0 state. | ||
| 28 | * IA32_MPERF counts at the rate of max advertised frequency | ||
| 29 | * IA32_APERF counts at the rate of actual CPU frequency | ||
| 30 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
| 31 | * no meaning should be associated with absolute values of these MSRs. | ||
| 32 | */ | ||
| 33 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
| 34 | unsigned int cpu) | ||
| 35 | { | ||
| 36 | struct aperfmperf perf; | ||
| 37 | unsigned long ratio; | ||
| 38 | unsigned int retval; | ||
| 39 | |||
| 40 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
| 41 | return 0; | ||
| 42 | |||
| 43 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
| 44 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
| 45 | |||
| 46 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
| 47 | |||
| 48 | return retval; | ||
| 49 | } | ||
| 50 | EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); | ||
| 51 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h new file mode 100644 index 000000000000..5dbf2950dc22 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.h | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | /* | ||
| 2 | * (c) 2010 Advanced Micro Devices, Inc. | ||
| 3 | * Your use of this code is subject to the terms and conditions of the | ||
| 4 | * GNU general public license version 2. See "COPYING" or | ||
| 5 | * http://www.gnu.org/licenses/gpl.html | ||
| 6 | */ | ||
| 7 | |||
| 8 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
| 9 | unsigned int cpu); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index b6215b9798e2..7ec2123838e6 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
| @@ -1,6 +1,5 @@ | |||
| 1 | |||
| 2 | /* | 1 | /* |
| 3 | * (c) 2003-2006 Advanced Micro Devices, Inc. | 2 | * (c) 2003-2010 Advanced Micro Devices, Inc. |
| 4 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
| 5 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
| 6 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
| @@ -46,6 +45,7 @@ | |||
| 46 | #define PFX "powernow-k8: " | 45 | #define PFX "powernow-k8: " |
| 47 | #define VERSION "version 2.20.00" | 46 | #define VERSION "version 2.20.00" |
| 48 | #include "powernow-k8.h" | 47 | #include "powernow-k8.h" |
| 48 | #include "mperf.h" | ||
| 49 | 49 | ||
| 50 | /* serialize freq changes */ | 50 | /* serialize freq changes */ |
| 51 | static DEFINE_MUTEX(fidvid_mutex); | 51 | static DEFINE_MUTEX(fidvid_mutex); |
| @@ -54,6 +54,12 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | |||
| 54 | 54 | ||
| 55 | static int cpu_family = CPU_OPTERON; | 55 | static int cpu_family = CPU_OPTERON; |
| 56 | 56 | ||
| 57 | /* core performance boost */ | ||
| 58 | static bool cpb_capable, cpb_enabled; | ||
| 59 | static struct msr __percpu *msrs; | ||
| 60 | |||
| 61 | static struct cpufreq_driver cpufreq_amd64_driver; | ||
| 62 | |||
| 57 | #ifndef CONFIG_SMP | 63 | #ifndef CONFIG_SMP |
| 58 | static inline const struct cpumask *cpu_core_mask(int cpu) | 64 | static inline const struct cpumask *cpu_core_mask(int cpu) |
| 59 | { | 65 | { |
| @@ -1249,6 +1255,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
| 1249 | struct powernow_k8_data *data; | 1255 | struct powernow_k8_data *data; |
| 1250 | struct init_on_cpu init_on_cpu; | 1256 | struct init_on_cpu init_on_cpu; |
| 1251 | int rc; | 1257 | int rc; |
| 1258 | struct cpuinfo_x86 *c = &cpu_data(pol->cpu); | ||
| 1252 | 1259 | ||
| 1253 | if (!cpu_online(pol->cpu)) | 1260 | if (!cpu_online(pol->cpu)) |
| 1254 | return -ENODEV; | 1261 | return -ENODEV; |
| @@ -1323,6 +1330,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
| 1323 | return -EINVAL; | 1330 | return -EINVAL; |
| 1324 | } | 1331 | } |
| 1325 | 1332 | ||
| 1333 | /* Check for APERF/MPERF support in hardware */ | ||
| 1334 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
| 1335 | cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; | ||
| 1336 | |||
| 1326 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | 1337 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); |
| 1327 | 1338 | ||
| 1328 | if (cpu_family == CPU_HW_PSTATE) | 1339 | if (cpu_family == CPU_HW_PSTATE) |
| @@ -1394,8 +1405,77 @@ out: | |||
| 1394 | return khz; | 1405 | return khz; |
| 1395 | } | 1406 | } |
| 1396 | 1407 | ||
| 1408 | static void _cpb_toggle_msrs(bool t) | ||
| 1409 | { | ||
| 1410 | int cpu; | ||
| 1411 | |||
| 1412 | get_online_cpus(); | ||
| 1413 | |||
| 1414 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
| 1415 | |||
| 1416 | for_each_cpu(cpu, cpu_online_mask) { | ||
| 1417 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
| 1418 | if (t) | ||
| 1419 | reg->l &= ~BIT(25); | ||
| 1420 | else | ||
| 1421 | reg->l |= BIT(25); | ||
| 1422 | } | ||
| 1423 | wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
| 1424 | |||
| 1425 | put_online_cpus(); | ||
| 1426 | } | ||
| 1427 | |||
| 1428 | /* | ||
| 1429 | * Switch on/off core performance boosting. | ||
| 1430 | * | ||
| 1431 | * 0=disable | ||
| 1432 | * 1=enable. | ||
| 1433 | */ | ||
| 1434 | static void cpb_toggle(bool t) | ||
| 1435 | { | ||
| 1436 | if (!cpb_capable) | ||
| 1437 | return; | ||
| 1438 | |||
| 1439 | if (t && !cpb_enabled) { | ||
| 1440 | cpb_enabled = true; | ||
| 1441 | _cpb_toggle_msrs(t); | ||
| 1442 | printk(KERN_INFO PFX "Core Boosting enabled.\n"); | ||
| 1443 | } else if (!t && cpb_enabled) { | ||
| 1444 | cpb_enabled = false; | ||
| 1445 | _cpb_toggle_msrs(t); | ||
| 1446 | printk(KERN_INFO PFX "Core Boosting disabled.\n"); | ||
| 1447 | } | ||
| 1448 | } | ||
| 1449 | |||
| 1450 | static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, | ||
| 1451 | size_t count) | ||
| 1452 | { | ||
| 1453 | int ret = -EINVAL; | ||
| 1454 | unsigned long val = 0; | ||
| 1455 | |||
| 1456 | ret = strict_strtoul(buf, 10, &val); | ||
| 1457 | if (!ret && (val == 0 || val == 1) && cpb_capable) | ||
| 1458 | cpb_toggle(val); | ||
| 1459 | else | ||
| 1460 | return -EINVAL; | ||
| 1461 | |||
| 1462 | return count; | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) | ||
| 1466 | { | ||
| 1467 | return sprintf(buf, "%u\n", cpb_enabled); | ||
| 1468 | } | ||
| 1469 | |||
| 1470 | #define define_one_rw(_name) \ | ||
| 1471 | static struct freq_attr _name = \ | ||
| 1472 | __ATTR(_name, 0644, show_##_name, store_##_name) | ||
| 1473 | |||
| 1474 | define_one_rw(cpb); | ||
| 1475 | |||
| 1397 | static struct freq_attr *powernow_k8_attr[] = { | 1476 | static struct freq_attr *powernow_k8_attr[] = { |
| 1398 | &cpufreq_freq_attr_scaling_available_freqs, | 1477 | &cpufreq_freq_attr_scaling_available_freqs, |
| 1478 | &cpb, | ||
| 1399 | NULL, | 1479 | NULL, |
| 1400 | }; | 1480 | }; |
| 1401 | 1481 | ||
| @@ -1411,10 +1491,51 @@ static struct cpufreq_driver cpufreq_amd64_driver = { | |||
| 1411 | .attr = powernow_k8_attr, | 1491 | .attr = powernow_k8_attr, |
| 1412 | }; | 1492 | }; |
| 1413 | 1493 | ||
| 1494 | /* | ||
| 1495 | * Clear the boost-disable flag on the CPU_DOWN path so that this cpu | ||
| 1496 | * cannot block the remaining ones from boosting. On the CPU_UP path we | ||
| 1497 | * simply keep the boost-disable flag in sync with the current global | ||
| 1498 | * state. | ||
| 1499 | */ | ||
| 1500 | static int cpb_notify(struct notifier_block *nb, unsigned long action, | ||
| 1501 | void *hcpu) | ||
| 1502 | { | ||
| 1503 | unsigned cpu = (long)hcpu; | ||
| 1504 | u32 lo, hi; | ||
| 1505 | |||
| 1506 | switch (action) { | ||
| 1507 | case CPU_UP_PREPARE: | ||
| 1508 | case CPU_UP_PREPARE_FROZEN: | ||
| 1509 | |||
| 1510 | if (!cpb_enabled) { | ||
| 1511 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
| 1512 | lo |= BIT(25); | ||
| 1513 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
| 1514 | } | ||
| 1515 | break; | ||
| 1516 | |||
| 1517 | case CPU_DOWN_PREPARE: | ||
| 1518 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 1519 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
| 1520 | lo &= ~BIT(25); | ||
| 1521 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
| 1522 | break; | ||
| 1523 | |||
| 1524 | default: | ||
| 1525 | break; | ||
| 1526 | } | ||
| 1527 | |||
| 1528 | return NOTIFY_OK; | ||
| 1529 | } | ||
| 1530 | |||
| 1531 | static struct notifier_block cpb_nb = { | ||
| 1532 | .notifier_call = cpb_notify, | ||
| 1533 | }; | ||
| 1534 | |||
| 1414 | /* driver entry point for init */ | 1535 | /* driver entry point for init */ |
| 1415 | static int __cpuinit powernowk8_init(void) | 1536 | static int __cpuinit powernowk8_init(void) |
| 1416 | { | 1537 | { |
| 1417 | unsigned int i, supported_cpus = 0; | 1538 | unsigned int i, supported_cpus = 0, cpu; |
| 1418 | 1539 | ||
| 1419 | for_each_online_cpu(i) { | 1540 | for_each_online_cpu(i) { |
| 1420 | int rc; | 1541 | int rc; |
| @@ -1423,15 +1544,36 @@ static int __cpuinit powernowk8_init(void) | |||
| 1423 | supported_cpus++; | 1544 | supported_cpus++; |
| 1424 | } | 1545 | } |
| 1425 | 1546 | ||
| 1426 | if (supported_cpus == num_online_cpus()) { | 1547 | if (supported_cpus != num_online_cpus()) |
| 1427 | printk(KERN_INFO PFX "Found %d %s " | 1548 | return -ENODEV; |
| 1428 | "processors (%d cpu cores) (" VERSION ")\n", | 1549 | |
| 1429 | num_online_nodes(), | 1550 | printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", |
| 1430 | boot_cpu_data.x86_model_id, supported_cpus); | 1551 | num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); |
| 1431 | return cpufreq_register_driver(&cpufreq_amd64_driver); | 1552 | |
| 1553 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
| 1554 | |||
| 1555 | cpb_capable = true; | ||
| 1556 | |||
| 1557 | register_cpu_notifier(&cpb_nb); | ||
| 1558 | |||
| 1559 | msrs = msrs_alloc(); | ||
| 1560 | if (!msrs) { | ||
| 1561 | printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); | ||
| 1562 | return -ENOMEM; | ||
| 1563 | } | ||
| 1564 | |||
| 1565 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
| 1566 | |||
| 1567 | for_each_cpu(cpu, cpu_online_mask) { | ||
| 1568 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
| 1569 | cpb_enabled |= !(!!(reg->l & BIT(25))); | ||
| 1570 | } | ||
| 1571 | |||
| 1572 | printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", | ||
| 1573 | (cpb_enabled ? "on" : "off")); | ||
| 1432 | } | 1574 | } |
| 1433 | 1575 | ||
| 1434 | return -ENODEV; | 1576 | return cpufreq_register_driver(&cpufreq_amd64_driver); |
| 1435 | } | 1577 | } |
| 1436 | 1578 | ||
| 1437 | /* driver entry point for term */ | 1579 | /* driver entry point for term */ |
| @@ -1439,6 +1581,13 @@ static void __exit powernowk8_exit(void) | |||
| 1439 | { | 1581 | { |
| 1440 | dprintk("exit\n"); | 1582 | dprintk("exit\n"); |
| 1441 | 1583 | ||
| 1584 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
| 1585 | msrs_free(msrs); | ||
| 1586 | msrs = NULL; | ||
| 1587 | |||
| 1588 | unregister_cpu_notifier(&cpb_nb); | ||
| 1589 | } | ||
| 1590 | |||
| 1442 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1591 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
| 1443 | } | 1592 | } |
| 1444 | 1593 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 02ce824073cb..df3529b1c02d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | |||
| 9 | enum pstate { | 8 | enum pstate { |
| 10 | HW_PSTATE_INVALID = 0xff, | 9 | HW_PSTATE_INVALID = 0xff, |
| 11 | HW_PSTATE_0 = 0, | 10 | HW_PSTATE_0 = 0, |
| @@ -55,7 +54,6 @@ struct powernow_k8_data { | |||
| 55 | struct cpumask *available_cores; | 54 | struct cpumask *available_cores; |
| 56 | }; | 55 | }; |
| 57 | 56 | ||
| 58 | |||
| 59 | /* processor's cpuid instruction support */ | 57 | /* processor's cpuid instruction support */ |
| 60 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ | 58 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ |
| 61 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | 59 | #define CPUID_XFAM 0x0ff00000 /* extended family */ |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 08be922de33a..dd531cc56a8f 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
| @@ -21,37 +21,55 @@ | |||
| 21 | * | 21 | * |
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #include <linux/module.h> | ||
| 24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
| 25 | #include <asm/vmware.h> | ||
| 26 | #include <asm/hypervisor.h> | 26 | #include <asm/hypervisor.h> |
| 27 | 27 | ||
| 28 | static inline void __cpuinit | 28 | /* |
| 29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | 29 | * Hypervisor detect order. This is specified explicitly here because |
| 30 | * some hypervisors might implement compatibility modes for other | ||
| 31 | * hypervisors and therefore need to be detected in specific sequence. | ||
| 32 | */ | ||
| 33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | ||
| 30 | { | 34 | { |
| 31 | if (vmware_platform()) | 35 | &x86_hyper_vmware, |
| 32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | 36 | &x86_hyper_ms_hyperv, |
| 33 | else | 37 | }; |
| 34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | ||
| 35 | } | ||
| 36 | 38 | ||
| 37 | static inline void __cpuinit | 39 | const struct hypervisor_x86 *x86_hyper; |
| 38 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | 40 | EXPORT_SYMBOL(x86_hyper); |
| 41 | |||
| 42 | static inline void __init | ||
| 43 | detect_hypervisor_vendor(void) | ||
| 39 | { | 44 | { |
| 40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | 45 | const struct hypervisor_x86 *h, * const *p; |
| 41 | vmware_set_feature_bits(c); | 46 | |
| 42 | return; | 47 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { |
| 48 | h = *p; | ||
| 49 | if (h->detect()) { | ||
| 50 | x86_hyper = h; | ||
| 51 | printk(KERN_INFO "Hypervisor detected: %s\n", h->name); | ||
| 52 | break; | ||
| 53 | } | ||
| 43 | } | 54 | } |
| 44 | } | 55 | } |
| 45 | 56 | ||
| 46 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | 57 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) |
| 47 | { | 58 | { |
| 48 | detect_hypervisor_vendor(c); | 59 | if (x86_hyper && x86_hyper->set_cpu_features) |
| 49 | hypervisor_set_feature_bits(c); | 60 | x86_hyper->set_cpu_features(c); |
| 50 | } | 61 | } |
| 51 | 62 | ||
| 52 | void __init init_hypervisor_platform(void) | 63 | void __init init_hypervisor_platform(void) |
| 53 | { | 64 | { |
| 65 | |||
| 66 | detect_hypervisor_vendor(); | ||
| 67 | |||
| 68 | if (!x86_hyper) | ||
| 69 | return; | ||
| 70 | |||
| 54 | init_hypervisor(&boot_cpu_data); | 71 | init_hypervisor(&boot_cpu_data); |
| 55 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | 72 | |
| 56 | vmware_platform_setup(); | 73 | if (x86_hyper->init_platform) |
| 74 | x86_hyper->init_platform(); | ||
| 57 | } | 75 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1366c7cfd483..85f69cdeae10 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
| 13 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
| 14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
| 15 | #include <asm/ds.h> | ||
| 16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
| 17 | #include <asm/cpu.h> | 16 | #include <asm/cpu.h> |
| 18 | 17 | ||
| @@ -373,12 +372,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 373 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 372 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
| 374 | } | 373 | } |
| 375 | 374 | ||
| 376 | if (c->cpuid_level > 6) { | ||
| 377 | unsigned ecx = cpuid_ecx(6); | ||
| 378 | if (ecx & 0x01) | ||
| 379 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
| 380 | } | ||
| 381 | |||
| 382 | if (cpu_has_xmm2) | 375 | if (cpu_has_xmm2) |
| 383 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 376 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
| 384 | if (cpu_has_ds) { | 377 | if (cpu_has_ds) { |
| @@ -388,7 +381,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 388 | set_cpu_cap(c, X86_FEATURE_BTS); | 381 | set_cpu_cap(c, X86_FEATURE_BTS); |
| 389 | if (!(l1 & (1<<12))) | 382 | if (!(l1 & (1<<12))) |
| 390 | set_cpu_cap(c, X86_FEATURE_PEBS); | 383 | set_cpu_cap(c, X86_FEATURE_PEBS); |
| 391 | ds_init_intel(c); | ||
| 392 | } | 384 | } |
| 393 | 385 | ||
| 394 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) | 386 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index b3eeb66c0a51..33eae2062cf5 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
| @@ -148,13 +148,19 @@ union _cpuid4_leaf_ecx { | |||
| 148 | u32 full; | 148 | u32 full; |
| 149 | }; | 149 | }; |
| 150 | 150 | ||
| 151 | struct amd_l3_cache { | ||
| 152 | struct pci_dev *dev; | ||
| 153 | bool can_disable; | ||
| 154 | unsigned indices; | ||
| 155 | u8 subcaches[4]; | ||
| 156 | }; | ||
| 157 | |||
| 151 | struct _cpuid4_info { | 158 | struct _cpuid4_info { |
| 152 | union _cpuid4_leaf_eax eax; | 159 | union _cpuid4_leaf_eax eax; |
| 153 | union _cpuid4_leaf_ebx ebx; | 160 | union _cpuid4_leaf_ebx ebx; |
| 154 | union _cpuid4_leaf_ecx ecx; | 161 | union _cpuid4_leaf_ecx ecx; |
| 155 | unsigned long size; | 162 | unsigned long size; |
| 156 | bool can_disable; | 163 | struct amd_l3_cache *l3; |
| 157 | unsigned int l3_indices; | ||
| 158 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | 164 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
| 159 | }; | 165 | }; |
| 160 | 166 | ||
| @@ -164,8 +170,7 @@ struct _cpuid4_info_regs { | |||
| 164 | union _cpuid4_leaf_ebx ebx; | 170 | union _cpuid4_leaf_ebx ebx; |
| 165 | union _cpuid4_leaf_ecx ecx; | 171 | union _cpuid4_leaf_ecx ecx; |
| 166 | unsigned long size; | 172 | unsigned long size; |
| 167 | bool can_disable; | 173 | struct amd_l3_cache *l3; |
| 168 | unsigned int l3_indices; | ||
| 169 | }; | 174 | }; |
| 170 | 175 | ||
| 171 | unsigned short num_cache_leaves; | 176 | unsigned short num_cache_leaves; |
| @@ -302,87 +307,163 @@ struct _cache_attr { | |||
| 302 | }; | 307 | }; |
| 303 | 308 | ||
| 304 | #ifdef CONFIG_CPU_SUP_AMD | 309 | #ifdef CONFIG_CPU_SUP_AMD |
| 305 | static unsigned int __cpuinit amd_calc_l3_indices(void) | 310 | |
| 311 | /* | ||
| 312 | * L3 cache descriptors | ||
| 313 | */ | ||
| 314 | static struct amd_l3_cache **__cpuinitdata l3_caches; | ||
| 315 | |||
| 316 | static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | ||
| 306 | { | 317 | { |
| 307 | /* | ||
| 308 | * We're called over smp_call_function_single() and therefore | ||
| 309 | * are on the correct cpu. | ||
| 310 | */ | ||
| 311 | int cpu = smp_processor_id(); | ||
| 312 | int node = cpu_to_node(cpu); | ||
| 313 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
| 314 | unsigned int sc0, sc1, sc2, sc3; | 318 | unsigned int sc0, sc1, sc2, sc3; |
| 315 | u32 val = 0; | 319 | u32 val = 0; |
| 316 | 320 | ||
| 317 | pci_read_config_dword(dev, 0x1C4, &val); | 321 | pci_read_config_dword(l3->dev, 0x1C4, &val); |
| 318 | 322 | ||
| 319 | /* calculate subcache sizes */ | 323 | /* calculate subcache sizes */ |
| 320 | sc0 = !(val & BIT(0)); | 324 | l3->subcaches[0] = sc0 = !(val & BIT(0)); |
| 321 | sc1 = !(val & BIT(4)); | 325 | l3->subcaches[1] = sc1 = !(val & BIT(4)); |
| 322 | sc2 = !(val & BIT(8)) + !(val & BIT(9)); | 326 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); |
| 323 | sc3 = !(val & BIT(12)) + !(val & BIT(13)); | 327 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); |
| 324 | 328 | ||
| 325 | return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | 329 | l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; |
| 330 | } | ||
| 331 | |||
| 332 | static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) | ||
| 333 | { | ||
| 334 | struct amd_l3_cache *l3; | ||
| 335 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
| 336 | |||
| 337 | l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); | ||
| 338 | if (!l3) { | ||
| 339 | printk(KERN_WARNING "Error allocating L3 struct\n"); | ||
| 340 | return NULL; | ||
| 341 | } | ||
| 342 | |||
| 343 | l3->dev = dev; | ||
| 344 | |||
| 345 | amd_calc_l3_indices(l3); | ||
| 346 | |||
| 347 | return l3; | ||
| 326 | } | 348 | } |
| 327 | 349 | ||
| 328 | static void __cpuinit | 350 | static void __cpuinit |
| 329 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 351 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) |
| 330 | { | 352 | { |
| 331 | if (index < 3) | 353 | int node; |
| 354 | |||
| 355 | if (boot_cpu_data.x86 != 0x10) | ||
| 332 | return; | 356 | return; |
| 333 | 357 | ||
| 334 | if (boot_cpu_data.x86 == 0x11) | 358 | if (index < 3) |
| 335 | return; | 359 | return; |
| 336 | 360 | ||
| 337 | /* see errata #382 and #388 */ | 361 | /* see errata #382 and #388 */ |
| 338 | if ((boot_cpu_data.x86 == 0x10) && | 362 | if (boot_cpu_data.x86_model < 0x8) |
| 339 | ((boot_cpu_data.x86_model < 0x8) || | 363 | return; |
| 340 | (boot_cpu_data.x86_mask < 0x1))) | 364 | |
| 365 | if ((boot_cpu_data.x86_model == 0x8 || | ||
| 366 | boot_cpu_data.x86_model == 0x9) | ||
| 367 | && | ||
| 368 | boot_cpu_data.x86_mask < 0x1) | ||
| 369 | return; | ||
| 370 | |||
| 371 | /* not in virtualized environments */ | ||
| 372 | if (num_k8_northbridges == 0) | ||
| 341 | return; | 373 | return; |
| 342 | 374 | ||
| 343 | this_leaf->can_disable = true; | 375 | /* |
| 344 | this_leaf->l3_indices = amd_calc_l3_indices(); | 376 | * Strictly speaking, the amount in @size below is leaked since it is |
| 377 | * never freed but this is done only on shutdown so it doesn't matter. | ||
| 378 | */ | ||
| 379 | if (!l3_caches) { | ||
| 380 | int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); | ||
| 381 | |||
| 382 | l3_caches = kzalloc(size, GFP_ATOMIC); | ||
| 383 | if (!l3_caches) | ||
| 384 | return; | ||
| 385 | } | ||
| 386 | |||
| 387 | node = amd_get_nb_id(smp_processor_id()); | ||
| 388 | |||
| 389 | if (!l3_caches[node]) { | ||
| 390 | l3_caches[node] = amd_init_l3_cache(node); | ||
| 391 | l3_caches[node]->can_disable = true; | ||
| 392 | } | ||
| 393 | |||
| 394 | WARN_ON(!l3_caches[node]); | ||
| 395 | |||
| 396 | this_leaf->l3 = l3_caches[node]; | ||
| 345 | } | 397 | } |
| 346 | 398 | ||
| 347 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | 399 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, |
| 348 | unsigned int index) | 400 | unsigned int slot) |
| 349 | { | 401 | { |
| 350 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 402 | struct pci_dev *dev = this_leaf->l3->dev; |
| 351 | int node = amd_get_nb_id(cpu); | ||
| 352 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
| 353 | unsigned int reg = 0; | 403 | unsigned int reg = 0; |
| 354 | 404 | ||
| 355 | if (!this_leaf->can_disable) | 405 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
| 356 | return -EINVAL; | 406 | return -EINVAL; |
| 357 | 407 | ||
| 358 | if (!dev) | 408 | if (!dev) |
| 359 | return -EINVAL; | 409 | return -EINVAL; |
| 360 | 410 | ||
| 361 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | 411 | pci_read_config_dword(dev, 0x1BC + slot * 4, ®); |
| 362 | return sprintf(buf, "0x%08x\n", reg); | 412 | return sprintf(buf, "0x%08x\n", reg); |
| 363 | } | 413 | } |
| 364 | 414 | ||
| 365 | #define SHOW_CACHE_DISABLE(index) \ | 415 | #define SHOW_CACHE_DISABLE(slot) \ |
| 366 | static ssize_t \ | 416 | static ssize_t \ |
| 367 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | 417 | show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ |
| 368 | { \ | 418 | { \ |
| 369 | return show_cache_disable(this_leaf, buf, index); \ | 419 | return show_cache_disable(this_leaf, buf, slot); \ |
| 370 | } | 420 | } |
| 371 | SHOW_CACHE_DISABLE(0) | 421 | SHOW_CACHE_DISABLE(0) |
| 372 | SHOW_CACHE_DISABLE(1) | 422 | SHOW_CACHE_DISABLE(1) |
| 373 | 423 | ||
| 424 | static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | ||
| 425 | unsigned slot, unsigned long idx) | ||
| 426 | { | ||
| 427 | int i; | ||
| 428 | |||
| 429 | idx |= BIT(30); | ||
| 430 | |||
| 431 | /* | ||
| 432 | * disable index in all 4 subcaches | ||
| 433 | */ | ||
| 434 | for (i = 0; i < 4; i++) { | ||
| 435 | u32 reg = idx | (i << 20); | ||
| 436 | |||
| 437 | if (!l3->subcaches[i]) | ||
| 438 | continue; | ||
| 439 | |||
| 440 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
| 441 | |||
| 442 | /* | ||
| 443 | * We need to WBINVD on a core on the node containing the L3 | ||
| 444 | * cache which indices we disable therefore a simple wbinvd() | ||
| 445 | * is not sufficient. | ||
| 446 | */ | ||
| 447 | wbinvd_on_cpu(cpu); | ||
| 448 | |||
| 449 | reg |= BIT(31); | ||
| 450 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
| 451 | } | ||
| 452 | } | ||
| 453 | |||
| 454 | |||
| 374 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | 455 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, |
| 375 | const char *buf, size_t count, unsigned int index) | 456 | const char *buf, size_t count, |
| 457 | unsigned int slot) | ||
| 376 | { | 458 | { |
| 459 | struct pci_dev *dev = this_leaf->l3->dev; | ||
| 377 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 460 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
| 378 | int node = amd_get_nb_id(cpu); | ||
| 379 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
| 380 | unsigned long val = 0; | 461 | unsigned long val = 0; |
| 381 | 462 | ||
| 382 | #define SUBCACHE_MASK (3UL << 20) | 463 | #define SUBCACHE_MASK (3UL << 20) |
| 383 | #define SUBCACHE_INDEX 0xfff | 464 | #define SUBCACHE_INDEX 0xfff |
| 384 | 465 | ||
| 385 | if (!this_leaf->can_disable) | 466 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
| 386 | return -EINVAL; | 467 | return -EINVAL; |
| 387 | 468 | ||
| 388 | if (!capable(CAP_SYS_ADMIN)) | 469 | if (!capable(CAP_SYS_ADMIN)) |
| @@ -396,26 +477,20 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
| 396 | 477 | ||
| 397 | /* do not allow writes outside of allowed bits */ | 478 | /* do not allow writes outside of allowed bits */ |
| 398 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | 479 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || |
| 399 | ((val & SUBCACHE_INDEX) > this_leaf->l3_indices)) | 480 | ((val & SUBCACHE_INDEX) > this_leaf->l3->indices)) |
| 400 | return -EINVAL; | 481 | return -EINVAL; |
| 401 | 482 | ||
| 402 | val |= BIT(30); | 483 | amd_l3_disable_index(this_leaf->l3, cpu, slot, val); |
| 403 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | 484 | |
| 404 | /* | ||
| 405 | * We need to WBINVD on a core on the node containing the L3 cache which | ||
| 406 | * indices we disable therefore a simple wbinvd() is not sufficient. | ||
| 407 | */ | ||
| 408 | wbinvd_on_cpu(cpu); | ||
| 409 | pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31)); | ||
| 410 | return count; | 485 | return count; |
| 411 | } | 486 | } |
| 412 | 487 | ||
| 413 | #define STORE_CACHE_DISABLE(index) \ | 488 | #define STORE_CACHE_DISABLE(slot) \ |
| 414 | static ssize_t \ | 489 | static ssize_t \ |
| 415 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | 490 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ |
| 416 | const char *buf, size_t count) \ | 491 | const char *buf, size_t count) \ |
| 417 | { \ | 492 | { \ |
| 418 | return store_cache_disable(this_leaf, buf, count, index); \ | 493 | return store_cache_disable(this_leaf, buf, count, slot); \ |
| 419 | } | 494 | } |
| 420 | STORE_CACHE_DISABLE(0) | 495 | STORE_CACHE_DISABLE(0) |
| 421 | STORE_CACHE_DISABLE(1) | 496 | STORE_CACHE_DISABLE(1) |
| @@ -443,8 +518,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
| 443 | 518 | ||
| 444 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 519 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
| 445 | amd_cpuid4(index, &eax, &ebx, &ecx); | 520 | amd_cpuid4(index, &eax, &ebx, &ecx); |
| 446 | if (boot_cpu_data.x86 >= 0x10) | 521 | amd_check_l3_disable(index, this_leaf); |
| 447 | amd_check_l3_disable(index, this_leaf); | ||
| 448 | } else { | 522 | } else { |
| 449 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 523 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
| 450 | } | 524 | } |
| @@ -701,6 +775,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
| 701 | for (i = 0; i < num_cache_leaves; i++) | 775 | for (i = 0; i < num_cache_leaves; i++) |
| 702 | cache_remove_shared_cpu_map(cpu, i); | 776 | cache_remove_shared_cpu_map(cpu, i); |
| 703 | 777 | ||
| 778 | kfree(per_cpu(ici_cpuid4_info, cpu)->l3); | ||
| 704 | kfree(per_cpu(ici_cpuid4_info, cpu)); | 779 | kfree(per_cpu(ici_cpuid4_info, cpu)); |
| 705 | per_cpu(ici_cpuid4_info, cpu) = NULL; | 780 | per_cpu(ici_cpuid4_info, cpu) = NULL; |
| 706 | } | 781 | } |
| @@ -985,7 +1060,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
| 985 | 1060 | ||
| 986 | this_leaf = CPUID4_INFO_IDX(cpu, i); | 1061 | this_leaf = CPUID4_INFO_IDX(cpu, i); |
| 987 | 1062 | ||
| 988 | if (this_leaf->can_disable) | 1063 | if (this_leaf->l3 && this_leaf->l3->can_disable) |
| 989 | ktype_cache.default_attrs = default_l3_attrs; | 1064 | ktype_cache.default_attrs = default_l3_attrs; |
| 990 | else | 1065 | else |
| 991 | ktype_cache.default_attrs = default_attrs; | 1066 | ktype_cache.default_attrs = default_attrs; |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 4ac6d48fe11b..bb34b03af252 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
| @@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | |||
| 7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
| 8 | 8 | ||
| 9 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o | 9 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o |
| 10 | |||
| 11 | obj-$(CONFIG_ACPI_APEI) += mce-apei.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c new file mode 100644 index 000000000000..745b54f9be89 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
| @@ -0,0 +1,138 @@ | |||
| 1 | /* | ||
| 2 | * Bridge between MCE and APEI | ||
| 3 | * | ||
| 4 | * On some machine, corrected memory errors are reported via APEI | ||
| 5 | * generic hardware error source (GHES) instead of corrected Machine | ||
| 6 | * Check. These corrected memory errors can be reported to user space | ||
| 7 | * through /dev/mcelog via faking a corrected Machine Check, so that | ||
| 8 | * the error memory page can be offlined by /sbin/mcelog if the error | ||
| 9 | * count for one page is beyond the threshold. | ||
| 10 | * | ||
| 11 | * For fatal MCE, save MCE record into persistent storage via ERST, so | ||
| 12 | * that the MCE record can be logged after reboot via ERST. | ||
| 13 | * | ||
| 14 | * Copyright 2010 Intel Corp. | ||
| 15 | * Author: Huang Ying <ying.huang@intel.com> | ||
| 16 | * | ||
| 17 | * This program is free software; you can redistribute it and/or | ||
| 18 | * modify it under the terms of the GNU General Public License version | ||
| 19 | * 2 as published by the Free Software Foundation. | ||
| 20 | * | ||
| 21 | * This program is distributed in the hope that it will be useful, | ||
| 22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 24 | * GNU General Public License for more details. | ||
| 25 | * | ||
| 26 | * You should have received a copy of the GNU General Public License | ||
| 27 | * along with this program; if not, write to the Free Software | ||
| 28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
| 29 | */ | ||
| 30 | |||
| 31 | #include <linux/kernel.h> | ||
| 32 | #include <linux/acpi.h> | ||
| 33 | #include <linux/cper.h> | ||
| 34 | #include <acpi/apei.h> | ||
| 35 | #include <asm/mce.h> | ||
| 36 | |||
| 37 | #include "mce-internal.h" | ||
| 38 | |||
| 39 | void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | ||
| 40 | { | ||
| 41 | struct mce m; | ||
| 42 | |||
| 43 | /* Only corrected MC is reported */ | ||
| 44 | if (!corrected) | ||
| 45 | return; | ||
| 46 | |||
| 47 | mce_setup(&m); | ||
| 48 | m.bank = 1; | ||
| 49 | /* Fake a memory read corrected error with unknown channel */ | ||
| 50 | m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; | ||
| 51 | m.addr = mem_err->physical_addr; | ||
| 52 | mce_log(&m); | ||
| 53 | mce_notify_irq(); | ||
| 54 | } | ||
| 55 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); | ||
| 56 | |||
| 57 | #define CPER_CREATOR_MCE \ | ||
| 58 | UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ | ||
| 59 | 0x64, 0x90, 0xb8, 0x9d) | ||
| 60 | #define CPER_SECTION_TYPE_MCE \ | ||
| 61 | UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ | ||
| 62 | 0x04, 0x4a, 0x38, 0xfc) | ||
| 63 | |||
| 64 | /* | ||
| 65 | * CPER specification (in UEFI specification 2.3 appendix N) requires | ||
| 66 | * byte-packed. | ||
| 67 | */ | ||
| 68 | struct cper_mce_record { | ||
| 69 | struct cper_record_header hdr; | ||
| 70 | struct cper_section_descriptor sec_hdr; | ||
| 71 | struct mce mce; | ||
| 72 | } __packed; | ||
| 73 | |||
| 74 | int apei_write_mce(struct mce *m) | ||
| 75 | { | ||
| 76 | struct cper_mce_record rcd; | ||
| 77 | |||
| 78 | memset(&rcd, 0, sizeof(rcd)); | ||
| 79 | memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); | ||
| 80 | rcd.hdr.revision = CPER_RECORD_REV; | ||
| 81 | rcd.hdr.signature_end = CPER_SIG_END; | ||
| 82 | rcd.hdr.section_count = 1; | ||
| 83 | rcd.hdr.error_severity = CPER_SER_FATAL; | ||
| 84 | /* timestamp, platform_id, partition_id are all invalid */ | ||
| 85 | rcd.hdr.validation_bits = 0; | ||
| 86 | rcd.hdr.record_length = sizeof(rcd); | ||
| 87 | rcd.hdr.creator_id = CPER_CREATOR_MCE; | ||
| 88 | rcd.hdr.notification_type = CPER_NOTIFY_MCE; | ||
| 89 | rcd.hdr.record_id = cper_next_record_id(); | ||
| 90 | rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; | ||
| 91 | |||
| 92 | rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd; | ||
| 93 | rcd.sec_hdr.section_length = sizeof(rcd.mce); | ||
| 94 | rcd.sec_hdr.revision = CPER_SEC_REV; | ||
| 95 | /* fru_id and fru_text is invalid */ | ||
| 96 | rcd.sec_hdr.validation_bits = 0; | ||
| 97 | rcd.sec_hdr.flags = CPER_SEC_PRIMARY; | ||
| 98 | rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; | ||
| 99 | rcd.sec_hdr.section_severity = CPER_SER_FATAL; | ||
| 100 | |||
| 101 | memcpy(&rcd.mce, m, sizeof(*m)); | ||
| 102 | |||
| 103 | return erst_write(&rcd.hdr); | ||
| 104 | } | ||
| 105 | |||
| 106 | ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
| 107 | { | ||
| 108 | struct cper_mce_record rcd; | ||
| 109 | ssize_t len; | ||
| 110 | |||
| 111 | len = erst_read_next(&rcd.hdr, sizeof(rcd)); | ||
| 112 | if (len <= 0) | ||
| 113 | return len; | ||
| 114 | /* Can not skip other records in storage via ERST unless clear them */ | ||
| 115 | else if (len != sizeof(rcd) || | ||
| 116 | uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { | ||
| 117 | if (printk_ratelimit()) | ||
| 118 | pr_warning( | ||
| 119 | "MCE-APEI: Can not skip the unknown record in ERST"); | ||
| 120 | return -EIO; | ||
| 121 | } | ||
| 122 | |||
| 123 | memcpy(m, &rcd.mce, sizeof(*m)); | ||
| 124 | *record_id = rcd.hdr.record_id; | ||
| 125 | |||
| 126 | return sizeof(*m); | ||
| 127 | } | ||
| 128 | |||
| 129 | /* Check whether there is record in ERST */ | ||
| 130 | int apei_check_mce(void) | ||
| 131 | { | ||
| 132 | return erst_get_record_count(); | ||
| 133 | } | ||
| 134 | |||
| 135 | int apei_clear_mce(u64 record_id) | ||
| 136 | { | ||
| 137 | return erst_clear(record_id); | ||
| 138 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 32996f9fab67..fefcc69ee8b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
| @@ -28,3 +28,26 @@ extern int mce_ser; | |||
| 28 | 28 | ||
| 29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
| 30 | 30 | ||
| 31 | #ifdef CONFIG_ACPI_APEI | ||
| 32 | int apei_write_mce(struct mce *m); | ||
| 33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | ||
| 34 | int apei_check_mce(void); | ||
| 35 | int apei_clear_mce(u64 record_id); | ||
| 36 | #else | ||
| 37 | static inline int apei_write_mce(struct mce *m) | ||
| 38 | { | ||
| 39 | return -EINVAL; | ||
| 40 | } | ||
| 41 | static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
| 42 | { | ||
| 43 | return 0; | ||
| 44 | } | ||
| 45 | static inline int apei_check_mce(void) | ||
| 46 | { | ||
| 47 | return 0; | ||
| 48 | } | ||
| 49 | static inline int apei_clear_mce(u64 record_id) | ||
| 50 | { | ||
| 51 | return -EINVAL; | ||
| 52 | } | ||
| 53 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0afa767e..707165dbc203 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -264,7 +264,7 @@ static void wait_for_panic(void) | |||
| 264 | 264 | ||
| 265 | static void mce_panic(char *msg, struct mce *final, char *exp) | 265 | static void mce_panic(char *msg, struct mce *final, char *exp) |
| 266 | { | 266 | { |
| 267 | int i; | 267 | int i, apei_err = 0; |
| 268 | 268 | ||
| 269 | if (!fake_panic) { | 269 | if (!fake_panic) { |
| 270 | /* | 270 | /* |
| @@ -287,8 +287,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
| 287 | struct mce *m = &mcelog.entry[i]; | 287 | struct mce *m = &mcelog.entry[i]; |
| 288 | if (!(m->status & MCI_STATUS_VAL)) | 288 | if (!(m->status & MCI_STATUS_VAL)) |
| 289 | continue; | 289 | continue; |
| 290 | if (!(m->status & MCI_STATUS_UC)) | 290 | if (!(m->status & MCI_STATUS_UC)) { |
| 291 | print_mce(m); | 291 | print_mce(m); |
| 292 | if (!apei_err) | ||
| 293 | apei_err = apei_write_mce(m); | ||
| 294 | } | ||
| 292 | } | 295 | } |
| 293 | /* Now print uncorrected but with the final one last */ | 296 | /* Now print uncorrected but with the final one last */ |
| 294 | for (i = 0; i < MCE_LOG_LEN; i++) { | 297 | for (i = 0; i < MCE_LOG_LEN; i++) { |
| @@ -297,11 +300,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
| 297 | continue; | 300 | continue; |
| 298 | if (!(m->status & MCI_STATUS_UC)) | 301 | if (!(m->status & MCI_STATUS_UC)) |
| 299 | continue; | 302 | continue; |
| 300 | if (!final || memcmp(m, final, sizeof(struct mce))) | 303 | if (!final || memcmp(m, final, sizeof(struct mce))) { |
| 301 | print_mce(m); | 304 | print_mce(m); |
| 305 | if (!apei_err) | ||
| 306 | apei_err = apei_write_mce(m); | ||
| 307 | } | ||
| 302 | } | 308 | } |
| 303 | if (final) | 309 | if (final) { |
| 304 | print_mce(final); | 310 | print_mce(final); |
| 311 | if (!apei_err) | ||
| 312 | apei_err = apei_write_mce(final); | ||
| 313 | } | ||
| 305 | if (cpu_missing) | 314 | if (cpu_missing) |
| 306 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | 315 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); |
| 307 | print_mce_tail(); | 316 | print_mce_tail(); |
| @@ -539,7 +548,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
| 539 | struct mce m; | 548 | struct mce m; |
| 540 | int i; | 549 | int i; |
| 541 | 550 | ||
| 542 | __get_cpu_var(mce_poll_count)++; | 551 | percpu_inc(mce_poll_count); |
| 543 | 552 | ||
| 544 | mce_setup(&m); | 553 | mce_setup(&m); |
| 545 | 554 | ||
| @@ -934,7 +943,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
| 934 | 943 | ||
| 935 | atomic_inc(&mce_entry); | 944 | atomic_inc(&mce_entry); |
| 936 | 945 | ||
| 937 | __get_cpu_var(mce_exception_count)++; | 946 | percpu_inc(mce_exception_count); |
| 938 | 947 | ||
| 939 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | 948 | if (notify_die(DIE_NMI, "machine check", regs, error_code, |
| 940 | 18, SIGKILL) == NOTIFY_STOP) | 949 | 18, SIGKILL) == NOTIFY_STOP) |
| @@ -1493,6 +1502,43 @@ static void collect_tscs(void *data) | |||
| 1493 | rdtscll(cpu_tsc[smp_processor_id()]); | 1502 | rdtscll(cpu_tsc[smp_processor_id()]); |
| 1494 | } | 1503 | } |
| 1495 | 1504 | ||
| 1505 | static int mce_apei_read_done; | ||
| 1506 | |||
| 1507 | /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ | ||
| 1508 | static int __mce_read_apei(char __user **ubuf, size_t usize) | ||
| 1509 | { | ||
| 1510 | int rc; | ||
| 1511 | u64 record_id; | ||
| 1512 | struct mce m; | ||
| 1513 | |||
| 1514 | if (usize < sizeof(struct mce)) | ||
| 1515 | return -EINVAL; | ||
| 1516 | |||
| 1517 | rc = apei_read_mce(&m, &record_id); | ||
| 1518 | /* Error or no more MCE record */ | ||
| 1519 | if (rc <= 0) { | ||
| 1520 | mce_apei_read_done = 1; | ||
| 1521 | return rc; | ||
| 1522 | } | ||
| 1523 | rc = -EFAULT; | ||
| 1524 | if (copy_to_user(*ubuf, &m, sizeof(struct mce))) | ||
| 1525 | return rc; | ||
| 1526 | /* | ||
| 1527 | * In fact, we should have cleared the record after that has | ||
| 1528 | * been flushed to the disk or sent to network in | ||
| 1529 | * /sbin/mcelog, but we have no interface to support that now, | ||
| 1530 | * so just clear it to avoid duplication. | ||
| 1531 | */ | ||
| 1532 | rc = apei_clear_mce(record_id); | ||
| 1533 | if (rc) { | ||
| 1534 | mce_apei_read_done = 1; | ||
| 1535 | return rc; | ||
| 1536 | } | ||
| 1537 | *ubuf += sizeof(struct mce); | ||
| 1538 | |||
| 1539 | return 0; | ||
| 1540 | } | ||
| 1541 | |||
| 1496 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1542 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, |
| 1497 | loff_t *off) | 1543 | loff_t *off) |
| 1498 | { | 1544 | { |
| @@ -1506,15 +1552,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
| 1506 | return -ENOMEM; | 1552 | return -ENOMEM; |
| 1507 | 1553 | ||
| 1508 | mutex_lock(&mce_read_mutex); | 1554 | mutex_lock(&mce_read_mutex); |
| 1555 | |||
| 1556 | if (!mce_apei_read_done) { | ||
| 1557 | err = __mce_read_apei(&buf, usize); | ||
| 1558 | if (err || buf != ubuf) | ||
| 1559 | goto out; | ||
| 1560 | } | ||
| 1561 | |||
| 1509 | next = rcu_dereference_check_mce(mcelog.next); | 1562 | next = rcu_dereference_check_mce(mcelog.next); |
| 1510 | 1563 | ||
| 1511 | /* Only supports full reads right now */ | 1564 | /* Only supports full reads right now */ |
| 1512 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | 1565 | err = -EINVAL; |
| 1513 | mutex_unlock(&mce_read_mutex); | 1566 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) |
| 1514 | kfree(cpu_tsc); | 1567 | goto out; |
| 1515 | |||
| 1516 | return -EINVAL; | ||
| 1517 | } | ||
| 1518 | 1568 | ||
| 1519 | err = 0; | 1569 | err = 0; |
| 1520 | prev = 0; | 1570 | prev = 0; |
| @@ -1562,10 +1612,15 @@ timeout: | |||
| 1562 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 1612 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); |
| 1563 | } | 1613 | } |
| 1564 | } | 1614 | } |
| 1615 | |||
| 1616 | if (err) | ||
| 1617 | err = -EFAULT; | ||
| 1618 | |||
| 1619 | out: | ||
| 1565 | mutex_unlock(&mce_read_mutex); | 1620 | mutex_unlock(&mce_read_mutex); |
| 1566 | kfree(cpu_tsc); | 1621 | kfree(cpu_tsc); |
| 1567 | 1622 | ||
| 1568 | return err ? -EFAULT : buf - ubuf; | 1623 | return err ? err : buf - ubuf; |
| 1569 | } | 1624 | } |
| 1570 | 1625 | ||
| 1571 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1626 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
| @@ -1573,6 +1628,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) | |||
| 1573 | poll_wait(file, &mce_wait, wait); | 1628 | poll_wait(file, &mce_wait, wait); |
| 1574 | if (rcu_dereference_check_mce(mcelog.next)) | 1629 | if (rcu_dereference_check_mce(mcelog.next)) |
| 1575 | return POLLIN | POLLRDNORM; | 1630 | return POLLIN | POLLRDNORM; |
| 1631 | if (!mce_apei_read_done && apei_check_mce()) | ||
| 1632 | return POLLIN | POLLRDNORM; | ||
| 1576 | return 0; | 1633 | return 0; |
| 1577 | } | 1634 | } |
| 1578 | 1635 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 81c499eceb21..e1a0a3bf9716 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
| @@ -190,7 +190,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
| 190 | mutex_unlock(&therm_cpu_lock); | 190 | mutex_unlock(&therm_cpu_lock); |
| 191 | break; | 191 | break; |
| 192 | } | 192 | } |
| 193 | return err ? NOTIFY_BAD : NOTIFY_OK; | 193 | return notifier_from_errno(err); |
| 194 | } | 194 | } |
| 195 | 195 | ||
| 196 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = | 196 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = |
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c new file mode 100644 index 000000000000..16f41bbe46b6 --- /dev/null +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | /* | ||
| 2 | * HyperV Detection code. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2010, Novell, Inc. | ||
| 5 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License as published by | ||
| 9 | * the Free Software Foundation; version 2 of the License. | ||
| 10 | * | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/types.h> | ||
| 14 | #include <linux/module.h> | ||
| 15 | #include <asm/processor.h> | ||
| 16 | #include <asm/hypervisor.h> | ||
| 17 | #include <asm/hyperv.h> | ||
| 18 | #include <asm/mshyperv.h> | ||
| 19 | |||
| 20 | struct ms_hyperv_info ms_hyperv; | ||
| 21 | |||
| 22 | static bool __init ms_hyperv_platform(void) | ||
| 23 | { | ||
| 24 | u32 eax; | ||
| 25 | u32 hyp_signature[3]; | ||
| 26 | |||
| 27 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | ||
| 28 | return false; | ||
| 29 | |||
| 30 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | ||
| 31 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | ||
| 32 | |||
| 33 | return eax >= HYPERV_CPUID_MIN && | ||
| 34 | eax <= HYPERV_CPUID_MAX && | ||
| 35 | !memcmp("Microsoft Hv", hyp_signature, 12); | ||
| 36 | } | ||
| 37 | |||
| 38 | static void __init ms_hyperv_init_platform(void) | ||
| 39 | { | ||
| 40 | /* | ||
| 41 | * Extract the features and hints | ||
| 42 | */ | ||
| 43 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); | ||
| 44 | ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); | ||
| 45 | |||
| 46 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", | ||
| 47 | ms_hyperv.features, ms_hyperv.hints); | ||
| 48 | } | ||
| 49 | |||
| 50 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | ||
| 51 | .name = "Microsoft HyperV", | ||
| 52 | .detect = ms_hyperv_platform, | ||
| 53 | .init_platform = ms_hyperv_init_platform, | ||
| 54 | }; | ||
| 55 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index db5bdc8addf8..5db5b7d65a18 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -31,46 +31,51 @@ | |||
| 31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
| 32 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
| 33 | 33 | ||
| 34 | static u64 perf_event_mask __read_mostly; | 34 | #if 0 |
| 35 | #undef wrmsrl | ||
| 36 | #define wrmsrl(msr, val) \ | ||
| 37 | do { \ | ||
| 38 | trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ | ||
| 39 | (unsigned long)(val)); \ | ||
| 40 | native_write_msr((msr), (u32)((u64)(val)), \ | ||
| 41 | (u32)((u64)(val) >> 32)); \ | ||
| 42 | } while (0) | ||
| 43 | #endif | ||
| 35 | 44 | ||
| 36 | /* The maximal number of PEBS events: */ | 45 | /* |
| 37 | #define MAX_PEBS_EVENTS 4 | 46 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
| 47 | */ | ||
| 48 | static unsigned long | ||
| 49 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
| 50 | { | ||
| 51 | unsigned long offset, addr = (unsigned long)from; | ||
| 52 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
| 53 | unsigned long size, len = 0; | ||
| 54 | struct page *page; | ||
| 55 | void *map; | ||
| 56 | int ret; | ||
| 38 | 57 | ||
| 39 | /* The size of a BTS record in bytes: */ | 58 | do { |
| 40 | #define BTS_RECORD_SIZE 24 | 59 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
| 60 | if (!ret) | ||
| 61 | break; | ||
| 41 | 62 | ||
| 42 | /* The size of a per-cpu BTS buffer in bytes: */ | 63 | offset = addr & (PAGE_SIZE - 1); |
| 43 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | 64 | size = min(PAGE_SIZE - offset, n - len); |
| 44 | 65 | ||
| 45 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | 66 | map = kmap_atomic(page, type); |
| 46 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | 67 | memcpy(to, map+offset, size); |
| 68 | kunmap_atomic(map, type); | ||
| 69 | put_page(page); | ||
| 47 | 70 | ||
| 71 | len += size; | ||
| 72 | to += size; | ||
| 73 | addr += size; | ||
| 48 | 74 | ||
| 49 | /* | 75 | } while (len < n); |
| 50 | * Bits in the debugctlmsr controlling branch tracing. | ||
| 51 | */ | ||
| 52 | #define X86_DEBUGCTL_TR (1 << 6) | ||
| 53 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
| 54 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
| 55 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
| 56 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
| 57 | 76 | ||
| 58 | /* | 77 | return len; |
| 59 | * A debug store configuration. | 78 | } |
| 60 | * | ||
| 61 | * We only support architectures that use 64bit fields. | ||
| 62 | */ | ||
| 63 | struct debug_store { | ||
| 64 | u64 bts_buffer_base; | ||
| 65 | u64 bts_index; | ||
| 66 | u64 bts_absolute_maximum; | ||
| 67 | u64 bts_interrupt_threshold; | ||
| 68 | u64 pebs_buffer_base; | ||
| 69 | u64 pebs_index; | ||
| 70 | u64 pebs_absolute_maximum; | ||
| 71 | u64 pebs_interrupt_threshold; | ||
| 72 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
| 73 | }; | ||
| 74 | 79 | ||
| 75 | struct event_constraint { | 80 | struct event_constraint { |
| 76 | union { | 81 | union { |
| @@ -89,18 +94,42 @@ struct amd_nb { | |||
| 89 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 94 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
| 90 | }; | 95 | }; |
| 91 | 96 | ||
| 97 | #define MAX_LBR_ENTRIES 16 | ||
| 98 | |||
| 92 | struct cpu_hw_events { | 99 | struct cpu_hw_events { |
| 100 | /* | ||
| 101 | * Generic x86 PMC bits | ||
| 102 | */ | ||
| 93 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
| 94 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| 95 | unsigned long interrupts; | ||
| 96 | int enabled; | 105 | int enabled; |
| 97 | struct debug_store *ds; | ||
| 98 | 106 | ||
| 99 | int n_events; | 107 | int n_events; |
| 100 | int n_added; | 108 | int n_added; |
| 109 | int n_txn; | ||
| 101 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 110 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
| 102 | u64 tags[X86_PMC_IDX_MAX]; | 111 | u64 tags[X86_PMC_IDX_MAX]; |
| 103 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 112 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
| 113 | |||
| 114 | unsigned int group_flag; | ||
| 115 | |||
| 116 | /* | ||
| 117 | * Intel DebugStore bits | ||
| 118 | */ | ||
| 119 | struct debug_store *ds; | ||
| 120 | u64 pebs_enabled; | ||
| 121 | |||
| 122 | /* | ||
| 123 | * Intel LBR bits | ||
| 124 | */ | ||
| 125 | int lbr_users; | ||
| 126 | void *lbr_context; | ||
| 127 | struct perf_branch_stack lbr_stack; | ||
| 128 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
| 129 | |||
| 130 | /* | ||
| 131 | * AMD specific bits | ||
| 132 | */ | ||
| 104 | struct amd_nb *amd_nb; | 133 | struct amd_nb *amd_nb; |
| 105 | }; | 134 | }; |
| 106 | 135 | ||
| @@ -114,44 +143,75 @@ struct cpu_hw_events { | |||
| 114 | #define EVENT_CONSTRAINT(c, n, m) \ | 143 | #define EVENT_CONSTRAINT(c, n, m) \ |
| 115 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 144 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
| 116 | 145 | ||
| 146 | /* | ||
| 147 | * Constraint on the Event code. | ||
| 148 | */ | ||
| 117 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | 149 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
| 118 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 150 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) |
| 119 | 151 | ||
| 152 | /* | ||
| 153 | * Constraint on the Event code + UMask + fixed-mask | ||
| 154 | * | ||
| 155 | * filter mask to validate fixed counter events. | ||
| 156 | * the following filters disqualify for fixed counters: | ||
| 157 | * - inv | ||
| 158 | * - edge | ||
| 159 | * - cnt-mask | ||
| 160 | * The other filters are supported by fixed counters. | ||
| 161 | * The any-thread option is supported starting with v3. | ||
| 162 | */ | ||
| 120 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 163 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
| 121 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | 164 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) |
| 165 | |||
| 166 | /* | ||
| 167 | * Constraint on the Event code + UMask | ||
| 168 | */ | ||
| 169 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
| 170 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
| 122 | 171 | ||
| 123 | #define EVENT_CONSTRAINT_END \ | 172 | #define EVENT_CONSTRAINT_END \ |
| 124 | EVENT_CONSTRAINT(0, 0, 0) | 173 | EVENT_CONSTRAINT(0, 0, 0) |
| 125 | 174 | ||
| 126 | #define for_each_event_constraint(e, c) \ | 175 | #define for_each_event_constraint(e, c) \ |
| 127 | for ((e) = (c); (e)->cmask; (e)++) | 176 | for ((e) = (c); (e)->weight; (e)++) |
| 177 | |||
| 178 | union perf_capabilities { | ||
| 179 | struct { | ||
| 180 | u64 lbr_format : 6; | ||
| 181 | u64 pebs_trap : 1; | ||
| 182 | u64 pebs_arch_reg : 1; | ||
| 183 | u64 pebs_format : 4; | ||
| 184 | u64 smm_freeze : 1; | ||
| 185 | }; | ||
| 186 | u64 capabilities; | ||
| 187 | }; | ||
| 128 | 188 | ||
| 129 | /* | 189 | /* |
| 130 | * struct x86_pmu - generic x86 pmu | 190 | * struct x86_pmu - generic x86 pmu |
| 131 | */ | 191 | */ |
| 132 | struct x86_pmu { | 192 | struct x86_pmu { |
| 193 | /* | ||
| 194 | * Generic x86 PMC bits | ||
| 195 | */ | ||
| 133 | const char *name; | 196 | const char *name; |
| 134 | int version; | 197 | int version; |
| 135 | int (*handle_irq)(struct pt_regs *); | 198 | int (*handle_irq)(struct pt_regs *); |
| 136 | void (*disable_all)(void); | 199 | void (*disable_all)(void); |
| 137 | void (*enable_all)(void); | 200 | void (*enable_all)(int added); |
| 138 | void (*enable)(struct perf_event *); | 201 | void (*enable)(struct perf_event *); |
| 139 | void (*disable)(struct perf_event *); | 202 | void (*disable)(struct perf_event *); |
| 203 | int (*hw_config)(struct perf_event *event); | ||
| 204 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
| 140 | unsigned eventsel; | 205 | unsigned eventsel; |
| 141 | unsigned perfctr; | 206 | unsigned perfctr; |
| 142 | u64 (*event_map)(int); | 207 | u64 (*event_map)(int); |
| 143 | u64 (*raw_event)(u64); | ||
| 144 | int max_events; | 208 | int max_events; |
| 145 | int num_events; | 209 | int num_counters; |
| 146 | int num_events_fixed; | 210 | int num_counters_fixed; |
| 147 | int event_bits; | 211 | int cntval_bits; |
| 148 | u64 event_mask; | 212 | u64 cntval_mask; |
| 149 | int apic; | 213 | int apic; |
| 150 | u64 max_period; | 214 | u64 max_period; |
| 151 | u64 intel_ctrl; | ||
| 152 | void (*enable_bts)(u64 config); | ||
| 153 | void (*disable_bts)(void); | ||
| 154 | |||
| 155 | struct event_constraint * | 215 | struct event_constraint * |
| 156 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | 216 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
| 157 | struct perf_event *event); | 217 | struct perf_event *event); |
| @@ -159,11 +219,32 @@ struct x86_pmu { | |||
| 159 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 219 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
| 160 | struct perf_event *event); | 220 | struct perf_event *event); |
| 161 | struct event_constraint *event_constraints; | 221 | struct event_constraint *event_constraints; |
| 222 | void (*quirks)(void); | ||
| 162 | 223 | ||
| 163 | int (*cpu_prepare)(int cpu); | 224 | int (*cpu_prepare)(int cpu); |
| 164 | void (*cpu_starting)(int cpu); | 225 | void (*cpu_starting)(int cpu); |
| 165 | void (*cpu_dying)(int cpu); | 226 | void (*cpu_dying)(int cpu); |
| 166 | void (*cpu_dead)(int cpu); | 227 | void (*cpu_dead)(int cpu); |
| 228 | |||
| 229 | /* | ||
| 230 | * Intel Arch Perfmon v2+ | ||
| 231 | */ | ||
| 232 | u64 intel_ctrl; | ||
| 233 | union perf_capabilities intel_cap; | ||
| 234 | |||
| 235 | /* | ||
| 236 | * Intel DebugStore bits | ||
| 237 | */ | ||
| 238 | int bts, pebs; | ||
| 239 | int pebs_record_size; | ||
| 240 | void (*drain_pebs)(struct pt_regs *regs); | ||
| 241 | struct event_constraint *pebs_constraints; | ||
| 242 | |||
| 243 | /* | ||
| 244 | * Intel LBR | ||
| 245 | */ | ||
| 246 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
| 247 | int lbr_nr; /* hardware stack size */ | ||
| 167 | }; | 248 | }; |
| 168 | 249 | ||
| 169 | static struct x86_pmu x86_pmu __read_mostly; | 250 | static struct x86_pmu x86_pmu __read_mostly; |
| @@ -198,7 +279,7 @@ static u64 | |||
| 198 | x86_perf_event_update(struct perf_event *event) | 279 | x86_perf_event_update(struct perf_event *event) |
| 199 | { | 280 | { |
| 200 | struct hw_perf_event *hwc = &event->hw; | 281 | struct hw_perf_event *hwc = &event->hw; |
| 201 | int shift = 64 - x86_pmu.event_bits; | 282 | int shift = 64 - x86_pmu.cntval_bits; |
| 202 | u64 prev_raw_count, new_raw_count; | 283 | u64 prev_raw_count, new_raw_count; |
| 203 | int idx = hwc->idx; | 284 | int idx = hwc->idx; |
| 204 | s64 delta; | 285 | s64 delta; |
| @@ -241,33 +322,32 @@ again: | |||
| 241 | static atomic_t active_events; | 322 | static atomic_t active_events; |
| 242 | static DEFINE_MUTEX(pmc_reserve_mutex); | 323 | static DEFINE_MUTEX(pmc_reserve_mutex); |
| 243 | 324 | ||
| 325 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 326 | |||
| 244 | static bool reserve_pmc_hardware(void) | 327 | static bool reserve_pmc_hardware(void) |
| 245 | { | 328 | { |
| 246 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 247 | int i; | 329 | int i; |
| 248 | 330 | ||
| 249 | if (nmi_watchdog == NMI_LOCAL_APIC) | 331 | if (nmi_watchdog == NMI_LOCAL_APIC) |
| 250 | disable_lapic_nmi_watchdog(); | 332 | disable_lapic_nmi_watchdog(); |
| 251 | 333 | ||
| 252 | for (i = 0; i < x86_pmu.num_events; i++) { | 334 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 253 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 335 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
| 254 | goto perfctr_fail; | 336 | goto perfctr_fail; |
| 255 | } | 337 | } |
| 256 | 338 | ||
| 257 | for (i = 0; i < x86_pmu.num_events; i++) { | 339 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 258 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 340 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
| 259 | goto eventsel_fail; | 341 | goto eventsel_fail; |
| 260 | } | 342 | } |
| 261 | #endif | ||
| 262 | 343 | ||
| 263 | return true; | 344 | return true; |
| 264 | 345 | ||
| 265 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 266 | eventsel_fail: | 346 | eventsel_fail: |
| 267 | for (i--; i >= 0; i--) | 347 | for (i--; i >= 0; i--) |
| 268 | release_evntsel_nmi(x86_pmu.eventsel + i); | 348 | release_evntsel_nmi(x86_pmu.eventsel + i); |
| 269 | 349 | ||
| 270 | i = x86_pmu.num_events; | 350 | i = x86_pmu.num_counters; |
| 271 | 351 | ||
| 272 | perfctr_fail: | 352 | perfctr_fail: |
| 273 | for (i--; i >= 0; i--) | 353 | for (i--; i >= 0; i--) |
| @@ -277,128 +357,36 @@ perfctr_fail: | |||
| 277 | enable_lapic_nmi_watchdog(); | 357 | enable_lapic_nmi_watchdog(); |
| 278 | 358 | ||
| 279 | return false; | 359 | return false; |
| 280 | #endif | ||
| 281 | } | 360 | } |
| 282 | 361 | ||
| 283 | static void release_pmc_hardware(void) | 362 | static void release_pmc_hardware(void) |
| 284 | { | 363 | { |
| 285 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 286 | int i; | 364 | int i; |
| 287 | 365 | ||
| 288 | for (i = 0; i < x86_pmu.num_events; i++) { | 366 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 289 | release_perfctr_nmi(x86_pmu.perfctr + i); | 367 | release_perfctr_nmi(x86_pmu.perfctr + i); |
| 290 | release_evntsel_nmi(x86_pmu.eventsel + i); | 368 | release_evntsel_nmi(x86_pmu.eventsel + i); |
| 291 | } | 369 | } |
| 292 | 370 | ||
| 293 | if (nmi_watchdog == NMI_LOCAL_APIC) | 371 | if (nmi_watchdog == NMI_LOCAL_APIC) |
| 294 | enable_lapic_nmi_watchdog(); | 372 | enable_lapic_nmi_watchdog(); |
| 295 | #endif | ||
| 296 | } | ||
| 297 | |||
| 298 | static inline bool bts_available(void) | ||
| 299 | { | ||
| 300 | return x86_pmu.enable_bts != NULL; | ||
| 301 | } | ||
| 302 | |||
| 303 | static void init_debug_store_on_cpu(int cpu) | ||
| 304 | { | ||
| 305 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
| 306 | |||
| 307 | if (!ds) | ||
| 308 | return; | ||
| 309 | |||
| 310 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
| 311 | (u32)((u64)(unsigned long)ds), | ||
| 312 | (u32)((u64)(unsigned long)ds >> 32)); | ||
| 313 | } | ||
| 314 | |||
| 315 | static void fini_debug_store_on_cpu(int cpu) | ||
| 316 | { | ||
| 317 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
| 318 | return; | ||
| 319 | |||
| 320 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
| 321 | } | ||
| 322 | |||
| 323 | static void release_bts_hardware(void) | ||
| 324 | { | ||
| 325 | int cpu; | ||
| 326 | |||
| 327 | if (!bts_available()) | ||
| 328 | return; | ||
| 329 | |||
| 330 | get_online_cpus(); | ||
| 331 | |||
| 332 | for_each_online_cpu(cpu) | ||
| 333 | fini_debug_store_on_cpu(cpu); | ||
| 334 | |||
| 335 | for_each_possible_cpu(cpu) { | ||
| 336 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
| 337 | |||
| 338 | if (!ds) | ||
| 339 | continue; | ||
| 340 | |||
| 341 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
| 342 | |||
| 343 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
| 344 | kfree(ds); | ||
| 345 | } | ||
| 346 | |||
| 347 | put_online_cpus(); | ||
| 348 | } | 373 | } |
| 349 | 374 | ||
| 350 | static int reserve_bts_hardware(void) | 375 | #else |
| 351 | { | ||
| 352 | int cpu, err = 0; | ||
| 353 | |||
| 354 | if (!bts_available()) | ||
| 355 | return 0; | ||
| 356 | |||
| 357 | get_online_cpus(); | ||
| 358 | |||
| 359 | for_each_possible_cpu(cpu) { | ||
| 360 | struct debug_store *ds; | ||
| 361 | void *buffer; | ||
| 362 | |||
| 363 | err = -ENOMEM; | ||
| 364 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
| 365 | if (unlikely(!buffer)) | ||
| 366 | break; | ||
| 367 | |||
| 368 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
| 369 | if (unlikely(!ds)) { | ||
| 370 | kfree(buffer); | ||
| 371 | break; | ||
| 372 | } | ||
| 373 | |||
| 374 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
| 375 | ds->bts_index = ds->bts_buffer_base; | ||
| 376 | ds->bts_absolute_maximum = | ||
| 377 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
| 378 | ds->bts_interrupt_threshold = | ||
| 379 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
| 380 | 376 | ||
| 381 | per_cpu(cpu_hw_events, cpu).ds = ds; | 377 | static bool reserve_pmc_hardware(void) { return true; } |
| 382 | err = 0; | 378 | static void release_pmc_hardware(void) {} |
| 383 | } | ||
| 384 | 379 | ||
| 385 | if (err) | 380 | #endif |
| 386 | release_bts_hardware(); | ||
| 387 | else { | ||
| 388 | for_each_online_cpu(cpu) | ||
| 389 | init_debug_store_on_cpu(cpu); | ||
| 390 | } | ||
| 391 | |||
| 392 | put_online_cpus(); | ||
| 393 | 381 | ||
| 394 | return err; | 382 | static int reserve_ds_buffers(void); |
| 395 | } | 383 | static void release_ds_buffers(void); |
| 396 | 384 | ||
| 397 | static void hw_perf_event_destroy(struct perf_event *event) | 385 | static void hw_perf_event_destroy(struct perf_event *event) |
| 398 | { | 386 | { |
| 399 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 387 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
| 400 | release_pmc_hardware(); | 388 | release_pmc_hardware(); |
| 401 | release_bts_hardware(); | 389 | release_ds_buffers(); |
| 402 | mutex_unlock(&pmc_reserve_mutex); | 390 | mutex_unlock(&pmc_reserve_mutex); |
| 403 | } | 391 | } |
| 404 | } | 392 | } |
| @@ -441,54 +429,11 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
| 441 | return 0; | 429 | return 0; |
| 442 | } | 430 | } |
| 443 | 431 | ||
| 444 | /* | 432 | static int x86_setup_perfctr(struct perf_event *event) |
| 445 | * Setup the hardware configuration for a given attr_type | ||
| 446 | */ | ||
| 447 | static int __hw_perf_event_init(struct perf_event *event) | ||
| 448 | { | 433 | { |
| 449 | struct perf_event_attr *attr = &event->attr; | 434 | struct perf_event_attr *attr = &event->attr; |
| 450 | struct hw_perf_event *hwc = &event->hw; | 435 | struct hw_perf_event *hwc = &event->hw; |
| 451 | u64 config; | 436 | u64 config; |
| 452 | int err; | ||
| 453 | |||
| 454 | if (!x86_pmu_initialized()) | ||
| 455 | return -ENODEV; | ||
| 456 | |||
| 457 | err = 0; | ||
| 458 | if (!atomic_inc_not_zero(&active_events)) { | ||
| 459 | mutex_lock(&pmc_reserve_mutex); | ||
| 460 | if (atomic_read(&active_events) == 0) { | ||
| 461 | if (!reserve_pmc_hardware()) | ||
| 462 | err = -EBUSY; | ||
| 463 | else | ||
| 464 | err = reserve_bts_hardware(); | ||
| 465 | } | ||
| 466 | if (!err) | ||
| 467 | atomic_inc(&active_events); | ||
| 468 | mutex_unlock(&pmc_reserve_mutex); | ||
| 469 | } | ||
| 470 | if (err) | ||
| 471 | return err; | ||
| 472 | |||
| 473 | event->destroy = hw_perf_event_destroy; | ||
| 474 | |||
| 475 | /* | ||
| 476 | * Generate PMC IRQs: | ||
| 477 | * (keep 'enabled' bit clear for now) | ||
| 478 | */ | ||
| 479 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
| 480 | |||
| 481 | hwc->idx = -1; | ||
| 482 | hwc->last_cpu = -1; | ||
| 483 | hwc->last_tag = ~0ULL; | ||
| 484 | |||
| 485 | /* | ||
| 486 | * Count user and OS events unless requested not to. | ||
| 487 | */ | ||
| 488 | if (!attr->exclude_user) | ||
| 489 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
| 490 | if (!attr->exclude_kernel) | ||
| 491 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
| 492 | 437 | ||
| 493 | if (!hwc->sample_period) { | 438 | if (!hwc->sample_period) { |
| 494 | hwc->sample_period = x86_pmu.max_period; | 439 | hwc->sample_period = x86_pmu.max_period; |
| @@ -505,16 +450,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
| 505 | return -EOPNOTSUPP; | 450 | return -EOPNOTSUPP; |
| 506 | } | 451 | } |
| 507 | 452 | ||
| 508 | /* | 453 | if (attr->type == PERF_TYPE_RAW) |
| 509 | * Raw hw_event type provide the config in the hw_event structure | ||
| 510 | */ | ||
| 511 | if (attr->type == PERF_TYPE_RAW) { | ||
| 512 | hwc->config |= x86_pmu.raw_event(attr->config); | ||
| 513 | if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && | ||
| 514 | perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
| 515 | return -EACCES; | ||
| 516 | return 0; | 454 | return 0; |
| 517 | } | ||
| 518 | 455 | ||
| 519 | if (attr->type == PERF_TYPE_HW_CACHE) | 456 | if (attr->type == PERF_TYPE_HW_CACHE) |
| 520 | return set_ext_hw_attr(hwc, attr); | 457 | return set_ext_hw_attr(hwc, attr); |
| @@ -539,11 +476,11 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
| 539 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 476 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
| 540 | (hwc->sample_period == 1)) { | 477 | (hwc->sample_period == 1)) { |
| 541 | /* BTS is not supported by this architecture. */ | 478 | /* BTS is not supported by this architecture. */ |
| 542 | if (!bts_available()) | 479 | if (!x86_pmu.bts) |
| 543 | return -EOPNOTSUPP; | 480 | return -EOPNOTSUPP; |
| 544 | 481 | ||
| 545 | /* BTS is currently only allowed for user-mode. */ | 482 | /* BTS is currently only allowed for user-mode. */ |
| 546 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | 483 | if (!attr->exclude_kernel) |
| 547 | return -EOPNOTSUPP; | 484 | return -EOPNOTSUPP; |
| 548 | } | 485 | } |
| 549 | 486 | ||
| @@ -552,12 +489,87 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
| 552 | return 0; | 489 | return 0; |
| 553 | } | 490 | } |
| 554 | 491 | ||
| 492 | static int x86_pmu_hw_config(struct perf_event *event) | ||
| 493 | { | ||
| 494 | if (event->attr.precise_ip) { | ||
| 495 | int precise = 0; | ||
| 496 | |||
| 497 | /* Support for constant skid */ | ||
| 498 | if (x86_pmu.pebs) | ||
| 499 | precise++; | ||
| 500 | |||
| 501 | /* Support for IP fixup */ | ||
| 502 | if (x86_pmu.lbr_nr) | ||
| 503 | precise++; | ||
| 504 | |||
| 505 | if (event->attr.precise_ip > precise) | ||
| 506 | return -EOPNOTSUPP; | ||
| 507 | } | ||
| 508 | |||
| 509 | /* | ||
| 510 | * Generate PMC IRQs: | ||
| 511 | * (keep 'enabled' bit clear for now) | ||
| 512 | */ | ||
| 513 | event->hw.config = ARCH_PERFMON_EVENTSEL_INT; | ||
| 514 | |||
| 515 | /* | ||
| 516 | * Count user and OS events unless requested not to | ||
| 517 | */ | ||
| 518 | if (!event->attr.exclude_user) | ||
| 519 | event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; | ||
| 520 | if (!event->attr.exclude_kernel) | ||
| 521 | event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; | ||
| 522 | |||
| 523 | if (event->attr.type == PERF_TYPE_RAW) | ||
| 524 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; | ||
| 525 | |||
| 526 | return x86_setup_perfctr(event); | ||
| 527 | } | ||
| 528 | |||
| 529 | /* | ||
| 530 | * Setup the hardware configuration for a given attr_type | ||
| 531 | */ | ||
| 532 | static int __hw_perf_event_init(struct perf_event *event) | ||
| 533 | { | ||
| 534 | int err; | ||
| 535 | |||
| 536 | if (!x86_pmu_initialized()) | ||
| 537 | return -ENODEV; | ||
| 538 | |||
| 539 | err = 0; | ||
| 540 | if (!atomic_inc_not_zero(&active_events)) { | ||
| 541 | mutex_lock(&pmc_reserve_mutex); | ||
| 542 | if (atomic_read(&active_events) == 0) { | ||
| 543 | if (!reserve_pmc_hardware()) | ||
| 544 | err = -EBUSY; | ||
| 545 | else { | ||
| 546 | err = reserve_ds_buffers(); | ||
| 547 | if (err) | ||
| 548 | release_pmc_hardware(); | ||
| 549 | } | ||
| 550 | } | ||
| 551 | if (!err) | ||
| 552 | atomic_inc(&active_events); | ||
| 553 | mutex_unlock(&pmc_reserve_mutex); | ||
| 554 | } | ||
| 555 | if (err) | ||
| 556 | return err; | ||
| 557 | |||
| 558 | event->destroy = hw_perf_event_destroy; | ||
| 559 | |||
| 560 | event->hw.idx = -1; | ||
| 561 | event->hw.last_cpu = -1; | ||
| 562 | event->hw.last_tag = ~0ULL; | ||
| 563 | |||
| 564 | return x86_pmu.hw_config(event); | ||
| 565 | } | ||
| 566 | |||
| 555 | static void x86_pmu_disable_all(void) | 567 | static void x86_pmu_disable_all(void) |
| 556 | { | 568 | { |
| 557 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 569 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 558 | int idx; | 570 | int idx; |
| 559 | 571 | ||
| 560 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 572 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 561 | u64 val; | 573 | u64 val; |
| 562 | 574 | ||
| 563 | if (!test_bit(idx, cpuc->active_mask)) | 575 | if (!test_bit(idx, cpuc->active_mask)) |
| @@ -587,12 +599,12 @@ void hw_perf_disable(void) | |||
| 587 | x86_pmu.disable_all(); | 599 | x86_pmu.disable_all(); |
| 588 | } | 600 | } |
| 589 | 601 | ||
| 590 | static void x86_pmu_enable_all(void) | 602 | static void x86_pmu_enable_all(int added) |
| 591 | { | 603 | { |
| 592 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 604 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 593 | int idx; | 605 | int idx; |
| 594 | 606 | ||
| 595 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 607 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 596 | struct perf_event *event = cpuc->events[idx]; | 608 | struct perf_event *event = cpuc->events[idx]; |
| 597 | u64 val; | 609 | u64 val; |
| 598 | 610 | ||
| @@ -667,14 +679,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
| 667 | * assign events to counters starting with most | 679 | * assign events to counters starting with most |
| 668 | * constrained events. | 680 | * constrained events. |
| 669 | */ | 681 | */ |
| 670 | wmax = x86_pmu.num_events; | 682 | wmax = x86_pmu.num_counters; |
| 671 | 683 | ||
| 672 | /* | 684 | /* |
| 673 | * when fixed event counters are present, | 685 | * when fixed event counters are present, |
| 674 | * wmax is incremented by 1 to account | 686 | * wmax is incremented by 1 to account |
| 675 | * for one more choice | 687 | * for one more choice |
| 676 | */ | 688 | */ |
| 677 | if (x86_pmu.num_events_fixed) | 689 | if (x86_pmu.num_counters_fixed) |
| 678 | wmax++; | 690 | wmax++; |
| 679 | 691 | ||
| 680 | for (w = 1, num = n; num && w <= wmax; w++) { | 692 | for (w = 1, num = n; num && w <= wmax; w++) { |
| @@ -724,7 +736,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
| 724 | struct perf_event *event; | 736 | struct perf_event *event; |
| 725 | int n, max_count; | 737 | int n, max_count; |
| 726 | 738 | ||
| 727 | max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; | 739 | max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; |
| 728 | 740 | ||
| 729 | /* current number of events already accepted */ | 741 | /* current number of events already accepted */ |
| 730 | n = cpuc->n_events; | 742 | n = cpuc->n_events; |
| @@ -795,7 +807,7 @@ void hw_perf_enable(void) | |||
| 795 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 807 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 796 | struct perf_event *event; | 808 | struct perf_event *event; |
| 797 | struct hw_perf_event *hwc; | 809 | struct hw_perf_event *hwc; |
| 798 | int i; | 810 | int i, added = cpuc->n_added; |
| 799 | 811 | ||
| 800 | if (!x86_pmu_initialized()) | 812 | if (!x86_pmu_initialized()) |
| 801 | return; | 813 | return; |
| @@ -847,19 +859,20 @@ void hw_perf_enable(void) | |||
| 847 | cpuc->enabled = 1; | 859 | cpuc->enabled = 1; |
| 848 | barrier(); | 860 | barrier(); |
| 849 | 861 | ||
| 850 | x86_pmu.enable_all(); | 862 | x86_pmu.enable_all(added); |
| 851 | } | 863 | } |
| 852 | 864 | ||
| 853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) | 865 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
| 866 | u64 enable_mask) | ||
| 854 | { | 867 | { |
| 855 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 868 | wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); |
| 856 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
| 857 | } | 869 | } |
| 858 | 870 | ||
| 859 | static inline void x86_pmu_disable_event(struct perf_event *event) | 871 | static inline void x86_pmu_disable_event(struct perf_event *event) |
| 860 | { | 872 | { |
| 861 | struct hw_perf_event *hwc = &event->hw; | 873 | struct hw_perf_event *hwc = &event->hw; |
| 862 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); | 874 | |
| 875 | wrmsrl(hwc->config_base + hwc->idx, hwc->config); | ||
| 863 | } | 876 | } |
| 864 | 877 | ||
| 865 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 878 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
| @@ -874,7 +887,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
| 874 | struct hw_perf_event *hwc = &event->hw; | 887 | struct hw_perf_event *hwc = &event->hw; |
| 875 | s64 left = atomic64_read(&hwc->period_left); | 888 | s64 left = atomic64_read(&hwc->period_left); |
| 876 | s64 period = hwc->sample_period; | 889 | s64 period = hwc->sample_period; |
| 877 | int err, ret = 0, idx = hwc->idx; | 890 | int ret = 0, idx = hwc->idx; |
| 878 | 891 | ||
| 879 | if (idx == X86_PMC_IDX_FIXED_BTS) | 892 | if (idx == X86_PMC_IDX_FIXED_BTS) |
| 880 | return 0; | 893 | return 0; |
| @@ -912,8 +925,8 @@ x86_perf_event_set_period(struct perf_event *event) | |||
| 912 | */ | 925 | */ |
| 913 | atomic64_set(&hwc->prev_count, (u64)-left); | 926 | atomic64_set(&hwc->prev_count, (u64)-left); |
| 914 | 927 | ||
| 915 | err = checking_wrmsrl(hwc->event_base + idx, | 928 | wrmsrl(hwc->event_base + idx, |
| 916 | (u64)(-left) & x86_pmu.event_mask); | 929 | (u64)(-left) & x86_pmu.cntval_mask); |
| 917 | 930 | ||
| 918 | perf_event_update_userpage(event); | 931 | perf_event_update_userpage(event); |
| 919 | 932 | ||
| @@ -924,7 +937,8 @@ static void x86_pmu_enable_event(struct perf_event *event) | |||
| 924 | { | 937 | { |
| 925 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 938 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 926 | if (cpuc->enabled) | 939 | if (cpuc->enabled) |
| 927 | __x86_pmu_enable_event(&event->hw); | 940 | __x86_pmu_enable_event(&event->hw, |
| 941 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
| 928 | } | 942 | } |
| 929 | 943 | ||
| 930 | /* | 944 | /* |
| @@ -950,7 +964,15 @@ static int x86_pmu_enable(struct perf_event *event) | |||
| 950 | if (n < 0) | 964 | if (n < 0) |
| 951 | return n; | 965 | return n; |
| 952 | 966 | ||
| 953 | ret = x86_schedule_events(cpuc, n, assign); | 967 | /* |
| 968 | * If group events scheduling transaction was started, | ||
| 969 | * skip the schedulability test here, it will be peformed | ||
| 970 | * at commit time(->commit_txn) as a whole | ||
| 971 | */ | ||
| 972 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | ||
| 973 | goto out; | ||
| 974 | |||
| 975 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
| 954 | if (ret) | 976 | if (ret) |
| 955 | return ret; | 977 | return ret; |
| 956 | /* | 978 | /* |
| @@ -959,8 +981,10 @@ static int x86_pmu_enable(struct perf_event *event) | |||
| 959 | */ | 981 | */ |
| 960 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 982 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
| 961 | 983 | ||
| 984 | out: | ||
| 962 | cpuc->n_events = n; | 985 | cpuc->n_events = n; |
| 963 | cpuc->n_added += n - n0; | 986 | cpuc->n_added += n - n0; |
| 987 | cpuc->n_txn += n - n0; | ||
| 964 | 988 | ||
| 965 | return 0; | 989 | return 0; |
| 966 | } | 990 | } |
| @@ -991,11 +1015,12 @@ static void x86_pmu_unthrottle(struct perf_event *event) | |||
| 991 | void perf_event_print_debug(void) | 1015 | void perf_event_print_debug(void) |
| 992 | { | 1016 | { |
| 993 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 1017 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
| 1018 | u64 pebs; | ||
| 994 | struct cpu_hw_events *cpuc; | 1019 | struct cpu_hw_events *cpuc; |
| 995 | unsigned long flags; | 1020 | unsigned long flags; |
| 996 | int cpu, idx; | 1021 | int cpu, idx; |
| 997 | 1022 | ||
| 998 | if (!x86_pmu.num_events) | 1023 | if (!x86_pmu.num_counters) |
| 999 | return; | 1024 | return; |
| 1000 | 1025 | ||
| 1001 | local_irq_save(flags); | 1026 | local_irq_save(flags); |
| @@ -1008,16 +1033,18 @@ void perf_event_print_debug(void) | |||
| 1008 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 1033 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
| 1009 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 1034 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
| 1010 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 1035 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
| 1036 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
| 1011 | 1037 | ||
| 1012 | pr_info("\n"); | 1038 | pr_info("\n"); |
| 1013 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 1039 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
| 1014 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 1040 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
| 1015 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1041 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
| 1016 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1042 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
| 1043 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
| 1017 | } | 1044 | } |
| 1018 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 1045 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
| 1019 | 1046 | ||
| 1020 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1047 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 1021 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1048 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
| 1022 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1049 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); |
| 1023 | 1050 | ||
| @@ -1030,7 +1057,7 @@ void perf_event_print_debug(void) | |||
| 1030 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | 1057 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", |
| 1031 | cpu, idx, prev_left); | 1058 | cpu, idx, prev_left); |
| 1032 | } | 1059 | } |
| 1033 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 1060 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
| 1034 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | 1061 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); |
| 1035 | 1062 | ||
| 1036 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | 1063 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", |
| @@ -1064,6 +1091,14 @@ static void x86_pmu_disable(struct perf_event *event) | |||
| 1064 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1091 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1065 | int i; | 1092 | int i; |
| 1066 | 1093 | ||
| 1094 | /* | ||
| 1095 | * If we're called during a txn, we don't need to do anything. | ||
| 1096 | * The events never got scheduled and ->cancel_txn will truncate | ||
| 1097 | * the event_list. | ||
| 1098 | */ | ||
| 1099 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | ||
| 1100 | return; | ||
| 1101 | |||
| 1067 | x86_pmu_stop(event); | 1102 | x86_pmu_stop(event); |
| 1068 | 1103 | ||
| 1069 | for (i = 0; i < cpuc->n_events; i++) { | 1104 | for (i = 0; i < cpuc->n_events; i++) { |
| @@ -1095,7 +1130,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
| 1095 | 1130 | ||
| 1096 | cpuc = &__get_cpu_var(cpu_hw_events); | 1131 | cpuc = &__get_cpu_var(cpu_hw_events); |
| 1097 | 1132 | ||
| 1098 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1133 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 1099 | if (!test_bit(idx, cpuc->active_mask)) | 1134 | if (!test_bit(idx, cpuc->active_mask)) |
| 1100 | continue; | 1135 | continue; |
| 1101 | 1136 | ||
| @@ -1103,7 +1138,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
| 1103 | hwc = &event->hw; | 1138 | hwc = &event->hw; |
| 1104 | 1139 | ||
| 1105 | val = x86_perf_event_update(event); | 1140 | val = x86_perf_event_update(event); |
| 1106 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | 1141 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) |
| 1107 | continue; | 1142 | continue; |
| 1108 | 1143 | ||
| 1109 | /* | 1144 | /* |
| @@ -1146,7 +1181,6 @@ void set_perf_event_pending(void) | |||
| 1146 | 1181 | ||
| 1147 | void perf_events_lapic_init(void) | 1182 | void perf_events_lapic_init(void) |
| 1148 | { | 1183 | { |
| 1149 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 1150 | if (!x86_pmu.apic || !x86_pmu_initialized()) | 1184 | if (!x86_pmu.apic || !x86_pmu_initialized()) |
| 1151 | return; | 1185 | return; |
| 1152 | 1186 | ||
| @@ -1154,7 +1188,6 @@ void perf_events_lapic_init(void) | |||
| 1154 | * Always use NMI for PMU | 1188 | * Always use NMI for PMU |
| 1155 | */ | 1189 | */ |
| 1156 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1190 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1157 | #endif | ||
| 1158 | } | 1191 | } |
| 1159 | 1192 | ||
| 1160 | static int __kprobes | 1193 | static int __kprobes |
| @@ -1178,9 +1211,7 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
| 1178 | 1211 | ||
| 1179 | regs = args->regs; | 1212 | regs = args->regs; |
| 1180 | 1213 | ||
| 1181 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 1182 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1214 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1183 | #endif | ||
| 1184 | /* | 1215 | /* |
| 1185 | * Can't rely on the handled return value to say it was our NMI, two | 1216 | * Can't rely on the handled return value to say it was our NMI, two |
| 1186 | * events could trigger 'simultaneously' raising two back-to-back NMIs. | 1217 | * events could trigger 'simultaneously' raising two back-to-back NMIs. |
| @@ -1217,118 +1248,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
| 1217 | return &unconstrained; | 1248 | return &unconstrained; |
| 1218 | } | 1249 | } |
| 1219 | 1250 | ||
| 1220 | static int x86_event_sched_in(struct perf_event *event, | ||
| 1221 | struct perf_cpu_context *cpuctx) | ||
| 1222 | { | ||
| 1223 | int ret = 0; | ||
| 1224 | |||
| 1225 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
| 1226 | event->oncpu = smp_processor_id(); | ||
| 1227 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
| 1228 | |||
| 1229 | if (!is_x86_event(event)) | ||
| 1230 | ret = event->pmu->enable(event); | ||
| 1231 | |||
| 1232 | if (!ret && !is_software_event(event)) | ||
| 1233 | cpuctx->active_oncpu++; | ||
| 1234 | |||
| 1235 | if (!ret && event->attr.exclusive) | ||
| 1236 | cpuctx->exclusive = 1; | ||
| 1237 | |||
| 1238 | return ret; | ||
| 1239 | } | ||
| 1240 | |||
| 1241 | static void x86_event_sched_out(struct perf_event *event, | ||
| 1242 | struct perf_cpu_context *cpuctx) | ||
| 1243 | { | ||
| 1244 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
| 1245 | event->oncpu = -1; | ||
| 1246 | |||
| 1247 | if (!is_x86_event(event)) | ||
| 1248 | event->pmu->disable(event); | ||
| 1249 | |||
| 1250 | event->tstamp_running -= event->ctx->time - event->tstamp_stopped; | ||
| 1251 | |||
| 1252 | if (!is_software_event(event)) | ||
| 1253 | cpuctx->active_oncpu--; | ||
| 1254 | |||
| 1255 | if (event->attr.exclusive || !cpuctx->active_oncpu) | ||
| 1256 | cpuctx->exclusive = 0; | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | /* | ||
| 1260 | * Called to enable a whole group of events. | ||
| 1261 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
| 1262 | * Assumes the caller has disabled interrupts and has | ||
| 1263 | * frozen the PMU with hw_perf_save_disable. | ||
| 1264 | * | ||
| 1265 | * called with PMU disabled. If successful and return value 1, | ||
| 1266 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
| 1267 | */ | ||
| 1268 | int hw_perf_group_sched_in(struct perf_event *leader, | ||
| 1269 | struct perf_cpu_context *cpuctx, | ||
| 1270 | struct perf_event_context *ctx) | ||
| 1271 | { | ||
| 1272 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1273 | struct perf_event *sub; | ||
| 1274 | int assign[X86_PMC_IDX_MAX]; | ||
| 1275 | int n0, n1, ret; | ||
| 1276 | |||
| 1277 | /* n0 = total number of events */ | ||
| 1278 | n0 = collect_events(cpuc, leader, true); | ||
| 1279 | if (n0 < 0) | ||
| 1280 | return n0; | ||
| 1281 | |||
| 1282 | ret = x86_schedule_events(cpuc, n0, assign); | ||
| 1283 | if (ret) | ||
| 1284 | return ret; | ||
| 1285 | |||
| 1286 | ret = x86_event_sched_in(leader, cpuctx); | ||
| 1287 | if (ret) | ||
| 1288 | return ret; | ||
| 1289 | |||
| 1290 | n1 = 1; | ||
| 1291 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
| 1292 | if (sub->state > PERF_EVENT_STATE_OFF) { | ||
| 1293 | ret = x86_event_sched_in(sub, cpuctx); | ||
| 1294 | if (ret) | ||
| 1295 | goto undo; | ||
| 1296 | ++n1; | ||
| 1297 | } | ||
| 1298 | } | ||
| 1299 | /* | ||
| 1300 | * copy new assignment, now we know it is possible | ||
| 1301 | * will be used by hw_perf_enable() | ||
| 1302 | */ | ||
| 1303 | memcpy(cpuc->assign, assign, n0*sizeof(int)); | ||
| 1304 | |||
| 1305 | cpuc->n_events = n0; | ||
| 1306 | cpuc->n_added += n1; | ||
| 1307 | ctx->nr_active += n1; | ||
| 1308 | |||
| 1309 | /* | ||
| 1310 | * 1 means successful and events are active | ||
| 1311 | * This is not quite true because we defer | ||
| 1312 | * actual activation until hw_perf_enable() but | ||
| 1313 | * this way we* ensure caller won't try to enable | ||
| 1314 | * individual events | ||
| 1315 | */ | ||
| 1316 | return 1; | ||
| 1317 | undo: | ||
| 1318 | x86_event_sched_out(leader, cpuctx); | ||
| 1319 | n0 = 1; | ||
| 1320 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
| 1321 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { | ||
| 1322 | x86_event_sched_out(sub, cpuctx); | ||
| 1323 | if (++n0 == n1) | ||
| 1324 | break; | ||
| 1325 | } | ||
| 1326 | } | ||
| 1327 | return ret; | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | #include "perf_event_amd.c" | 1251 | #include "perf_event_amd.c" |
| 1331 | #include "perf_event_p6.c" | 1252 | #include "perf_event_p6.c" |
| 1253 | #include "perf_event_p4.c" | ||
| 1254 | #include "perf_event_intel_lbr.c" | ||
| 1255 | #include "perf_event_intel_ds.c" | ||
| 1332 | #include "perf_event_intel.c" | 1256 | #include "perf_event_intel.c" |
| 1333 | 1257 | ||
| 1334 | static int __cpuinit | 1258 | static int __cpuinit |
| @@ -1402,48 +1326,50 @@ void __init init_hw_perf_events(void) | |||
| 1402 | 1326 | ||
| 1403 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1327 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
| 1404 | 1328 | ||
| 1405 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | 1329 | if (x86_pmu.quirks) |
| 1330 | x86_pmu.quirks(); | ||
| 1331 | |||
| 1332 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | ||
| 1406 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1333 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
| 1407 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); | 1334 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); |
| 1408 | x86_pmu.num_events = X86_PMC_MAX_GENERIC; | 1335 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; |
| 1409 | } | 1336 | } |
| 1410 | perf_event_mask = (1 << x86_pmu.num_events) - 1; | 1337 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
| 1411 | perf_max_events = x86_pmu.num_events; | 1338 | perf_max_events = x86_pmu.num_counters; |
| 1412 | 1339 | ||
| 1413 | if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { | 1340 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
| 1414 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | 1341 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
| 1415 | x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); | 1342 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); |
| 1416 | x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; | 1343 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; |
| 1417 | } | 1344 | } |
| 1418 | 1345 | ||
| 1419 | perf_event_mask |= | 1346 | x86_pmu.intel_ctrl |= |
| 1420 | ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; | 1347 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
| 1421 | x86_pmu.intel_ctrl = perf_event_mask; | ||
| 1422 | 1348 | ||
| 1423 | perf_events_lapic_init(); | 1349 | perf_events_lapic_init(); |
| 1424 | register_die_notifier(&perf_event_nmi_notifier); | 1350 | register_die_notifier(&perf_event_nmi_notifier); |
| 1425 | 1351 | ||
| 1426 | unconstrained = (struct event_constraint) | 1352 | unconstrained = (struct event_constraint) |
| 1427 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, | 1353 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
| 1428 | 0, x86_pmu.num_events); | 1354 | 0, x86_pmu.num_counters); |
| 1429 | 1355 | ||
| 1430 | if (x86_pmu.event_constraints) { | 1356 | if (x86_pmu.event_constraints) { |
| 1431 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1357 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
| 1432 | if (c->cmask != INTEL_ARCH_FIXED_MASK) | 1358 | if (c->cmask != X86_RAW_EVENT_MASK) |
| 1433 | continue; | 1359 | continue; |
| 1434 | 1360 | ||
| 1435 | c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; | 1361 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
| 1436 | c->weight += x86_pmu.num_events; | 1362 | c->weight += x86_pmu.num_counters; |
| 1437 | } | 1363 | } |
| 1438 | } | 1364 | } |
| 1439 | 1365 | ||
| 1440 | pr_info("... version: %d\n", x86_pmu.version); | 1366 | pr_info("... version: %d\n", x86_pmu.version); |
| 1441 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 1367 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
| 1442 | pr_info("... generic registers: %d\n", x86_pmu.num_events); | 1368 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
| 1443 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); | 1369 | pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); |
| 1444 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 1370 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
| 1445 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 1371 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
| 1446 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 1372 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
| 1447 | 1373 | ||
| 1448 | perf_cpu_notifier(x86_pmu_notifier); | 1374 | perf_cpu_notifier(x86_pmu_notifier); |
| 1449 | } | 1375 | } |
| @@ -1453,6 +1379,71 @@ static inline void x86_pmu_read(struct perf_event *event) | |||
| 1453 | x86_perf_event_update(event); | 1379 | x86_perf_event_update(event); |
| 1454 | } | 1380 | } |
| 1455 | 1381 | ||
| 1382 | /* | ||
| 1383 | * Start group events scheduling transaction | ||
| 1384 | * Set the flag to make pmu::enable() not perform the | ||
| 1385 | * schedulability test, it will be performed at commit time | ||
| 1386 | */ | ||
| 1387 | static void x86_pmu_start_txn(const struct pmu *pmu) | ||
| 1388 | { | ||
| 1389 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1390 | |||
| 1391 | cpuc->group_flag |= PERF_EVENT_TXN_STARTED; | ||
| 1392 | cpuc->n_txn = 0; | ||
| 1393 | } | ||
| 1394 | |||
| 1395 | /* | ||
| 1396 | * Stop group events scheduling transaction | ||
| 1397 | * Clear the flag and pmu::enable() will perform the | ||
| 1398 | * schedulability test. | ||
| 1399 | */ | ||
| 1400 | static void x86_pmu_cancel_txn(const struct pmu *pmu) | ||
| 1401 | { | ||
| 1402 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1403 | |||
| 1404 | cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; | ||
| 1405 | /* | ||
| 1406 | * Truncate the collected events. | ||
| 1407 | */ | ||
| 1408 | cpuc->n_added -= cpuc->n_txn; | ||
| 1409 | cpuc->n_events -= cpuc->n_txn; | ||
| 1410 | } | ||
| 1411 | |||
| 1412 | /* | ||
| 1413 | * Commit group events scheduling transaction | ||
| 1414 | * Perform the group schedulability test as a whole | ||
| 1415 | * Return 0 if success | ||
| 1416 | */ | ||
| 1417 | static int x86_pmu_commit_txn(const struct pmu *pmu) | ||
| 1418 | { | ||
| 1419 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1420 | int assign[X86_PMC_IDX_MAX]; | ||
| 1421 | int n, ret; | ||
| 1422 | |||
| 1423 | n = cpuc->n_events; | ||
| 1424 | |||
| 1425 | if (!x86_pmu_initialized()) | ||
| 1426 | return -EAGAIN; | ||
| 1427 | |||
| 1428 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
| 1429 | if (ret) | ||
| 1430 | return ret; | ||
| 1431 | |||
| 1432 | /* | ||
| 1433 | * copy new assignment, now we know it is possible | ||
| 1434 | * will be used by hw_perf_enable() | ||
| 1435 | */ | ||
| 1436 | memcpy(cpuc->assign, assign, n*sizeof(int)); | ||
| 1437 | |||
| 1438 | /* | ||
| 1439 | * Clear out the txn count so that ->cancel_txn() which gets | ||
| 1440 | * run after ->commit_txn() doesn't undo things. | ||
| 1441 | */ | ||
| 1442 | cpuc->n_txn = 0; | ||
| 1443 | |||
| 1444 | return 0; | ||
| 1445 | } | ||
| 1446 | |||
| 1456 | static const struct pmu pmu = { | 1447 | static const struct pmu pmu = { |
| 1457 | .enable = x86_pmu_enable, | 1448 | .enable = x86_pmu_enable, |
| 1458 | .disable = x86_pmu_disable, | 1449 | .disable = x86_pmu_disable, |
| @@ -1460,9 +1451,38 @@ static const struct pmu pmu = { | |||
| 1460 | .stop = x86_pmu_stop, | 1451 | .stop = x86_pmu_stop, |
| 1461 | .read = x86_pmu_read, | 1452 | .read = x86_pmu_read, |
| 1462 | .unthrottle = x86_pmu_unthrottle, | 1453 | .unthrottle = x86_pmu_unthrottle, |
| 1454 | .start_txn = x86_pmu_start_txn, | ||
| 1455 | .cancel_txn = x86_pmu_cancel_txn, | ||
| 1456 | .commit_txn = x86_pmu_commit_txn, | ||
| 1463 | }; | 1457 | }; |
| 1464 | 1458 | ||
| 1465 | /* | 1459 | /* |
| 1460 | * validate that we can schedule this event | ||
| 1461 | */ | ||
| 1462 | static int validate_event(struct perf_event *event) | ||
| 1463 | { | ||
| 1464 | struct cpu_hw_events *fake_cpuc; | ||
| 1465 | struct event_constraint *c; | ||
| 1466 | int ret = 0; | ||
| 1467 | |||
| 1468 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
| 1469 | if (!fake_cpuc) | ||
| 1470 | return -ENOMEM; | ||
| 1471 | |||
| 1472 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
| 1473 | |||
| 1474 | if (!c || !c->weight) | ||
| 1475 | ret = -ENOSPC; | ||
| 1476 | |||
| 1477 | if (x86_pmu.put_event_constraints) | ||
| 1478 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
| 1479 | |||
| 1480 | kfree(fake_cpuc); | ||
| 1481 | |||
| 1482 | return ret; | ||
| 1483 | } | ||
| 1484 | |||
| 1485 | /* | ||
| 1466 | * validate a single event group | 1486 | * validate a single event group |
| 1467 | * | 1487 | * |
| 1468 | * validation include: | 1488 | * validation include: |
| @@ -1502,7 +1522,7 @@ static int validate_group(struct perf_event *event) | |||
| 1502 | 1522 | ||
| 1503 | fake_cpuc->n_events = n; | 1523 | fake_cpuc->n_events = n; |
| 1504 | 1524 | ||
| 1505 | ret = x86_schedule_events(fake_cpuc, n, NULL); | 1525 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
| 1506 | 1526 | ||
| 1507 | out_free: | 1527 | out_free: |
| 1508 | kfree(fake_cpuc); | 1528 | kfree(fake_cpuc); |
| @@ -1527,6 +1547,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
| 1527 | 1547 | ||
| 1528 | if (event->group_leader != event) | 1548 | if (event->group_leader != event) |
| 1529 | err = validate_group(event); | 1549 | err = validate_group(event); |
| 1550 | else | ||
| 1551 | err = validate_event(event); | ||
| 1530 | 1552 | ||
| 1531 | event->pmu = tmp; | 1553 | event->pmu = tmp; |
| 1532 | } | 1554 | } |
| @@ -1574,8 +1596,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
| 1574 | { | 1596 | { |
| 1575 | struct perf_callchain_entry *entry = data; | 1597 | struct perf_callchain_entry *entry = data; |
| 1576 | 1598 | ||
| 1577 | if (reliable) | 1599 | callchain_store(entry, addr); |
| 1578 | callchain_store(entry, addr); | ||
| 1579 | } | 1600 | } |
| 1580 | 1601 | ||
| 1581 | static const struct stacktrace_ops backtrace_ops = { | 1602 | static const struct stacktrace_ops backtrace_ops = { |
| @@ -1597,41 +1618,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
| 1597 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1618 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
| 1598 | } | 1619 | } |
| 1599 | 1620 | ||
| 1600 | /* | ||
| 1601 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
| 1602 | */ | ||
| 1603 | static unsigned long | ||
| 1604 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
| 1605 | { | ||
| 1606 | unsigned long offset, addr = (unsigned long)from; | ||
| 1607 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
| 1608 | unsigned long size, len = 0; | ||
| 1609 | struct page *page; | ||
| 1610 | void *map; | ||
| 1611 | int ret; | ||
| 1612 | |||
| 1613 | do { | ||
| 1614 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
| 1615 | if (!ret) | ||
| 1616 | break; | ||
| 1617 | |||
| 1618 | offset = addr & (PAGE_SIZE - 1); | ||
| 1619 | size = min(PAGE_SIZE - offset, n - len); | ||
| 1620 | |||
| 1621 | map = kmap_atomic(page, type); | ||
| 1622 | memcpy(to, map+offset, size); | ||
| 1623 | kunmap_atomic(map, type); | ||
| 1624 | put_page(page); | ||
| 1625 | |||
| 1626 | len += size; | ||
| 1627 | to += size; | ||
| 1628 | addr += size; | ||
| 1629 | |||
| 1630 | } while (len < n); | ||
| 1631 | |||
| 1632 | return len; | ||
| 1633 | } | ||
| 1634 | |||
| 1635 | #ifdef CONFIG_COMPAT | 1621 | #ifdef CONFIG_COMPAT |
| 1636 | static inline int | 1622 | static inline int |
| 1637 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1623 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) |
| @@ -1727,6 +1713,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
| 1727 | { | 1713 | { |
| 1728 | struct perf_callchain_entry *entry; | 1714 | struct perf_callchain_entry *entry; |
| 1729 | 1715 | ||
| 1716 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
| 1717 | /* TODO: We don't support guest os callchain now */ | ||
| 1718 | return NULL; | ||
| 1719 | } | ||
| 1720 | |||
| 1730 | if (in_nmi()) | 1721 | if (in_nmi()) |
| 1731 | entry = &__get_cpu_var(pmc_nmi_entry); | 1722 | entry = &__get_cpu_var(pmc_nmi_entry); |
| 1732 | else | 1723 | else |
| @@ -1748,5 +1739,43 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski | |||
| 1748 | */ | 1739 | */ |
| 1749 | regs->bp = rewind_frame_pointer(skip + 1); | 1740 | regs->bp = rewind_frame_pointer(skip + 1); |
| 1750 | regs->cs = __KERNEL_CS; | 1741 | regs->cs = __KERNEL_CS; |
| 1751 | local_save_flags(regs->flags); | 1742 | /* |
| 1743 | * We abuse bit 3 to pass exact information, see perf_misc_flags | ||
| 1744 | * and the comment with PERF_EFLAGS_EXACT. | ||
| 1745 | */ | ||
| 1746 | regs->flags = 0; | ||
| 1747 | } | ||
| 1748 | |||
| 1749 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
| 1750 | { | ||
| 1751 | unsigned long ip; | ||
| 1752 | |||
| 1753 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | ||
| 1754 | ip = perf_guest_cbs->get_guest_ip(); | ||
| 1755 | else | ||
| 1756 | ip = instruction_pointer(regs); | ||
| 1757 | |||
| 1758 | return ip; | ||
| 1759 | } | ||
| 1760 | |||
| 1761 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
| 1762 | { | ||
| 1763 | int misc = 0; | ||
| 1764 | |||
| 1765 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
| 1766 | if (perf_guest_cbs->is_user_mode()) | ||
| 1767 | misc |= PERF_RECORD_MISC_GUEST_USER; | ||
| 1768 | else | ||
| 1769 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | ||
| 1770 | } else { | ||
| 1771 | if (user_mode(regs)) | ||
| 1772 | misc |= PERF_RECORD_MISC_USER; | ||
| 1773 | else | ||
| 1774 | misc |= PERF_RECORD_MISC_KERNEL; | ||
| 1775 | } | ||
| 1776 | |||
| 1777 | if (regs->flags & PERF_EFLAGS_EXACT) | ||
| 1778 | misc |= PERF_RECORD_MISC_EXACT_IP; | ||
| 1779 | |||
| 1780 | return misc; | ||
| 1752 | } | 1781 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index db6f7d4056e1..611df11ba15e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | 2 | ||
| 3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | 3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); |
| 4 | 4 | ||
| 5 | static __initconst u64 amd_hw_cache_event_ids | 5 | static __initconst const u64 amd_hw_cache_event_ids |
| 6 | [PERF_COUNT_HW_CACHE_MAX] | 6 | [PERF_COUNT_HW_CACHE_MAX] |
| 7 | [PERF_COUNT_HW_CACHE_OP_MAX] | 7 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| @@ -111,22 +111,19 @@ static u64 amd_pmu_event_map(int hw_event) | |||
| 111 | return amd_perfmon_event_map[hw_event]; | 111 | return amd_perfmon_event_map[hw_event]; |
| 112 | } | 112 | } |
| 113 | 113 | ||
| 114 | static u64 amd_pmu_raw_event(u64 hw_event) | 114 | static int amd_pmu_hw_config(struct perf_event *event) |
| 115 | { | 115 | { |
| 116 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | 116 | int ret = x86_pmu_hw_config(event); |
| 117 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | 117 | |
| 118 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | 118 | if (ret) |
| 119 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | 119 | return ret; |
| 120 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | 120 | |
| 121 | 121 | if (event->attr.type != PERF_TYPE_RAW) | |
| 122 | #define K7_EVNTSEL_MASK \ | 122 | return 0; |
| 123 | (K7_EVNTSEL_EVENT_MASK | \ | 123 | |
| 124 | K7_EVNTSEL_UNIT_MASK | \ | 124 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; |
| 125 | K7_EVNTSEL_EDGE_MASK | \ | 125 | |
| 126 | K7_EVNTSEL_INV_MASK | \ | 126 | return 0; |
| 127 | K7_EVNTSEL_REG_MASK) | ||
| 128 | |||
| 129 | return hw_event & K7_EVNTSEL_MASK; | ||
| 130 | } | 127 | } |
| 131 | 128 | ||
| 132 | /* | 129 | /* |
| @@ -165,7 +162,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
| 165 | * be removed on one CPU at a time AND PMU is disabled | 162 | * be removed on one CPU at a time AND PMU is disabled |
| 166 | * when we come here | 163 | * when we come here |
| 167 | */ | 164 | */ |
| 168 | for (i = 0; i < x86_pmu.num_events; i++) { | 165 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 169 | if (nb->owners[i] == event) { | 166 | if (nb->owners[i] == event) { |
| 170 | cmpxchg(nb->owners+i, event, NULL); | 167 | cmpxchg(nb->owners+i, event, NULL); |
| 171 | break; | 168 | break; |
| @@ -215,7 +212,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
| 215 | struct hw_perf_event *hwc = &event->hw; | 212 | struct hw_perf_event *hwc = &event->hw; |
| 216 | struct amd_nb *nb = cpuc->amd_nb; | 213 | struct amd_nb *nb = cpuc->amd_nb; |
| 217 | struct perf_event *old = NULL; | 214 | struct perf_event *old = NULL; |
| 218 | int max = x86_pmu.num_events; | 215 | int max = x86_pmu.num_counters; |
| 219 | int i, j, k = -1; | 216 | int i, j, k = -1; |
| 220 | 217 | ||
| 221 | /* | 218 | /* |
| @@ -293,7 +290,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | |||
| 293 | /* | 290 | /* |
| 294 | * initialize all possible NB constraints | 291 | * initialize all possible NB constraints |
| 295 | */ | 292 | */ |
| 296 | for (i = 0; i < x86_pmu.num_events; i++) { | 293 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 297 | __set_bit(i, nb->event_constraints[i].idxmsk); | 294 | __set_bit(i, nb->event_constraints[i].idxmsk); |
| 298 | nb->event_constraints[i].weight = 1; | 295 | nb->event_constraints[i].weight = 1; |
| 299 | } | 296 | } |
| @@ -371,21 +368,22 @@ static void amd_pmu_cpu_dead(int cpu) | |||
| 371 | raw_spin_unlock(&amd_nb_lock); | 368 | raw_spin_unlock(&amd_nb_lock); |
| 372 | } | 369 | } |
| 373 | 370 | ||
| 374 | static __initconst struct x86_pmu amd_pmu = { | 371 | static __initconst const struct x86_pmu amd_pmu = { |
| 375 | .name = "AMD", | 372 | .name = "AMD", |
| 376 | .handle_irq = x86_pmu_handle_irq, | 373 | .handle_irq = x86_pmu_handle_irq, |
| 377 | .disable_all = x86_pmu_disable_all, | 374 | .disable_all = x86_pmu_disable_all, |
| 378 | .enable_all = x86_pmu_enable_all, | 375 | .enable_all = x86_pmu_enable_all, |
| 379 | .enable = x86_pmu_enable_event, | 376 | .enable = x86_pmu_enable_event, |
| 380 | .disable = x86_pmu_disable_event, | 377 | .disable = x86_pmu_disable_event, |
| 378 | .hw_config = amd_pmu_hw_config, | ||
| 379 | .schedule_events = x86_schedule_events, | ||
| 381 | .eventsel = MSR_K7_EVNTSEL0, | 380 | .eventsel = MSR_K7_EVNTSEL0, |
| 382 | .perfctr = MSR_K7_PERFCTR0, | 381 | .perfctr = MSR_K7_PERFCTR0, |
| 383 | .event_map = amd_pmu_event_map, | 382 | .event_map = amd_pmu_event_map, |
| 384 | .raw_event = amd_pmu_raw_event, | ||
| 385 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 383 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
| 386 | .num_events = 4, | 384 | .num_counters = 4, |
| 387 | .event_bits = 48, | 385 | .cntval_bits = 48, |
| 388 | .event_mask = (1ULL << 48) - 1, | 386 | .cntval_mask = (1ULL << 48) - 1, |
| 389 | .apic = 1, | 387 | .apic = 1, |
| 390 | /* use highest bit to detect overflow */ | 388 | /* use highest bit to detect overflow */ |
| 391 | .max_period = (1ULL << 47) - 1, | 389 | .max_period = (1ULL << 47) - 1, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9c794ac87837..fdbc652d3feb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
| @@ -88,7 +88,7 @@ static u64 intel_pmu_event_map(int hw_event) | |||
| 88 | return intel_perfmon_event_map[hw_event]; | 88 | return intel_perfmon_event_map[hw_event]; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | static __initconst u64 westmere_hw_cache_event_ids | 91 | static __initconst const u64 westmere_hw_cache_event_ids |
| 92 | [PERF_COUNT_HW_CACHE_MAX] | 92 | [PERF_COUNT_HW_CACHE_MAX] |
| 93 | [PERF_COUNT_HW_CACHE_OP_MAX] | 93 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| @@ -179,7 +179,7 @@ static __initconst u64 westmere_hw_cache_event_ids | |||
| 179 | }, | 179 | }, |
| 180 | }; | 180 | }; |
| 181 | 181 | ||
| 182 | static __initconst u64 nehalem_hw_cache_event_ids | 182 | static __initconst const u64 nehalem_hw_cache_event_ids |
| 183 | [PERF_COUNT_HW_CACHE_MAX] | 183 | [PERF_COUNT_HW_CACHE_MAX] |
| 184 | [PERF_COUNT_HW_CACHE_OP_MAX] | 184 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| @@ -270,7 +270,7 @@ static __initconst u64 nehalem_hw_cache_event_ids | |||
| 270 | }, | 270 | }, |
| 271 | }; | 271 | }; |
| 272 | 272 | ||
| 273 | static __initconst u64 core2_hw_cache_event_ids | 273 | static __initconst const u64 core2_hw_cache_event_ids |
| 274 | [PERF_COUNT_HW_CACHE_MAX] | 274 | [PERF_COUNT_HW_CACHE_MAX] |
| 275 | [PERF_COUNT_HW_CACHE_OP_MAX] | 275 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| @@ -361,7 +361,7 @@ static __initconst u64 core2_hw_cache_event_ids | |||
| 361 | }, | 361 | }, |
| 362 | }; | 362 | }; |
| 363 | 363 | ||
| 364 | static __initconst u64 atom_hw_cache_event_ids | 364 | static __initconst const u64 atom_hw_cache_event_ids |
| 365 | [PERF_COUNT_HW_CACHE_MAX] | 365 | [PERF_COUNT_HW_CACHE_MAX] |
| 366 | [PERF_COUNT_HW_CACHE_OP_MAX] | 366 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| @@ -452,60 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids | |||
| 452 | }, | 452 | }, |
| 453 | }; | 453 | }; |
| 454 | 454 | ||
| 455 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
| 456 | { | ||
| 457 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
| 458 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
| 459 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
| 460 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
| 461 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
| 462 | |||
| 463 | #define CORE_EVNTSEL_MASK \ | ||
| 464 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
| 465 | INTEL_ARCH_UNIT_MASK | \ | ||
| 466 | INTEL_ARCH_EDGE_MASK | \ | ||
| 467 | INTEL_ARCH_INV_MASK | \ | ||
| 468 | INTEL_ARCH_CNT_MASK) | ||
| 469 | |||
| 470 | return hw_event & CORE_EVNTSEL_MASK; | ||
| 471 | } | ||
| 472 | |||
| 473 | static void intel_pmu_enable_bts(u64 config) | ||
| 474 | { | ||
| 475 | unsigned long debugctlmsr; | ||
| 476 | |||
| 477 | debugctlmsr = get_debugctlmsr(); | ||
| 478 | |||
| 479 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
| 480 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
| 481 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
| 482 | |||
| 483 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
| 484 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
| 485 | |||
| 486 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
| 487 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
| 488 | |||
| 489 | update_debugctlmsr(debugctlmsr); | ||
| 490 | } | ||
| 491 | |||
| 492 | static void intel_pmu_disable_bts(void) | ||
| 493 | { | ||
| 494 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 495 | unsigned long debugctlmsr; | ||
| 496 | |||
| 497 | if (!cpuc->ds) | ||
| 498 | return; | ||
| 499 | |||
| 500 | debugctlmsr = get_debugctlmsr(); | ||
| 501 | |||
| 502 | debugctlmsr &= | ||
| 503 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
| 504 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
| 505 | |||
| 506 | update_debugctlmsr(debugctlmsr); | ||
| 507 | } | ||
| 508 | |||
| 509 | static void intel_pmu_disable_all(void) | 455 | static void intel_pmu_disable_all(void) |
| 510 | { | 456 | { |
| 511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 457 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| @@ -514,12 +460,17 @@ static void intel_pmu_disable_all(void) | |||
| 514 | 460 | ||
| 515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 461 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
| 516 | intel_pmu_disable_bts(); | 462 | intel_pmu_disable_bts(); |
| 463 | |||
| 464 | intel_pmu_pebs_disable_all(); | ||
| 465 | intel_pmu_lbr_disable_all(); | ||
| 517 | } | 466 | } |
| 518 | 467 | ||
| 519 | static void intel_pmu_enable_all(void) | 468 | static void intel_pmu_enable_all(int added) |
| 520 | { | 469 | { |
| 521 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 470 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 522 | 471 | ||
| 472 | intel_pmu_pebs_enable_all(); | ||
| 473 | intel_pmu_lbr_enable_all(); | ||
| 523 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 474 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
| 524 | 475 | ||
| 525 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 476 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
| @@ -533,6 +484,42 @@ static void intel_pmu_enable_all(void) | |||
| 533 | } | 484 | } |
| 534 | } | 485 | } |
| 535 | 486 | ||
| 487 | /* | ||
| 488 | * Workaround for: | ||
| 489 | * Intel Errata AAK100 (model 26) | ||
| 490 | * Intel Errata AAP53 (model 30) | ||
| 491 | * Intel Errata BD53 (model 44) | ||
| 492 | * | ||
| 493 | * These chips need to be 'reset' when adding counters by programming | ||
| 494 | * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 | ||
| 495 | * either in sequence on the same PMC or on different PMCs. | ||
| 496 | */ | ||
| 497 | static void intel_pmu_nhm_enable_all(int added) | ||
| 498 | { | ||
| 499 | if (added) { | ||
| 500 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 501 | int i; | ||
| 502 | |||
| 503 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); | ||
| 504 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); | ||
| 505 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); | ||
| 506 | |||
| 507 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); | ||
| 508 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | ||
| 509 | |||
| 510 | for (i = 0; i < 3; i++) { | ||
| 511 | struct perf_event *event = cpuc->events[i]; | ||
| 512 | |||
| 513 | if (!event) | ||
| 514 | continue; | ||
| 515 | |||
| 516 | __x86_pmu_enable_event(&event->hw, | ||
| 517 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
| 518 | } | ||
| 519 | } | ||
| 520 | intel_pmu_enable_all(added); | ||
| 521 | } | ||
| 522 | |||
| 536 | static inline u64 intel_pmu_get_status(void) | 523 | static inline u64 intel_pmu_get_status(void) |
| 537 | { | 524 | { |
| 538 | u64 status; | 525 | u64 status; |
| @@ -547,8 +534,7 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
| 547 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 534 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
| 548 | } | 535 | } |
| 549 | 536 | ||
| 550 | static inline void | 537 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
| 551 | intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
| 552 | { | 538 | { |
| 553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 539 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
| 554 | u64 ctrl_val, mask; | 540 | u64 ctrl_val, mask; |
| @@ -557,71 +543,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc) | |||
| 557 | 543 | ||
| 558 | rdmsrl(hwc->config_base, ctrl_val); | 544 | rdmsrl(hwc->config_base, ctrl_val); |
| 559 | ctrl_val &= ~mask; | 545 | ctrl_val &= ~mask; |
| 560 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 546 | wrmsrl(hwc->config_base, ctrl_val); |
| 561 | } | ||
| 562 | |||
| 563 | static void intel_pmu_drain_bts_buffer(void) | ||
| 564 | { | ||
| 565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 566 | struct debug_store *ds = cpuc->ds; | ||
| 567 | struct bts_record { | ||
| 568 | u64 from; | ||
| 569 | u64 to; | ||
| 570 | u64 flags; | ||
| 571 | }; | ||
| 572 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
| 573 | struct bts_record *at, *top; | ||
| 574 | struct perf_output_handle handle; | ||
| 575 | struct perf_event_header header; | ||
| 576 | struct perf_sample_data data; | ||
| 577 | struct pt_regs regs; | ||
| 578 | |||
| 579 | if (!event) | ||
| 580 | return; | ||
| 581 | |||
| 582 | if (!ds) | ||
| 583 | return; | ||
| 584 | |||
| 585 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
| 586 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
| 587 | |||
| 588 | if (top <= at) | ||
| 589 | return; | ||
| 590 | |||
| 591 | ds->bts_index = ds->bts_buffer_base; | ||
| 592 | |||
| 593 | perf_sample_data_init(&data, 0); | ||
| 594 | |||
| 595 | data.period = event->hw.last_period; | ||
| 596 | regs.ip = 0; | ||
| 597 | |||
| 598 | /* | ||
| 599 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
| 600 | * We will overwrite the from and to address before we output | ||
| 601 | * the sample. | ||
| 602 | */ | ||
| 603 | perf_prepare_sample(&header, &data, event, ®s); | ||
| 604 | |||
| 605 | if (perf_output_begin(&handle, event, | ||
| 606 | header.size * (top - at), 1, 1)) | ||
| 607 | return; | ||
| 608 | |||
| 609 | for (; at < top; at++) { | ||
| 610 | data.ip = at->from; | ||
| 611 | data.addr = at->to; | ||
| 612 | |||
| 613 | perf_output_sample(&handle, &header, &data, event); | ||
| 614 | } | ||
| 615 | |||
| 616 | perf_output_end(&handle); | ||
| 617 | |||
| 618 | /* There's new data available. */ | ||
| 619 | event->hw.interrupts++; | ||
| 620 | event->pending_kill = POLL_IN; | ||
| 621 | } | 547 | } |
| 622 | 548 | ||
| 623 | static inline void | 549 | static void intel_pmu_disable_event(struct perf_event *event) |
| 624 | intel_pmu_disable_event(struct perf_event *event) | ||
| 625 | { | 550 | { |
| 626 | struct hw_perf_event *hwc = &event->hw; | 551 | struct hw_perf_event *hwc = &event->hw; |
| 627 | 552 | ||
| @@ -637,14 +562,15 @@ intel_pmu_disable_event(struct perf_event *event) | |||
| 637 | } | 562 | } |
| 638 | 563 | ||
| 639 | x86_pmu_disable_event(event); | 564 | x86_pmu_disable_event(event); |
| 565 | |||
| 566 | if (unlikely(event->attr.precise_ip)) | ||
| 567 | intel_pmu_pebs_disable(event); | ||
| 640 | } | 568 | } |
| 641 | 569 | ||
| 642 | static inline void | 570 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
| 643 | intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
| 644 | { | 571 | { |
| 645 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 572 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
| 646 | u64 ctrl_val, bits, mask; | 573 | u64 ctrl_val, bits, mask; |
| 647 | int err; | ||
| 648 | 574 | ||
| 649 | /* | 575 | /* |
| 650 | * Enable IRQ generation (0x8), | 576 | * Enable IRQ generation (0x8), |
| @@ -669,7 +595,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc) | |||
| 669 | rdmsrl(hwc->config_base, ctrl_val); | 595 | rdmsrl(hwc->config_base, ctrl_val); |
| 670 | ctrl_val &= ~mask; | 596 | ctrl_val &= ~mask; |
| 671 | ctrl_val |= bits; | 597 | ctrl_val |= bits; |
| 672 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 598 | wrmsrl(hwc->config_base, ctrl_val); |
| 673 | } | 599 | } |
| 674 | 600 | ||
| 675 | static void intel_pmu_enable_event(struct perf_event *event) | 601 | static void intel_pmu_enable_event(struct perf_event *event) |
| @@ -689,7 +615,10 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
| 689 | return; | 615 | return; |
| 690 | } | 616 | } |
| 691 | 617 | ||
| 692 | __x86_pmu_enable_event(hwc); | 618 | if (unlikely(event->attr.precise_ip)) |
| 619 | intel_pmu_pebs_enable(event); | ||
| 620 | |||
| 621 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); | ||
| 693 | } | 622 | } |
| 694 | 623 | ||
| 695 | /* | 624 | /* |
| @@ -708,20 +637,20 @@ static void intel_pmu_reset(void) | |||
| 708 | unsigned long flags; | 637 | unsigned long flags; |
| 709 | int idx; | 638 | int idx; |
| 710 | 639 | ||
| 711 | if (!x86_pmu.num_events) | 640 | if (!x86_pmu.num_counters) |
| 712 | return; | 641 | return; |
| 713 | 642 | ||
| 714 | local_irq_save(flags); | 643 | local_irq_save(flags); |
| 715 | 644 | ||
| 716 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 645 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
| 717 | 646 | ||
| 718 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 647 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 719 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 648 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); |
| 720 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | 649 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); |
| 721 | } | 650 | } |
| 722 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 651 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
| 723 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 652 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
| 724 | } | 653 | |
| 725 | if (ds) | 654 | if (ds) |
| 726 | ds->bts_index = ds->bts_buffer_base; | 655 | ds->bts_index = ds->bts_buffer_base; |
| 727 | 656 | ||
| @@ -747,7 +676,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
| 747 | intel_pmu_drain_bts_buffer(); | 676 | intel_pmu_drain_bts_buffer(); |
| 748 | status = intel_pmu_get_status(); | 677 | status = intel_pmu_get_status(); |
| 749 | if (!status) { | 678 | if (!status) { |
| 750 | intel_pmu_enable_all(); | 679 | intel_pmu_enable_all(0); |
| 751 | return 0; | 680 | return 0; |
| 752 | } | 681 | } |
| 753 | 682 | ||
| @@ -762,6 +691,15 @@ again: | |||
| 762 | 691 | ||
| 763 | inc_irq_stat(apic_perf_irqs); | 692 | inc_irq_stat(apic_perf_irqs); |
| 764 | ack = status; | 693 | ack = status; |
| 694 | |||
| 695 | intel_pmu_lbr_read(); | ||
| 696 | |||
| 697 | /* | ||
| 698 | * PEBS overflow sets bit 62 in the global status register | ||
| 699 | */ | ||
| 700 | if (__test_and_clear_bit(62, (unsigned long *)&status)) | ||
| 701 | x86_pmu.drain_pebs(regs); | ||
| 702 | |||
| 765 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 703 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
| 766 | struct perf_event *event = cpuc->events[bit]; | 704 | struct perf_event *event = cpuc->events[bit]; |
| 767 | 705 | ||
| @@ -787,26 +725,22 @@ again: | |||
| 787 | goto again; | 725 | goto again; |
| 788 | 726 | ||
| 789 | done: | 727 | done: |
| 790 | intel_pmu_enable_all(); | 728 | intel_pmu_enable_all(0); |
| 791 | return 1; | 729 | return 1; |
| 792 | } | 730 | } |
| 793 | 731 | ||
| 794 | static struct event_constraint bts_constraint = | ||
| 795 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
| 796 | |||
| 797 | static struct event_constraint * | 732 | static struct event_constraint * |
| 798 | intel_special_constraints(struct perf_event *event) | 733 | intel_bts_constraints(struct perf_event *event) |
| 799 | { | 734 | { |
| 800 | unsigned int hw_event; | 735 | struct hw_perf_event *hwc = &event->hw; |
| 801 | 736 | unsigned int hw_event, bts_event; | |
| 802 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
| 803 | 737 | ||
| 804 | if (unlikely((hw_event == | 738 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
| 805 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | 739 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
| 806 | (event->hw.sample_period == 1))) { | ||
| 807 | 740 | ||
| 741 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) | ||
| 808 | return &bts_constraint; | 742 | return &bts_constraint; |
| 809 | } | 743 | |
| 810 | return NULL; | 744 | return NULL; |
| 811 | } | 745 | } |
| 812 | 746 | ||
| @@ -815,24 +749,53 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
| 815 | { | 749 | { |
| 816 | struct event_constraint *c; | 750 | struct event_constraint *c; |
| 817 | 751 | ||
| 818 | c = intel_special_constraints(event); | 752 | c = intel_bts_constraints(event); |
| 753 | if (c) | ||
| 754 | return c; | ||
| 755 | |||
| 756 | c = intel_pebs_constraints(event); | ||
| 819 | if (c) | 757 | if (c) |
| 820 | return c; | 758 | return c; |
| 821 | 759 | ||
| 822 | return x86_get_event_constraints(cpuc, event); | 760 | return x86_get_event_constraints(cpuc, event); |
| 823 | } | 761 | } |
| 824 | 762 | ||
| 825 | static __initconst struct x86_pmu core_pmu = { | 763 | static int intel_pmu_hw_config(struct perf_event *event) |
| 764 | { | ||
| 765 | int ret = x86_pmu_hw_config(event); | ||
| 766 | |||
| 767 | if (ret) | ||
| 768 | return ret; | ||
| 769 | |||
| 770 | if (event->attr.type != PERF_TYPE_RAW) | ||
| 771 | return 0; | ||
| 772 | |||
| 773 | if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) | ||
| 774 | return 0; | ||
| 775 | |||
| 776 | if (x86_pmu.version < 3) | ||
| 777 | return -EINVAL; | ||
| 778 | |||
| 779 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
| 780 | return -EACCES; | ||
| 781 | |||
| 782 | event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; | ||
| 783 | |||
| 784 | return 0; | ||
| 785 | } | ||
| 786 | |||
| 787 | static __initconst const struct x86_pmu core_pmu = { | ||
| 826 | .name = "core", | 788 | .name = "core", |
| 827 | .handle_irq = x86_pmu_handle_irq, | 789 | .handle_irq = x86_pmu_handle_irq, |
| 828 | .disable_all = x86_pmu_disable_all, | 790 | .disable_all = x86_pmu_disable_all, |
| 829 | .enable_all = x86_pmu_enable_all, | 791 | .enable_all = x86_pmu_enable_all, |
| 830 | .enable = x86_pmu_enable_event, | 792 | .enable = x86_pmu_enable_event, |
| 831 | .disable = x86_pmu_disable_event, | 793 | .disable = x86_pmu_disable_event, |
| 794 | .hw_config = x86_pmu_hw_config, | ||
| 795 | .schedule_events = x86_schedule_events, | ||
| 832 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 796 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
| 833 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 797 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
| 834 | .event_map = intel_pmu_event_map, | 798 | .event_map = intel_pmu_event_map, |
| 835 | .raw_event = intel_pmu_raw_event, | ||
| 836 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 799 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
| 837 | .apic = 1, | 800 | .apic = 1, |
| 838 | /* | 801 | /* |
| @@ -845,17 +808,32 @@ static __initconst struct x86_pmu core_pmu = { | |||
| 845 | .event_constraints = intel_core_event_constraints, | 808 | .event_constraints = intel_core_event_constraints, |
| 846 | }; | 809 | }; |
| 847 | 810 | ||
| 848 | static __initconst struct x86_pmu intel_pmu = { | 811 | static void intel_pmu_cpu_starting(int cpu) |
| 812 | { | ||
| 813 | init_debug_store_on_cpu(cpu); | ||
| 814 | /* | ||
| 815 | * Deal with CPUs that don't clear their LBRs on power-up. | ||
| 816 | */ | ||
| 817 | intel_pmu_lbr_reset(); | ||
| 818 | } | ||
| 819 | |||
| 820 | static void intel_pmu_cpu_dying(int cpu) | ||
| 821 | { | ||
| 822 | fini_debug_store_on_cpu(cpu); | ||
| 823 | } | ||
| 824 | |||
| 825 | static __initconst const struct x86_pmu intel_pmu = { | ||
| 849 | .name = "Intel", | 826 | .name = "Intel", |
| 850 | .handle_irq = intel_pmu_handle_irq, | 827 | .handle_irq = intel_pmu_handle_irq, |
| 851 | .disable_all = intel_pmu_disable_all, | 828 | .disable_all = intel_pmu_disable_all, |
| 852 | .enable_all = intel_pmu_enable_all, | 829 | .enable_all = intel_pmu_enable_all, |
| 853 | .enable = intel_pmu_enable_event, | 830 | .enable = intel_pmu_enable_event, |
| 854 | .disable = intel_pmu_disable_event, | 831 | .disable = intel_pmu_disable_event, |
| 832 | .hw_config = intel_pmu_hw_config, | ||
| 833 | .schedule_events = x86_schedule_events, | ||
| 855 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 834 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
| 856 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 835 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
| 857 | .event_map = intel_pmu_event_map, | 836 | .event_map = intel_pmu_event_map, |
| 858 | .raw_event = intel_pmu_raw_event, | ||
| 859 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 837 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
| 860 | .apic = 1, | 838 | .apic = 1, |
| 861 | /* | 839 | /* |
| @@ -864,14 +842,38 @@ static __initconst struct x86_pmu intel_pmu = { | |||
| 864 | * the generic event period: | 842 | * the generic event period: |
| 865 | */ | 843 | */ |
| 866 | .max_period = (1ULL << 31) - 1, | 844 | .max_period = (1ULL << 31) - 1, |
| 867 | .enable_bts = intel_pmu_enable_bts, | ||
| 868 | .disable_bts = intel_pmu_disable_bts, | ||
| 869 | .get_event_constraints = intel_get_event_constraints, | 845 | .get_event_constraints = intel_get_event_constraints, |
| 870 | 846 | ||
| 871 | .cpu_starting = init_debug_store_on_cpu, | 847 | .cpu_starting = intel_pmu_cpu_starting, |
| 872 | .cpu_dying = fini_debug_store_on_cpu, | 848 | .cpu_dying = intel_pmu_cpu_dying, |
| 873 | }; | 849 | }; |
| 874 | 850 | ||
| 851 | static void intel_clovertown_quirks(void) | ||
| 852 | { | ||
| 853 | /* | ||
| 854 | * PEBS is unreliable due to: | ||
| 855 | * | ||
| 856 | * AJ67 - PEBS may experience CPL leaks | ||
| 857 | * AJ68 - PEBS PMI may be delayed by one event | ||
| 858 | * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] | ||
| 859 | * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS | ||
| 860 | * | ||
| 861 | * AJ67 could be worked around by restricting the OS/USR flags. | ||
| 862 | * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. | ||
| 863 | * | ||
| 864 | * AJ106 could possibly be worked around by not allowing LBR | ||
| 865 | * usage from PEBS, including the fixup. | ||
| 866 | * AJ68 could possibly be worked around by always programming | ||
| 867 | * a pebs_event_reset[0] value and coping with the lost events. | ||
| 868 | * | ||
| 869 | * But taken together it might just make sense to not enable PEBS on | ||
| 870 | * these chips. | ||
| 871 | */ | ||
| 872 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | ||
| 873 | x86_pmu.pebs = 0; | ||
| 874 | x86_pmu.pebs_constraints = NULL; | ||
| 875 | } | ||
| 876 | |||
| 875 | static __init int intel_pmu_init(void) | 877 | static __init int intel_pmu_init(void) |
| 876 | { | 878 | { |
| 877 | union cpuid10_edx edx; | 879 | union cpuid10_edx edx; |
| @@ -881,12 +883,13 @@ static __init int intel_pmu_init(void) | |||
| 881 | int version; | 883 | int version; |
| 882 | 884 | ||
| 883 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 885 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
| 884 | /* check for P6 processor family */ | 886 | switch (boot_cpu_data.x86) { |
| 885 | if (boot_cpu_data.x86 == 6) { | 887 | case 0x6: |
| 886 | return p6_pmu_init(); | 888 | return p6_pmu_init(); |
| 887 | } else { | 889 | case 0xf: |
| 890 | return p4_pmu_init(); | ||
| 891 | } | ||
| 888 | return -ENODEV; | 892 | return -ENODEV; |
| 889 | } | ||
| 890 | } | 893 | } |
| 891 | 894 | ||
| 892 | /* | 895 | /* |
| @@ -904,16 +907,28 @@ static __init int intel_pmu_init(void) | |||
| 904 | x86_pmu = intel_pmu; | 907 | x86_pmu = intel_pmu; |
| 905 | 908 | ||
| 906 | x86_pmu.version = version; | 909 | x86_pmu.version = version; |
| 907 | x86_pmu.num_events = eax.split.num_events; | 910 | x86_pmu.num_counters = eax.split.num_counters; |
| 908 | x86_pmu.event_bits = eax.split.bit_width; | 911 | x86_pmu.cntval_bits = eax.split.bit_width; |
| 909 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | 912 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
| 910 | 913 | ||
| 911 | /* | 914 | /* |
| 912 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 915 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
| 913 | * assume at least 3 events: | 916 | * assume at least 3 events: |
| 914 | */ | 917 | */ |
| 915 | if (version > 1) | 918 | if (version > 1) |
| 916 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 919 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
| 920 | |||
| 921 | /* | ||
| 922 | * v2 and above have a perf capabilities MSR | ||
| 923 | */ | ||
| 924 | if (version > 1) { | ||
| 925 | u64 capabilities; | ||
| 926 | |||
| 927 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
| 928 | x86_pmu.intel_cap.capabilities = capabilities; | ||
| 929 | } | ||
| 930 | |||
| 931 | intel_ds_init(); | ||
| 917 | 932 | ||
| 918 | /* | 933 | /* |
| 919 | * Install the hw-cache-events table: | 934 | * Install the hw-cache-events table: |
| @@ -924,12 +939,15 @@ static __init int intel_pmu_init(void) | |||
| 924 | break; | 939 | break; |
| 925 | 940 | ||
| 926 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 941 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
| 942 | x86_pmu.quirks = intel_clovertown_quirks; | ||
| 927 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 943 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
| 928 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 944 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
| 929 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 945 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
| 930 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 946 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
| 931 | sizeof(hw_cache_event_ids)); | 947 | sizeof(hw_cache_event_ids)); |
| 932 | 948 | ||
| 949 | intel_pmu_lbr_init_core(); | ||
| 950 | |||
| 933 | x86_pmu.event_constraints = intel_core2_event_constraints; | 951 | x86_pmu.event_constraints = intel_core2_event_constraints; |
| 934 | pr_cont("Core2 events, "); | 952 | pr_cont("Core2 events, "); |
| 935 | break; | 953 | break; |
| @@ -940,13 +958,19 @@ static __init int intel_pmu_init(void) | |||
| 940 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 958 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
| 941 | sizeof(hw_cache_event_ids)); | 959 | sizeof(hw_cache_event_ids)); |
| 942 | 960 | ||
| 961 | intel_pmu_lbr_init_nhm(); | ||
| 962 | |||
| 943 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 963 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
| 944 | pr_cont("Nehalem/Corei7 events, "); | 964 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
| 965 | pr_cont("Nehalem events, "); | ||
| 945 | break; | 966 | break; |
| 967 | |||
| 946 | case 28: /* Atom */ | 968 | case 28: /* Atom */ |
| 947 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 969 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
| 948 | sizeof(hw_cache_event_ids)); | 970 | sizeof(hw_cache_event_ids)); |
| 949 | 971 | ||
| 972 | intel_pmu_lbr_init_atom(); | ||
| 973 | |||
| 950 | x86_pmu.event_constraints = intel_gen_event_constraints; | 974 | x86_pmu.event_constraints = intel_gen_event_constraints; |
| 951 | pr_cont("Atom events, "); | 975 | pr_cont("Atom events, "); |
| 952 | break; | 976 | break; |
| @@ -956,7 +980,10 @@ static __init int intel_pmu_init(void) | |||
| 956 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 980 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
| 957 | sizeof(hw_cache_event_ids)); | 981 | sizeof(hw_cache_event_ids)); |
| 958 | 982 | ||
| 983 | intel_pmu_lbr_init_nhm(); | ||
| 984 | |||
| 959 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 985 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
| 986 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | ||
| 960 | pr_cont("Westmere events, "); | 987 | pr_cont("Westmere events, "); |
| 961 | break; | 988 | break; |
| 962 | 989 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 000000000000..18018d1311cd --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
| @@ -0,0 +1,641 @@ | |||
| 1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
| 2 | |||
| 3 | /* The maximal number of PEBS events: */ | ||
| 4 | #define MAX_PEBS_EVENTS 4 | ||
| 5 | |||
| 6 | /* The size of a BTS record in bytes: */ | ||
| 7 | #define BTS_RECORD_SIZE 24 | ||
| 8 | |||
| 9 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | ||
| 10 | #define PEBS_BUFFER_SIZE PAGE_SIZE | ||
| 11 | |||
| 12 | /* | ||
| 13 | * pebs_record_32 for p4 and core not supported | ||
| 14 | |||
| 15 | struct pebs_record_32 { | ||
| 16 | u32 flags, ip; | ||
| 17 | u32 ax, bc, cx, dx; | ||
| 18 | u32 si, di, bp, sp; | ||
| 19 | }; | ||
| 20 | |||
| 21 | */ | ||
| 22 | |||
| 23 | struct pebs_record_core { | ||
| 24 | u64 flags, ip; | ||
| 25 | u64 ax, bx, cx, dx; | ||
| 26 | u64 si, di, bp, sp; | ||
| 27 | u64 r8, r9, r10, r11; | ||
| 28 | u64 r12, r13, r14, r15; | ||
| 29 | }; | ||
| 30 | |||
| 31 | struct pebs_record_nhm { | ||
| 32 | u64 flags, ip; | ||
| 33 | u64 ax, bx, cx, dx; | ||
| 34 | u64 si, di, bp, sp; | ||
| 35 | u64 r8, r9, r10, r11; | ||
| 36 | u64 r12, r13, r14, r15; | ||
| 37 | u64 status, dla, dse, lat; | ||
| 38 | }; | ||
| 39 | |||
| 40 | /* | ||
| 41 | * A debug store configuration. | ||
| 42 | * | ||
| 43 | * We only support architectures that use 64bit fields. | ||
| 44 | */ | ||
| 45 | struct debug_store { | ||
| 46 | u64 bts_buffer_base; | ||
| 47 | u64 bts_index; | ||
| 48 | u64 bts_absolute_maximum; | ||
| 49 | u64 bts_interrupt_threshold; | ||
| 50 | u64 pebs_buffer_base; | ||
| 51 | u64 pebs_index; | ||
| 52 | u64 pebs_absolute_maximum; | ||
| 53 | u64 pebs_interrupt_threshold; | ||
| 54 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
| 55 | }; | ||
| 56 | |||
| 57 | static void init_debug_store_on_cpu(int cpu) | ||
| 58 | { | ||
| 59 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
| 60 | |||
| 61 | if (!ds) | ||
| 62 | return; | ||
| 63 | |||
| 64 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
| 65 | (u32)((u64)(unsigned long)ds), | ||
| 66 | (u32)((u64)(unsigned long)ds >> 32)); | ||
| 67 | } | ||
| 68 | |||
| 69 | static void fini_debug_store_on_cpu(int cpu) | ||
| 70 | { | ||
| 71 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
| 72 | return; | ||
| 73 | |||
| 74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
| 75 | } | ||
| 76 | |||
| 77 | static void release_ds_buffers(void) | ||
| 78 | { | ||
| 79 | int cpu; | ||
| 80 | |||
| 81 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
| 82 | return; | ||
| 83 | |||
| 84 | get_online_cpus(); | ||
| 85 | |||
| 86 | for_each_online_cpu(cpu) | ||
| 87 | fini_debug_store_on_cpu(cpu); | ||
| 88 | |||
| 89 | for_each_possible_cpu(cpu) { | ||
| 90 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
| 91 | |||
| 92 | if (!ds) | ||
| 93 | continue; | ||
| 94 | |||
| 95 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
| 96 | |||
| 97 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
| 98 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
| 99 | kfree(ds); | ||
| 100 | } | ||
| 101 | |||
| 102 | put_online_cpus(); | ||
| 103 | } | ||
| 104 | |||
| 105 | static int reserve_ds_buffers(void) | ||
| 106 | { | ||
| 107 | int cpu, err = 0; | ||
| 108 | |||
| 109 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
| 110 | return 0; | ||
| 111 | |||
| 112 | get_online_cpus(); | ||
| 113 | |||
| 114 | for_each_possible_cpu(cpu) { | ||
| 115 | struct debug_store *ds; | ||
| 116 | void *buffer; | ||
| 117 | int max, thresh; | ||
| 118 | |||
| 119 | err = -ENOMEM; | ||
| 120 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
| 121 | if (unlikely(!ds)) | ||
| 122 | break; | ||
| 123 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
| 124 | |||
| 125 | if (x86_pmu.bts) { | ||
| 126 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
| 127 | if (unlikely(!buffer)) | ||
| 128 | break; | ||
| 129 | |||
| 130 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
| 131 | thresh = max / 16; | ||
| 132 | |||
| 133 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
| 134 | ds->bts_index = ds->bts_buffer_base; | ||
| 135 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
| 136 | max * BTS_RECORD_SIZE; | ||
| 137 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
| 138 | thresh * BTS_RECORD_SIZE; | ||
| 139 | } | ||
| 140 | |||
| 141 | if (x86_pmu.pebs) { | ||
| 142 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | ||
| 143 | if (unlikely(!buffer)) | ||
| 144 | break; | ||
| 145 | |||
| 146 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
| 147 | |||
| 148 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
| 149 | ds->pebs_index = ds->pebs_buffer_base; | ||
| 150 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
| 151 | max * x86_pmu.pebs_record_size; | ||
| 152 | /* | ||
| 153 | * Always use single record PEBS | ||
| 154 | */ | ||
| 155 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
| 156 | x86_pmu.pebs_record_size; | ||
| 157 | } | ||
| 158 | |||
| 159 | err = 0; | ||
| 160 | } | ||
| 161 | |||
| 162 | if (err) | ||
| 163 | release_ds_buffers(); | ||
| 164 | else { | ||
| 165 | for_each_online_cpu(cpu) | ||
| 166 | init_debug_store_on_cpu(cpu); | ||
| 167 | } | ||
| 168 | |||
| 169 | put_online_cpus(); | ||
| 170 | |||
| 171 | return err; | ||
| 172 | } | ||
| 173 | |||
| 174 | /* | ||
| 175 | * BTS | ||
| 176 | */ | ||
| 177 | |||
| 178 | static struct event_constraint bts_constraint = | ||
| 179 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
| 180 | |||
| 181 | static void intel_pmu_enable_bts(u64 config) | ||
| 182 | { | ||
| 183 | unsigned long debugctlmsr; | ||
| 184 | |||
| 185 | debugctlmsr = get_debugctlmsr(); | ||
| 186 | |||
| 187 | debugctlmsr |= DEBUGCTLMSR_TR; | ||
| 188 | debugctlmsr |= DEBUGCTLMSR_BTS; | ||
| 189 | debugctlmsr |= DEBUGCTLMSR_BTINT; | ||
| 190 | |||
| 191 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
| 192 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; | ||
| 193 | |||
| 194 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
| 195 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; | ||
| 196 | |||
| 197 | update_debugctlmsr(debugctlmsr); | ||
| 198 | } | ||
| 199 | |||
| 200 | static void intel_pmu_disable_bts(void) | ||
| 201 | { | ||
| 202 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 203 | unsigned long debugctlmsr; | ||
| 204 | |||
| 205 | if (!cpuc->ds) | ||
| 206 | return; | ||
| 207 | |||
| 208 | debugctlmsr = get_debugctlmsr(); | ||
| 209 | |||
| 210 | debugctlmsr &= | ||
| 211 | ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | | ||
| 212 | DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); | ||
| 213 | |||
| 214 | update_debugctlmsr(debugctlmsr); | ||
| 215 | } | ||
| 216 | |||
| 217 | static void intel_pmu_drain_bts_buffer(void) | ||
| 218 | { | ||
| 219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 220 | struct debug_store *ds = cpuc->ds; | ||
| 221 | struct bts_record { | ||
| 222 | u64 from; | ||
| 223 | u64 to; | ||
| 224 | u64 flags; | ||
| 225 | }; | ||
| 226 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
| 227 | struct bts_record *at, *top; | ||
| 228 | struct perf_output_handle handle; | ||
| 229 | struct perf_event_header header; | ||
| 230 | struct perf_sample_data data; | ||
| 231 | struct pt_regs regs; | ||
| 232 | |||
| 233 | if (!event) | ||
| 234 | return; | ||
| 235 | |||
| 236 | if (!ds) | ||
| 237 | return; | ||
| 238 | |||
| 239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
| 240 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
| 241 | |||
| 242 | if (top <= at) | ||
| 243 | return; | ||
| 244 | |||
| 245 | ds->bts_index = ds->bts_buffer_base; | ||
| 246 | |||
| 247 | perf_sample_data_init(&data, 0); | ||
| 248 | data.period = event->hw.last_period; | ||
| 249 | regs.ip = 0; | ||
| 250 | |||
| 251 | /* | ||
| 252 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
| 253 | * We will overwrite the from and to address before we output | ||
| 254 | * the sample. | ||
| 255 | */ | ||
| 256 | perf_prepare_sample(&header, &data, event, ®s); | ||
| 257 | |||
| 258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | ||
| 259 | return; | ||
| 260 | |||
| 261 | for (; at < top; at++) { | ||
| 262 | data.ip = at->from; | ||
| 263 | data.addr = at->to; | ||
| 264 | |||
| 265 | perf_output_sample(&handle, &header, &data, event); | ||
| 266 | } | ||
| 267 | |||
| 268 | perf_output_end(&handle); | ||
| 269 | |||
| 270 | /* There's new data available. */ | ||
| 271 | event->hw.interrupts++; | ||
| 272 | event->pending_kill = POLL_IN; | ||
| 273 | } | ||
| 274 | |||
| 275 | /* | ||
| 276 | * PEBS | ||
| 277 | */ | ||
| 278 | |||
| 279 | static struct event_constraint intel_core_pebs_events[] = { | ||
| 280 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | ||
| 281 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | ||
| 282 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | ||
| 283 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | ||
| 284 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
| 285 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
| 286 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
| 287 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
| 288 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
| 289 | EVENT_CONSTRAINT_END | ||
| 290 | }; | ||
| 291 | |||
| 292 | static struct event_constraint intel_nehalem_pebs_events[] = { | ||
| 293 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | ||
| 294 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | ||
| 295 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | ||
| 296 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | ||
| 297 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
| 298 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
| 299 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
| 300 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
| 301 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
| 302 | EVENT_CONSTRAINT_END | ||
| 303 | }; | ||
| 304 | |||
| 305 | static struct event_constraint * | ||
| 306 | intel_pebs_constraints(struct perf_event *event) | ||
| 307 | { | ||
| 308 | struct event_constraint *c; | ||
| 309 | |||
| 310 | if (!event->attr.precise_ip) | ||
| 311 | return NULL; | ||
| 312 | |||
| 313 | if (x86_pmu.pebs_constraints) { | ||
| 314 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { | ||
| 315 | if ((event->hw.config & c->cmask) == c->code) | ||
| 316 | return c; | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | return &emptyconstraint; | ||
| 321 | } | ||
| 322 | |||
| 323 | static void intel_pmu_pebs_enable(struct perf_event *event) | ||
| 324 | { | ||
| 325 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 326 | struct hw_perf_event *hwc = &event->hw; | ||
| 327 | |||
| 328 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | ||
| 329 | |||
| 330 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | ||
| 331 | WARN_ON_ONCE(cpuc->enabled); | ||
| 332 | |||
| 333 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
| 334 | intel_pmu_lbr_enable(event); | ||
| 335 | } | ||
| 336 | |||
| 337 | static void intel_pmu_pebs_disable(struct perf_event *event) | ||
| 338 | { | ||
| 339 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 340 | struct hw_perf_event *hwc = &event->hw; | ||
| 341 | |||
| 342 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | ||
| 343 | if (cpuc->enabled) | ||
| 344 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
| 345 | |||
| 346 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | ||
| 347 | |||
| 348 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
| 349 | intel_pmu_lbr_disable(event); | ||
| 350 | } | ||
| 351 | |||
| 352 | static void intel_pmu_pebs_enable_all(void) | ||
| 353 | { | ||
| 354 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 355 | |||
| 356 | if (cpuc->pebs_enabled) | ||
| 357 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
| 358 | } | ||
| 359 | |||
| 360 | static void intel_pmu_pebs_disable_all(void) | ||
| 361 | { | ||
| 362 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 363 | |||
| 364 | if (cpuc->pebs_enabled) | ||
| 365 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | ||
| 366 | } | ||
| 367 | |||
| 368 | #include <asm/insn.h> | ||
| 369 | |||
| 370 | static inline bool kernel_ip(unsigned long ip) | ||
| 371 | { | ||
| 372 | #ifdef CONFIG_X86_32 | ||
| 373 | return ip > PAGE_OFFSET; | ||
| 374 | #else | ||
| 375 | return (long)ip < 0; | ||
| 376 | #endif | ||
| 377 | } | ||
| 378 | |||
| 379 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | ||
| 380 | { | ||
| 381 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 382 | unsigned long from = cpuc->lbr_entries[0].from; | ||
| 383 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | ||
| 384 | unsigned long ip = regs->ip; | ||
| 385 | |||
| 386 | /* | ||
| 387 | * We don't need to fixup if the PEBS assist is fault like | ||
| 388 | */ | ||
| 389 | if (!x86_pmu.intel_cap.pebs_trap) | ||
| 390 | return 1; | ||
| 391 | |||
| 392 | /* | ||
| 393 | * No LBR entry, no basic block, no rewinding | ||
| 394 | */ | ||
| 395 | if (!cpuc->lbr_stack.nr || !from || !to) | ||
| 396 | return 0; | ||
| 397 | |||
| 398 | /* | ||
| 399 | * Basic blocks should never cross user/kernel boundaries | ||
| 400 | */ | ||
| 401 | if (kernel_ip(ip) != kernel_ip(to)) | ||
| 402 | return 0; | ||
| 403 | |||
| 404 | /* | ||
| 405 | * unsigned math, either ip is before the start (impossible) or | ||
| 406 | * the basic block is larger than 1 page (sanity) | ||
| 407 | */ | ||
| 408 | if ((ip - to) > PAGE_SIZE) | ||
| 409 | return 0; | ||
| 410 | |||
| 411 | /* | ||
| 412 | * We sampled a branch insn, rewind using the LBR stack | ||
| 413 | */ | ||
| 414 | if (ip == to) { | ||
| 415 | regs->ip = from; | ||
| 416 | return 1; | ||
| 417 | } | ||
| 418 | |||
| 419 | do { | ||
| 420 | struct insn insn; | ||
| 421 | u8 buf[MAX_INSN_SIZE]; | ||
| 422 | void *kaddr; | ||
| 423 | |||
| 424 | old_to = to; | ||
| 425 | if (!kernel_ip(ip)) { | ||
| 426 | int bytes, size = MAX_INSN_SIZE; | ||
| 427 | |||
| 428 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
| 429 | if (bytes != size) | ||
| 430 | return 0; | ||
| 431 | |||
| 432 | kaddr = buf; | ||
| 433 | } else | ||
| 434 | kaddr = (void *)to; | ||
| 435 | |||
| 436 | kernel_insn_init(&insn, kaddr); | ||
| 437 | insn_get_length(&insn); | ||
| 438 | to += insn.length; | ||
| 439 | } while (to < ip); | ||
| 440 | |||
| 441 | if (to == ip) { | ||
| 442 | regs->ip = old_to; | ||
| 443 | return 1; | ||
| 444 | } | ||
| 445 | |||
| 446 | /* | ||
| 447 | * Even though we decoded the basic block, the instruction stream | ||
| 448 | * never matched the given IP, either the TO or the IP got corrupted. | ||
| 449 | */ | ||
| 450 | return 0; | ||
| 451 | } | ||
| 452 | |||
| 453 | static int intel_pmu_save_and_restart(struct perf_event *event); | ||
| 454 | |||
| 455 | static void __intel_pmu_pebs_event(struct perf_event *event, | ||
| 456 | struct pt_regs *iregs, void *__pebs) | ||
| 457 | { | ||
| 458 | /* | ||
| 459 | * We cast to pebs_record_core since that is a subset of | ||
| 460 | * both formats and we don't use the other fields in this | ||
| 461 | * routine. | ||
| 462 | */ | ||
| 463 | struct pebs_record_core *pebs = __pebs; | ||
| 464 | struct perf_sample_data data; | ||
| 465 | struct pt_regs regs; | ||
| 466 | |||
| 467 | if (!intel_pmu_save_and_restart(event)) | ||
| 468 | return; | ||
| 469 | |||
| 470 | perf_sample_data_init(&data, 0); | ||
| 471 | data.period = event->hw.last_period; | ||
| 472 | |||
| 473 | /* | ||
| 474 | * We use the interrupt regs as a base because the PEBS record | ||
| 475 | * does not contain a full regs set, specifically it seems to | ||
| 476 | * lack segment descriptors, which get used by things like | ||
| 477 | * user_mode(). | ||
| 478 | * | ||
| 479 | * In the simple case fix up only the IP and BP,SP regs, for | ||
| 480 | * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. | ||
| 481 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | ||
| 482 | */ | ||
| 483 | regs = *iregs; | ||
| 484 | regs.ip = pebs->ip; | ||
| 485 | regs.bp = pebs->bp; | ||
| 486 | regs.sp = pebs->sp; | ||
| 487 | |||
| 488 | if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | ||
| 489 | regs.flags |= PERF_EFLAGS_EXACT; | ||
| 490 | else | ||
| 491 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
| 492 | |||
| 493 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
| 494 | x86_pmu_stop(event); | ||
| 495 | } | ||
| 496 | |||
| 497 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | ||
| 498 | { | ||
| 499 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 500 | struct debug_store *ds = cpuc->ds; | ||
| 501 | struct perf_event *event = cpuc->events[0]; /* PMC0 only */ | ||
| 502 | struct pebs_record_core *at, *top; | ||
| 503 | int n; | ||
| 504 | |||
| 505 | if (!ds || !x86_pmu.pebs) | ||
| 506 | return; | ||
| 507 | |||
| 508 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | ||
| 509 | top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; | ||
| 510 | |||
| 511 | /* | ||
| 512 | * Whatever else happens, drain the thing | ||
| 513 | */ | ||
| 514 | ds->pebs_index = ds->pebs_buffer_base; | ||
| 515 | |||
| 516 | if (!test_bit(0, cpuc->active_mask)) | ||
| 517 | return; | ||
| 518 | |||
| 519 | WARN_ON_ONCE(!event); | ||
| 520 | |||
| 521 | if (!event->attr.precise_ip) | ||
| 522 | return; | ||
| 523 | |||
| 524 | n = top - at; | ||
| 525 | if (n <= 0) | ||
| 526 | return; | ||
| 527 | |||
| 528 | /* | ||
| 529 | * Should not happen, we program the threshold at 1 and do not | ||
| 530 | * set a reset value. | ||
| 531 | */ | ||
| 532 | WARN_ON_ONCE(n > 1); | ||
| 533 | at += n - 1; | ||
| 534 | |||
| 535 | __intel_pmu_pebs_event(event, iregs, at); | ||
| 536 | } | ||
| 537 | |||
| 538 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
| 539 | { | ||
| 540 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 541 | struct debug_store *ds = cpuc->ds; | ||
| 542 | struct pebs_record_nhm *at, *top; | ||
| 543 | struct perf_event *event = NULL; | ||
| 544 | u64 status = 0; | ||
| 545 | int bit, n; | ||
| 546 | |||
| 547 | if (!ds || !x86_pmu.pebs) | ||
| 548 | return; | ||
| 549 | |||
| 550 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
| 551 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
| 552 | |||
| 553 | ds->pebs_index = ds->pebs_buffer_base; | ||
| 554 | |||
| 555 | n = top - at; | ||
| 556 | if (n <= 0) | ||
| 557 | return; | ||
| 558 | |||
| 559 | /* | ||
| 560 | * Should not happen, we program the threshold at 1 and do not | ||
| 561 | * set a reset value. | ||
| 562 | */ | ||
| 563 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | ||
| 564 | |||
| 565 | for ( ; at < top; at++) { | ||
| 566 | for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | ||
| 567 | event = cpuc->events[bit]; | ||
| 568 | if (!test_bit(bit, cpuc->active_mask)) | ||
| 569 | continue; | ||
| 570 | |||
| 571 | WARN_ON_ONCE(!event); | ||
| 572 | |||
| 573 | if (!event->attr.precise_ip) | ||
| 574 | continue; | ||
| 575 | |||
| 576 | if (__test_and_set_bit(bit, (unsigned long *)&status)) | ||
| 577 | continue; | ||
| 578 | |||
| 579 | break; | ||
| 580 | } | ||
| 581 | |||
| 582 | if (!event || bit >= MAX_PEBS_EVENTS) | ||
| 583 | continue; | ||
| 584 | |||
| 585 | __intel_pmu_pebs_event(event, iregs, at); | ||
| 586 | } | ||
| 587 | } | ||
| 588 | |||
| 589 | /* | ||
| 590 | * BTS, PEBS probe and setup | ||
| 591 | */ | ||
| 592 | |||
| 593 | static void intel_ds_init(void) | ||
| 594 | { | ||
| 595 | /* | ||
| 596 | * No support for 32bit formats | ||
| 597 | */ | ||
| 598 | if (!boot_cpu_has(X86_FEATURE_DTES64)) | ||
| 599 | return; | ||
| 600 | |||
| 601 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | ||
| 602 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | ||
| 603 | if (x86_pmu.pebs) { | ||
| 604 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; | ||
| 605 | int format = x86_pmu.intel_cap.pebs_format; | ||
| 606 | |||
| 607 | switch (format) { | ||
| 608 | case 0: | ||
| 609 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); | ||
| 610 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | ||
| 611 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | ||
| 612 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
| 613 | break; | ||
| 614 | |||
| 615 | case 1: | ||
| 616 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); | ||
| 617 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | ||
| 618 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | ||
| 619 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
| 620 | break; | ||
| 621 | |||
| 622 | default: | ||
| 623 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | ||
| 624 | x86_pmu.pebs = 0; | ||
| 625 | break; | ||
| 626 | } | ||
| 627 | } | ||
| 628 | } | ||
| 629 | |||
| 630 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
| 631 | |||
| 632 | static int reserve_ds_buffers(void) | ||
| 633 | { | ||
| 634 | return 0; | ||
| 635 | } | ||
| 636 | |||
| 637 | static void release_ds_buffers(void) | ||
| 638 | { | ||
| 639 | } | ||
| 640 | |||
| 641 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 000000000000..d202c1bece1a --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
| 2 | |||
| 3 | enum { | ||
| 4 | LBR_FORMAT_32 = 0x00, | ||
| 5 | LBR_FORMAT_LIP = 0x01, | ||
| 6 | LBR_FORMAT_EIP = 0x02, | ||
| 7 | LBR_FORMAT_EIP_FLAGS = 0x03, | ||
| 8 | }; | ||
| 9 | |||
| 10 | /* | ||
| 11 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | ||
| 12 | * otherwise it becomes near impossible to get a reliable stack. | ||
| 13 | */ | ||
| 14 | |||
| 15 | static void __intel_pmu_lbr_enable(void) | ||
| 16 | { | ||
| 17 | u64 debugctl; | ||
| 18 | |||
| 19 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
| 20 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
| 21 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
| 22 | } | ||
| 23 | |||
| 24 | static void __intel_pmu_lbr_disable(void) | ||
| 25 | { | ||
| 26 | u64 debugctl; | ||
| 27 | |||
| 28 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
| 29 | debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
| 30 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
| 31 | } | ||
| 32 | |||
| 33 | static void intel_pmu_lbr_reset_32(void) | ||
| 34 | { | ||
| 35 | int i; | ||
| 36 | |||
| 37 | for (i = 0; i < x86_pmu.lbr_nr; i++) | ||
| 38 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
| 39 | } | ||
| 40 | |||
| 41 | static void intel_pmu_lbr_reset_64(void) | ||
| 42 | { | ||
| 43 | int i; | ||
| 44 | |||
| 45 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
| 46 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
| 47 | wrmsrl(x86_pmu.lbr_to + i, 0); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | static void intel_pmu_lbr_reset(void) | ||
| 52 | { | ||
| 53 | if (!x86_pmu.lbr_nr) | ||
| 54 | return; | ||
| 55 | |||
| 56 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
| 57 | intel_pmu_lbr_reset_32(); | ||
| 58 | else | ||
| 59 | intel_pmu_lbr_reset_64(); | ||
| 60 | } | ||
| 61 | |||
| 62 | static void intel_pmu_lbr_enable(struct perf_event *event) | ||
| 63 | { | ||
| 64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 65 | |||
| 66 | if (!x86_pmu.lbr_nr) | ||
| 67 | return; | ||
| 68 | |||
| 69 | WARN_ON_ONCE(cpuc->enabled); | ||
| 70 | |||
| 71 | /* | ||
| 72 | * Reset the LBR stack if we changed task context to | ||
| 73 | * avoid data leaks. | ||
| 74 | */ | ||
| 75 | |||
| 76 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | ||
| 77 | intel_pmu_lbr_reset(); | ||
| 78 | cpuc->lbr_context = event->ctx; | ||
| 79 | } | ||
| 80 | |||
| 81 | cpuc->lbr_users++; | ||
| 82 | } | ||
| 83 | |||
| 84 | static void intel_pmu_lbr_disable(struct perf_event *event) | ||
| 85 | { | ||
| 86 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 87 | |||
| 88 | if (!x86_pmu.lbr_nr) | ||
| 89 | return; | ||
| 90 | |||
| 91 | cpuc->lbr_users--; | ||
| 92 | WARN_ON_ONCE(cpuc->lbr_users < 0); | ||
| 93 | |||
| 94 | if (cpuc->enabled && !cpuc->lbr_users) | ||
| 95 | __intel_pmu_lbr_disable(); | ||
| 96 | } | ||
| 97 | |||
| 98 | static void intel_pmu_lbr_enable_all(void) | ||
| 99 | { | ||
| 100 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 101 | |||
| 102 | if (cpuc->lbr_users) | ||
| 103 | __intel_pmu_lbr_enable(); | ||
| 104 | } | ||
| 105 | |||
| 106 | static void intel_pmu_lbr_disable_all(void) | ||
| 107 | { | ||
| 108 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 109 | |||
| 110 | if (cpuc->lbr_users) | ||
| 111 | __intel_pmu_lbr_disable(); | ||
| 112 | } | ||
| 113 | |||
| 114 | static inline u64 intel_pmu_lbr_tos(void) | ||
| 115 | { | ||
| 116 | u64 tos; | ||
| 117 | |||
| 118 | rdmsrl(x86_pmu.lbr_tos, tos); | ||
| 119 | |||
| 120 | return tos; | ||
| 121 | } | ||
| 122 | |||
| 123 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | ||
| 124 | { | ||
| 125 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
| 126 | u64 tos = intel_pmu_lbr_tos(); | ||
| 127 | int i; | ||
| 128 | |||
| 129 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
| 130 | unsigned long lbr_idx = (tos - i) & mask; | ||
| 131 | union { | ||
| 132 | struct { | ||
| 133 | u32 from; | ||
| 134 | u32 to; | ||
| 135 | }; | ||
| 136 | u64 lbr; | ||
| 137 | } msr_lastbranch; | ||
| 138 | |||
| 139 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | ||
| 140 | |||
| 141 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | ||
| 142 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | ||
| 143 | cpuc->lbr_entries[i].flags = 0; | ||
| 144 | } | ||
| 145 | cpuc->lbr_stack.nr = i; | ||
| 146 | } | ||
| 147 | |||
| 148 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
| 149 | |||
| 150 | /* | ||
| 151 | * Due to lack of segmentation in Linux the effective address (offset) | ||
| 152 | * is the same as the linear address, allowing us to merge the LIP and EIP | ||
| 153 | * LBR formats. | ||
| 154 | */ | ||
| 155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||
| 156 | { | ||
| 157 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
| 158 | int lbr_format = x86_pmu.intel_cap.lbr_format; | ||
| 159 | u64 tos = intel_pmu_lbr_tos(); | ||
| 160 | int i; | ||
| 161 | |||
| 162 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
| 163 | unsigned long lbr_idx = (tos - i) & mask; | ||
| 164 | u64 from, to, flags = 0; | ||
| 165 | |||
| 166 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||
| 167 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | ||
| 168 | |||
| 169 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | ||
| 170 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | ||
| 171 | from = (u64)((((s64)from) << 1) >> 1); | ||
| 172 | } | ||
| 173 | |||
| 174 | cpuc->lbr_entries[i].from = from; | ||
| 175 | cpuc->lbr_entries[i].to = to; | ||
| 176 | cpuc->lbr_entries[i].flags = flags; | ||
| 177 | } | ||
| 178 | cpuc->lbr_stack.nr = i; | ||
| 179 | } | ||
| 180 | |||
| 181 | static void intel_pmu_lbr_read(void) | ||
| 182 | { | ||
| 183 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 184 | |||
| 185 | if (!cpuc->lbr_users) | ||
| 186 | return; | ||
| 187 | |||
| 188 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
| 189 | intel_pmu_lbr_read_32(cpuc); | ||
| 190 | else | ||
| 191 | intel_pmu_lbr_read_64(cpuc); | ||
| 192 | } | ||
| 193 | |||
| 194 | static void intel_pmu_lbr_init_core(void) | ||
| 195 | { | ||
| 196 | x86_pmu.lbr_nr = 4; | ||
| 197 | x86_pmu.lbr_tos = 0x01c9; | ||
| 198 | x86_pmu.lbr_from = 0x40; | ||
| 199 | x86_pmu.lbr_to = 0x60; | ||
| 200 | } | ||
| 201 | |||
| 202 | static void intel_pmu_lbr_init_nhm(void) | ||
| 203 | { | ||
| 204 | x86_pmu.lbr_nr = 16; | ||
| 205 | x86_pmu.lbr_tos = 0x01c9; | ||
| 206 | x86_pmu.lbr_from = 0x680; | ||
| 207 | x86_pmu.lbr_to = 0x6c0; | ||
| 208 | } | ||
| 209 | |||
| 210 | static void intel_pmu_lbr_init_atom(void) | ||
| 211 | { | ||
| 212 | x86_pmu.lbr_nr = 8; | ||
| 213 | x86_pmu.lbr_tos = 0x01c9; | ||
| 214 | x86_pmu.lbr_from = 0x40; | ||
| 215 | x86_pmu.lbr_to = 0x60; | ||
| 216 | } | ||
| 217 | |||
| 218 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 000000000000..ae85d69644d1 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
| @@ -0,0 +1,858 @@ | |||
| 1 | /* | ||
| 2 | * Netburst Perfomance Events (P4, old Xeon) | ||
| 3 | * | ||
| 4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | ||
| 5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | ||
| 6 | * | ||
| 7 | * For licencing details see kernel-base/COPYING | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifdef CONFIG_CPU_SUP_INTEL | ||
| 11 | |||
| 12 | #include <asm/perf_event_p4.h> | ||
| 13 | |||
| 14 | #define P4_CNTR_LIMIT 3 | ||
| 15 | /* | ||
| 16 | * array indices: 0,1 - HT threads, used with HT enabled cpu | ||
| 17 | */ | ||
| 18 | struct p4_event_bind { | ||
| 19 | unsigned int opcode; /* Event code and ESCR selector */ | ||
| 20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | ||
| 21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | ||
| 22 | }; | ||
| 23 | |||
| 24 | struct p4_cache_event_bind { | ||
| 25 | unsigned int metric_pebs; | ||
| 26 | unsigned int metric_vert; | ||
| 27 | }; | ||
| 28 | |||
| 29 | #define P4_GEN_CACHE_EVENT_BIND(name) \ | ||
| 30 | [P4_CACHE__##name] = { \ | ||
| 31 | .metric_pebs = P4_PEBS__##name, \ | ||
| 32 | .metric_vert = P4_VERT__##name, \ | ||
| 33 | } | ||
| 34 | |||
| 35 | static struct p4_cache_event_bind p4_cache_event_bind_map[] = { | ||
| 36 | P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), | ||
| 37 | P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), | ||
| 38 | P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), | ||
| 39 | P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), | ||
| 40 | }; | ||
| 41 | |||
| 42 | /* | ||
| 43 | * Note that we don't use CCCR1 here, there is an | ||
| 44 | * exception for P4_BSQ_ALLOCATION but we just have | ||
| 45 | * no workaround | ||
| 46 | * | ||
| 47 | * consider this binding as resources which particular | ||
| 48 | * event may borrow, it doesn't contain EventMask, | ||
| 49 | * Tags and friends -- they are left to a caller | ||
| 50 | */ | ||
| 51 | static struct p4_event_bind p4_event_bind_map[] = { | ||
| 52 | [P4_EVENT_TC_DELIVER_MODE] = { | ||
| 53 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | ||
| 54 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
| 55 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 56 | }, | ||
| 57 | [P4_EVENT_BPU_FETCH_REQUEST] = { | ||
| 58 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | ||
| 59 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | ||
| 60 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 61 | }, | ||
| 62 | [P4_EVENT_ITLB_REFERENCE] = { | ||
| 63 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | ||
| 64 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | ||
| 65 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 66 | }, | ||
| 67 | [P4_EVENT_MEMORY_CANCEL] = { | ||
| 68 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | ||
| 69 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
| 70 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 71 | }, | ||
| 72 | [P4_EVENT_MEMORY_COMPLETE] = { | ||
| 73 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | ||
| 74 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
| 75 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 76 | }, | ||
| 77 | [P4_EVENT_LOAD_PORT_REPLAY] = { | ||
| 78 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | ||
| 79 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | ||
| 80 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 81 | }, | ||
| 82 | [P4_EVENT_STORE_PORT_REPLAY] = { | ||
| 83 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | ||
| 84 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
| 85 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 86 | }, | ||
| 87 | [P4_EVENT_MOB_LOAD_REPLAY] = { | ||
| 88 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | ||
| 89 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | ||
| 90 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 91 | }, | ||
| 92 | [P4_EVENT_PAGE_WALK_TYPE] = { | ||
| 93 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | ||
| 94 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | ||
| 95 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 96 | }, | ||
| 97 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | ||
| 98 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | ||
| 99 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
| 100 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 101 | }, | ||
| 102 | [P4_EVENT_IOQ_ALLOCATION] = { | ||
| 103 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | ||
| 104 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 105 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 106 | }, | ||
| 107 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
| 108 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | ||
| 109 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | ||
| 110 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
| 111 | }, | ||
| 112 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | ||
| 113 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | ||
| 114 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 115 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 116 | }, | ||
| 117 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | ||
| 118 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | ||
| 119 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | ||
| 120 | .cntr = { {0, -1, -1}, {1, -1, -1} }, | ||
| 121 | }, | ||
| 122 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
| 123 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | ||
| 124 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | ||
| 125 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
| 126 | }, | ||
| 127 | [P4_EVENT_SSE_INPUT_ASSIST] = { | ||
| 128 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | ||
| 129 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 130 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 131 | }, | ||
| 132 | [P4_EVENT_PACKED_SP_UOP] = { | ||
| 133 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | ||
| 134 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 135 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 136 | }, | ||
| 137 | [P4_EVENT_PACKED_DP_UOP] = { | ||
| 138 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | ||
| 139 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 140 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 141 | }, | ||
| 142 | [P4_EVENT_SCALAR_SP_UOP] = { | ||
| 143 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | ||
| 144 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 145 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 146 | }, | ||
| 147 | [P4_EVENT_SCALAR_DP_UOP] = { | ||
| 148 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | ||
| 149 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 150 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 151 | }, | ||
| 152 | [P4_EVENT_64BIT_MMX_UOP] = { | ||
| 153 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | ||
| 154 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 155 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 156 | }, | ||
| 157 | [P4_EVENT_128BIT_MMX_UOP] = { | ||
| 158 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | ||
| 159 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 160 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 161 | }, | ||
| 162 | [P4_EVENT_X87_FP_UOP] = { | ||
| 163 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | ||
| 164 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
| 165 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 166 | }, | ||
| 167 | [P4_EVENT_TC_MISC] = { | ||
| 168 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | ||
| 169 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
| 170 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 171 | }, | ||
| 172 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | ||
| 173 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | ||
| 174 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 175 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 176 | }, | ||
| 177 | [P4_EVENT_TC_MS_XFER] = { | ||
| 178 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | ||
| 179 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
| 180 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 181 | }, | ||
| 182 | [P4_EVENT_UOP_QUEUE_WRITES] = { | ||
| 183 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | ||
| 184 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
| 185 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 186 | }, | ||
| 187 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | ||
| 188 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | ||
| 189 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | ||
| 190 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 191 | }, | ||
| 192 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | ||
| 193 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | ||
| 194 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | ||
| 195 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
| 196 | }, | ||
| 197 | [P4_EVENT_RESOURCE_STALL] = { | ||
| 198 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | ||
| 199 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | ||
| 200 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 201 | }, | ||
| 202 | [P4_EVENT_WC_BUFFER] = { | ||
| 203 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | ||
| 204 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
| 205 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
| 206 | }, | ||
| 207 | [P4_EVENT_B2B_CYCLES] = { | ||
| 208 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | ||
| 209 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 210 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 211 | }, | ||
| 212 | [P4_EVENT_BNR] = { | ||
| 213 | .opcode = P4_OPCODE(P4_EVENT_BNR), | ||
| 214 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 215 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 216 | }, | ||
| 217 | [P4_EVENT_SNOOP] = { | ||
| 218 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | ||
| 219 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 220 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 221 | }, | ||
| 222 | [P4_EVENT_RESPONSE] = { | ||
| 223 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | ||
| 224 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
| 225 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
| 226 | }, | ||
| 227 | [P4_EVENT_FRONT_END_EVENT] = { | ||
| 228 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | ||
| 229 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 230 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 231 | }, | ||
| 232 | [P4_EVENT_EXECUTION_EVENT] = { | ||
| 233 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | ||
| 234 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 235 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 236 | }, | ||
| 237 | [P4_EVENT_REPLAY_EVENT] = { | ||
| 238 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | ||
| 239 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 240 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 241 | }, | ||
| 242 | [P4_EVENT_INSTR_RETIRED] = { | ||
| 243 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | ||
| 244 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
| 245 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 246 | }, | ||
| 247 | [P4_EVENT_UOPS_RETIRED] = { | ||
| 248 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | ||
| 249 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
| 250 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 251 | }, | ||
| 252 | [P4_EVENT_UOP_TYPE] = { | ||
| 253 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | ||
| 254 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | ||
| 255 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 256 | }, | ||
| 257 | [P4_EVENT_BRANCH_RETIRED] = { | ||
| 258 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | ||
| 259 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 260 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 261 | }, | ||
| 262 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | ||
| 263 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | ||
| 264 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
| 265 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 266 | }, | ||
| 267 | [P4_EVENT_X87_ASSIST] = { | ||
| 268 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | ||
| 269 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 270 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 271 | }, | ||
| 272 | [P4_EVENT_MACHINE_CLEAR] = { | ||
| 273 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | ||
| 274 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
| 275 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 276 | }, | ||
| 277 | [P4_EVENT_INSTR_COMPLETED] = { | ||
| 278 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | ||
| 279 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
| 280 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
| 281 | }, | ||
| 282 | }; | ||
| 283 | |||
| 284 | #define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ | ||
| 285 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | ||
| 286 | P4_ESCR_EMASK_BIT(event, bit)) | \ | ||
| 287 | p4_config_pack_cccr(cache_event | \ | ||
| 288 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | ||
| 289 | |||
| 290 | static __initconst const u64 p4_hw_cache_event_ids | ||
| 291 | [PERF_COUNT_HW_CACHE_MAX] | ||
| 292 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 293 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
| 294 | { | ||
| 295 | [ C(L1D ) ] = { | ||
| 296 | [ C(OP_READ) ] = { | ||
| 297 | [ C(RESULT_ACCESS) ] = 0x0, | ||
| 298 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
| 299 | P4_CACHE__1stl_cache_load_miss_retired), | ||
| 300 | }, | ||
| 301 | }, | ||
| 302 | [ C(LL ) ] = { | ||
| 303 | [ C(OP_READ) ] = { | ||
| 304 | [ C(RESULT_ACCESS) ] = 0x0, | ||
| 305 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
| 306 | P4_CACHE__2ndl_cache_load_miss_retired), | ||
| 307 | }, | ||
| 308 | }, | ||
| 309 | [ C(DTLB) ] = { | ||
| 310 | [ C(OP_READ) ] = { | ||
| 311 | [ C(RESULT_ACCESS) ] = 0x0, | ||
| 312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
| 313 | P4_CACHE__dtlb_load_miss_retired), | ||
| 314 | }, | ||
| 315 | [ C(OP_WRITE) ] = { | ||
| 316 | [ C(RESULT_ACCESS) ] = 0x0, | ||
| 317 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
| 318 | P4_CACHE__dtlb_store_miss_retired), | ||
| 319 | }, | ||
| 320 | }, | ||
| 321 | [ C(ITLB) ] = { | ||
| 322 | [ C(OP_READ) ] = { | ||
| 323 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | ||
| 324 | P4_CACHE__itlb_reference_hit), | ||
| 325 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | ||
| 326 | P4_CACHE__itlb_reference_miss), | ||
| 327 | }, | ||
| 328 | [ C(OP_WRITE) ] = { | ||
| 329 | [ C(RESULT_ACCESS) ] = -1, | ||
| 330 | [ C(RESULT_MISS) ] = -1, | ||
| 331 | }, | ||
| 332 | [ C(OP_PREFETCH) ] = { | ||
| 333 | [ C(RESULT_ACCESS) ] = -1, | ||
| 334 | [ C(RESULT_MISS) ] = -1, | ||
| 335 | }, | ||
| 336 | }, | ||
| 337 | }; | ||
| 338 | |||
| 339 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { | ||
| 340 | /* non-halted CPU clocks */ | ||
| 341 | [PERF_COUNT_HW_CPU_CYCLES] = | ||
| 342 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | ||
| 343 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | ||
| 344 | |||
| 345 | /* | ||
| 346 | * retired instructions | ||
| 347 | * in a sake of simplicity we don't use the FSB tagging | ||
| 348 | */ | ||
| 349 | [PERF_COUNT_HW_INSTRUCTIONS] = | ||
| 350 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | | ||
| 351 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | ||
| 352 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), | ||
| 353 | |||
| 354 | /* cache hits */ | ||
| 355 | [PERF_COUNT_HW_CACHE_REFERENCES] = | ||
| 356 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
| 357 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
| 358 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
| 359 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
| 360 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
| 361 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
| 362 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), | ||
| 363 | |||
| 364 | /* cache misses */ | ||
| 365 | [PERF_COUNT_HW_CACHE_MISSES] = | ||
| 366 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
| 367 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
| 368 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
| 369 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), | ||
| 370 | |||
| 371 | /* branch instructions retired */ | ||
| 372 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = | ||
| 373 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | | ||
| 374 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
| 375 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | ||
| 376 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | ||
| 377 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), | ||
| 378 | |||
| 379 | /* mispredicted branches retired */ | ||
| 380 | [PERF_COUNT_HW_BRANCH_MISSES] = | ||
| 381 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | | ||
| 382 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), | ||
| 383 | |||
| 384 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | ||
| 385 | [PERF_COUNT_HW_BUS_CYCLES] = | ||
| 386 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | | ||
| 387 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
| 388 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | | ||
| 389 | p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | ||
| 390 | }; | ||
| 391 | |||
| 392 | static struct p4_event_bind *p4_config_get_bind(u64 config) | ||
| 393 | { | ||
| 394 | unsigned int evnt = p4_config_unpack_event(config); | ||
| 395 | struct p4_event_bind *bind = NULL; | ||
| 396 | |||
| 397 | if (evnt < ARRAY_SIZE(p4_event_bind_map)) | ||
| 398 | bind = &p4_event_bind_map[evnt]; | ||
| 399 | |||
| 400 | return bind; | ||
| 401 | } | ||
| 402 | |||
| 403 | static u64 p4_pmu_event_map(int hw_event) | ||
| 404 | { | ||
| 405 | struct p4_event_bind *bind; | ||
| 406 | unsigned int esel; | ||
| 407 | u64 config; | ||
| 408 | |||
| 409 | config = p4_general_events[hw_event]; | ||
| 410 | bind = p4_config_get_bind(config); | ||
| 411 | esel = P4_OPCODE_ESEL(bind->opcode); | ||
| 412 | config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | ||
| 413 | |||
| 414 | return config; | ||
| 415 | } | ||
| 416 | |||
| 417 | static int p4_hw_config(struct perf_event *event) | ||
| 418 | { | ||
| 419 | int cpu = get_cpu(); | ||
| 420 | int rc = 0; | ||
| 421 | unsigned int evnt; | ||
| 422 | u32 escr, cccr; | ||
| 423 | |||
| 424 | /* | ||
| 425 | * the reason we use cpu that early is that: if we get scheduled | ||
| 426 | * first time on the same cpu -- we will not need swap thread | ||
| 427 | * specific flags in config (and will save some cpu cycles) | ||
| 428 | */ | ||
| 429 | |||
| 430 | cccr = p4_default_cccr_conf(cpu); | ||
| 431 | escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, | ||
| 432 | event->attr.exclude_user); | ||
| 433 | event->hw.config = p4_config_pack_escr(escr) | | ||
| 434 | p4_config_pack_cccr(cccr); | ||
| 435 | |||
| 436 | if (p4_ht_active() && p4_ht_thread(cpu)) | ||
| 437 | event->hw.config = p4_set_ht_bit(event->hw.config); | ||
| 438 | |||
| 439 | if (event->attr.type == PERF_TYPE_RAW) { | ||
| 440 | |||
| 441 | /* user data may have out-of-bound event index */ | ||
| 442 | evnt = p4_config_unpack_event(event->attr.config); | ||
| 443 | if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { | ||
| 444 | rc = -EINVAL; | ||
| 445 | goto out; | ||
| 446 | } | ||
| 447 | |||
| 448 | /* | ||
| 449 | * We don't control raw events so it's up to the caller | ||
| 450 | * to pass sane values (and we don't count the thread number | ||
| 451 | * on HT machine but allow HT-compatible specifics to be | ||
| 452 | * passed on) | ||
| 453 | * | ||
| 454 | * XXX: HT wide things should check perf_paranoid_cpu() && | ||
| 455 | * CAP_SYS_ADMIN | ||
| 456 | */ | ||
| 457 | event->hw.config |= event->attr.config & | ||
| 458 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | ||
| 459 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | ||
| 460 | } | ||
| 461 | |||
| 462 | rc = x86_setup_perfctr(event); | ||
| 463 | out: | ||
| 464 | put_cpu(); | ||
| 465 | return rc; | ||
| 466 | } | ||
| 467 | |||
| 468 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | ||
| 469 | { | ||
| 470 | int overflow = 0; | ||
| 471 | u32 low, high; | ||
| 472 | |||
| 473 | rdmsr(hwc->config_base + hwc->idx, low, high); | ||
| 474 | |||
| 475 | /* we need to check high bit for unflagged overflows */ | ||
| 476 | if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { | ||
| 477 | overflow = 1; | ||
| 478 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
| 479 | ((u64)low) & ~P4_CCCR_OVF); | ||
| 480 | } | ||
| 481 | |||
| 482 | return overflow; | ||
| 483 | } | ||
| 484 | |||
| 485 | static inline void p4_pmu_disable_event(struct perf_event *event) | ||
| 486 | { | ||
| 487 | struct hw_perf_event *hwc = &event->hw; | ||
| 488 | |||
| 489 | /* | ||
| 490 | * If event gets disabled while counter is in overflowed | ||
| 491 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | ||
| 492 | * asserted again and again | ||
| 493 | */ | ||
| 494 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
| 495 | (u64)(p4_config_unpack_cccr(hwc->config)) & | ||
| 496 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | ||
| 497 | } | ||
| 498 | |||
| 499 | static void p4_pmu_disable_all(void) | ||
| 500 | { | ||
| 501 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 502 | int idx; | ||
| 503 | |||
| 504 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
| 505 | struct perf_event *event = cpuc->events[idx]; | ||
| 506 | if (!test_bit(idx, cpuc->active_mask)) | ||
| 507 | continue; | ||
| 508 | p4_pmu_disable_event(event); | ||
| 509 | } | ||
| 510 | } | ||
| 511 | |||
| 512 | static void p4_pmu_enable_event(struct perf_event *event) | ||
| 513 | { | ||
| 514 | struct hw_perf_event *hwc = &event->hw; | ||
| 515 | int thread = p4_ht_config_thread(hwc->config); | ||
| 516 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | ||
| 517 | unsigned int idx = p4_config_unpack_event(hwc->config); | ||
| 518 | unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); | ||
| 519 | struct p4_event_bind *bind; | ||
| 520 | struct p4_cache_event_bind *bind_cache; | ||
| 521 | u64 escr_addr, cccr; | ||
| 522 | |||
| 523 | bind = &p4_event_bind_map[idx]; | ||
| 524 | escr_addr = (u64)bind->escr_msr[thread]; | ||
| 525 | |||
| 526 | /* | ||
| 527 | * - we dont support cascaded counters yet | ||
| 528 | * - and counter 1 is broken (erratum) | ||
| 529 | */ | ||
| 530 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | ||
| 531 | WARN_ON_ONCE(hwc->idx == 1); | ||
| 532 | |||
| 533 | /* we need a real Event value */ | ||
| 534 | escr_conf &= ~P4_ESCR_EVENT_MASK; | ||
| 535 | escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); | ||
| 536 | |||
| 537 | cccr = p4_config_unpack_cccr(hwc->config); | ||
| 538 | |||
| 539 | /* | ||
| 540 | * it could be Cache event so that we need to | ||
| 541 | * set metrics into additional MSRs | ||
| 542 | */ | ||
| 543 | BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); | ||
| 544 | if (idx_cache > P4_CACHE__NONE && | ||
| 545 | idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { | ||
| 546 | bind_cache = &p4_cache_event_bind_map[idx_cache]; | ||
| 547 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); | ||
| 548 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); | ||
| 549 | } | ||
| 550 | |||
| 551 | (void)checking_wrmsrl(escr_addr, escr_conf); | ||
| 552 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
| 553 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | ||
| 554 | } | ||
| 555 | |||
| 556 | static void p4_pmu_enable_all(int added) | ||
| 557 | { | ||
| 558 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 559 | int idx; | ||
| 560 | |||
| 561 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
| 562 | struct perf_event *event = cpuc->events[idx]; | ||
| 563 | if (!test_bit(idx, cpuc->active_mask)) | ||
| 564 | continue; | ||
| 565 | p4_pmu_enable_event(event); | ||
| 566 | } | ||
| 567 | } | ||
| 568 | |||
| 569 | static int p4_pmu_handle_irq(struct pt_regs *regs) | ||
| 570 | { | ||
| 571 | struct perf_sample_data data; | ||
| 572 | struct cpu_hw_events *cpuc; | ||
| 573 | struct perf_event *event; | ||
| 574 | struct hw_perf_event *hwc; | ||
| 575 | int idx, handled = 0; | ||
| 576 | u64 val; | ||
| 577 | |||
| 578 | data.addr = 0; | ||
| 579 | data.raw = NULL; | ||
| 580 | |||
| 581 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 582 | |||
| 583 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
| 584 | |||
| 585 | if (!test_bit(idx, cpuc->active_mask)) | ||
| 586 | continue; | ||
| 587 | |||
| 588 | event = cpuc->events[idx]; | ||
| 589 | hwc = &event->hw; | ||
| 590 | |||
| 591 | WARN_ON_ONCE(hwc->idx != idx); | ||
| 592 | |||
| 593 | /* it might be unflagged overflow */ | ||
| 594 | handled = p4_pmu_clear_cccr_ovf(hwc); | ||
| 595 | |||
| 596 | val = x86_perf_event_update(event); | ||
| 597 | if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) | ||
| 598 | continue; | ||
| 599 | |||
| 600 | /* event overflow for sure */ | ||
| 601 | data.period = event->hw.last_period; | ||
| 602 | |||
| 603 | if (!x86_perf_event_set_period(event)) | ||
| 604 | continue; | ||
| 605 | if (perf_event_overflow(event, 1, &data, regs)) | ||
| 606 | p4_pmu_disable_event(event); | ||
| 607 | } | ||
| 608 | |||
| 609 | if (handled) { | ||
| 610 | /* p4 quirk: unmask it again */ | ||
| 611 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
| 612 | inc_irq_stat(apic_perf_irqs); | ||
| 613 | } | ||
| 614 | |||
| 615 | return handled; | ||
| 616 | } | ||
| 617 | |||
| 618 | /* | ||
| 619 | * swap thread specific fields according to a thread | ||
| 620 | * we are going to run on | ||
| 621 | */ | ||
| 622 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | ||
| 623 | { | ||
| 624 | u32 escr, cccr; | ||
| 625 | |||
| 626 | /* | ||
| 627 | * we either lucky and continue on same cpu or no HT support | ||
| 628 | */ | ||
| 629 | if (!p4_should_swap_ts(hwc->config, cpu)) | ||
| 630 | return; | ||
| 631 | |||
| 632 | /* | ||
| 633 | * the event is migrated from an another logical | ||
| 634 | * cpu, so we need to swap thread specific flags | ||
| 635 | */ | ||
| 636 | |||
| 637 | escr = p4_config_unpack_escr(hwc->config); | ||
| 638 | cccr = p4_config_unpack_cccr(hwc->config); | ||
| 639 | |||
| 640 | if (p4_ht_thread(cpu)) { | ||
| 641 | cccr &= ~P4_CCCR_OVF_PMI_T0; | ||
| 642 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
| 643 | if (escr & P4_ESCR_T0_OS) { | ||
| 644 | escr &= ~P4_ESCR_T0_OS; | ||
| 645 | escr |= P4_ESCR_T1_OS; | ||
| 646 | } | ||
| 647 | if (escr & P4_ESCR_T0_USR) { | ||
| 648 | escr &= ~P4_ESCR_T0_USR; | ||
| 649 | escr |= P4_ESCR_T1_USR; | ||
| 650 | } | ||
| 651 | hwc->config = p4_config_pack_escr(escr); | ||
| 652 | hwc->config |= p4_config_pack_cccr(cccr); | ||
| 653 | hwc->config |= P4_CONFIG_HT; | ||
| 654 | } else { | ||
| 655 | cccr &= ~P4_CCCR_OVF_PMI_T1; | ||
| 656 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
| 657 | if (escr & P4_ESCR_T1_OS) { | ||
| 658 | escr &= ~P4_ESCR_T1_OS; | ||
| 659 | escr |= P4_ESCR_T0_OS; | ||
| 660 | } | ||
| 661 | if (escr & P4_ESCR_T1_USR) { | ||
| 662 | escr &= ~P4_ESCR_T1_USR; | ||
| 663 | escr |= P4_ESCR_T0_USR; | ||
| 664 | } | ||
| 665 | hwc->config = p4_config_pack_escr(escr); | ||
| 666 | hwc->config |= p4_config_pack_cccr(cccr); | ||
| 667 | hwc->config &= ~P4_CONFIG_HT; | ||
| 668 | } | ||
| 669 | } | ||
| 670 | |||
| 671 | /* | ||
| 672 | * ESCR address hashing is tricky, ESCRs are not sequential | ||
| 673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and | ||
| 674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] | ||
| 675 | * | ||
| 676 | * so we make ~70% filled hashtable | ||
| 677 | */ | ||
| 678 | |||
| 679 | #define P4_ESCR_MSR_BASE 0x000003a0 | ||
| 680 | #define P4_ESCR_MSR_MAX 0x000003e1 | ||
| 681 | #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) | ||
| 682 | #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) | ||
| 683 | #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr | ||
| 684 | |||
| 685 | static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { | ||
| 686 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), | ||
| 687 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), | ||
| 688 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), | ||
| 689 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), | ||
| 690 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), | ||
| 691 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), | ||
| 692 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), | ||
| 693 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), | ||
| 694 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), | ||
| 695 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), | ||
| 696 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), | ||
| 697 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), | ||
| 698 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), | ||
| 699 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), | ||
| 700 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), | ||
| 701 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), | ||
| 702 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), | ||
| 703 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), | ||
| 704 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), | ||
| 705 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), | ||
| 706 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), | ||
| 707 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), | ||
| 708 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), | ||
| 709 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), | ||
| 710 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), | ||
| 711 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), | ||
| 712 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), | ||
| 713 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), | ||
| 714 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), | ||
| 715 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), | ||
| 716 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), | ||
| 717 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), | ||
| 718 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), | ||
| 719 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), | ||
| 720 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), | ||
| 721 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), | ||
| 722 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), | ||
| 723 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), | ||
| 724 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), | ||
| 725 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), | ||
| 726 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), | ||
| 727 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), | ||
| 728 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), | ||
| 729 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), | ||
| 730 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), | ||
| 731 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), | ||
| 732 | }; | ||
| 733 | |||
| 734 | static int p4_get_escr_idx(unsigned int addr) | ||
| 735 | { | ||
| 736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); | ||
| 737 | |||
| 738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || | ||
| 739 | !p4_escr_table[idx] || | ||
| 740 | p4_escr_table[idx] != addr)) { | ||
| 741 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); | ||
| 742 | return -1; | ||
| 743 | } | ||
| 744 | |||
| 745 | return idx; | ||
| 746 | } | ||
| 747 | |||
| 748 | static int p4_next_cntr(int thread, unsigned long *used_mask, | ||
| 749 | struct p4_event_bind *bind) | ||
| 750 | { | ||
| 751 | int i, j; | ||
| 752 | |||
| 753 | for (i = 0; i < P4_CNTR_LIMIT; i++) { | ||
| 754 | j = bind->cntr[thread][i]; | ||
| 755 | if (j != -1 && !test_bit(j, used_mask)) | ||
| 756 | return j; | ||
| 757 | } | ||
| 758 | |||
| 759 | return -1; | ||
| 760 | } | ||
| 761 | |||
| 762 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||
| 763 | { | ||
| 764 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 765 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; | ||
| 766 | int cpu = smp_processor_id(); | ||
| 767 | struct hw_perf_event *hwc; | ||
| 768 | struct p4_event_bind *bind; | ||
| 769 | unsigned int i, thread, num; | ||
| 770 | int cntr_idx, escr_idx; | ||
| 771 | |||
| 772 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
| 773 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); | ||
| 774 | |||
| 775 | for (i = 0, num = n; i < n; i++, num--) { | ||
| 776 | |||
| 777 | hwc = &cpuc->event_list[i]->hw; | ||
| 778 | thread = p4_ht_thread(cpu); | ||
| 779 | bind = p4_config_get_bind(hwc->config); | ||
| 780 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | ||
| 781 | if (unlikely(escr_idx == -1)) | ||
| 782 | goto done; | ||
| 783 | |||
| 784 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | ||
| 785 | cntr_idx = hwc->idx; | ||
| 786 | if (assign) | ||
| 787 | assign[i] = hwc->idx; | ||
| 788 | goto reserve; | ||
| 789 | } | ||
| 790 | |||
| 791 | cntr_idx = p4_next_cntr(thread, used_mask, bind); | ||
| 792 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) | ||
| 793 | goto done; | ||
| 794 | |||
| 795 | p4_pmu_swap_config_ts(hwc, cpu); | ||
| 796 | if (assign) | ||
| 797 | assign[i] = cntr_idx; | ||
| 798 | reserve: | ||
| 799 | set_bit(cntr_idx, used_mask); | ||
| 800 | set_bit(escr_idx, escr_mask); | ||
| 801 | } | ||
| 802 | |||
| 803 | done: | ||
| 804 | return num ? -ENOSPC : 0; | ||
| 805 | } | ||
| 806 | |||
| 807 | static __initconst const struct x86_pmu p4_pmu = { | ||
| 808 | .name = "Netburst P4/Xeon", | ||
| 809 | .handle_irq = p4_pmu_handle_irq, | ||
| 810 | .disable_all = p4_pmu_disable_all, | ||
| 811 | .enable_all = p4_pmu_enable_all, | ||
| 812 | .enable = p4_pmu_enable_event, | ||
| 813 | .disable = p4_pmu_disable_event, | ||
| 814 | .eventsel = MSR_P4_BPU_CCCR0, | ||
| 815 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
| 816 | .event_map = p4_pmu_event_map, | ||
| 817 | .max_events = ARRAY_SIZE(p4_general_events), | ||
| 818 | .get_event_constraints = x86_get_event_constraints, | ||
| 819 | /* | ||
| 820 | * IF HT disabled we may need to use all | ||
| 821 | * ARCH_P4_MAX_CCCR counters simulaneously | ||
| 822 | * though leave it restricted at moment assuming | ||
| 823 | * HT is on | ||
| 824 | */ | ||
| 825 | .num_counters = ARCH_P4_MAX_CCCR, | ||
| 826 | .apic = 1, | ||
| 827 | .cntval_bits = 40, | ||
| 828 | .cntval_mask = (1ULL << 40) - 1, | ||
| 829 | .max_period = (1ULL << 39) - 1, | ||
| 830 | .hw_config = p4_hw_config, | ||
| 831 | .schedule_events = p4_pmu_schedule_events, | ||
| 832 | }; | ||
| 833 | |||
| 834 | static __init int p4_pmu_init(void) | ||
| 835 | { | ||
| 836 | unsigned int low, high; | ||
| 837 | |||
| 838 | /* If we get stripped -- indexig fails */ | ||
| 839 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | ||
| 840 | |||
| 841 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
| 842 | if (!(low & (1 << 7))) { | ||
| 843 | pr_cont("unsupported Netburst CPU model %d ", | ||
| 844 | boot_cpu_data.x86_model); | ||
| 845 | return -ENODEV; | ||
| 846 | } | ||
| 847 | |||
| 848 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, | ||
| 849 | sizeof(hw_cache_event_ids)); | ||
| 850 | |||
| 851 | pr_cont("Netburst events, "); | ||
| 852 | |||
| 853 | x86_pmu = p4_pmu; | ||
| 854 | |||
| 855 | return 0; | ||
| 856 | } | ||
| 857 | |||
| 858 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a330485d14da..34ba07be2cda 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
| @@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event) | |||
| 27 | */ | 27 | */ |
| 28 | #define P6_NOP_EVENT 0x0000002EULL | 28 | #define P6_NOP_EVENT 0x0000002EULL |
| 29 | 29 | ||
| 30 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
| 31 | { | ||
| 32 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
| 33 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
| 34 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
| 35 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
| 36 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
| 37 | |||
| 38 | #define P6_EVNTSEL_MASK \ | ||
| 39 | (P6_EVNTSEL_EVENT_MASK | \ | ||
| 40 | P6_EVNTSEL_UNIT_MASK | \ | ||
| 41 | P6_EVNTSEL_EDGE_MASK | \ | ||
| 42 | P6_EVNTSEL_INV_MASK | \ | ||
| 43 | P6_EVNTSEL_REG_MASK) | ||
| 44 | |||
| 45 | return hw_event & P6_EVNTSEL_MASK; | ||
| 46 | } | ||
| 47 | |||
| 48 | static struct event_constraint p6_event_constraints[] = | 30 | static struct event_constraint p6_event_constraints[] = |
| 49 | { | 31 | { |
| 50 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | 32 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ |
| @@ -66,7 +48,7 @@ static void p6_pmu_disable_all(void) | |||
| 66 | wrmsrl(MSR_P6_EVNTSEL0, val); | 48 | wrmsrl(MSR_P6_EVNTSEL0, val); |
| 67 | } | 49 | } |
| 68 | 50 | ||
| 69 | static void p6_pmu_enable_all(void) | 51 | static void p6_pmu_enable_all(int added) |
| 70 | { | 52 | { |
| 71 | unsigned long val; | 53 | unsigned long val; |
| 72 | 54 | ||
| @@ -102,22 +84,23 @@ static void p6_pmu_enable_event(struct perf_event *event) | |||
| 102 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | 84 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); |
| 103 | } | 85 | } |
| 104 | 86 | ||
| 105 | static __initconst struct x86_pmu p6_pmu = { | 87 | static __initconst const struct x86_pmu p6_pmu = { |
| 106 | .name = "p6", | 88 | .name = "p6", |
| 107 | .handle_irq = x86_pmu_handle_irq, | 89 | .handle_irq = x86_pmu_handle_irq, |
| 108 | .disable_all = p6_pmu_disable_all, | 90 | .disable_all = p6_pmu_disable_all, |
| 109 | .enable_all = p6_pmu_enable_all, | 91 | .enable_all = p6_pmu_enable_all, |
| 110 | .enable = p6_pmu_enable_event, | 92 | .enable = p6_pmu_enable_event, |
| 111 | .disable = p6_pmu_disable_event, | 93 | .disable = p6_pmu_disable_event, |
| 94 | .hw_config = x86_pmu_hw_config, | ||
| 95 | .schedule_events = x86_schedule_events, | ||
| 112 | .eventsel = MSR_P6_EVNTSEL0, | 96 | .eventsel = MSR_P6_EVNTSEL0, |
| 113 | .perfctr = MSR_P6_PERFCTR0, | 97 | .perfctr = MSR_P6_PERFCTR0, |
| 114 | .event_map = p6_pmu_event_map, | 98 | .event_map = p6_pmu_event_map, |
| 115 | .raw_event = p6_pmu_raw_event, | ||
| 116 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | 99 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), |
| 117 | .apic = 1, | 100 | .apic = 1, |
| 118 | .max_period = (1ULL << 31) - 1, | 101 | .max_period = (1ULL << 31) - 1, |
| 119 | .version = 0, | 102 | .version = 0, |
| 120 | .num_events = 2, | 103 | .num_counters = 2, |
| 121 | /* | 104 | /* |
| 122 | * Events have 40 bits implemented. However they are designed such | 105 | * Events have 40 bits implemented. However they are designed such |
| 123 | * that bits [32-39] are sign extensions of bit 31. As such the | 106 | * that bits [32-39] are sign extensions of bit 31. As such the |
| @@ -125,8 +108,8 @@ static __initconst struct x86_pmu p6_pmu = { | |||
| 125 | * | 108 | * |
| 126 | * See IA-32 Intel Architecture Software developer manual Vol 3B | 109 | * See IA-32 Intel Architecture Software developer manual Vol 3B |
| 127 | */ | 110 | */ |
| 128 | .event_bits = 32, | 111 | .cntval_bits = 32, |
| 129 | .event_mask = (1ULL << 32) - 1, | 112 | .cntval_mask = (1ULL << 32) - 1, |
| 130 | .get_event_constraints = x86_get_event_constraints, | 113 | .get_event_constraints = x86_get_event_constraints, |
| 131 | .event_constraints = p6_event_constraints, | 114 | .event_constraints = p6_event_constraints, |
| 132 | }; | 115 | }; |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index dfdb4dba2320..b9d1ff588445 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
| @@ -24,8 +24,8 @@ | |||
| 24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
| 25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
| 26 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
| 27 | #include <asm/vmware.h> | ||
| 28 | #include <asm/x86_init.h> | 27 | #include <asm/x86_init.h> |
| 28 | #include <asm/hypervisor.h> | ||
| 29 | 29 | ||
| 30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | 30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 |
| 31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | 31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 |
| @@ -65,7 +65,7 @@ static unsigned long vmware_get_tsc_khz(void) | |||
| 65 | return tsc_hz; | 65 | return tsc_hz; |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | void __init vmware_platform_setup(void) | 68 | static void __init vmware_platform_setup(void) |
| 69 | { | 69 | { |
| 70 | uint32_t eax, ebx, ecx, edx; | 70 | uint32_t eax, ebx, ecx, edx; |
| 71 | 71 | ||
| @@ -83,26 +83,22 @@ void __init vmware_platform_setup(void) | |||
| 83 | * serial key should be enough, as this will always have a VMware | 83 | * serial key should be enough, as this will always have a VMware |
| 84 | * specific string when running under VMware hypervisor. | 84 | * specific string when running under VMware hypervisor. |
| 85 | */ | 85 | */ |
| 86 | int vmware_platform(void) | 86 | static bool __init vmware_platform(void) |
| 87 | { | 87 | { |
| 88 | if (cpu_has_hypervisor) { | 88 | if (cpu_has_hypervisor) { |
| 89 | unsigned int eax, ebx, ecx, edx; | 89 | unsigned int eax; |
| 90 | char hyper_vendor_id[13]; | 90 | unsigned int hyper_vendor_id[3]; |
| 91 | 91 | ||
| 92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | 92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], |
| 93 | memcpy(hyper_vendor_id + 0, &ebx, 4); | 93 | &hyper_vendor_id[1], &hyper_vendor_id[2]); |
| 94 | memcpy(hyper_vendor_id + 4, &ecx, 4); | 94 | if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) |
| 95 | memcpy(hyper_vendor_id + 8, &edx, 4); | 95 | return true; |
| 96 | hyper_vendor_id[12] = '\0'; | ||
| 97 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
| 98 | return 1; | ||
| 99 | } else if (dmi_available && dmi_name_in_serial("VMware") && | 96 | } else if (dmi_available && dmi_name_in_serial("VMware") && |
| 100 | __vmware_platform()) | 97 | __vmware_platform()) |
| 101 | return 1; | 98 | return true; |
| 102 | 99 | ||
| 103 | return 0; | 100 | return false; |
| 104 | } | 101 | } |
| 105 | EXPORT_SYMBOL(vmware_platform); | ||
| 106 | 102 | ||
| 107 | /* | 103 | /* |
| 108 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | 104 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
| @@ -116,8 +112,16 @@ EXPORT_SYMBOL(vmware_platform); | |||
| 116 | * so that the kernel could just trust the hypervisor with providing a | 112 | * so that the kernel could just trust the hypervisor with providing a |
| 117 | * reliable virtual TSC that is suitable for timekeeping. | 113 | * reliable virtual TSC that is suitable for timekeeping. |
| 118 | */ | 114 | */ |
| 119 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | 115 | static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) |
| 120 | { | 116 | { |
| 121 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
| 122 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | 118 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); |
| 123 | } | 119 | } |
| 120 | |||
| 121 | const __refconst struct hypervisor_x86 x86_hyper_vmware = { | ||
| 122 | .name = "VMware", | ||
| 123 | .detect = vmware_platform, | ||
| 124 | .set_cpu_features = vmware_set_cpu_features, | ||
| 125 | .init_platform = vmware_platform_setup, | ||
| 126 | }; | ||
| 127 | EXPORT_SYMBOL(x86_hyper_vmware); | ||
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 8b862d5900fe..1b7b31ab7d86 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
| @@ -170,7 +170,7 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, | |||
| 170 | cpuid_device_destroy(cpu); | 170 | cpuid_device_destroy(cpu); |
| 171 | break; | 171 | break; |
| 172 | } | 172 | } |
| 173 | return err ? NOTIFY_BAD : NOTIFY_OK; | 173 | return notifier_from_errno(err); |
| 174 | } | 174 | } |
| 175 | 175 | ||
| 176 | static struct notifier_block __refdata cpuid_class_cpu_notifier = | 176 | static struct notifier_block __refdata cpuid_class_cpu_notifier = |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c deleted file mode 100644 index 1c47390dd0e5..000000000000 --- a/arch/x86/kernel/ds.c +++ /dev/null | |||
| @@ -1,1437 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Debug Store support | ||
| 3 | * | ||
| 4 | * This provides a low-level interface to the hardware's Debug Store | ||
| 5 | * feature that is used for branch trace store (BTS) and | ||
| 6 | * precise-event based sampling (PEBS). | ||
| 7 | * | ||
| 8 | * It manages: | ||
| 9 | * - DS and BTS hardware configuration | ||
| 10 | * - buffer overflow handling (to be done) | ||
| 11 | * - buffer access | ||
| 12 | * | ||
| 13 | * It does not do: | ||
| 14 | * - security checking (is the caller allowed to trace the task) | ||
| 15 | * - buffer allocation (memory accounting) | ||
| 16 | * | ||
| 17 | * | ||
| 18 | * Copyright (C) 2007-2009 Intel Corporation. | ||
| 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <linux/kernel.h> | ||
| 23 | #include <linux/string.h> | ||
| 24 | #include <linux/errno.h> | ||
| 25 | #include <linux/sched.h> | ||
| 26 | #include <linux/slab.h> | ||
| 27 | #include <linux/mm.h> | ||
| 28 | #include <linux/trace_clock.h> | ||
| 29 | |||
| 30 | #include <asm/ds.h> | ||
| 31 | |||
| 32 | #include "ds_selftest.h" | ||
| 33 | |||
| 34 | /* | ||
| 35 | * The configuration for a particular DS hardware implementation: | ||
| 36 | */ | ||
| 37 | struct ds_configuration { | ||
| 38 | /* The name of the configuration: */ | ||
| 39 | const char *name; | ||
| 40 | |||
| 41 | /* The size of pointer-typed fields in DS, BTS, and PEBS: */ | ||
| 42 | unsigned char sizeof_ptr_field; | ||
| 43 | |||
| 44 | /* The size of a BTS/PEBS record in bytes: */ | ||
| 45 | unsigned char sizeof_rec[2]; | ||
| 46 | |||
| 47 | /* The number of pebs counter reset values in the DS structure. */ | ||
| 48 | unsigned char nr_counter_reset; | ||
| 49 | |||
| 50 | /* Control bit-masks indexed by enum ds_feature: */ | ||
| 51 | unsigned long ctl[dsf_ctl_max]; | ||
| 52 | }; | ||
| 53 | static struct ds_configuration ds_cfg __read_mostly; | ||
| 54 | |||
| 55 | |||
| 56 | /* Maximal size of a DS configuration: */ | ||
| 57 | #define MAX_SIZEOF_DS 0x80 | ||
| 58 | |||
| 59 | /* Maximal size of a BTS record: */ | ||
| 60 | #define MAX_SIZEOF_BTS (3 * 8) | ||
| 61 | |||
| 62 | /* BTS and PEBS buffer alignment: */ | ||
| 63 | #define DS_ALIGNMENT (1 << 3) | ||
| 64 | |||
| 65 | /* Number of buffer pointers in DS: */ | ||
| 66 | #define NUM_DS_PTR_FIELDS 8 | ||
| 67 | |||
| 68 | /* Size of a pebs reset value in DS: */ | ||
| 69 | #define PEBS_RESET_FIELD_SIZE 8 | ||
| 70 | |||
| 71 | /* Mask of control bits in the DS MSR register: */ | ||
| 72 | #define BTS_CONTROL \ | ||
| 73 | ( ds_cfg.ctl[dsf_bts] | \ | ||
| 74 | ds_cfg.ctl[dsf_bts_kernel] | \ | ||
| 75 | ds_cfg.ctl[dsf_bts_user] | \ | ||
| 76 | ds_cfg.ctl[dsf_bts_overflow] ) | ||
| 77 | |||
| 78 | /* | ||
| 79 | * A BTS or PEBS tracer. | ||
| 80 | * | ||
| 81 | * This holds the configuration of the tracer and serves as a handle | ||
| 82 | * to identify tracers. | ||
| 83 | */ | ||
| 84 | struct ds_tracer { | ||
| 85 | /* The DS context (partially) owned by this tracer. */ | ||
| 86 | struct ds_context *context; | ||
| 87 | /* The buffer provided on ds_request() and its size in bytes. */ | ||
| 88 | void *buffer; | ||
| 89 | size_t size; | ||
| 90 | }; | ||
| 91 | |||
| 92 | struct bts_tracer { | ||
| 93 | /* The common DS part: */ | ||
| 94 | struct ds_tracer ds; | ||
| 95 | |||
| 96 | /* The trace including the DS configuration: */ | ||
| 97 | struct bts_trace trace; | ||
| 98 | |||
| 99 | /* Buffer overflow notification function: */ | ||
| 100 | bts_ovfl_callback_t ovfl; | ||
| 101 | |||
| 102 | /* Active flags affecting trace collection. */ | ||
| 103 | unsigned int flags; | ||
| 104 | }; | ||
| 105 | |||
| 106 | struct pebs_tracer { | ||
| 107 | /* The common DS part: */ | ||
| 108 | struct ds_tracer ds; | ||
| 109 | |||
| 110 | /* The trace including the DS configuration: */ | ||
| 111 | struct pebs_trace trace; | ||
| 112 | |||
| 113 | /* Buffer overflow notification function: */ | ||
| 114 | pebs_ovfl_callback_t ovfl; | ||
| 115 | }; | ||
| 116 | |||
| 117 | /* | ||
| 118 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | ||
| 119 | * Architectures Software Developer's Manual, section 18.5) | ||
| 120 | * | ||
| 121 | * The DS configuration consists of the following fields; different | ||
| 122 | * architetures vary in the size of those fields. | ||
| 123 | * | ||
| 124 | * - double-word aligned base linear address of the BTS buffer | ||
| 125 | * - write pointer into the BTS buffer | ||
| 126 | * - end linear address of the BTS buffer (one byte beyond the end of | ||
| 127 | * the buffer) | ||
| 128 | * - interrupt pointer into BTS buffer | ||
| 129 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
| 130 | * - double-word aligned base linear address of the PEBS buffer | ||
| 131 | * - write pointer into the PEBS buffer | ||
| 132 | * - end linear address of the PEBS buffer (one byte beyond the end of | ||
| 133 | * the buffer) | ||
| 134 | * - interrupt pointer into PEBS buffer | ||
| 135 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
| 136 | * - value to which counter is reset following counter overflow | ||
| 137 | * | ||
| 138 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 139 | * architectures use 32bit pointers in 32bit mode. | ||
| 140 | * | ||
| 141 | * | ||
| 142 | * We compute the base address for the first 8 fields based on: | ||
| 143 | * - the field size stored in the DS configuration | ||
| 144 | * - the relative field position | ||
| 145 | * - an offset giving the start of the respective region | ||
| 146 | * | ||
| 147 | * This offset is further used to index various arrays holding | ||
| 148 | * information for BTS and PEBS at the respective index. | ||
| 149 | * | ||
| 150 | * On later 32bit processors, we only access the lower 32bit of the | ||
| 151 | * 64bit pointer fields. The upper halves will be zeroed out. | ||
| 152 | */ | ||
| 153 | |||
| 154 | enum ds_field { | ||
| 155 | ds_buffer_base = 0, | ||
| 156 | ds_index, | ||
| 157 | ds_absolute_maximum, | ||
| 158 | ds_interrupt_threshold, | ||
| 159 | }; | ||
| 160 | |||
| 161 | enum ds_qualifier { | ||
| 162 | ds_bts = 0, | ||
| 163 | ds_pebs | ||
| 164 | }; | ||
| 165 | |||
| 166 | static inline unsigned long | ||
| 167 | ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) | ||
| 168 | { | ||
| 169 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); | ||
| 170 | return *(unsigned long *)base; | ||
| 171 | } | ||
| 172 | |||
| 173 | static inline void | ||
| 174 | ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, | ||
| 175 | unsigned long value) | ||
| 176 | { | ||
| 177 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); | ||
| 178 | (*(unsigned long *)base) = value; | ||
| 179 | } | ||
| 180 | |||
| 181 | |||
| 182 | /* | ||
| 183 | * Locking is done only for allocating BTS or PEBS resources. | ||
| 184 | */ | ||
| 185 | static DEFINE_SPINLOCK(ds_lock); | ||
| 186 | |||
| 187 | /* | ||
| 188 | * We either support (system-wide) per-cpu or per-thread allocation. | ||
| 189 | * We distinguish the two based on the task_struct pointer, where a | ||
| 190 | * NULL pointer indicates per-cpu allocation for the current cpu. | ||
| 191 | * | ||
| 192 | * Allocations are use-counted. As soon as resources are allocated, | ||
| 193 | * further allocations must be of the same type (per-cpu or | ||
| 194 | * per-thread). We model this by counting allocations (i.e. the number | ||
| 195 | * of tracers of a certain type) for one type negatively: | ||
| 196 | * =0 no tracers | ||
| 197 | * >0 number of per-thread tracers | ||
| 198 | * <0 number of per-cpu tracers | ||
| 199 | * | ||
| 200 | * Tracers essentially gives the number of ds contexts for a certain | ||
| 201 | * type of allocation. | ||
| 202 | */ | ||
| 203 | static atomic_t tracers = ATOMIC_INIT(0); | ||
| 204 | |||
| 205 | static inline int get_tracer(struct task_struct *task) | ||
| 206 | { | ||
| 207 | int error; | ||
| 208 | |||
| 209 | spin_lock_irq(&ds_lock); | ||
| 210 | |||
| 211 | if (task) { | ||
| 212 | error = -EPERM; | ||
| 213 | if (atomic_read(&tracers) < 0) | ||
| 214 | goto out; | ||
| 215 | atomic_inc(&tracers); | ||
| 216 | } else { | ||
| 217 | error = -EPERM; | ||
| 218 | if (atomic_read(&tracers) > 0) | ||
| 219 | goto out; | ||
| 220 | atomic_dec(&tracers); | ||
| 221 | } | ||
| 222 | |||
| 223 | error = 0; | ||
| 224 | out: | ||
| 225 | spin_unlock_irq(&ds_lock); | ||
| 226 | return error; | ||
| 227 | } | ||
| 228 | |||
| 229 | static inline void put_tracer(struct task_struct *task) | ||
| 230 | { | ||
| 231 | if (task) | ||
| 232 | atomic_dec(&tracers); | ||
| 233 | else | ||
| 234 | atomic_inc(&tracers); | ||
| 235 | } | ||
| 236 | |||
| 237 | /* | ||
| 238 | * The DS context is either attached to a thread or to a cpu: | ||
| 239 | * - in the former case, the thread_struct contains a pointer to the | ||
| 240 | * attached context. | ||
| 241 | * - in the latter case, we use a static array of per-cpu context | ||
| 242 | * pointers. | ||
| 243 | * | ||
| 244 | * Contexts are use-counted. They are allocated on first access and | ||
| 245 | * deallocated when the last user puts the context. | ||
| 246 | */ | ||
| 247 | struct ds_context { | ||
| 248 | /* The DS configuration; goes into MSR_IA32_DS_AREA: */ | ||
| 249 | unsigned char ds[MAX_SIZEOF_DS]; | ||
| 250 | |||
| 251 | /* The owner of the BTS and PEBS configuration, respectively: */ | ||
| 252 | struct bts_tracer *bts_master; | ||
| 253 | struct pebs_tracer *pebs_master; | ||
| 254 | |||
| 255 | /* Use count: */ | ||
| 256 | unsigned long count; | ||
| 257 | |||
| 258 | /* Pointer to the context pointer field: */ | ||
| 259 | struct ds_context **this; | ||
| 260 | |||
| 261 | /* The traced task; NULL for cpu tracing: */ | ||
| 262 | struct task_struct *task; | ||
| 263 | |||
| 264 | /* The traced cpu; only valid if task is NULL: */ | ||
| 265 | int cpu; | ||
| 266 | }; | ||
| 267 | |||
| 268 | static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context); | ||
| 269 | |||
| 270 | |||
| 271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) | ||
| 272 | { | ||
| 273 | struct ds_context **p_context = | ||
| 274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu)); | ||
| 275 | struct ds_context *context = NULL; | ||
| 276 | struct ds_context *new_context = NULL; | ||
| 277 | |||
| 278 | /* Chances are small that we already have a context. */ | ||
| 279 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); | ||
| 280 | if (!new_context) | ||
| 281 | return NULL; | ||
| 282 | |||
| 283 | spin_lock_irq(&ds_lock); | ||
| 284 | |||
| 285 | context = *p_context; | ||
| 286 | if (likely(!context)) { | ||
| 287 | context = new_context; | ||
| 288 | |||
| 289 | context->this = p_context; | ||
| 290 | context->task = task; | ||
| 291 | context->cpu = cpu; | ||
| 292 | context->count = 0; | ||
| 293 | |||
| 294 | *p_context = context; | ||
| 295 | } | ||
| 296 | |||
| 297 | context->count++; | ||
| 298 | |||
| 299 | spin_unlock_irq(&ds_lock); | ||
| 300 | |||
| 301 | if (context != new_context) | ||
| 302 | kfree(new_context); | ||
| 303 | |||
| 304 | return context; | ||
| 305 | } | ||
| 306 | |||
| 307 | static void ds_put_context(struct ds_context *context) | ||
| 308 | { | ||
| 309 | struct task_struct *task; | ||
| 310 | unsigned long irq; | ||
| 311 | |||
| 312 | if (!context) | ||
| 313 | return; | ||
| 314 | |||
| 315 | spin_lock_irqsave(&ds_lock, irq); | ||
| 316 | |||
| 317 | if (--context->count) { | ||
| 318 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 319 | return; | ||
| 320 | } | ||
| 321 | |||
| 322 | *(context->this) = NULL; | ||
| 323 | |||
| 324 | task = context->task; | ||
| 325 | |||
| 326 | if (task) | ||
| 327 | clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
| 328 | |||
| 329 | /* | ||
| 330 | * We leave the (now dangling) pointer to the DS configuration in | ||
| 331 | * the DS_AREA msr. This is as good or as bad as replacing it with | ||
| 332 | * NULL - the hardware would crash if we enabled tracing. | ||
| 333 | * | ||
| 334 | * This saves us some problems with having to write an msr on a | ||
| 335 | * different cpu while preventing others from doing the same for the | ||
| 336 | * next context for that same cpu. | ||
| 337 | */ | ||
| 338 | |||
| 339 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 340 | |||
| 341 | /* The context might still be in use for context switching. */ | ||
| 342 | if (task && (task != current)) | ||
| 343 | wait_task_context_switch(task); | ||
| 344 | |||
| 345 | kfree(context); | ||
| 346 | } | ||
| 347 | |||
| 348 | static void ds_install_ds_area(struct ds_context *context) | ||
| 349 | { | ||
| 350 | unsigned long ds; | ||
| 351 | |||
| 352 | ds = (unsigned long)context->ds; | ||
| 353 | |||
| 354 | /* | ||
| 355 | * There is a race between the bts master and the pebs master. | ||
| 356 | * | ||
| 357 | * The thread/cpu access is synchronized via get/put_cpu() for | ||
| 358 | * task tracing and via wrmsr_on_cpu for cpu tracing. | ||
| 359 | * | ||
| 360 | * If bts and pebs are collected for the same task or same cpu, | ||
| 361 | * the same confiuration is written twice. | ||
| 362 | */ | ||
| 363 | if (context->task) { | ||
| 364 | get_cpu(); | ||
| 365 | if (context->task == current) | ||
| 366 | wrmsrl(MSR_IA32_DS_AREA, ds); | ||
| 367 | set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
| 368 | put_cpu(); | ||
| 369 | } else | ||
| 370 | wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, | ||
| 371 | (u32)((u64)ds), (u32)((u64)ds >> 32)); | ||
| 372 | } | ||
| 373 | |||
| 374 | /* | ||
| 375 | * Call the tracer's callback on a buffer overflow. | ||
| 376 | * | ||
| 377 | * context: the ds context | ||
| 378 | * qual: the buffer type | ||
| 379 | */ | ||
| 380 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) | ||
| 381 | { | ||
| 382 | switch (qual) { | ||
| 383 | case ds_bts: | ||
| 384 | if (context->bts_master && | ||
| 385 | context->bts_master->ovfl) | ||
| 386 | context->bts_master->ovfl(context->bts_master); | ||
| 387 | break; | ||
| 388 | case ds_pebs: | ||
| 389 | if (context->pebs_master && | ||
| 390 | context->pebs_master->ovfl) | ||
| 391 | context->pebs_master->ovfl(context->pebs_master); | ||
| 392 | break; | ||
| 393 | } | ||
| 394 | } | ||
| 395 | |||
| 396 | |||
| 397 | /* | ||
| 398 | * Write raw data into the BTS or PEBS buffer. | ||
| 399 | * | ||
| 400 | * The remainder of any partially written record is zeroed out. | ||
| 401 | * | ||
| 402 | * context: the DS context | ||
| 403 | * qual: the buffer type | ||
| 404 | * record: the data to write | ||
| 405 | * size: the size of the data | ||
| 406 | */ | ||
| 407 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | ||
| 408 | const void *record, size_t size) | ||
| 409 | { | ||
| 410 | int bytes_written = 0; | ||
| 411 | |||
| 412 | if (!record) | ||
| 413 | return -EINVAL; | ||
| 414 | |||
| 415 | while (size) { | ||
| 416 | unsigned long base, index, end, write_end, int_th; | ||
| 417 | unsigned long write_size, adj_write_size; | ||
| 418 | |||
| 419 | /* | ||
| 420 | * Write as much as possible without producing an | ||
| 421 | * overflow interrupt. | ||
| 422 | * | ||
| 423 | * Interrupt_threshold must either be | ||
| 424 | * - bigger than absolute_maximum or | ||
| 425 | * - point to a record between buffer_base and absolute_maximum | ||
| 426 | * | ||
| 427 | * Index points to a valid record. | ||
| 428 | */ | ||
| 429 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 430 | index = ds_get(context->ds, qual, ds_index); | ||
| 431 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 432 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
| 433 | |||
| 434 | write_end = min(end, int_th); | ||
| 435 | |||
| 436 | /* | ||
| 437 | * If we are already beyond the interrupt threshold, | ||
| 438 | * we fill the entire buffer. | ||
| 439 | */ | ||
| 440 | if (write_end <= index) | ||
| 441 | write_end = end; | ||
| 442 | |||
| 443 | if (write_end <= index) | ||
| 444 | break; | ||
| 445 | |||
| 446 | write_size = min((unsigned long) size, write_end - index); | ||
| 447 | memcpy((void *)index, record, write_size); | ||
| 448 | |||
| 449 | record = (const char *)record + write_size; | ||
| 450 | size -= write_size; | ||
| 451 | bytes_written += write_size; | ||
| 452 | |||
| 453 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
| 454 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
| 455 | |||
| 456 | /* Zero out trailing bytes. */ | ||
| 457 | memset((char *)index + write_size, 0, | ||
| 458 | adj_write_size - write_size); | ||
| 459 | index += adj_write_size; | ||
| 460 | |||
| 461 | if (index >= end) | ||
| 462 | index = base; | ||
| 463 | ds_set(context->ds, qual, ds_index, index); | ||
| 464 | |||
| 465 | if (index >= int_th) | ||
| 466 | ds_overflow(context, qual); | ||
| 467 | } | ||
| 468 | |||
| 469 | return bytes_written; | ||
| 470 | } | ||
| 471 | |||
| 472 | |||
| 473 | /* | ||
| 474 | * Branch Trace Store (BTS) uses the following format. Different | ||
| 475 | * architectures vary in the size of those fields. | ||
| 476 | * - source linear address | ||
| 477 | * - destination linear address | ||
| 478 | * - flags | ||
| 479 | * | ||
| 480 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 481 | * architectures use 32bit pointers in 32bit mode. | ||
| 482 | * | ||
| 483 | * We compute the base address for the fields based on: | ||
| 484 | * - the field size stored in the DS configuration | ||
| 485 | * - the relative field position | ||
| 486 | * | ||
| 487 | * In order to store additional information in the BTS buffer, we use | ||
| 488 | * a special source address to indicate that the record requires | ||
| 489 | * special interpretation. | ||
| 490 | * | ||
| 491 | * Netburst indicated via a bit in the flags field whether the branch | ||
| 492 | * was predicted; this is ignored. | ||
| 493 | * | ||
| 494 | * We use two levels of abstraction: | ||
| 495 | * - the raw data level defined here | ||
| 496 | * - an arch-independent level defined in ds.h | ||
| 497 | */ | ||
| 498 | |||
| 499 | enum bts_field { | ||
| 500 | bts_from, | ||
| 501 | bts_to, | ||
| 502 | bts_flags, | ||
| 503 | |||
| 504 | bts_qual = bts_from, | ||
| 505 | bts_clock = bts_to, | ||
| 506 | bts_pid = bts_flags, | ||
| 507 | |||
| 508 | bts_qual_mask = (bts_qual_max - 1), | ||
| 509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | ||
| 510 | }; | ||
| 511 | |||
| 512 | static inline unsigned long bts_get(const char *base, unsigned long field) | ||
| 513 | { | ||
| 514 | base += (ds_cfg.sizeof_ptr_field * field); | ||
| 515 | return *(unsigned long *)base; | ||
| 516 | } | ||
| 517 | |||
| 518 | static inline void bts_set(char *base, unsigned long field, unsigned long val) | ||
| 519 | { | ||
| 520 | base += (ds_cfg.sizeof_ptr_field * field); | ||
| 521 | (*(unsigned long *)base) = val; | ||
| 522 | } | ||
| 523 | |||
| 524 | |||
| 525 | /* | ||
| 526 | * The raw BTS data is architecture dependent. | ||
| 527 | * | ||
| 528 | * For higher-level users, we give an arch-independent view. | ||
| 529 | * - ds.h defines struct bts_struct | ||
| 530 | * - bts_read translates one raw bts record into a bts_struct | ||
| 531 | * - bts_write translates one bts_struct into the raw format and | ||
| 532 | * writes it into the top of the parameter tracer's buffer. | ||
| 533 | * | ||
| 534 | * return: bytes read/written on success; -Eerrno, otherwise | ||
| 535 | */ | ||
| 536 | static int | ||
| 537 | bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) | ||
| 538 | { | ||
| 539 | if (!tracer) | ||
| 540 | return -EINVAL; | ||
| 541 | |||
| 542 | if (at < tracer->trace.ds.begin) | ||
| 543 | return -EINVAL; | ||
| 544 | |||
| 545 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
| 546 | return -EINVAL; | ||
| 547 | |||
| 548 | memset(out, 0, sizeof(*out)); | ||
| 549 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | ||
| 550 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | ||
| 551 | out->variant.event.clock = bts_get(at, bts_clock); | ||
| 552 | out->variant.event.pid = bts_get(at, bts_pid); | ||
| 553 | } else { | ||
| 554 | out->qualifier = bts_branch; | ||
| 555 | out->variant.lbr.from = bts_get(at, bts_from); | ||
| 556 | out->variant.lbr.to = bts_get(at, bts_to); | ||
| 557 | |||
| 558 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
| 559 | out->qualifier = bts_invalid; | ||
| 560 | } | ||
| 561 | |||
| 562 | return ds_cfg.sizeof_rec[ds_bts]; | ||
| 563 | } | ||
| 564 | |||
| 565 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | ||
| 566 | { | ||
| 567 | unsigned char raw[MAX_SIZEOF_BTS]; | ||
| 568 | |||
| 569 | if (!tracer) | ||
| 570 | return -EINVAL; | ||
| 571 | |||
| 572 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) | ||
| 573 | return -EOVERFLOW; | ||
| 574 | |||
| 575 | switch (in->qualifier) { | ||
| 576 | case bts_invalid: | ||
| 577 | bts_set(raw, bts_from, 0); | ||
| 578 | bts_set(raw, bts_to, 0); | ||
| 579 | bts_set(raw, bts_flags, 0); | ||
| 580 | break; | ||
| 581 | case bts_branch: | ||
| 582 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
| 583 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
| 584 | bts_set(raw, bts_flags, 0); | ||
| 585 | break; | ||
| 586 | case bts_task_arrives: | ||
| 587 | case bts_task_departs: | ||
| 588 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
| 589 | bts_set(raw, bts_clock, in->variant.event.clock); | ||
| 590 | bts_set(raw, bts_pid, in->variant.event.pid); | ||
| 591 | break; | ||
| 592 | default: | ||
| 593 | return -EINVAL; | ||
| 594 | } | ||
| 595 | |||
| 596 | return ds_write(tracer->ds.context, ds_bts, raw, | ||
| 597 | ds_cfg.sizeof_rec[ds_bts]); | ||
| 598 | } | ||
| 599 | |||
| 600 | |||
| 601 | static void ds_write_config(struct ds_context *context, | ||
| 602 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
| 603 | { | ||
| 604 | unsigned char *ds = context->ds; | ||
| 605 | |||
| 606 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
| 607 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
| 608 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
| 609 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
| 610 | } | ||
| 611 | |||
| 612 | static void ds_read_config(struct ds_context *context, | ||
| 613 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
| 614 | { | ||
| 615 | unsigned char *ds = context->ds; | ||
| 616 | |||
| 617 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | ||
| 618 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
| 619 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
| 620 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
| 621 | } | ||
| 622 | |||
| 623 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | ||
| 624 | void *base, size_t size, size_t ith, | ||
| 625 | unsigned int flags) { | ||
| 626 | unsigned long buffer, adj; | ||
| 627 | |||
| 628 | /* | ||
| 629 | * Adjust the buffer address and size to meet alignment | ||
| 630 | * constraints: | ||
| 631 | * - buffer is double-word aligned | ||
| 632 | * - size is multiple of record size | ||
| 633 | * | ||
| 634 | * We checked the size at the very beginning; we have enough | ||
| 635 | * space to do the adjustment. | ||
| 636 | */ | ||
| 637 | buffer = (unsigned long)base; | ||
| 638 | |||
| 639 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; | ||
| 640 | buffer += adj; | ||
| 641 | size -= adj; | ||
| 642 | |||
| 643 | trace->n = size / ds_cfg.sizeof_rec[qual]; | ||
| 644 | trace->size = ds_cfg.sizeof_rec[qual]; | ||
| 645 | |||
| 646 | size = (trace->n * trace->size); | ||
| 647 | |||
| 648 | trace->begin = (void *)buffer; | ||
| 649 | trace->top = trace->begin; | ||
| 650 | trace->end = (void *)(buffer + size); | ||
| 651 | /* | ||
| 652 | * The value for 'no threshold' is -1, which will set the | ||
| 653 | * threshold outside of the buffer, just like we want it. | ||
| 654 | */ | ||
| 655 | ith *= ds_cfg.sizeof_rec[qual]; | ||
| 656 | trace->ith = (void *)(buffer + size - ith); | ||
| 657 | |||
| 658 | trace->flags = flags; | ||
| 659 | } | ||
| 660 | |||
| 661 | |||
| 662 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | ||
| 663 | enum ds_qualifier qual, struct task_struct *task, | ||
| 664 | int cpu, void *base, size_t size, size_t th) | ||
| 665 | { | ||
| 666 | struct ds_context *context; | ||
| 667 | int error; | ||
| 668 | size_t req_size; | ||
| 669 | |||
| 670 | error = -EOPNOTSUPP; | ||
| 671 | if (!ds_cfg.sizeof_rec[qual]) | ||
| 672 | goto out; | ||
| 673 | |||
| 674 | error = -EINVAL; | ||
| 675 | if (!base) | ||
| 676 | goto out; | ||
| 677 | |||
| 678 | req_size = ds_cfg.sizeof_rec[qual]; | ||
| 679 | /* We might need space for alignment adjustments. */ | ||
| 680 | if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) | ||
| 681 | req_size += DS_ALIGNMENT; | ||
| 682 | |||
| 683 | error = -EINVAL; | ||
| 684 | if (size < req_size) | ||
| 685 | goto out; | ||
| 686 | |||
| 687 | if (th != (size_t)-1) { | ||
| 688 | th *= ds_cfg.sizeof_rec[qual]; | ||
| 689 | |||
| 690 | error = -EINVAL; | ||
| 691 | if (size <= th) | ||
| 692 | goto out; | ||
| 693 | } | ||
| 694 | |||
| 695 | tracer->buffer = base; | ||
| 696 | tracer->size = size; | ||
| 697 | |||
| 698 | error = -ENOMEM; | ||
| 699 | context = ds_get_context(task, cpu); | ||
| 700 | if (!context) | ||
| 701 | goto out; | ||
| 702 | tracer->context = context; | ||
| 703 | |||
| 704 | /* | ||
| 705 | * Defer any tracer-specific initialization work for the context until | ||
| 706 | * context ownership has been clarified. | ||
| 707 | */ | ||
| 708 | |||
| 709 | error = 0; | ||
| 710 | out: | ||
| 711 | return error; | ||
| 712 | } | ||
| 713 | |||
| 714 | static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, | ||
| 715 | void *base, size_t size, | ||
| 716 | bts_ovfl_callback_t ovfl, size_t th, | ||
| 717 | unsigned int flags) | ||
| 718 | { | ||
| 719 | struct bts_tracer *tracer; | ||
| 720 | int error; | ||
| 721 | |||
| 722 | /* Buffer overflow notification is not yet implemented. */ | ||
| 723 | error = -EOPNOTSUPP; | ||
| 724 | if (ovfl) | ||
| 725 | goto out; | ||
| 726 | |||
| 727 | error = get_tracer(task); | ||
| 728 | if (error < 0) | ||
| 729 | goto out; | ||
| 730 | |||
| 731 | error = -ENOMEM; | ||
| 732 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | ||
| 733 | if (!tracer) | ||
| 734 | goto out_put_tracer; | ||
| 735 | tracer->ovfl = ovfl; | ||
| 736 | |||
| 737 | /* Do some more error checking and acquire a tracing context. */ | ||
| 738 | error = ds_request(&tracer->ds, &tracer->trace.ds, | ||
| 739 | ds_bts, task, cpu, base, size, th); | ||
| 740 | if (error < 0) | ||
| 741 | goto out_tracer; | ||
| 742 | |||
| 743 | /* Claim the bts part of the tracing context we acquired above. */ | ||
| 744 | spin_lock_irq(&ds_lock); | ||
| 745 | |||
| 746 | error = -EPERM; | ||
| 747 | if (tracer->ds.context->bts_master) | ||
| 748 | goto out_unlock; | ||
| 749 | tracer->ds.context->bts_master = tracer; | ||
| 750 | |||
| 751 | spin_unlock_irq(&ds_lock); | ||
| 752 | |||
| 753 | /* | ||
| 754 | * Now that we own the bts part of the context, let's complete the | ||
| 755 | * initialization for that part. | ||
| 756 | */ | ||
| 757 | ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); | ||
| 758 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
| 759 | ds_install_ds_area(tracer->ds.context); | ||
| 760 | |||
| 761 | tracer->trace.read = bts_read; | ||
| 762 | tracer->trace.write = bts_write; | ||
| 763 | |||
| 764 | /* Start tracing. */ | ||
| 765 | ds_resume_bts(tracer); | ||
| 766 | |||
| 767 | return tracer; | ||
| 768 | |||
| 769 | out_unlock: | ||
| 770 | spin_unlock_irq(&ds_lock); | ||
| 771 | ds_put_context(tracer->ds.context); | ||
| 772 | out_tracer: | ||
| 773 | kfree(tracer); | ||
| 774 | out_put_tracer: | ||
| 775 | put_tracer(task); | ||
| 776 | out: | ||
| 777 | return ERR_PTR(error); | ||
| 778 | } | ||
| 779 | |||
| 780 | struct bts_tracer *ds_request_bts_task(struct task_struct *task, | ||
| 781 | void *base, size_t size, | ||
| 782 | bts_ovfl_callback_t ovfl, | ||
| 783 | size_t th, unsigned int flags) | ||
| 784 | { | ||
| 785 | return ds_request_bts(task, 0, base, size, ovfl, th, flags); | ||
| 786 | } | ||
| 787 | |||
| 788 | struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
| 789 | bts_ovfl_callback_t ovfl, | ||
| 790 | size_t th, unsigned int flags) | ||
| 791 | { | ||
| 792 | return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); | ||
| 793 | } | ||
| 794 | |||
| 795 | static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, | ||
| 796 | void *base, size_t size, | ||
| 797 | pebs_ovfl_callback_t ovfl, size_t th, | ||
| 798 | unsigned int flags) | ||
| 799 | { | ||
| 800 | struct pebs_tracer *tracer; | ||
| 801 | int error; | ||
| 802 | |||
| 803 | /* Buffer overflow notification is not yet implemented. */ | ||
| 804 | error = -EOPNOTSUPP; | ||
| 805 | if (ovfl) | ||
| 806 | goto out; | ||
| 807 | |||
| 808 | error = get_tracer(task); | ||
| 809 | if (error < 0) | ||
| 810 | goto out; | ||
| 811 | |||
| 812 | error = -ENOMEM; | ||
| 813 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | ||
| 814 | if (!tracer) | ||
| 815 | goto out_put_tracer; | ||
| 816 | tracer->ovfl = ovfl; | ||
| 817 | |||
| 818 | /* Do some more error checking and acquire a tracing context. */ | ||
| 819 | error = ds_request(&tracer->ds, &tracer->trace.ds, | ||
| 820 | ds_pebs, task, cpu, base, size, th); | ||
| 821 | if (error < 0) | ||
| 822 | goto out_tracer; | ||
| 823 | |||
| 824 | /* Claim the pebs part of the tracing context we acquired above. */ | ||
| 825 | spin_lock_irq(&ds_lock); | ||
| 826 | |||
| 827 | error = -EPERM; | ||
| 828 | if (tracer->ds.context->pebs_master) | ||
| 829 | goto out_unlock; | ||
| 830 | tracer->ds.context->pebs_master = tracer; | ||
| 831 | |||
| 832 | spin_unlock_irq(&ds_lock); | ||
| 833 | |||
| 834 | /* | ||
| 835 | * Now that we own the pebs part of the context, let's complete the | ||
| 836 | * initialization for that part. | ||
| 837 | */ | ||
| 838 | ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); | ||
| 839 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
| 840 | ds_install_ds_area(tracer->ds.context); | ||
| 841 | |||
| 842 | /* Start tracing. */ | ||
| 843 | ds_resume_pebs(tracer); | ||
| 844 | |||
| 845 | return tracer; | ||
| 846 | |||
| 847 | out_unlock: | ||
| 848 | spin_unlock_irq(&ds_lock); | ||
| 849 | ds_put_context(tracer->ds.context); | ||
| 850 | out_tracer: | ||
| 851 | kfree(tracer); | ||
| 852 | out_put_tracer: | ||
| 853 | put_tracer(task); | ||
| 854 | out: | ||
| 855 | return ERR_PTR(error); | ||
| 856 | } | ||
| 857 | |||
| 858 | struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, | ||
| 859 | void *base, size_t size, | ||
| 860 | pebs_ovfl_callback_t ovfl, | ||
| 861 | size_t th, unsigned int flags) | ||
| 862 | { | ||
| 863 | return ds_request_pebs(task, 0, base, size, ovfl, th, flags); | ||
| 864 | } | ||
| 865 | |||
| 866 | struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, | ||
| 867 | pebs_ovfl_callback_t ovfl, | ||
| 868 | size_t th, unsigned int flags) | ||
| 869 | { | ||
| 870 | return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); | ||
| 871 | } | ||
| 872 | |||
| 873 | static void ds_free_bts(struct bts_tracer *tracer) | ||
| 874 | { | ||
| 875 | struct task_struct *task; | ||
| 876 | |||
| 877 | task = tracer->ds.context->task; | ||
| 878 | |||
| 879 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); | ||
| 880 | tracer->ds.context->bts_master = NULL; | ||
| 881 | |||
| 882 | /* Make sure tracing stopped and the tracer is not in use. */ | ||
| 883 | if (task && (task != current)) | ||
| 884 | wait_task_context_switch(task); | ||
| 885 | |||
| 886 | ds_put_context(tracer->ds.context); | ||
| 887 | put_tracer(task); | ||
| 888 | |||
| 889 | kfree(tracer); | ||
| 890 | } | ||
| 891 | |||
| 892 | void ds_release_bts(struct bts_tracer *tracer) | ||
| 893 | { | ||
| 894 | might_sleep(); | ||
| 895 | |||
| 896 | if (!tracer) | ||
| 897 | return; | ||
| 898 | |||
| 899 | ds_suspend_bts(tracer); | ||
| 900 | ds_free_bts(tracer); | ||
| 901 | } | ||
| 902 | |||
| 903 | int ds_release_bts_noirq(struct bts_tracer *tracer) | ||
| 904 | { | ||
| 905 | struct task_struct *task; | ||
| 906 | unsigned long irq; | ||
| 907 | int error; | ||
| 908 | |||
| 909 | if (!tracer) | ||
| 910 | return 0; | ||
| 911 | |||
| 912 | task = tracer->ds.context->task; | ||
| 913 | |||
| 914 | local_irq_save(irq); | ||
| 915 | |||
| 916 | error = -EPERM; | ||
| 917 | if (!task && | ||
| 918 | (tracer->ds.context->cpu != smp_processor_id())) | ||
| 919 | goto out; | ||
| 920 | |||
| 921 | error = -EPERM; | ||
| 922 | if (task && (task != current)) | ||
| 923 | goto out; | ||
| 924 | |||
| 925 | ds_suspend_bts_noirq(tracer); | ||
| 926 | ds_free_bts(tracer); | ||
| 927 | |||
| 928 | error = 0; | ||
| 929 | out: | ||
| 930 | local_irq_restore(irq); | ||
| 931 | return error; | ||
| 932 | } | ||
| 933 | |||
| 934 | static void update_task_debugctlmsr(struct task_struct *task, | ||
| 935 | unsigned long debugctlmsr) | ||
| 936 | { | ||
| 937 | task->thread.debugctlmsr = debugctlmsr; | ||
| 938 | |||
| 939 | get_cpu(); | ||
| 940 | if (task == current) | ||
| 941 | update_debugctlmsr(debugctlmsr); | ||
| 942 | put_cpu(); | ||
| 943 | } | ||
| 944 | |||
| 945 | void ds_suspend_bts(struct bts_tracer *tracer) | ||
| 946 | { | ||
| 947 | struct task_struct *task; | ||
| 948 | unsigned long debugctlmsr; | ||
| 949 | int cpu; | ||
| 950 | |||
| 951 | if (!tracer) | ||
| 952 | return; | ||
| 953 | |||
| 954 | tracer->flags = 0; | ||
| 955 | |||
| 956 | task = tracer->ds.context->task; | ||
| 957 | cpu = tracer->ds.context->cpu; | ||
| 958 | |||
| 959 | WARN_ON(!task && irqs_disabled()); | ||
| 960 | |||
| 961 | debugctlmsr = (task ? | ||
| 962 | task->thread.debugctlmsr : | ||
| 963 | get_debugctlmsr_on_cpu(cpu)); | ||
| 964 | debugctlmsr &= ~BTS_CONTROL; | ||
| 965 | |||
| 966 | if (task) | ||
| 967 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 968 | else | ||
| 969 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
| 970 | } | ||
| 971 | |||
| 972 | int ds_suspend_bts_noirq(struct bts_tracer *tracer) | ||
| 973 | { | ||
| 974 | struct task_struct *task; | ||
| 975 | unsigned long debugctlmsr, irq; | ||
| 976 | int cpu, error = 0; | ||
| 977 | |||
| 978 | if (!tracer) | ||
| 979 | return 0; | ||
| 980 | |||
| 981 | tracer->flags = 0; | ||
| 982 | |||
| 983 | task = tracer->ds.context->task; | ||
| 984 | cpu = tracer->ds.context->cpu; | ||
| 985 | |||
| 986 | local_irq_save(irq); | ||
| 987 | |||
| 988 | error = -EPERM; | ||
| 989 | if (!task && (cpu != smp_processor_id())) | ||
| 990 | goto out; | ||
| 991 | |||
| 992 | debugctlmsr = (task ? | ||
| 993 | task->thread.debugctlmsr : | ||
| 994 | get_debugctlmsr()); | ||
| 995 | debugctlmsr &= ~BTS_CONTROL; | ||
| 996 | |||
| 997 | if (task) | ||
| 998 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 999 | else | ||
| 1000 | update_debugctlmsr(debugctlmsr); | ||
| 1001 | |||
| 1002 | error = 0; | ||
| 1003 | out: | ||
| 1004 | local_irq_restore(irq); | ||
| 1005 | return error; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | static unsigned long ds_bts_control(struct bts_tracer *tracer) | ||
| 1009 | { | ||
| 1010 | unsigned long control; | ||
| 1011 | |||
| 1012 | control = ds_cfg.ctl[dsf_bts]; | ||
| 1013 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | ||
| 1014 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
| 1015 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
| 1016 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
| 1017 | |||
| 1018 | return control; | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | void ds_resume_bts(struct bts_tracer *tracer) | ||
| 1022 | { | ||
| 1023 | struct task_struct *task; | ||
| 1024 | unsigned long debugctlmsr; | ||
| 1025 | int cpu; | ||
| 1026 | |||
| 1027 | if (!tracer) | ||
| 1028 | return; | ||
| 1029 | |||
| 1030 | tracer->flags = tracer->trace.ds.flags; | ||
| 1031 | |||
| 1032 | task = tracer->ds.context->task; | ||
| 1033 | cpu = tracer->ds.context->cpu; | ||
| 1034 | |||
| 1035 | WARN_ON(!task && irqs_disabled()); | ||
| 1036 | |||
| 1037 | debugctlmsr = (task ? | ||
| 1038 | task->thread.debugctlmsr : | ||
| 1039 | get_debugctlmsr_on_cpu(cpu)); | ||
| 1040 | debugctlmsr |= ds_bts_control(tracer); | ||
| 1041 | |||
| 1042 | if (task) | ||
| 1043 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 1044 | else | ||
| 1045 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | int ds_resume_bts_noirq(struct bts_tracer *tracer) | ||
| 1049 | { | ||
| 1050 | struct task_struct *task; | ||
| 1051 | unsigned long debugctlmsr, irq; | ||
| 1052 | int cpu, error = 0; | ||
| 1053 | |||
| 1054 | if (!tracer) | ||
| 1055 | return 0; | ||
| 1056 | |||
| 1057 | tracer->flags = tracer->trace.ds.flags; | ||
| 1058 | |||
| 1059 | task = tracer->ds.context->task; | ||
| 1060 | cpu = tracer->ds.context->cpu; | ||
| 1061 | |||
| 1062 | local_irq_save(irq); | ||
| 1063 | |||
| 1064 | error = -EPERM; | ||
| 1065 | if (!task && (cpu != smp_processor_id())) | ||
| 1066 | goto out; | ||
| 1067 | |||
| 1068 | debugctlmsr = (task ? | ||
| 1069 | task->thread.debugctlmsr : | ||
| 1070 | get_debugctlmsr()); | ||
| 1071 | debugctlmsr |= ds_bts_control(tracer); | ||
| 1072 | |||
| 1073 | if (task) | ||
| 1074 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 1075 | else | ||
| 1076 | update_debugctlmsr(debugctlmsr); | ||
| 1077 | |||
| 1078 | error = 0; | ||
| 1079 | out: | ||
| 1080 | local_irq_restore(irq); | ||
| 1081 | return error; | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | static void ds_free_pebs(struct pebs_tracer *tracer) | ||
| 1085 | { | ||
| 1086 | struct task_struct *task; | ||
| 1087 | |||
| 1088 | task = tracer->ds.context->task; | ||
| 1089 | |||
| 1090 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
| 1091 | tracer->ds.context->pebs_master = NULL; | ||
| 1092 | |||
| 1093 | ds_put_context(tracer->ds.context); | ||
| 1094 | put_tracer(task); | ||
| 1095 | |||
| 1096 | kfree(tracer); | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | void ds_release_pebs(struct pebs_tracer *tracer) | ||
| 1100 | { | ||
| 1101 | might_sleep(); | ||
| 1102 | |||
| 1103 | if (!tracer) | ||
| 1104 | return; | ||
| 1105 | |||
| 1106 | ds_suspend_pebs(tracer); | ||
| 1107 | ds_free_pebs(tracer); | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | int ds_release_pebs_noirq(struct pebs_tracer *tracer) | ||
| 1111 | { | ||
| 1112 | struct task_struct *task; | ||
| 1113 | unsigned long irq; | ||
| 1114 | int error; | ||
| 1115 | |||
| 1116 | if (!tracer) | ||
| 1117 | return 0; | ||
| 1118 | |||
| 1119 | task = tracer->ds.context->task; | ||
| 1120 | |||
| 1121 | local_irq_save(irq); | ||
| 1122 | |||
| 1123 | error = -EPERM; | ||
| 1124 | if (!task && | ||
| 1125 | (tracer->ds.context->cpu != smp_processor_id())) | ||
| 1126 | goto out; | ||
| 1127 | |||
| 1128 | error = -EPERM; | ||
| 1129 | if (task && (task != current)) | ||
| 1130 | goto out; | ||
| 1131 | |||
| 1132 | ds_suspend_pebs_noirq(tracer); | ||
| 1133 | ds_free_pebs(tracer); | ||
| 1134 | |||
| 1135 | error = 0; | ||
| 1136 | out: | ||
| 1137 | local_irq_restore(irq); | ||
| 1138 | return error; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | void ds_suspend_pebs(struct pebs_tracer *tracer) | ||
| 1142 | { | ||
| 1143 | |||
| 1144 | } | ||
| 1145 | |||
| 1146 | int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) | ||
| 1147 | { | ||
| 1148 | return 0; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | void ds_resume_pebs(struct pebs_tracer *tracer) | ||
| 1152 | { | ||
| 1153 | |||
| 1154 | } | ||
| 1155 | |||
| 1156 | int ds_resume_pebs_noirq(struct pebs_tracer *tracer) | ||
| 1157 | { | ||
| 1158 | return 0; | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) | ||
| 1162 | { | ||
| 1163 | if (!tracer) | ||
| 1164 | return NULL; | ||
| 1165 | |||
| 1166 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
| 1167 | return &tracer->trace; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) | ||
| 1171 | { | ||
| 1172 | if (!tracer) | ||
| 1173 | return NULL; | ||
| 1174 | |||
| 1175 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
| 1176 | |||
| 1177 | tracer->trace.counters = ds_cfg.nr_counter_reset; | ||
| 1178 | memcpy(tracer->trace.counter_reset, | ||
| 1179 | tracer->ds.context->ds + | ||
| 1180 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), | ||
| 1181 | ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); | ||
| 1182 | |||
| 1183 | return &tracer->trace; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | int ds_reset_bts(struct bts_tracer *tracer) | ||
| 1187 | { | ||
| 1188 | if (!tracer) | ||
| 1189 | return -EINVAL; | ||
| 1190 | |||
| 1191 | tracer->trace.ds.top = tracer->trace.ds.begin; | ||
| 1192 | |||
| 1193 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, | ||
| 1194 | (unsigned long)tracer->trace.ds.top); | ||
| 1195 | |||
| 1196 | return 0; | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | int ds_reset_pebs(struct pebs_tracer *tracer) | ||
| 1200 | { | ||
| 1201 | if (!tracer) | ||
| 1202 | return -EINVAL; | ||
| 1203 | |||
| 1204 | tracer->trace.ds.top = tracer->trace.ds.begin; | ||
| 1205 | |||
| 1206 | ds_set(tracer->ds.context->ds, ds_pebs, ds_index, | ||
| 1207 | (unsigned long)tracer->trace.ds.top); | ||
| 1208 | |||
| 1209 | return 0; | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | int ds_set_pebs_reset(struct pebs_tracer *tracer, | ||
| 1213 | unsigned int counter, u64 value) | ||
| 1214 | { | ||
| 1215 | if (!tracer) | ||
| 1216 | return -EINVAL; | ||
| 1217 | |||
| 1218 | if (ds_cfg.nr_counter_reset < counter) | ||
| 1219 | return -EINVAL; | ||
| 1220 | |||
| 1221 | *(u64 *)(tracer->ds.context->ds + | ||
| 1222 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + | ||
| 1223 | (counter * PEBS_RESET_FIELD_SIZE)) = value; | ||
| 1224 | |||
| 1225 | return 0; | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | static const struct ds_configuration ds_cfg_netburst = { | ||
| 1229 | .name = "Netburst", | ||
| 1230 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), | ||
| 1231 | .ctl[dsf_bts_kernel] = (1 << 5), | ||
| 1232 | .ctl[dsf_bts_user] = (1 << 6), | ||
| 1233 | .nr_counter_reset = 1, | ||
| 1234 | }; | ||
| 1235 | static const struct ds_configuration ds_cfg_pentium_m = { | ||
| 1236 | .name = "Pentium M", | ||
| 1237 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
| 1238 | .nr_counter_reset = 1, | ||
| 1239 | }; | ||
| 1240 | static const struct ds_configuration ds_cfg_core2_atom = { | ||
| 1241 | .name = "Core 2/Atom", | ||
| 1242 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
| 1243 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
| 1244 | .ctl[dsf_bts_user] = (1 << 10), | ||
| 1245 | .nr_counter_reset = 1, | ||
| 1246 | }; | ||
| 1247 | static const struct ds_configuration ds_cfg_core_i7 = { | ||
| 1248 | .name = "Core i7", | ||
| 1249 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
| 1250 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
| 1251 | .ctl[dsf_bts_user] = (1 << 10), | ||
| 1252 | .nr_counter_reset = 4, | ||
| 1253 | }; | ||
| 1254 | |||
| 1255 | static void | ||
| 1256 | ds_configure(const struct ds_configuration *cfg, | ||
| 1257 | struct cpuinfo_x86 *cpu) | ||
| 1258 | { | ||
| 1259 | unsigned long nr_pebs_fields = 0; | ||
| 1260 | |||
| 1261 | printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); | ||
| 1262 | |||
| 1263 | #ifdef __i386__ | ||
| 1264 | nr_pebs_fields = 10; | ||
| 1265 | #else | ||
| 1266 | nr_pebs_fields = 18; | ||
| 1267 | #endif | ||
| 1268 | |||
| 1269 | /* | ||
| 1270 | * Starting with version 2, architectural performance | ||
| 1271 | * monitoring supports a format specifier. | ||
| 1272 | */ | ||
| 1273 | if ((cpuid_eax(0xa) & 0xff) > 1) { | ||
| 1274 | unsigned long perf_capabilities, format; | ||
| 1275 | |||
| 1276 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); | ||
| 1277 | |||
| 1278 | format = (perf_capabilities >> 8) & 0xf; | ||
| 1279 | |||
| 1280 | switch (format) { | ||
| 1281 | case 0: | ||
| 1282 | nr_pebs_fields = 18; | ||
| 1283 | break; | ||
| 1284 | case 1: | ||
| 1285 | nr_pebs_fields = 22; | ||
| 1286 | break; | ||
| 1287 | default: | ||
| 1288 | printk(KERN_INFO | ||
| 1289 | "[ds] unknown PEBS format: %lu\n", format); | ||
| 1290 | nr_pebs_fields = 0; | ||
| 1291 | break; | ||
| 1292 | } | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
| 1296 | ds_cfg = *cfg; | ||
| 1297 | |||
| 1298 | ds_cfg.sizeof_ptr_field = | ||
| 1299 | (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); | ||
| 1300 | |||
| 1301 | ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; | ||
| 1302 | ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; | ||
| 1303 | |||
| 1304 | if (!cpu_has(cpu, X86_FEATURE_BTS)) { | ||
| 1305 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
| 1306 | printk(KERN_INFO "[ds] bts not available\n"); | ||
| 1307 | } | ||
| 1308 | if (!cpu_has(cpu, X86_FEATURE_PEBS)) { | ||
| 1309 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
| 1310 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
| 1311 | } | ||
| 1312 | |||
| 1313 | printk(KERN_INFO "[ds] sizes: address: %u bit, ", | ||
| 1314 | 8 * ds_cfg.sizeof_ptr_field); | ||
| 1315 | printk("bts/pebs record: %u/%u bytes\n", | ||
| 1316 | ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); | ||
| 1317 | |||
| 1318 | WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | ||
| 1322 | { | ||
| 1323 | /* Only configure the first cpu. Others are identical. */ | ||
| 1324 | if (ds_cfg.name) | ||
| 1325 | return; | ||
| 1326 | |||
| 1327 | switch (c->x86) { | ||
| 1328 | case 0x6: | ||
| 1329 | switch (c->x86_model) { | ||
| 1330 | case 0x9: | ||
| 1331 | case 0xd: /* Pentium M */ | ||
| 1332 | ds_configure(&ds_cfg_pentium_m, c); | ||
| 1333 | break; | ||
| 1334 | case 0xf: | ||
| 1335 | case 0x17: /* Core2 */ | ||
| 1336 | case 0x1c: /* Atom */ | ||
| 1337 | ds_configure(&ds_cfg_core2_atom, c); | ||
| 1338 | break; | ||
| 1339 | case 0x1a: /* Core i7 */ | ||
| 1340 | ds_configure(&ds_cfg_core_i7, c); | ||
| 1341 | break; | ||
| 1342 | default: | ||
| 1343 | /* Sorry, don't know about them. */ | ||
| 1344 | break; | ||
| 1345 | } | ||
| 1346 | break; | ||
| 1347 | case 0xf: | ||
| 1348 | switch (c->x86_model) { | ||
| 1349 | case 0x0: | ||
| 1350 | case 0x1: | ||
| 1351 | case 0x2: /* Netburst */ | ||
| 1352 | ds_configure(&ds_cfg_netburst, c); | ||
| 1353 | break; | ||
| 1354 | default: | ||
| 1355 | /* Sorry, don't know about them. */ | ||
| 1356 | break; | ||
| 1357 | } | ||
| 1358 | break; | ||
| 1359 | default: | ||
| 1360 | /* Sorry, don't know about them. */ | ||
| 1361 | break; | ||
| 1362 | } | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | static inline void ds_take_timestamp(struct ds_context *context, | ||
| 1366 | enum bts_qualifier qualifier, | ||
| 1367 | struct task_struct *task) | ||
| 1368 | { | ||
| 1369 | struct bts_tracer *tracer = context->bts_master; | ||
| 1370 | struct bts_struct ts; | ||
| 1371 | |||
| 1372 | /* Prevent compilers from reading the tracer pointer twice. */ | ||
| 1373 | barrier(); | ||
| 1374 | |||
| 1375 | if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) | ||
| 1376 | return; | ||
| 1377 | |||
| 1378 | memset(&ts, 0, sizeof(ts)); | ||
| 1379 | ts.qualifier = qualifier; | ||
| 1380 | ts.variant.event.clock = trace_clock_global(); | ||
| 1381 | ts.variant.event.pid = task->pid; | ||
| 1382 | |||
| 1383 | bts_write(tracer, &ts); | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | /* | ||
| 1387 | * Change the DS configuration from tracing prev to tracing next. | ||
| 1388 | */ | ||
| 1389 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | ||
| 1390 | { | ||
| 1391 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | ||
| 1392 | struct ds_context *next_ctx = next->thread.ds_ctx; | ||
| 1393 | unsigned long debugctlmsr = next->thread.debugctlmsr; | ||
| 1394 | |||
| 1395 | /* Make sure all data is read before we start. */ | ||
| 1396 | barrier(); | ||
| 1397 | |||
| 1398 | if (prev_ctx) { | ||
| 1399 | update_debugctlmsr(0); | ||
| 1400 | |||
| 1401 | ds_take_timestamp(prev_ctx, bts_task_departs, prev); | ||
| 1402 | } | ||
| 1403 | |||
| 1404 | if (next_ctx) { | ||
| 1405 | ds_take_timestamp(next_ctx, bts_task_arrives, next); | ||
| 1406 | |||
| 1407 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | update_debugctlmsr(debugctlmsr); | ||
| 1411 | } | ||
| 1412 | |||
| 1413 | static __init int ds_selftest(void) | ||
| 1414 | { | ||
| 1415 | if (ds_cfg.sizeof_rec[ds_bts]) { | ||
| 1416 | int error; | ||
| 1417 | |||
| 1418 | error = ds_selftest_bts(); | ||
| 1419 | if (error) { | ||
| 1420 | WARN(1, "[ds] selftest failed. disabling bts.\n"); | ||
| 1421 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
| 1422 | } | ||
| 1423 | } | ||
| 1424 | |||
| 1425 | if (ds_cfg.sizeof_rec[ds_pebs]) { | ||
| 1426 | int error; | ||
| 1427 | |||
| 1428 | error = ds_selftest_pebs(); | ||
| 1429 | if (error) { | ||
| 1430 | WARN(1, "[ds] selftest failed. disabling pebs.\n"); | ||
| 1431 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
| 1432 | } | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | return 0; | ||
| 1436 | } | ||
| 1437 | device_initcall(ds_selftest); | ||
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c deleted file mode 100644 index 6bc7c199ab99..000000000000 --- a/arch/x86/kernel/ds_selftest.c +++ /dev/null | |||
| @@ -1,408 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Debug Store support - selftest | ||
| 3 | * | ||
| 4 | * | ||
| 5 | * Copyright (C) 2009 Intel Corporation. | ||
| 6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "ds_selftest.h" | ||
| 10 | |||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/string.h> | ||
| 13 | #include <linux/smp.h> | ||
| 14 | #include <linux/cpu.h> | ||
| 15 | |||
| 16 | #include <asm/ds.h> | ||
| 17 | |||
| 18 | |||
| 19 | #define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ | ||
| 20 | #define SMALL_BUFFER_SIZE 24 /* A single bts entry. */ | ||
| 21 | |||
| 22 | struct ds_selftest_bts_conf { | ||
| 23 | struct bts_tracer *tracer; | ||
| 24 | int error; | ||
| 25 | int (*suspend)(struct bts_tracer *); | ||
| 26 | int (*resume)(struct bts_tracer *); | ||
| 27 | }; | ||
| 28 | |||
| 29 | static int ds_selftest_bts_consistency(const struct bts_trace *trace) | ||
| 30 | { | ||
| 31 | int error = 0; | ||
| 32 | |||
| 33 | if (!trace) { | ||
| 34 | printk(KERN_CONT "failed to access trace..."); | ||
| 35 | /* Bail out. Other tests are pointless. */ | ||
| 36 | return -1; | ||
| 37 | } | ||
| 38 | |||
| 39 | if (!trace->read) { | ||
| 40 | printk(KERN_CONT "bts read not available..."); | ||
| 41 | error = -1; | ||
| 42 | } | ||
| 43 | |||
| 44 | /* Do some sanity checks on the trace configuration. */ | ||
| 45 | if (!trace->ds.n) { | ||
| 46 | printk(KERN_CONT "empty bts buffer..."); | ||
| 47 | error = -1; | ||
| 48 | } | ||
| 49 | if (!trace->ds.size) { | ||
| 50 | printk(KERN_CONT "bad bts trace setup..."); | ||
| 51 | error = -1; | ||
| 52 | } | ||
| 53 | if (trace->ds.end != | ||
| 54 | (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) { | ||
| 55 | printk(KERN_CONT "bad bts buffer setup..."); | ||
| 56 | error = -1; | ||
| 57 | } | ||
| 58 | /* | ||
| 59 | * We allow top in [begin; end], since its not clear when the | ||
| 60 | * overflow adjustment happens: after the increment or before the | ||
| 61 | * write. | ||
| 62 | */ | ||
| 63 | if ((trace->ds.top < trace->ds.begin) || | ||
| 64 | (trace->ds.end < trace->ds.top)) { | ||
| 65 | printk(KERN_CONT "bts top out of bounds..."); | ||
| 66 | error = -1; | ||
| 67 | } | ||
| 68 | |||
| 69 | return error; | ||
| 70 | } | ||
| 71 | |||
| 72 | static int ds_selftest_bts_read(struct bts_tracer *tracer, | ||
| 73 | const struct bts_trace *trace, | ||
| 74 | const void *from, const void *to) | ||
| 75 | { | ||
| 76 | const unsigned char *at; | ||
| 77 | |||
| 78 | /* | ||
| 79 | * Check a few things which do not belong to this test. | ||
| 80 | * They should be covered by other tests. | ||
| 81 | */ | ||
| 82 | if (!trace) | ||
| 83 | return -1; | ||
| 84 | |||
| 85 | if (!trace->read) | ||
| 86 | return -1; | ||
| 87 | |||
| 88 | if (to < from) | ||
| 89 | return -1; | ||
| 90 | |||
| 91 | if (from < trace->ds.begin) | ||
| 92 | return -1; | ||
| 93 | |||
| 94 | if (trace->ds.end < to) | ||
| 95 | return -1; | ||
| 96 | |||
| 97 | if (!trace->ds.size) | ||
| 98 | return -1; | ||
| 99 | |||
| 100 | /* Now to the test itself. */ | ||
| 101 | for (at = from; (void *)at < to; at += trace->ds.size) { | ||
| 102 | struct bts_struct bts; | ||
| 103 | unsigned long index; | ||
| 104 | int error; | ||
| 105 | |||
| 106 | if (((void *)at - trace->ds.begin) % trace->ds.size) { | ||
| 107 | printk(KERN_CONT | ||
| 108 | "read from non-integer index..."); | ||
| 109 | return -1; | ||
| 110 | } | ||
| 111 | index = ((void *)at - trace->ds.begin) / trace->ds.size; | ||
| 112 | |||
| 113 | memset(&bts, 0, sizeof(bts)); | ||
| 114 | error = trace->read(tracer, at, &bts); | ||
| 115 | if (error < 0) { | ||
| 116 | printk(KERN_CONT | ||
| 117 | "error reading bts trace at [%lu] (0x%p)...", | ||
| 118 | index, at); | ||
| 119 | return error; | ||
| 120 | } | ||
| 121 | |||
| 122 | switch (bts.qualifier) { | ||
| 123 | case BTS_BRANCH: | ||
| 124 | break; | ||
| 125 | default: | ||
| 126 | printk(KERN_CONT | ||
| 127 | "unexpected bts entry %llu at [%lu] (0x%p)...", | ||
| 128 | bts.qualifier, index, at); | ||
| 129 | return -1; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | return 0; | ||
| 134 | } | ||
| 135 | |||
| 136 | static void ds_selftest_bts_cpu(void *arg) | ||
| 137 | { | ||
| 138 | struct ds_selftest_bts_conf *conf = arg; | ||
| 139 | const struct bts_trace *trace; | ||
| 140 | void *top; | ||
| 141 | |||
| 142 | if (IS_ERR(conf->tracer)) { | ||
| 143 | conf->error = PTR_ERR(conf->tracer); | ||
| 144 | conf->tracer = NULL; | ||
| 145 | |||
| 146 | printk(KERN_CONT | ||
| 147 | "initialization failed (err: %d)...", conf->error); | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | |||
| 151 | /* We should meanwhile have enough trace. */ | ||
| 152 | conf->error = conf->suspend(conf->tracer); | ||
| 153 | if (conf->error < 0) | ||
| 154 | return; | ||
| 155 | |||
| 156 | /* Let's see if we can access the trace. */ | ||
| 157 | trace = ds_read_bts(conf->tracer); | ||
| 158 | |||
| 159 | conf->error = ds_selftest_bts_consistency(trace); | ||
| 160 | if (conf->error < 0) | ||
| 161 | return; | ||
| 162 | |||
| 163 | /* If everything went well, we should have a few trace entries. */ | ||
| 164 | if (trace->ds.top == trace->ds.begin) { | ||
| 165 | /* | ||
| 166 | * It is possible but highly unlikely that we got a | ||
| 167 | * buffer overflow and end up at exactly the same | ||
| 168 | * position we started from. | ||
| 169 | * Let's issue a warning, but continue. | ||
| 170 | */ | ||
| 171 | printk(KERN_CONT "no trace/overflow..."); | ||
| 172 | } | ||
| 173 | |||
| 174 | /* Let's try to read the trace we collected. */ | ||
| 175 | conf->error = | ||
| 176 | ds_selftest_bts_read(conf->tracer, trace, | ||
| 177 | trace->ds.begin, trace->ds.top); | ||
| 178 | if (conf->error < 0) | ||
| 179 | return; | ||
| 180 | |||
| 181 | /* | ||
| 182 | * Let's read the trace again. | ||
| 183 | * Since we suspended tracing, we should get the same result. | ||
| 184 | */ | ||
| 185 | top = trace->ds.top; | ||
| 186 | |||
| 187 | trace = ds_read_bts(conf->tracer); | ||
| 188 | conf->error = ds_selftest_bts_consistency(trace); | ||
| 189 | if (conf->error < 0) | ||
| 190 | return; | ||
| 191 | |||
| 192 | if (top != trace->ds.top) { | ||
| 193 | printk(KERN_CONT "suspend not working..."); | ||
| 194 | conf->error = -1; | ||
| 195 | return; | ||
| 196 | } | ||
| 197 | |||
| 198 | /* Let's collect some more trace - see if resume is working. */ | ||
| 199 | conf->error = conf->resume(conf->tracer); | ||
| 200 | if (conf->error < 0) | ||
| 201 | return; | ||
| 202 | |||
| 203 | conf->error = conf->suspend(conf->tracer); | ||
| 204 | if (conf->error < 0) | ||
| 205 | return; | ||
| 206 | |||
| 207 | trace = ds_read_bts(conf->tracer); | ||
| 208 | |||
| 209 | conf->error = ds_selftest_bts_consistency(trace); | ||
| 210 | if (conf->error < 0) | ||
| 211 | return; | ||
| 212 | |||
| 213 | if (trace->ds.top == top) { | ||
| 214 | /* | ||
| 215 | * It is possible but highly unlikely that we got a | ||
| 216 | * buffer overflow and end up at exactly the same | ||
| 217 | * position we started from. | ||
| 218 | * Let's issue a warning and check the full trace. | ||
| 219 | */ | ||
| 220 | printk(KERN_CONT | ||
| 221 | "no resume progress/overflow..."); | ||
| 222 | |||
| 223 | conf->error = | ||
| 224 | ds_selftest_bts_read(conf->tracer, trace, | ||
| 225 | trace->ds.begin, trace->ds.end); | ||
| 226 | } else if (trace->ds.top < top) { | ||
| 227 | /* | ||
| 228 | * We had a buffer overflow - the entire buffer should | ||
| 229 | * contain trace records. | ||
| 230 | */ | ||
| 231 | conf->error = | ||
| 232 | ds_selftest_bts_read(conf->tracer, trace, | ||
| 233 | trace->ds.begin, trace->ds.end); | ||
| 234 | } else { | ||
| 235 | /* | ||
| 236 | * It is quite likely that the buffer did not overflow. | ||
| 237 | * Let's just check the delta trace. | ||
| 238 | */ | ||
| 239 | conf->error = | ||
| 240 | ds_selftest_bts_read(conf->tracer, trace, top, | ||
| 241 | trace->ds.top); | ||
| 242 | } | ||
| 243 | if (conf->error < 0) | ||
| 244 | return; | ||
| 245 | |||
| 246 | conf->error = 0; | ||
| 247 | } | ||
| 248 | |||
| 249 | static int ds_suspend_bts_wrap(struct bts_tracer *tracer) | ||
| 250 | { | ||
| 251 | ds_suspend_bts(tracer); | ||
| 252 | return 0; | ||
| 253 | } | ||
| 254 | |||
| 255 | static int ds_resume_bts_wrap(struct bts_tracer *tracer) | ||
| 256 | { | ||
| 257 | ds_resume_bts(tracer); | ||
| 258 | return 0; | ||
| 259 | } | ||
| 260 | |||
| 261 | static void ds_release_bts_noirq_wrap(void *tracer) | ||
| 262 | { | ||
| 263 | (void)ds_release_bts_noirq(tracer); | ||
| 264 | } | ||
| 265 | |||
| 266 | static int ds_selftest_bts_bad_release_noirq(int cpu, | ||
| 267 | struct bts_tracer *tracer) | ||
| 268 | { | ||
| 269 | int error = -EPERM; | ||
| 270 | |||
| 271 | /* Try to release the tracer on the wrong cpu. */ | ||
| 272 | get_cpu(); | ||
| 273 | if (cpu != smp_processor_id()) { | ||
| 274 | error = ds_release_bts_noirq(tracer); | ||
| 275 | if (error != -EPERM) | ||
| 276 | printk(KERN_CONT "release on wrong cpu..."); | ||
| 277 | } | ||
| 278 | put_cpu(); | ||
| 279 | |||
| 280 | return error ? 0 : -1; | ||
| 281 | } | ||
| 282 | |||
| 283 | static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer) | ||
| 284 | { | ||
| 285 | struct bts_tracer *tracer; | ||
| 286 | int error; | ||
| 287 | |||
| 288 | /* Try to request cpu tracing while task tracing is active. */ | ||
| 289 | tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, | ||
| 290 | (size_t)-1, BTS_KERNEL); | ||
| 291 | error = PTR_ERR(tracer); | ||
| 292 | if (!IS_ERR(tracer)) { | ||
| 293 | ds_release_bts(tracer); | ||
| 294 | error = 0; | ||
| 295 | } | ||
| 296 | |||
| 297 | if (error != -EPERM) | ||
| 298 | printk(KERN_CONT "cpu/task tracing overlap..."); | ||
| 299 | |||
| 300 | return error ? 0 : -1; | ||
| 301 | } | ||
| 302 | |||
| 303 | static int ds_selftest_bts_bad_request_task(void *buffer) | ||
| 304 | { | ||
| 305 | struct bts_tracer *tracer; | ||
| 306 | int error; | ||
| 307 | |||
| 308 | /* Try to request cpu tracing while task tracing is active. */ | ||
| 309 | tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL, | ||
| 310 | (size_t)-1, BTS_KERNEL); | ||
| 311 | error = PTR_ERR(tracer); | ||
| 312 | if (!IS_ERR(tracer)) { | ||
| 313 | error = 0; | ||
| 314 | ds_release_bts(tracer); | ||
| 315 | } | ||
| 316 | |||
| 317 | if (error != -EPERM) | ||
| 318 | printk(KERN_CONT "task/cpu tracing overlap..."); | ||
| 319 | |||
| 320 | return error ? 0 : -1; | ||
| 321 | } | ||
| 322 | |||
| 323 | int ds_selftest_bts(void) | ||
| 324 | { | ||
| 325 | struct ds_selftest_bts_conf conf; | ||
| 326 | unsigned char buffer[BUFFER_SIZE], *small_buffer; | ||
| 327 | unsigned long irq; | ||
| 328 | int cpu; | ||
| 329 | |||
| 330 | printk(KERN_INFO "[ds] bts selftest..."); | ||
| 331 | conf.error = 0; | ||
| 332 | |||
| 333 | small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8; | ||
| 334 | |||
| 335 | get_online_cpus(); | ||
| 336 | for_each_online_cpu(cpu) { | ||
| 337 | conf.suspend = ds_suspend_bts_wrap; | ||
| 338 | conf.resume = ds_resume_bts_wrap; | ||
| 339 | conf.tracer = | ||
| 340 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
| 341 | NULL, (size_t)-1, BTS_KERNEL); | ||
| 342 | ds_selftest_bts_cpu(&conf); | ||
| 343 | if (conf.error >= 0) | ||
| 344 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
| 345 | ds_release_bts(conf.tracer); | ||
| 346 | if (conf.error < 0) | ||
| 347 | goto out; | ||
| 348 | |||
| 349 | conf.suspend = ds_suspend_bts_noirq; | ||
| 350 | conf.resume = ds_resume_bts_noirq; | ||
| 351 | conf.tracer = | ||
| 352 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
| 353 | NULL, (size_t)-1, BTS_KERNEL); | ||
| 354 | smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1); | ||
| 355 | if (conf.error >= 0) { | ||
| 356 | conf.error = | ||
| 357 | ds_selftest_bts_bad_release_noirq(cpu, | ||
| 358 | conf.tracer); | ||
| 359 | /* We must not release the tracer twice. */ | ||
| 360 | if (conf.error < 0) | ||
| 361 | conf.tracer = NULL; | ||
| 362 | } | ||
| 363 | if (conf.error >= 0) | ||
| 364 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
| 365 | smp_call_function_single(cpu, ds_release_bts_noirq_wrap, | ||
| 366 | conf.tracer, 1); | ||
| 367 | if (conf.error < 0) | ||
| 368 | goto out; | ||
| 369 | } | ||
| 370 | |||
| 371 | conf.suspend = ds_suspend_bts_wrap; | ||
| 372 | conf.resume = ds_resume_bts_wrap; | ||
| 373 | conf.tracer = | ||
| 374 | ds_request_bts_task(current, buffer, BUFFER_SIZE, | ||
| 375 | NULL, (size_t)-1, BTS_KERNEL); | ||
| 376 | ds_selftest_bts_cpu(&conf); | ||
| 377 | if (conf.error >= 0) | ||
| 378 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
| 379 | ds_release_bts(conf.tracer); | ||
| 380 | if (conf.error < 0) | ||
| 381 | goto out; | ||
| 382 | |||
| 383 | conf.suspend = ds_suspend_bts_noirq; | ||
| 384 | conf.resume = ds_resume_bts_noirq; | ||
| 385 | conf.tracer = | ||
| 386 | ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE, | ||
| 387 | NULL, (size_t)-1, BTS_KERNEL); | ||
| 388 | local_irq_save(irq); | ||
| 389 | ds_selftest_bts_cpu(&conf); | ||
| 390 | if (conf.error >= 0) | ||
| 391 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
| 392 | ds_release_bts_noirq(conf.tracer); | ||
| 393 | local_irq_restore(irq); | ||
| 394 | if (conf.error < 0) | ||
| 395 | goto out; | ||
| 396 | |||
| 397 | conf.error = 0; | ||
| 398 | out: | ||
| 399 | put_online_cpus(); | ||
| 400 | printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed")); | ||
| 401 | |||
| 402 | return conf.error; | ||
| 403 | } | ||
| 404 | |||
| 405 | int ds_selftest_pebs(void) | ||
| 406 | { | ||
| 407 | return 0; | ||
| 408 | } | ||
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h deleted file mode 100644 index 2ba8745c6663..000000000000 --- a/arch/x86/kernel/ds_selftest.h +++ /dev/null | |||
| @@ -1,15 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Debug Store support - selftest | ||
| 3 | * | ||
| 4 | * | ||
| 5 | * Copyright (C) 2009 Intel Corporation. | ||
| 6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
| 7 | */ | ||
| 8 | |||
| 9 | #ifdef CONFIG_X86_DS_SELFTEST | ||
| 10 | extern int ds_selftest_bts(void); | ||
| 11 | extern int ds_selftest_pebs(void); | ||
| 12 | #else | ||
| 13 | static inline int ds_selftest_bts(void) { return 0; } | ||
| 14 | static inline int ds_selftest_pebs(void) { return 0; } | ||
| 15 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6d817554780a..c89a386930b7 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
| @@ -224,11 +224,6 @@ unsigned __kprobes long oops_begin(void) | |||
| 224 | int cpu; | 224 | int cpu; |
| 225 | unsigned long flags; | 225 | unsigned long flags; |
| 226 | 226 | ||
| 227 | /* notify the hw-branch tracer so it may disable tracing and | ||
| 228 | add the last trace to the trace buffer - | ||
| 229 | the earlier this happens, the more useful the trace. */ | ||
| 230 | trace_hw_branch_oops(); | ||
| 231 | |||
| 232 | oops_enter(); | 227 | oops_enter(); |
| 233 | 228 | ||
| 234 | /* racy, but better than risking deadlock. */ | 229 | /* racy, but better than risking deadlock. */ |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index b9c830c12b4a..fa99bae75ace 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
| @@ -41,6 +41,14 @@ static void early_vga_write(struct console *con, const char *str, unsigned n) | |||
| 41 | writew(0x720, VGABASE + 2*(max_xpos*j + i)); | 41 | writew(0x720, VGABASE + 2*(max_xpos*j + i)); |
| 42 | current_ypos = max_ypos-1; | 42 | current_ypos = max_ypos-1; |
| 43 | } | 43 | } |
| 44 | #ifdef CONFIG_KGDB_KDB | ||
| 45 | if (c == '\b') { | ||
| 46 | if (current_xpos > 0) | ||
| 47 | current_xpos--; | ||
| 48 | } else if (c == '\r') { | ||
| 49 | current_xpos = 0; | ||
| 50 | } else | ||
| 51 | #endif | ||
| 44 | if (c == '\n') { | 52 | if (c == '\n') { |
| 45 | current_xpos = 0; | 53 | current_xpos = 0; |
| 46 | current_ypos++; | 54 | current_ypos++; |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 44a8e0dc6737..cd49141cf153 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
| @@ -53,6 +53,7 @@ | |||
| 53 | #include <asm/processor-flags.h> | 53 | #include <asm/processor-flags.h> |
| 54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
| 55 | #include <asm/irq_vectors.h> | 55 | #include <asm/irq_vectors.h> |
| 56 | #include <asm/cpufeature.h> | ||
| 56 | 57 | ||
| 57 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 58 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
| 58 | #include <linux/elf-em.h> | 59 | #include <linux/elf-em.h> |
| @@ -905,7 +906,25 @@ ENTRY(simd_coprocessor_error) | |||
| 905 | RING0_INT_FRAME | 906 | RING0_INT_FRAME |
| 906 | pushl $0 | 907 | pushl $0 |
| 907 | CFI_ADJUST_CFA_OFFSET 4 | 908 | CFI_ADJUST_CFA_OFFSET 4 |
| 909 | #ifdef CONFIG_X86_INVD_BUG | ||
| 910 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ | ||
| 911 | 661: pushl $do_general_protection | ||
| 912 | 662: | ||
| 913 | .section .altinstructions,"a" | ||
| 914 | .balign 4 | ||
| 915 | .long 661b | ||
| 916 | .long 663f | ||
| 917 | .byte X86_FEATURE_XMM | ||
| 918 | .byte 662b-661b | ||
| 919 | .byte 664f-663f | ||
| 920 | .previous | ||
| 921 | .section .altinstr_replacement,"ax" | ||
| 922 | 663: pushl $do_simd_coprocessor_error | ||
| 923 | 664: | ||
| 924 | .previous | ||
| 925 | #else | ||
| 908 | pushl $do_simd_coprocessor_error | 926 | pushl $do_simd_coprocessor_error |
| 927 | #endif | ||
| 909 | CFI_ADJUST_CFA_OFFSET 4 | 928 | CFI_ADJUST_CFA_OFFSET 4 |
| 910 | jmp error_code | 929 | jmp error_code |
| 911 | CFI_ENDPROC | 930 | CFI_ENDPROC |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 23b4ecdffa9b..a198b7c87a12 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | unsigned long hpet_address; | 36 | unsigned long hpet_address; |
| 37 | u8 hpet_blockid; /* OS timer block num */ | 37 | u8 hpet_blockid; /* OS timer block num */ |
| 38 | u8 hpet_msi_disable; | 38 | u8 hpet_msi_disable; |
| 39 | u8 hpet_readback_cmp; | ||
| 39 | 40 | ||
| 40 | #ifdef CONFIG_PCI_MSI | 41 | #ifdef CONFIG_PCI_MSI |
| 41 | static unsigned long hpet_num_timers; | 42 | static unsigned long hpet_num_timers; |
| @@ -395,19 +396,23 @@ static int hpet_next_event(unsigned long delta, | |||
| 395 | * at that point and we would wait for the next hpet interrupt | 396 | * at that point and we would wait for the next hpet interrupt |
| 396 | * forever. We found out that reading the CMP register back | 397 | * forever. We found out that reading the CMP register back |
| 397 | * forces the transfer so we can rely on the comparison with | 398 | * forces the transfer so we can rely on the comparison with |
| 398 | * the counter register below. If the read back from the | 399 | * the counter register below. |
| 399 | * compare register does not match the value we programmed | 400 | * |
| 400 | * then we might have a real hardware problem. We can not do | 401 | * That works fine on those ATI chipsets, but on newer Intel |
| 401 | * much about it here, but at least alert the user/admin with | 402 | * chipsets (ICH9...) this triggers due to an erratum: Reading |
| 402 | * a prominent warning. | 403 | * the comparator immediately following a write is returning |
| 403 | * An erratum on some chipsets (ICH9,..), results in comparator read | 404 | * the old value. |
| 404 | * immediately following a write returning old value. Workaround | 405 | * |
| 405 | * for this is to read this value second time, when first | 406 | * We restrict the read back to the affected ATI chipsets (set |
| 406 | * read returns old value. | 407 | * by quirks) and also run it with hpet=verbose for debugging |
| 408 | * purposes. | ||
| 407 | */ | 409 | */ |
| 408 | if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { | 410 | if (hpet_readback_cmp || hpet_verbose) { |
| 409 | WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, | 411 | u32 cmp = hpet_readl(HPET_Tn_CMP(timer)); |
| 410 | KERN_WARNING "hpet: compare register read back failed.\n"); | 412 | |
| 413 | if (cmp != cnt) | ||
| 414 | printk_once(KERN_WARNING | ||
| 415 | "hpet: compare register read back failed.\n"); | ||
| 411 | } | 416 | } |
| 412 | 417 | ||
| 413 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | 418 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d6cc065f519f..a8f1b803d2fd 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
| @@ -189,25 +189,16 @@ static int get_hbp_len(u8 hbp_len) | |||
| 189 | } | 189 | } |
| 190 | 190 | ||
| 191 | /* | 191 | /* |
| 192 | * Check for virtual address in user space. | ||
| 193 | */ | ||
| 194 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
| 195 | { | ||
| 196 | unsigned int len; | ||
| 197 | |||
| 198 | len = get_hbp_len(hbp_len); | ||
| 199 | |||
| 200 | return (va <= TASK_SIZE - len); | ||
| 201 | } | ||
| 202 | |||
| 203 | /* | ||
| 204 | * Check for virtual address in kernel space. | 192 | * Check for virtual address in kernel space. |
| 205 | */ | 193 | */ |
| 206 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | 194 | int arch_check_bp_in_kernelspace(struct perf_event *bp) |
| 207 | { | 195 | { |
| 208 | unsigned int len; | 196 | unsigned int len; |
| 197 | unsigned long va; | ||
| 198 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
| 209 | 199 | ||
| 210 | len = get_hbp_len(hbp_len); | 200 | va = info->address; |
| 201 | len = get_hbp_len(info->len); | ||
| 211 | 202 | ||
| 212 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | 203 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); |
| 213 | } | 204 | } |
| @@ -300,8 +291,7 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
| 300 | /* | 291 | /* |
| 301 | * Validate the arch-specific HW Breakpoint register settings | 292 | * Validate the arch-specific HW Breakpoint register settings |
| 302 | */ | 293 | */ |
| 303 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | 294 | int arch_validate_hwbkpt_settings(struct perf_event *bp) |
| 304 | struct task_struct *tsk) | ||
| 305 | { | 295 | { |
| 306 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | 296 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); |
| 307 | unsigned int align; | 297 | unsigned int align; |
| @@ -314,16 +304,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, | |||
| 314 | 304 | ||
| 315 | ret = -EINVAL; | 305 | ret = -EINVAL; |
| 316 | 306 | ||
| 317 | if (info->type == X86_BREAKPOINT_EXECUTE) | ||
| 318 | /* | ||
| 319 | * Ptrace-refactoring code | ||
| 320 | * For now, we'll allow instruction breakpoint only for user-space | ||
| 321 | * addresses | ||
| 322 | */ | ||
| 323 | if ((!arch_check_va_in_userspace(info->address, info->len)) && | ||
| 324 | info->len != X86_BREAKPOINT_EXECUTE) | ||
| 325 | return ret; | ||
| 326 | |||
| 327 | switch (info->len) { | 307 | switch (info->len) { |
| 328 | case X86_BREAKPOINT_LEN_1: | 308 | case X86_BREAKPOINT_LEN_1: |
| 329 | align = 0; | 309 | align = 0; |
| @@ -350,15 +330,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, | |||
| 350 | if (info->address & align) | 330 | if (info->address & align) |
| 351 | return -EINVAL; | 331 | return -EINVAL; |
| 352 | 332 | ||
| 353 | /* Check that the virtual address is in the proper range */ | ||
| 354 | if (tsk) { | ||
| 355 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
| 356 | return -EFAULT; | ||
| 357 | } else { | ||
| 358 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
| 359 | return -EFAULT; | ||
| 360 | } | ||
| 361 | |||
| 362 | return 0; | 333 | return 0; |
| 363 | } | 334 | } |
| 364 | 335 | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 54c31c285488..86cef6b32253 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
| @@ -102,65 +102,62 @@ void __cpuinit fpu_init(void) | |||
| 102 | 102 | ||
| 103 | mxcsr_feature_mask_init(); | 103 | mxcsr_feature_mask_init(); |
| 104 | /* clean state in init */ | 104 | /* clean state in init */ |
| 105 | if (cpu_has_xsave) | 105 | current_thread_info()->status = 0; |
| 106 | current_thread_info()->status = TS_XSAVE; | ||
| 107 | else | ||
| 108 | current_thread_info()->status = 0; | ||
| 109 | clear_used_math(); | 106 | clear_used_math(); |
| 110 | } | 107 | } |
| 111 | #endif /* CONFIG_X86_64 */ | 108 | #endif /* CONFIG_X86_64 */ |
| 112 | 109 | ||
| 113 | /* | 110 | static void fpu_finit(struct fpu *fpu) |
| 114 | * The _current_ task is using the FPU for the first time | ||
| 115 | * so initialize it and set the mxcsr to its default | ||
| 116 | * value at reset if we support XMM instructions and then | ||
| 117 | * remeber the current task has used the FPU. | ||
| 118 | */ | ||
| 119 | int init_fpu(struct task_struct *tsk) | ||
| 120 | { | 111 | { |
| 121 | if (tsk_used_math(tsk)) { | ||
| 122 | if (HAVE_HWFP && tsk == current) | ||
| 123 | unlazy_fpu(tsk); | ||
| 124 | return 0; | ||
| 125 | } | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Memory allocation at the first usage of the FPU and other state. | ||
| 129 | */ | ||
| 130 | if (!tsk->thread.xstate) { | ||
| 131 | tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, | ||
| 132 | GFP_KERNEL); | ||
| 133 | if (!tsk->thread.xstate) | ||
| 134 | return -ENOMEM; | ||
| 135 | } | ||
| 136 | |||
| 137 | #ifdef CONFIG_X86_32 | 112 | #ifdef CONFIG_X86_32 |
| 138 | if (!HAVE_HWFP) { | 113 | if (!HAVE_HWFP) { |
| 139 | memset(tsk->thread.xstate, 0, xstate_size); | 114 | finit_soft_fpu(&fpu->state->soft); |
| 140 | finit_task(tsk); | 115 | return; |
| 141 | set_stopped_child_used_math(tsk); | ||
| 142 | return 0; | ||
| 143 | } | 116 | } |
| 144 | #endif | 117 | #endif |
| 145 | 118 | ||
| 146 | if (cpu_has_fxsr) { | 119 | if (cpu_has_fxsr) { |
| 147 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 120 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
| 148 | 121 | ||
| 149 | memset(fx, 0, xstate_size); | 122 | memset(fx, 0, xstate_size); |
| 150 | fx->cwd = 0x37f; | 123 | fx->cwd = 0x37f; |
| 151 | if (cpu_has_xmm) | 124 | if (cpu_has_xmm) |
| 152 | fx->mxcsr = MXCSR_DEFAULT; | 125 | fx->mxcsr = MXCSR_DEFAULT; |
| 153 | } else { | 126 | } else { |
| 154 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | 127 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
| 155 | memset(fp, 0, xstate_size); | 128 | memset(fp, 0, xstate_size); |
| 156 | fp->cwd = 0xffff037fu; | 129 | fp->cwd = 0xffff037fu; |
| 157 | fp->swd = 0xffff0000u; | 130 | fp->swd = 0xffff0000u; |
| 158 | fp->twd = 0xffffffffu; | 131 | fp->twd = 0xffffffffu; |
| 159 | fp->fos = 0xffff0000u; | 132 | fp->fos = 0xffff0000u; |
| 160 | } | 133 | } |
| 134 | } | ||
| 135 | |||
| 136 | /* | ||
| 137 | * The _current_ task is using the FPU for the first time | ||
| 138 | * so initialize it and set the mxcsr to its default | ||
| 139 | * value at reset if we support XMM instructions and then | ||
| 140 | * remeber the current task has used the FPU. | ||
| 141 | */ | ||
| 142 | int init_fpu(struct task_struct *tsk) | ||
| 143 | { | ||
| 144 | int ret; | ||
| 145 | |||
| 146 | if (tsk_used_math(tsk)) { | ||
| 147 | if (HAVE_HWFP && tsk == current) | ||
| 148 | unlazy_fpu(tsk); | ||
| 149 | return 0; | ||
| 150 | } | ||
| 151 | |||
| 161 | /* | 152 | /* |
| 162 | * Only the device not available exception or ptrace can call init_fpu. | 153 | * Memory allocation at the first usage of the FPU and other state. |
| 163 | */ | 154 | */ |
| 155 | ret = fpu_alloc(&tsk->thread.fpu); | ||
| 156 | if (ret) | ||
| 157 | return ret; | ||
| 158 | |||
| 159 | fpu_finit(&tsk->thread.fpu); | ||
| 160 | |||
| 164 | set_stopped_child_used_math(tsk); | 161 | set_stopped_child_used_math(tsk); |
| 165 | return 0; | 162 | return 0; |
| 166 | } | 163 | } |
| @@ -194,7 +191,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
| 194 | return ret; | 191 | return ret; |
| 195 | 192 | ||
| 196 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 193 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
| 197 | &target->thread.xstate->fxsave, 0, -1); | 194 | &target->thread.fpu.state->fxsave, 0, -1); |
| 198 | } | 195 | } |
| 199 | 196 | ||
| 200 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | 197 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, |
| @@ -211,19 +208,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 211 | return ret; | 208 | return ret; |
| 212 | 209 | ||
| 213 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 210 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
| 214 | &target->thread.xstate->fxsave, 0, -1); | 211 | &target->thread.fpu.state->fxsave, 0, -1); |
| 215 | 212 | ||
| 216 | /* | 213 | /* |
| 217 | * mxcsr reserved bits must be masked to zero for security reasons. | 214 | * mxcsr reserved bits must be masked to zero for security reasons. |
| 218 | */ | 215 | */ |
| 219 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 216 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
| 220 | 217 | ||
| 221 | /* | 218 | /* |
| 222 | * update the header bits in the xsave header, indicating the | 219 | * update the header bits in the xsave header, indicating the |
| 223 | * presence of FP and SSE state. | 220 | * presence of FP and SSE state. |
| 224 | */ | 221 | */ |
| 225 | if (cpu_has_xsave) | 222 | if (cpu_has_xsave) |
| 226 | target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | 223 | target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; |
| 227 | 224 | ||
| 228 | return ret; | 225 | return ret; |
| 229 | } | 226 | } |
| @@ -246,14 +243,14 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | |||
| 246 | * memory layout in the thread struct, so that we can copy the entire | 243 | * memory layout in the thread struct, so that we can copy the entire |
| 247 | * xstateregs to the user using one user_regset_copyout(). | 244 | * xstateregs to the user using one user_regset_copyout(). |
| 248 | */ | 245 | */ |
| 249 | memcpy(&target->thread.xstate->fxsave.sw_reserved, | 246 | memcpy(&target->thread.fpu.state->fxsave.sw_reserved, |
| 250 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | 247 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); |
| 251 | 248 | ||
| 252 | /* | 249 | /* |
| 253 | * Copy the xstate memory layout. | 250 | * Copy the xstate memory layout. |
| 254 | */ | 251 | */ |
| 255 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 252 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
| 256 | &target->thread.xstate->xsave, 0, -1); | 253 | &target->thread.fpu.state->xsave, 0, -1); |
| 257 | return ret; | 254 | return ret; |
| 258 | } | 255 | } |
| 259 | 256 | ||
| @@ -272,14 +269,14 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 272 | return ret; | 269 | return ret; |
| 273 | 270 | ||
| 274 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 271 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
| 275 | &target->thread.xstate->xsave, 0, -1); | 272 | &target->thread.fpu.state->xsave, 0, -1); |
| 276 | 273 | ||
| 277 | /* | 274 | /* |
| 278 | * mxcsr reserved bits must be masked to zero for security reasons. | 275 | * mxcsr reserved bits must be masked to zero for security reasons. |
| 279 | */ | 276 | */ |
| 280 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 277 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
| 281 | 278 | ||
| 282 | xsave_hdr = &target->thread.xstate->xsave.xsave_hdr; | 279 | xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr; |
| 283 | 280 | ||
| 284 | xsave_hdr->xstate_bv &= pcntxt_mask; | 281 | xsave_hdr->xstate_bv &= pcntxt_mask; |
| 285 | /* | 282 | /* |
| @@ -365,7 +362,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) | |||
| 365 | static void | 362 | static void |
| 366 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | 363 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) |
| 367 | { | 364 | { |
| 368 | struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; | 365 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
| 369 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; | 366 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; |
| 370 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; | 367 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; |
| 371 | int i; | 368 | int i; |
| @@ -405,7 +402,7 @@ static void convert_to_fxsr(struct task_struct *tsk, | |||
| 405 | const struct user_i387_ia32_struct *env) | 402 | const struct user_i387_ia32_struct *env) |
| 406 | 403 | ||
| 407 | { | 404 | { |
| 408 | struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; | 405 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
| 409 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; | 406 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; |
| 410 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; | 407 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; |
| 411 | int i; | 408 | int i; |
| @@ -445,7 +442,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
| 445 | 442 | ||
| 446 | if (!cpu_has_fxsr) { | 443 | if (!cpu_has_fxsr) { |
| 447 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 444 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
| 448 | &target->thread.xstate->fsave, 0, | 445 | &target->thread.fpu.state->fsave, 0, |
| 449 | -1); | 446 | -1); |
| 450 | } | 447 | } |
| 451 | 448 | ||
| @@ -475,7 +472,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 475 | 472 | ||
| 476 | if (!cpu_has_fxsr) { | 473 | if (!cpu_has_fxsr) { |
| 477 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 474 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
| 478 | &target->thread.xstate->fsave, 0, -1); | 475 | &target->thread.fpu.state->fsave, 0, -1); |
| 479 | } | 476 | } |
| 480 | 477 | ||
| 481 | if (pos > 0 || count < sizeof(env)) | 478 | if (pos > 0 || count < sizeof(env)) |
| @@ -490,7 +487,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 490 | * presence of FP. | 487 | * presence of FP. |
| 491 | */ | 488 | */ |
| 492 | if (cpu_has_xsave) | 489 | if (cpu_has_xsave) |
| 493 | target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; | 490 | target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; |
| 494 | return ret; | 491 | return ret; |
| 495 | } | 492 | } |
| 496 | 493 | ||
| @@ -501,7 +498,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
| 501 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | 498 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) |
| 502 | { | 499 | { |
| 503 | struct task_struct *tsk = current; | 500 | struct task_struct *tsk = current; |
| 504 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | 501 | struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; |
| 505 | 502 | ||
| 506 | fp->status = fp->swd; | 503 | fp->status = fp->swd; |
| 507 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) | 504 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) |
| @@ -512,7 +509,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
| 512 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | 509 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) |
| 513 | { | 510 | { |
| 514 | struct task_struct *tsk = current; | 511 | struct task_struct *tsk = current; |
| 515 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 512 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; |
| 516 | struct user_i387_ia32_struct env; | 513 | struct user_i387_ia32_struct env; |
| 517 | int err = 0; | 514 | int err = 0; |
| 518 | 515 | ||
| @@ -547,7 +544,7 @@ static int save_i387_xsave(void __user *buf) | |||
| 547 | * header as well as change any contents in the memory layout. | 544 | * header as well as change any contents in the memory layout. |
| 548 | * xrestore as part of sigreturn will capture all the changes. | 545 | * xrestore as part of sigreturn will capture all the changes. |
| 549 | */ | 546 | */ |
| 550 | tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | 547 | tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; |
| 551 | 548 | ||
| 552 | if (save_i387_fxsave(fx) < 0) | 549 | if (save_i387_fxsave(fx) < 0) |
| 553 | return -1; | 550 | return -1; |
| @@ -599,7 +596,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
| 599 | { | 596 | { |
| 600 | struct task_struct *tsk = current; | 597 | struct task_struct *tsk = current; |
| 601 | 598 | ||
| 602 | return __copy_from_user(&tsk->thread.xstate->fsave, buf, | 599 | return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, |
| 603 | sizeof(struct i387_fsave_struct)); | 600 | sizeof(struct i387_fsave_struct)); |
| 604 | } | 601 | } |
| 605 | 602 | ||
| @@ -610,10 +607,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, | |||
| 610 | struct user_i387_ia32_struct env; | 607 | struct user_i387_ia32_struct env; |
| 611 | int err; | 608 | int err; |
| 612 | 609 | ||
| 613 | err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], | 610 | err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], |
| 614 | size); | 611 | size); |
| 615 | /* mxcsr reserved bits must be masked to zero for security reasons */ | 612 | /* mxcsr reserved bits must be masked to zero for security reasons */ |
| 616 | tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 613 | tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
| 617 | if (err || __copy_from_user(&env, buf, sizeof(env))) | 614 | if (err || __copy_from_user(&env, buf, sizeof(env))) |
| 618 | return 1; | 615 | return 1; |
| 619 | convert_to_fxsr(tsk, &env); | 616 | convert_to_fxsr(tsk, &env); |
| @@ -629,7 +626,7 @@ static int restore_i387_xsave(void __user *buf) | |||
| 629 | struct i387_fxsave_struct __user *fx = | 626 | struct i387_fxsave_struct __user *fx = |
| 630 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; | 627 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; |
| 631 | struct xsave_hdr_struct *xsave_hdr = | 628 | struct xsave_hdr_struct *xsave_hdr = |
| 632 | ¤t->thread.xstate->xsave.xsave_hdr; | 629 | ¤t->thread.fpu.state->xsave.xsave_hdr; |
| 633 | u64 mask; | 630 | u64 mask; |
| 634 | int err; | 631 | int err; |
| 635 | 632 | ||
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 23c167925a5c..2dfd31597443 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | #include <asm/hpet.h> | 16 | #include <asm/hpet.h> |
| 17 | #include <asm/smp.h> | 17 | #include <asm/smp.h> |
| 18 | 18 | ||
| 19 | DEFINE_SPINLOCK(i8253_lock); | 19 | DEFINE_RAW_SPINLOCK(i8253_lock); |
| 20 | EXPORT_SYMBOL(i8253_lock); | 20 | EXPORT_SYMBOL(i8253_lock); |
| 21 | 21 | ||
| 22 | /* | 22 | /* |
| @@ -33,7 +33,7 @@ struct clock_event_device *global_clock_event; | |||
| 33 | static void init_pit_timer(enum clock_event_mode mode, | 33 | static void init_pit_timer(enum clock_event_mode mode, |
| 34 | struct clock_event_device *evt) | 34 | struct clock_event_device *evt) |
| 35 | { | 35 | { |
| 36 | spin_lock(&i8253_lock); | 36 | raw_spin_lock(&i8253_lock); |
| 37 | 37 | ||
| 38 | switch (mode) { | 38 | switch (mode) { |
| 39 | case CLOCK_EVT_MODE_PERIODIC: | 39 | case CLOCK_EVT_MODE_PERIODIC: |
| @@ -62,7 +62,7 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
| 62 | /* Nothing to do here */ | 62 | /* Nothing to do here */ |
| 63 | break; | 63 | break; |
| 64 | } | 64 | } |
| 65 | spin_unlock(&i8253_lock); | 65 | raw_spin_unlock(&i8253_lock); |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | /* | 68 | /* |
| @@ -72,10 +72,10 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
| 72 | */ | 72 | */ |
| 73 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) | 73 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) |
| 74 | { | 74 | { |
| 75 | spin_lock(&i8253_lock); | 75 | raw_spin_lock(&i8253_lock); |
| 76 | outb_pit(delta & 0xff , PIT_CH0); /* LSB */ | 76 | outb_pit(delta & 0xff , PIT_CH0); /* LSB */ |
| 77 | outb_pit(delta >> 8 , PIT_CH0); /* MSB */ | 77 | outb_pit(delta >> 8 , PIT_CH0); /* MSB */ |
| 78 | spin_unlock(&i8253_lock); | 78 | raw_spin_unlock(&i8253_lock); |
| 79 | 79 | ||
| 80 | return 0; | 80 | return 0; |
| 81 | } | 81 | } |
| @@ -130,7 +130,7 @@ static cycle_t pit_read(struct clocksource *cs) | |||
| 130 | int count; | 130 | int count; |
| 131 | u32 jifs; | 131 | u32 jifs; |
| 132 | 132 | ||
| 133 | spin_lock_irqsave(&i8253_lock, flags); | 133 | raw_spin_lock_irqsave(&i8253_lock, flags); |
| 134 | /* | 134 | /* |
| 135 | * Although our caller may have the read side of xtime_lock, | 135 | * Although our caller may have the read side of xtime_lock, |
| 136 | * this is now a seqlock, and we are cheating in this routine | 136 | * this is now a seqlock, and we are cheating in this routine |
| @@ -176,7 +176,7 @@ static cycle_t pit_read(struct clocksource *cs) | |||
| 176 | old_count = count; | 176 | old_count = count; |
| 177 | old_jifs = jifs; | 177 | old_jifs = jifs; |
| 178 | 178 | ||
| 179 | spin_unlock_irqrestore(&i8253_lock, flags); | 179 | raw_spin_unlock_irqrestore(&i8253_lock, flags); |
| 180 | 180 | ||
| 181 | count = (LATCH - 1) - count; | 181 | count = (LATCH - 1) - count; |
| 182 | 182 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 0ed2d300cd46..990ae7cfc578 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
| @@ -60,7 +60,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) | |||
| 60 | outb(0, 0xF0); | 60 | outb(0, 0xF0); |
| 61 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | 61 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) |
| 62 | return IRQ_NONE; | 62 | return IRQ_NONE; |
| 63 | math_error((void __user *)get_irq_regs()->ip); | 63 | math_error(get_irq_regs(), 0, 16); |
| 64 | return IRQ_HANDLED; | 64 | return IRQ_HANDLED; |
| 65 | } | 65 | } |
| 66 | 66 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b2258ca91003..4f4af75b9482 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
| @@ -47,20 +47,8 @@ | |||
| 47 | #include <asm/debugreg.h> | 47 | #include <asm/debugreg.h> |
| 48 | #include <asm/apicdef.h> | 48 | #include <asm/apicdef.h> |
| 49 | #include <asm/system.h> | 49 | #include <asm/system.h> |
| 50 | |||
| 51 | #include <asm/apic.h> | 50 | #include <asm/apic.h> |
| 52 | 51 | ||
| 53 | /* | ||
| 54 | * Put the error code here just in case the user cares: | ||
| 55 | */ | ||
| 56 | static int gdb_x86errcode; | ||
| 57 | |||
| 58 | /* | ||
| 59 | * Likewise, the vector number here (since GDB only gets the signal | ||
| 60 | * number through the usual means, and that's not very specific): | ||
| 61 | */ | ||
| 62 | static int gdb_x86vector = -1; | ||
| 63 | |||
| 64 | /** | 52 | /** |
| 65 | * pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs | 53 | * pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs |
| 66 | * @gdb_regs: A pointer to hold the registers in the order GDB wants. | 54 | * @gdb_regs: A pointer to hold the registers in the order GDB wants. |
| @@ -211,6 +199,8 @@ static struct hw_breakpoint { | |||
| 211 | struct perf_event **pev; | 199 | struct perf_event **pev; |
| 212 | } breakinfo[4]; | 200 | } breakinfo[4]; |
| 213 | 201 | ||
| 202 | static unsigned long early_dr7; | ||
| 203 | |||
| 214 | static void kgdb_correct_hw_break(void) | 204 | static void kgdb_correct_hw_break(void) |
| 215 | { | 205 | { |
| 216 | int breakno; | 206 | int breakno; |
| @@ -222,6 +212,14 @@ static void kgdb_correct_hw_break(void) | |||
| 222 | int cpu = raw_smp_processor_id(); | 212 | int cpu = raw_smp_processor_id(); |
| 223 | if (!breakinfo[breakno].enabled) | 213 | if (!breakinfo[breakno].enabled) |
| 224 | continue; | 214 | continue; |
| 215 | if (dbg_is_early) { | ||
| 216 | set_debugreg(breakinfo[breakno].addr, breakno); | ||
| 217 | early_dr7 |= encode_dr7(breakno, | ||
| 218 | breakinfo[breakno].len, | ||
| 219 | breakinfo[breakno].type); | ||
| 220 | set_debugreg(early_dr7, 7); | ||
| 221 | continue; | ||
| 222 | } | ||
| 225 | bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu); | 223 | bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu); |
| 226 | info = counter_arch_bp(bp); | 224 | info = counter_arch_bp(bp); |
| 227 | if (bp->attr.disabled != 1) | 225 | if (bp->attr.disabled != 1) |
| @@ -236,7 +234,8 @@ static void kgdb_correct_hw_break(void) | |||
| 236 | if (!val) | 234 | if (!val) |
| 237 | bp->attr.disabled = 0; | 235 | bp->attr.disabled = 0; |
| 238 | } | 236 | } |
| 239 | hw_breakpoint_restore(); | 237 | if (!dbg_is_early) |
| 238 | hw_breakpoint_restore(); | ||
| 240 | } | 239 | } |
| 241 | 240 | ||
| 242 | static int hw_break_reserve_slot(int breakno) | 241 | static int hw_break_reserve_slot(int breakno) |
| @@ -245,6 +244,9 @@ static int hw_break_reserve_slot(int breakno) | |||
| 245 | int cnt = 0; | 244 | int cnt = 0; |
| 246 | struct perf_event **pevent; | 245 | struct perf_event **pevent; |
| 247 | 246 | ||
| 247 | if (dbg_is_early) | ||
| 248 | return 0; | ||
| 249 | |||
| 248 | for_each_online_cpu(cpu) { | 250 | for_each_online_cpu(cpu) { |
| 249 | cnt++; | 251 | cnt++; |
| 250 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | 252 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); |
| @@ -270,6 +272,9 @@ static int hw_break_release_slot(int breakno) | |||
| 270 | struct perf_event **pevent; | 272 | struct perf_event **pevent; |
| 271 | int cpu; | 273 | int cpu; |
| 272 | 274 | ||
| 275 | if (dbg_is_early) | ||
| 276 | return 0; | ||
| 277 | |||
| 273 | for_each_online_cpu(cpu) { | 278 | for_each_online_cpu(cpu) { |
| 274 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | 279 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); |
| 275 | if (dbg_release_bp_slot(*pevent)) | 280 | if (dbg_release_bp_slot(*pevent)) |
| @@ -314,7 +319,11 @@ static void kgdb_remove_all_hw_break(void) | |||
| 314 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | 319 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); |
| 315 | if (bp->attr.disabled == 1) | 320 | if (bp->attr.disabled == 1) |
| 316 | continue; | 321 | continue; |
| 317 | arch_uninstall_hw_breakpoint(bp); | 322 | if (dbg_is_early) |
| 323 | early_dr7 &= ~encode_dr7(i, breakinfo[i].len, | ||
| 324 | breakinfo[i].type); | ||
| 325 | else | ||
| 326 | arch_uninstall_hw_breakpoint(bp); | ||
| 318 | bp->attr.disabled = 1; | 327 | bp->attr.disabled = 1; |
| 319 | } | 328 | } |
| 320 | } | 329 | } |
| @@ -391,6 +400,11 @@ void kgdb_disable_hw_debug(struct pt_regs *regs) | |||
| 391 | for (i = 0; i < 4; i++) { | 400 | for (i = 0; i < 4; i++) { |
| 392 | if (!breakinfo[i].enabled) | 401 | if (!breakinfo[i].enabled) |
| 393 | continue; | 402 | continue; |
| 403 | if (dbg_is_early) { | ||
| 404 | early_dr7 &= ~encode_dr7(i, breakinfo[i].len, | ||
| 405 | breakinfo[i].type); | ||
| 406 | continue; | ||
| 407 | } | ||
| 394 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | 408 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); |
| 395 | if (bp->attr.disabled == 1) | 409 | if (bp->attr.disabled == 1) |
| 396 | continue; | 410 | continue; |
| @@ -399,23 +413,6 @@ void kgdb_disable_hw_debug(struct pt_regs *regs) | |||
| 399 | } | 413 | } |
| 400 | } | 414 | } |
| 401 | 415 | ||
| 402 | /** | ||
| 403 | * kgdb_post_primary_code - Save error vector/code numbers. | ||
| 404 | * @regs: Original pt_regs. | ||
| 405 | * @e_vector: Original error vector. | ||
| 406 | * @err_code: Original error code. | ||
| 407 | * | ||
| 408 | * This is needed on architectures which support SMP and KGDB. | ||
| 409 | * This function is called after all the slave cpus have been put | ||
| 410 | * to a know spin state and the primary CPU has control over KGDB. | ||
| 411 | */ | ||
| 412 | void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code) | ||
| 413 | { | ||
| 414 | /* primary processor is completely in the debugger */ | ||
| 415 | gdb_x86vector = e_vector; | ||
| 416 | gdb_x86errcode = err_code; | ||
| 417 | } | ||
| 418 | |||
| 419 | #ifdef CONFIG_SMP | 416 | #ifdef CONFIG_SMP |
| 420 | /** | 417 | /** |
| 421 | * kgdb_roundup_cpus - Get other CPUs into a holding pattern | 418 | * kgdb_roundup_cpus - Get other CPUs into a holding pattern |
| @@ -567,7 +564,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
| 567 | return NOTIFY_DONE; | 564 | return NOTIFY_DONE; |
| 568 | } | 565 | } |
| 569 | 566 | ||
| 570 | if (kgdb_handle_exception(args->trapnr, args->signr, args->err, regs)) | 567 | if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs)) |
| 571 | return NOTIFY_DONE; | 568 | return NOTIFY_DONE; |
| 572 | 569 | ||
| 573 | /* Must touch watchdog before return to normal operation */ | 570 | /* Must touch watchdog before return to normal operation */ |
| @@ -575,6 +572,26 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
| 575 | return NOTIFY_STOP; | 572 | return NOTIFY_STOP; |
| 576 | } | 573 | } |
| 577 | 574 | ||
| 575 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | ||
| 576 | int kgdb_ll_trap(int cmd, const char *str, | ||
| 577 | struct pt_regs *regs, long err, int trap, int sig) | ||
| 578 | { | ||
| 579 | struct die_args args = { | ||
| 580 | .regs = regs, | ||
| 581 | .str = str, | ||
| 582 | .err = err, | ||
| 583 | .trapnr = trap, | ||
| 584 | .signr = sig, | ||
| 585 | |||
| 586 | }; | ||
| 587 | |||
| 588 | if (!kgdb_io_module_registered) | ||
| 589 | return NOTIFY_DONE; | ||
| 590 | |||
| 591 | return __kgdb_notify(&args, cmd); | ||
| 592 | } | ||
| 593 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | ||
| 594 | |||
| 578 | static int | 595 | static int |
| 579 | kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) | 596 | kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) |
| 580 | { | 597 | { |
| @@ -605,14 +622,15 @@ static struct notifier_block kgdb_notifier = { | |||
| 605 | */ | 622 | */ |
| 606 | int kgdb_arch_init(void) | 623 | int kgdb_arch_init(void) |
| 607 | { | 624 | { |
| 625 | return register_die_notifier(&kgdb_notifier); | ||
| 626 | } | ||
| 627 | |||
| 628 | void kgdb_arch_late(void) | ||
| 629 | { | ||
| 608 | int i, cpu; | 630 | int i, cpu; |
| 609 | int ret; | ||
| 610 | struct perf_event_attr attr; | 631 | struct perf_event_attr attr; |
| 611 | struct perf_event **pevent; | 632 | struct perf_event **pevent; |
| 612 | 633 | ||
| 613 | ret = register_die_notifier(&kgdb_notifier); | ||
| 614 | if (ret != 0) | ||
| 615 | return ret; | ||
| 616 | /* | 634 | /* |
| 617 | * Pre-allocate the hw breakpoint structions in the non-atomic | 635 | * Pre-allocate the hw breakpoint structions in the non-atomic |
| 618 | * portion of kgdb because this operation requires mutexs to | 636 | * portion of kgdb because this operation requires mutexs to |
| @@ -624,12 +642,15 @@ int kgdb_arch_init(void) | |||
| 624 | attr.bp_type = HW_BREAKPOINT_W; | 642 | attr.bp_type = HW_BREAKPOINT_W; |
| 625 | attr.disabled = 1; | 643 | attr.disabled = 1; |
| 626 | for (i = 0; i < 4; i++) { | 644 | for (i = 0; i < 4; i++) { |
| 645 | if (breakinfo[i].pev) | ||
| 646 | continue; | ||
| 627 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); | 647 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); |
| 628 | if (IS_ERR(breakinfo[i].pev)) { | 648 | if (IS_ERR(breakinfo[i].pev)) { |
| 629 | printk(KERN_ERR "kgdb: Could not allocate hw breakpoints\n"); | 649 | printk(KERN_ERR "kgdb: Could not allocate hw" |
| 650 | "breakpoints\nDisabling the kernel debugger\n"); | ||
| 630 | breakinfo[i].pev = NULL; | 651 | breakinfo[i].pev = NULL; |
| 631 | kgdb_arch_exit(); | 652 | kgdb_arch_exit(); |
| 632 | return -1; | 653 | return; |
| 633 | } | 654 | } |
| 634 | for_each_online_cpu(cpu) { | 655 | for_each_online_cpu(cpu) { |
| 635 | pevent = per_cpu_ptr(breakinfo[i].pev, cpu); | 656 | pevent = per_cpu_ptr(breakinfo[i].pev, cpu); |
| @@ -640,7 +661,6 @@ int kgdb_arch_init(void) | |||
| 640 | } | 661 | } |
| 641 | } | 662 | } |
| 642 | } | 663 | } |
| 643 | return ret; | ||
| 644 | } | 664 | } |
| 645 | 665 | ||
| 646 | /** | 666 | /** |
| @@ -690,6 +710,11 @@ unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) | |||
| 690 | return instruction_pointer(regs); | 710 | return instruction_pointer(regs); |
| 691 | } | 711 | } |
| 692 | 712 | ||
| 713 | void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) | ||
| 714 | { | ||
| 715 | regs->ip = ip; | ||
| 716 | } | ||
| 717 | |||
| 693 | struct kgdb_arch arch_kgdb_ops = { | 718 | struct kgdb_arch arch_kgdb_ops = { |
| 694 | /* Breakpoint instruction: */ | 719 | /* Breakpoint instruction: */ |
| 695 | .gdb_bpt_instr = { 0xcc }, | 720 | .gdb_bpt_instr = { 0xcc }, |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b43bbaebe2c0..345a4b1fe144 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -422,14 +422,22 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
| 422 | 422 | ||
| 423 | static void __kprobes clear_btf(void) | 423 | static void __kprobes clear_btf(void) |
| 424 | { | 424 | { |
| 425 | if (test_thread_flag(TIF_DEBUGCTLMSR)) | 425 | if (test_thread_flag(TIF_BLOCKSTEP)) { |
| 426 | update_debugctlmsr(0); | 426 | unsigned long debugctl = get_debugctlmsr(); |
| 427 | |||
| 428 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
| 429 | update_debugctlmsr(debugctl); | ||
| 430 | } | ||
| 427 | } | 431 | } |
| 428 | 432 | ||
| 429 | static void __kprobes restore_btf(void) | 433 | static void __kprobes restore_btf(void) |
| 430 | { | 434 | { |
| 431 | if (test_thread_flag(TIF_DEBUGCTLMSR)) | 435 | if (test_thread_flag(TIF_BLOCKSTEP)) { |
| 432 | update_debugctlmsr(current->thread.debugctlmsr); | 436 | unsigned long debugctl = get_debugctlmsr(); |
| 437 | |||
| 438 | debugctl |= DEBUGCTLMSR_BTF; | ||
| 439 | update_debugctlmsr(debugctl); | ||
| 440 | } | ||
| 433 | } | 441 | } |
| 434 | 442 | ||
| 435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 443 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
| @@ -534,20 +542,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
| 534 | struct kprobe_ctlblk *kcb; | 542 | struct kprobe_ctlblk *kcb; |
| 535 | 543 | ||
| 536 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); | 544 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); |
| 537 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
| 538 | /* | ||
| 539 | * The breakpoint instruction was removed right | ||
| 540 | * after we hit it. Another cpu has removed | ||
| 541 | * either a probepoint or a debugger breakpoint | ||
| 542 | * at this address. In either case, no further | ||
| 543 | * handling of this interrupt is appropriate. | ||
| 544 | * Back up over the (now missing) int3 and run | ||
| 545 | * the original instruction. | ||
| 546 | */ | ||
| 547 | regs->ip = (unsigned long)addr; | ||
| 548 | return 1; | ||
| 549 | } | ||
| 550 | |||
| 551 | /* | 545 | /* |
| 552 | * We don't want to be preempted for the entire | 546 | * We don't want to be preempted for the entire |
| 553 | * duration of kprobe processing. We conditionally | 547 | * duration of kprobe processing. We conditionally |
| @@ -579,6 +573,19 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
| 579 | setup_singlestep(p, regs, kcb, 0); | 573 | setup_singlestep(p, regs, kcb, 0); |
| 580 | return 1; | 574 | return 1; |
| 581 | } | 575 | } |
| 576 | } else if (*addr != BREAKPOINT_INSTRUCTION) { | ||
| 577 | /* | ||
| 578 | * The breakpoint instruction was removed right | ||
| 579 | * after we hit it. Another cpu has removed | ||
| 580 | * either a probepoint or a debugger breakpoint | ||
| 581 | * at this address. In either case, no further | ||
| 582 | * handling of this interrupt is appropriate. | ||
| 583 | * Back up over the (now missing) int3 and run | ||
| 584 | * the original instruction. | ||
| 585 | */ | ||
| 586 | regs->ip = (unsigned long)addr; | ||
| 587 | preempt_enable_no_resched(); | ||
| 588 | return 1; | ||
| 582 | } else if (kprobe_running()) { | 589 | } else if (kprobe_running()) { |
| 583 | p = __get_cpu_var(current_kprobe); | 590 | p = __get_cpu_var(current_kprobe); |
| 584 | if (p->break_handler && p->break_handler(p, regs)) { | 591 | if (p->break_handler && p->break_handler(p, regs)) { |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index feaeb0d3aa4f..eb9b76c716c2 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
| @@ -29,6 +29,8 @@ | |||
| 29 | #define KVM_SCALE 22 | 29 | #define KVM_SCALE 22 |
| 30 | 30 | ||
| 31 | static int kvmclock = 1; | 31 | static int kvmclock = 1; |
| 32 | static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; | ||
| 33 | static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; | ||
| 32 | 34 | ||
| 33 | static int parse_no_kvmclock(char *arg) | 35 | static int parse_no_kvmclock(char *arg) |
| 34 | { | 36 | { |
| @@ -54,7 +56,8 @@ static unsigned long kvm_get_wallclock(void) | |||
| 54 | 56 | ||
| 55 | low = (int)__pa_symbol(&wall_clock); | 57 | low = (int)__pa_symbol(&wall_clock); |
| 56 | high = ((u64)__pa_symbol(&wall_clock) >> 32); | 58 | high = ((u64)__pa_symbol(&wall_clock) >> 32); |
| 57 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | 59 | |
| 60 | native_write_msr(msr_kvm_wall_clock, low, high); | ||
| 58 | 61 | ||
| 59 | vcpu_time = &get_cpu_var(hv_clock); | 62 | vcpu_time = &get_cpu_var(hv_clock); |
| 60 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); | 63 | pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); |
| @@ -130,7 +133,8 @@ static int kvm_register_clock(char *txt) | |||
| 130 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); | 133 | high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); |
| 131 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | 134 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", |
| 132 | cpu, high, low, txt); | 135 | cpu, high, low, txt); |
| 133 | return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); | 136 | |
| 137 | return native_write_msr_safe(msr_kvm_system_time, low, high); | ||
| 134 | } | 138 | } |
| 135 | 139 | ||
| 136 | #ifdef CONFIG_X86_LOCAL_APIC | 140 | #ifdef CONFIG_X86_LOCAL_APIC |
| @@ -165,14 +169,14 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
| 165 | #ifdef CONFIG_KEXEC | 169 | #ifdef CONFIG_KEXEC |
| 166 | static void kvm_crash_shutdown(struct pt_regs *regs) | 170 | static void kvm_crash_shutdown(struct pt_regs *regs) |
| 167 | { | 171 | { |
| 168 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 172 | native_write_msr(msr_kvm_system_time, 0, 0); |
| 169 | native_machine_crash_shutdown(regs); | 173 | native_machine_crash_shutdown(regs); |
| 170 | } | 174 | } |
| 171 | #endif | 175 | #endif |
| 172 | 176 | ||
| 173 | static void kvm_shutdown(void) | 177 | static void kvm_shutdown(void) |
| 174 | { | 178 | { |
| 175 | native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); | 179 | native_write_msr(msr_kvm_system_time, 0, 0); |
| 176 | native_machine_shutdown(); | 180 | native_machine_shutdown(); |
| 177 | } | 181 | } |
| 178 | 182 | ||
| @@ -181,27 +185,37 @@ void __init kvmclock_init(void) | |||
| 181 | if (!kvm_para_available()) | 185 | if (!kvm_para_available()) |
| 182 | return; | 186 | return; |
| 183 | 187 | ||
| 184 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | 188 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { |
| 185 | if (kvm_register_clock("boot clock")) | 189 | msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; |
| 186 | return; | 190 | msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; |
| 187 | pv_time_ops.sched_clock = kvm_clock_read; | 191 | } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) |
| 188 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | 192 | return; |
| 189 | x86_platform.get_wallclock = kvm_get_wallclock; | 193 | |
| 190 | x86_platform.set_wallclock = kvm_set_wallclock; | 194 | printk(KERN_INFO "kvm-clock: Using msrs %x and %x", |
| 195 | msr_kvm_system_time, msr_kvm_wall_clock); | ||
| 196 | |||
| 197 | if (kvm_register_clock("boot clock")) | ||
| 198 | return; | ||
| 199 | pv_time_ops.sched_clock = kvm_clock_read; | ||
| 200 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; | ||
| 201 | x86_platform.get_wallclock = kvm_get_wallclock; | ||
| 202 | x86_platform.set_wallclock = kvm_set_wallclock; | ||
| 191 | #ifdef CONFIG_X86_LOCAL_APIC | 203 | #ifdef CONFIG_X86_LOCAL_APIC |
| 192 | x86_cpuinit.setup_percpu_clockev = | 204 | x86_cpuinit.setup_percpu_clockev = |
| 193 | kvm_setup_secondary_clock; | 205 | kvm_setup_secondary_clock; |
| 194 | #endif | 206 | #endif |
| 195 | #ifdef CONFIG_SMP | 207 | #ifdef CONFIG_SMP |
| 196 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 208 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
| 197 | #endif | 209 | #endif |
| 198 | machine_ops.shutdown = kvm_shutdown; | 210 | machine_ops.shutdown = kvm_shutdown; |
| 199 | #ifdef CONFIG_KEXEC | 211 | #ifdef CONFIG_KEXEC |
| 200 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 212 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
| 201 | #endif | 213 | #endif |
| 202 | kvm_get_preset_lpj(); | 214 | kvm_get_preset_lpj(); |
| 203 | clocksource_register(&kvm_clock); | 215 | clocksource_register(&kvm_clock); |
| 204 | pv_info.paravirt_enabled = 1; | 216 | pv_info.paravirt_enabled = 1; |
| 205 | pv_info.name = "KVM"; | 217 | pv_info.name = "KVM"; |
| 206 | } | 218 | |
| 219 | if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) | ||
| 220 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); | ||
| 207 | } | 221 | } |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index cceb5bc3c3c2..fa6551d36c10 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
| @@ -201,9 +201,9 @@ static int do_microcode_update(const void __user *buf, size_t size) | |||
| 201 | return error; | 201 | return error; |
| 202 | } | 202 | } |
| 203 | 203 | ||
| 204 | static int microcode_open(struct inode *unused1, struct file *unused2) | 204 | static int microcode_open(struct inode *inode, struct file *file) |
| 205 | { | 205 | { |
| 206 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | 206 | return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; |
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | static ssize_t microcode_write(struct file *file, const char __user *buf, | 209 | static ssize_t microcode_write(struct file *file, const char __user *buf, |
| @@ -260,6 +260,7 @@ static void microcode_dev_exit(void) | |||
| 260 | } | 260 | } |
| 261 | 261 | ||
| 262 | MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | 262 | MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); |
| 263 | MODULE_ALIAS("devname:cpu/microcode"); | ||
| 263 | #else | 264 | #else |
| 264 | #define microcode_dev_init() 0 | 265 | #define microcode_dev_init() 0 |
| 265 | #define microcode_dev_exit() do { } while (0) | 266 | #define microcode_dev_exit() do { } while (0) |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 85a343e28937..356170262a93 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
| @@ -343,10 +343,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
| 343 | int (*get_ucode_data)(void *, const void *, size_t)) | 343 | int (*get_ucode_data)(void *, const void *, size_t)) |
| 344 | { | 344 | { |
| 345 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 345 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
| 346 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | 346 | u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL; |
| 347 | int new_rev = uci->cpu_sig.rev; | 347 | int new_rev = uci->cpu_sig.rev; |
| 348 | unsigned int leftover = size; | 348 | unsigned int leftover = size; |
| 349 | enum ucode_state state = UCODE_OK; | 349 | enum ucode_state state = UCODE_OK; |
| 350 | unsigned int curr_mc_size = 0; | ||
| 350 | 351 | ||
| 351 | while (leftover) { | 352 | while (leftover) { |
| 352 | struct microcode_header_intel mc_header; | 353 | struct microcode_header_intel mc_header; |
| @@ -361,9 +362,15 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
| 361 | break; | 362 | break; |
| 362 | } | 363 | } |
| 363 | 364 | ||
| 364 | mc = vmalloc(mc_size); | 365 | /* For performance reasons, reuse mc area when possible */ |
| 365 | if (!mc) | 366 | if (!mc || mc_size > curr_mc_size) { |
| 366 | break; | 367 | if (mc) |
| 368 | vfree(mc); | ||
| 369 | mc = vmalloc(mc_size); | ||
| 370 | if (!mc) | ||
| 371 | break; | ||
| 372 | curr_mc_size = mc_size; | ||
| 373 | } | ||
| 367 | 374 | ||
| 368 | if (get_ucode_data(mc, ucode_ptr, mc_size) || | 375 | if (get_ucode_data(mc, ucode_ptr, mc_size) || |
| 369 | microcode_sanity_check(mc) < 0) { | 376 | microcode_sanity_check(mc) < 0) { |
| @@ -376,13 +383,16 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
| 376 | vfree(new_mc); | 383 | vfree(new_mc); |
| 377 | new_rev = mc_header.rev; | 384 | new_rev = mc_header.rev; |
| 378 | new_mc = mc; | 385 | new_mc = mc; |
| 379 | } else | 386 | mc = NULL; /* trigger new vmalloc */ |
| 380 | vfree(mc); | 387 | } |
| 381 | 388 | ||
| 382 | ucode_ptr += mc_size; | 389 | ucode_ptr += mc_size; |
| 383 | leftover -= mc_size; | 390 | leftover -= mc_size; |
| 384 | } | 391 | } |
| 385 | 392 | ||
| 393 | if (mc) | ||
| 394 | vfree(mc); | ||
| 395 | |||
| 386 | if (leftover) { | 396 | if (leftover) { |
| 387 | if (new_mc) | 397 | if (new_mc) |
| 388 | vfree(new_mc); | 398 | vfree(new_mc); |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index e81030f71a8f..5ae5d2426edf 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
| @@ -115,21 +115,6 @@ static void __init MP_bus_info(struct mpc_bus *m) | |||
| 115 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | 115 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | static int bad_ioapic(unsigned long address) | ||
| 119 | { | ||
| 120 | if (nr_ioapics >= MAX_IO_APICS) { | ||
| 121 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " | ||
| 122 | "(found %d)\n", MAX_IO_APICS, nr_ioapics); | ||
| 123 | panic("Recompile kernel with bigger MAX_IO_APICS!\n"); | ||
| 124 | } | ||
| 125 | if (!address) { | ||
| 126 | printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" | ||
| 127 | " found in table, skipping!\n"); | ||
| 128 | return 1; | ||
| 129 | } | ||
| 130 | return 0; | ||
| 131 | } | ||
| 132 | |||
| 133 | static void __init MP_ioapic_info(struct mpc_ioapic *m) | 118 | static void __init MP_ioapic_info(struct mpc_ioapic *m) |
| 134 | { | 119 | { |
| 135 | if (!(m->flags & MPC_APIC_USABLE)) | 120 | if (!(m->flags & MPC_APIC_USABLE)) |
| @@ -138,15 +123,7 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) | |||
| 138 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", | 123 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", |
| 139 | m->apicid, m->apicver, m->apicaddr); | 124 | m->apicid, m->apicver, m->apicaddr); |
| 140 | 125 | ||
| 141 | if (bad_ioapic(m->apicaddr)) | 126 | mp_register_ioapic(m->apicid, m->apicaddr, gsi_end + 1); |
| 142 | return; | ||
| 143 | |||
| 144 | mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; | ||
| 145 | mp_ioapics[nr_ioapics].apicid = m->apicid; | ||
| 146 | mp_ioapics[nr_ioapics].type = m->type; | ||
| 147 | mp_ioapics[nr_ioapics].apicver = m->apicver; | ||
| 148 | mp_ioapics[nr_ioapics].flags = m->flags; | ||
| 149 | nr_ioapics++; | ||
| 150 | } | 127 | } |
| 151 | 128 | ||
| 152 | static void print_MP_intsrc_info(struct mpc_intsrc *m) | 129 | static void print_MP_intsrc_info(struct mpc_intsrc *m) |
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index 0aad8670858e..e796448f0eb5 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c | |||
| @@ -237,4 +237,9 @@ void __init x86_mrst_early_setup(void) | |||
| 237 | x86_init.pci.fixup_irqs = x86_init_noop; | 237 | x86_init.pci.fixup_irqs = x86_init_noop; |
| 238 | 238 | ||
| 239 | legacy_pic = &null_legacy_pic; | 239 | legacy_pic = &null_legacy_pic; |
| 240 | |||
| 241 | /* Avoid searching for BIOS MP tables */ | ||
| 242 | x86_init.mpparse.find_smp_config = x86_init_noop; | ||
| 243 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; | ||
| 244 | |||
| 240 | } | 245 | } |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 4d4468e9f47c..7bf2dc4c8f70 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
| @@ -230,7 +230,7 @@ static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, | |||
| 230 | msr_device_destroy(cpu); | 230 | msr_device_destroy(cpu); |
| 231 | break; | 231 | break; |
| 232 | } | 232 | } |
| 233 | return err ? NOTIFY_BAD : NOTIFY_OK; | 233 | return notifier_from_errno(err); |
| 234 | } | 234 | } |
| 235 | 235 | ||
| 236 | static struct notifier_block __refdata msr_class_cpu_notifier = { | 236 | static struct notifier_block __refdata msr_class_cpu_notifier = { |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 7d2829dde20e..a5bc528d4328 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
| @@ -31,8 +31,6 @@ static struct dma_map_ops swiotlb_dma_ops = { | |||
| 31 | .free_coherent = swiotlb_free_coherent, | 31 | .free_coherent = swiotlb_free_coherent, |
| 32 | .sync_single_for_cpu = swiotlb_sync_single_for_cpu, | 32 | .sync_single_for_cpu = swiotlb_sync_single_for_cpu, |
| 33 | .sync_single_for_device = swiotlb_sync_single_for_device, | 33 | .sync_single_for_device = swiotlb_sync_single_for_device, |
| 34 | .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, | ||
| 35 | .sync_single_range_for_device = swiotlb_sync_single_range_for_device, | ||
| 36 | .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, | 34 | .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, |
| 37 | .sync_sg_for_device = swiotlb_sync_sg_for_device, | 35 | .sync_sg_for_device = swiotlb_sync_sg_for_device, |
| 38 | .map_sg = swiotlb_map_sg_attrs, | 36 | .map_sg = swiotlb_map_sg_attrs, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 28ad9f4d8b94..e7e35219b32f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
| 21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
| 22 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
| 23 | #include <asm/ds.h> | ||
| 24 | #include <asm/debugreg.h> | 23 | #include <asm/debugreg.h> |
| 25 | 24 | ||
| 26 | unsigned long idle_halt; | 25 | unsigned long idle_halt; |
| @@ -32,26 +31,22 @@ struct kmem_cache *task_xstate_cachep; | |||
| 32 | 31 | ||
| 33 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 32 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
| 34 | { | 33 | { |
| 34 | int ret; | ||
| 35 | |||
| 35 | *dst = *src; | 36 | *dst = *src; |
| 36 | if (src->thread.xstate) { | 37 | if (fpu_allocated(&src->thread.fpu)) { |
| 37 | dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, | 38 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); |
| 38 | GFP_KERNEL); | 39 | ret = fpu_alloc(&dst->thread.fpu); |
| 39 | if (!dst->thread.xstate) | 40 | if (ret) |
| 40 | return -ENOMEM; | 41 | return ret; |
| 41 | WARN_ON((unsigned long)dst->thread.xstate & 15); | 42 | fpu_copy(&dst->thread.fpu, &src->thread.fpu); |
| 42 | memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); | ||
| 43 | } | 43 | } |
| 44 | return 0; | 44 | return 0; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | void free_thread_xstate(struct task_struct *tsk) | 47 | void free_thread_xstate(struct task_struct *tsk) |
| 48 | { | 48 | { |
| 49 | if (tsk->thread.xstate) { | 49 | fpu_free(&tsk->thread.fpu); |
| 50 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); | ||
| 51 | tsk->thread.xstate = NULL; | ||
| 52 | } | ||
| 53 | |||
| 54 | WARN(tsk->thread.ds_ctx, "leaking DS context\n"); | ||
| 55 | } | 50 | } |
| 56 | 51 | ||
| 57 | void free_thread_info(struct thread_info *ti) | 52 | void free_thread_info(struct thread_info *ti) |
| @@ -198,11 +193,16 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 198 | prev = &prev_p->thread; | 193 | prev = &prev_p->thread; |
| 199 | next = &next_p->thread; | 194 | next = &next_p->thread; |
| 200 | 195 | ||
| 201 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || | 196 | if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ |
| 202 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | 197 | test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { |
| 203 | ds_switch_to(prev_p, next_p); | 198 | unsigned long debugctl = get_debugctlmsr(); |
| 204 | else if (next->debugctlmsr != prev->debugctlmsr) | 199 | |
| 205 | update_debugctlmsr(next->debugctlmsr); | 200 | debugctl &= ~DEBUGCTLMSR_BTF; |
| 201 | if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) | ||
| 202 | debugctl |= DEBUGCTLMSR_BTF; | ||
| 203 | |||
| 204 | update_debugctlmsr(debugctl); | ||
| 205 | } | ||
| 206 | 206 | ||
| 207 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 207 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
| 208 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 208 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
| @@ -546,11 +546,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
| 546 | * check OSVW bit for CPUs that are not affected | 546 | * check OSVW bit for CPUs that are not affected |
| 547 | * by erratum #400 | 547 | * by erratum #400 |
| 548 | */ | 548 | */ |
| 549 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); | 549 | if (cpu_has(c, X86_FEATURE_OSVW)) { |
| 550 | if (val >= 2) { | 550 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); |
| 551 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); | 551 | if (val >= 2) { |
| 552 | if (!(val & BIT(1))) | 552 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); |
| 553 | goto no_c1e_idle; | 553 | if (!(val & BIT(1))) |
| 554 | goto no_c1e_idle; | ||
| 555 | } | ||
| 554 | } | 556 | } |
| 555 | return 1; | 557 | return 1; |
| 556 | } | 558 | } |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f6c62667e30c..8d128783af47 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -55,7 +55,6 @@ | |||
| 55 | #include <asm/cpu.h> | 55 | #include <asm/cpu.h> |
| 56 | #include <asm/idle.h> | 56 | #include <asm/idle.h> |
| 57 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
| 58 | #include <asm/ds.h> | ||
| 59 | #include <asm/debugreg.h> | 58 | #include <asm/debugreg.h> |
| 60 | 59 | ||
| 61 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
| @@ -238,13 +237,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
| 238 | kfree(p->thread.io_bitmap_ptr); | 237 | kfree(p->thread.io_bitmap_ptr); |
| 239 | p->thread.io_bitmap_max = 0; | 238 | p->thread.io_bitmap_max = 0; |
| 240 | } | 239 | } |
| 241 | |||
| 242 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); | ||
| 243 | p->thread.ds_ctx = NULL; | ||
| 244 | |||
| 245 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
| 246 | p->thread.debugctlmsr = 0; | ||
| 247 | |||
| 248 | return err; | 240 | return err; |
| 249 | } | 241 | } |
| 250 | 242 | ||
| @@ -317,7 +309,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 317 | 309 | ||
| 318 | /* we're going to use this soon, after a few expensive things */ | 310 | /* we're going to use this soon, after a few expensive things */ |
| 319 | if (preload_fpu) | 311 | if (preload_fpu) |
| 320 | prefetch(next->xstate); | 312 | prefetch(next->fpu.state); |
| 321 | 313 | ||
| 322 | /* | 314 | /* |
| 323 | * Reload esp0. | 315 | * Reload esp0. |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 17cb3295cbf7..3c2422a99f1f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -49,7 +49,6 @@ | |||
| 49 | #include <asm/ia32.h> | 49 | #include <asm/ia32.h> |
| 50 | #include <asm/idle.h> | 50 | #include <asm/idle.h> |
| 51 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
| 52 | #include <asm/ds.h> | ||
| 53 | #include <asm/debugreg.h> | 52 | #include <asm/debugreg.h> |
| 54 | 53 | ||
| 55 | asmlinkage extern void ret_from_fork(void); | 54 | asmlinkage extern void ret_from_fork(void); |
| @@ -313,13 +312,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
| 313 | if (err) | 312 | if (err) |
| 314 | goto out; | 313 | goto out; |
| 315 | } | 314 | } |
| 316 | |||
| 317 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); | ||
| 318 | p->thread.ds_ctx = NULL; | ||
| 319 | |||
| 320 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
| 321 | p->thread.debugctlmsr = 0; | ||
| 322 | |||
| 323 | err = 0; | 315 | err = 0; |
| 324 | out: | 316 | out: |
| 325 | if (err && p->thread.io_bitmap_ptr) { | 317 | if (err && p->thread.io_bitmap_ptr) { |
| @@ -396,7 +388,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 396 | 388 | ||
| 397 | /* we're going to use this soon, after a few expensive things */ | 389 | /* we're going to use this soon, after a few expensive things */ |
| 398 | if (preload_fpu) | 390 | if (preload_fpu) |
| 399 | prefetch(next->xstate); | 391 | prefetch(next->fpu.state); |
| 400 | 392 | ||
| 401 | /* | 393 | /* |
| 402 | * Reload esp0, LDT and the page table pointer: | 394 | * Reload esp0, LDT and the page table pointer: |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2e9b55027b7e..70c4872cd8aa 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
| @@ -2,9 +2,6 @@ | |||
| 2 | /* | 2 | /* |
| 3 | * Pentium III FXSR, SSE support | 3 | * Pentium III FXSR, SSE support |
| 4 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 4 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
| 5 | * | ||
| 6 | * BTS tracing | ||
| 7 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
| 8 | */ | 5 | */ |
| 9 | 6 | ||
| 10 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
| @@ -22,7 +19,6 @@ | |||
| 22 | #include <linux/audit.h> | 19 | #include <linux/audit.h> |
| 23 | #include <linux/seccomp.h> | 20 | #include <linux/seccomp.h> |
| 24 | #include <linux/signal.h> | 21 | #include <linux/signal.h> |
| 25 | #include <linux/workqueue.h> | ||
| 26 | #include <linux/perf_event.h> | 22 | #include <linux/perf_event.h> |
| 27 | #include <linux/hw_breakpoint.h> | 23 | #include <linux/hw_breakpoint.h> |
| 28 | 24 | ||
| @@ -36,7 +32,6 @@ | |||
| 36 | #include <asm/desc.h> | 32 | #include <asm/desc.h> |
| 37 | #include <asm/prctl.h> | 33 | #include <asm/prctl.h> |
| 38 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
| 39 | #include <asm/ds.h> | ||
| 40 | #include <asm/hw_breakpoint.h> | 35 | #include <asm/hw_breakpoint.h> |
| 41 | 36 | ||
| 42 | #include "tls.h" | 37 | #include "tls.h" |
| @@ -693,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | |||
| 693 | struct perf_event_attr attr; | 688 | struct perf_event_attr attr; |
| 694 | 689 | ||
| 695 | if (!t->ptrace_bps[nr]) { | 690 | if (!t->ptrace_bps[nr]) { |
| 696 | hw_breakpoint_init(&attr); | 691 | ptrace_breakpoint_init(&attr); |
| 697 | /* | 692 | /* |
| 698 | * Put stub len and type to register (reserve) an inactive but | 693 | * Put stub len and type to register (reserve) an inactive but |
| 699 | * correct bp | 694 | * correct bp |
| @@ -789,342 +784,6 @@ static int ioperm_get(struct task_struct *target, | |||
| 789 | 0, IO_BITMAP_BYTES); | 784 | 0, IO_BITMAP_BYTES); |
| 790 | } | 785 | } |
| 791 | 786 | ||
| 792 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 793 | /* | ||
| 794 | * A branch trace store context. | ||
| 795 | * | ||
| 796 | * Contexts may only be installed by ptrace_bts_config() and only for | ||
| 797 | * ptraced tasks. | ||
| 798 | * | ||
| 799 | * Contexts are destroyed when the tracee is detached from the tracer. | ||
| 800 | * The actual destruction work requires interrupts enabled, so the | ||
| 801 | * work is deferred and will be scheduled during __ptrace_unlink(). | ||
| 802 | * | ||
| 803 | * Contexts hold an additional task_struct reference on the traced | ||
| 804 | * task, as well as a reference on the tracer's mm. | ||
| 805 | * | ||
| 806 | * Ptrace already holds a task_struct for the duration of ptrace operations, | ||
| 807 | * but since destruction is deferred, it may be executed after both | ||
| 808 | * tracer and tracee exited. | ||
| 809 | */ | ||
| 810 | struct bts_context { | ||
| 811 | /* The branch trace handle. */ | ||
| 812 | struct bts_tracer *tracer; | ||
| 813 | |||
| 814 | /* The buffer used to store the branch trace and its size. */ | ||
| 815 | void *buffer; | ||
| 816 | unsigned int size; | ||
| 817 | |||
| 818 | /* The mm that paid for the above buffer. */ | ||
| 819 | struct mm_struct *mm; | ||
| 820 | |||
| 821 | /* The task this context belongs to. */ | ||
| 822 | struct task_struct *task; | ||
| 823 | |||
| 824 | /* The signal to send on a bts buffer overflow. */ | ||
| 825 | unsigned int bts_ovfl_signal; | ||
| 826 | |||
| 827 | /* The work struct to destroy a context. */ | ||
| 828 | struct work_struct work; | ||
| 829 | }; | ||
| 830 | |||
| 831 | static int alloc_bts_buffer(struct bts_context *context, unsigned int size) | ||
| 832 | { | ||
| 833 | void *buffer = NULL; | ||
| 834 | int err = -ENOMEM; | ||
| 835 | |||
| 836 | err = account_locked_memory(current->mm, current->signal->rlim, size); | ||
| 837 | if (err < 0) | ||
| 838 | return err; | ||
| 839 | |||
| 840 | buffer = kzalloc(size, GFP_KERNEL); | ||
| 841 | if (!buffer) | ||
| 842 | goto out_refund; | ||
| 843 | |||
| 844 | context->buffer = buffer; | ||
| 845 | context->size = size; | ||
| 846 | context->mm = get_task_mm(current); | ||
| 847 | |||
| 848 | return 0; | ||
| 849 | |||
| 850 | out_refund: | ||
| 851 | refund_locked_memory(current->mm, size); | ||
| 852 | return err; | ||
| 853 | } | ||
| 854 | |||
| 855 | static inline void free_bts_buffer(struct bts_context *context) | ||
| 856 | { | ||
| 857 | if (!context->buffer) | ||
| 858 | return; | ||
| 859 | |||
| 860 | kfree(context->buffer); | ||
| 861 | context->buffer = NULL; | ||
| 862 | |||
| 863 | refund_locked_memory(context->mm, context->size); | ||
| 864 | context->size = 0; | ||
| 865 | |||
| 866 | mmput(context->mm); | ||
| 867 | context->mm = NULL; | ||
| 868 | } | ||
| 869 | |||
| 870 | static void free_bts_context_work(struct work_struct *w) | ||
| 871 | { | ||
| 872 | struct bts_context *context; | ||
| 873 | |||
| 874 | context = container_of(w, struct bts_context, work); | ||
| 875 | |||
| 876 | ds_release_bts(context->tracer); | ||
| 877 | put_task_struct(context->task); | ||
| 878 | free_bts_buffer(context); | ||
| 879 | kfree(context); | ||
| 880 | } | ||
| 881 | |||
| 882 | static inline void free_bts_context(struct bts_context *context) | ||
| 883 | { | ||
| 884 | INIT_WORK(&context->work, free_bts_context_work); | ||
| 885 | schedule_work(&context->work); | ||
| 886 | } | ||
| 887 | |||
| 888 | static inline struct bts_context *alloc_bts_context(struct task_struct *task) | ||
| 889 | { | ||
| 890 | struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
| 891 | if (context) { | ||
| 892 | context->task = task; | ||
| 893 | task->bts = context; | ||
| 894 | |||
| 895 | get_task_struct(task); | ||
| 896 | } | ||
| 897 | |||
| 898 | return context; | ||
| 899 | } | ||
| 900 | |||
| 901 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | ||
| 902 | struct bts_struct __user *out) | ||
| 903 | { | ||
| 904 | struct bts_context *context; | ||
| 905 | const struct bts_trace *trace; | ||
| 906 | struct bts_struct bts; | ||
| 907 | const unsigned char *at; | ||
| 908 | int error; | ||
| 909 | |||
| 910 | context = child->bts; | ||
| 911 | if (!context) | ||
| 912 | return -ESRCH; | ||
| 913 | |||
| 914 | trace = ds_read_bts(context->tracer); | ||
| 915 | if (!trace) | ||
| 916 | return -ESRCH; | ||
| 917 | |||
| 918 | at = trace->ds.top - ((index + 1) * trace->ds.size); | ||
| 919 | if ((void *)at < trace->ds.begin) | ||
| 920 | at += (trace->ds.n * trace->ds.size); | ||
| 921 | |||
| 922 | if (!trace->read) | ||
| 923 | return -EOPNOTSUPP; | ||
| 924 | |||
| 925 | error = trace->read(context->tracer, at, &bts); | ||
| 926 | if (error < 0) | ||
| 927 | return error; | ||
| 928 | |||
| 929 | if (copy_to_user(out, &bts, sizeof(bts))) | ||
| 930 | return -EFAULT; | ||
| 931 | |||
| 932 | return sizeof(bts); | ||
| 933 | } | ||
| 934 | |||
| 935 | static int ptrace_bts_drain(struct task_struct *child, | ||
| 936 | long size, | ||
| 937 | struct bts_struct __user *out) | ||
| 938 | { | ||
| 939 | struct bts_context *context; | ||
| 940 | const struct bts_trace *trace; | ||
| 941 | const unsigned char *at; | ||
| 942 | int error, drained = 0; | ||
| 943 | |||
| 944 | context = child->bts; | ||
| 945 | if (!context) | ||
| 946 | return -ESRCH; | ||
| 947 | |||
| 948 | trace = ds_read_bts(context->tracer); | ||
| 949 | if (!trace) | ||
| 950 | return -ESRCH; | ||
| 951 | |||
| 952 | if (!trace->read) | ||
| 953 | return -EOPNOTSUPP; | ||
| 954 | |||
| 955 | if (size < (trace->ds.top - trace->ds.begin)) | ||
| 956 | return -EIO; | ||
| 957 | |||
| 958 | for (at = trace->ds.begin; (void *)at < trace->ds.top; | ||
| 959 | out++, drained++, at += trace->ds.size) { | ||
| 960 | struct bts_struct bts; | ||
| 961 | |||
| 962 | error = trace->read(context->tracer, at, &bts); | ||
| 963 | if (error < 0) | ||
| 964 | return error; | ||
| 965 | |||
| 966 | if (copy_to_user(out, &bts, sizeof(bts))) | ||
| 967 | return -EFAULT; | ||
| 968 | } | ||
| 969 | |||
| 970 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | ||
| 971 | |||
| 972 | error = ds_reset_bts(context->tracer); | ||
| 973 | if (error < 0) | ||
| 974 | return error; | ||
| 975 | |||
| 976 | return drained; | ||
| 977 | } | ||
| 978 | |||
| 979 | static int ptrace_bts_config(struct task_struct *child, | ||
| 980 | long cfg_size, | ||
| 981 | const struct ptrace_bts_config __user *ucfg) | ||
| 982 | { | ||
| 983 | struct bts_context *context; | ||
| 984 | struct ptrace_bts_config cfg; | ||
| 985 | unsigned int flags = 0; | ||
| 986 | |||
| 987 | if (cfg_size < sizeof(cfg)) | ||
| 988 | return -EIO; | ||
| 989 | |||
| 990 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | ||
| 991 | return -EFAULT; | ||
| 992 | |||
| 993 | context = child->bts; | ||
| 994 | if (!context) | ||
| 995 | context = alloc_bts_context(child); | ||
| 996 | if (!context) | ||
| 997 | return -ENOMEM; | ||
| 998 | |||
| 999 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | ||
| 1000 | if (!cfg.signal) | ||
| 1001 | return -EINVAL; | ||
| 1002 | |||
| 1003 | return -EOPNOTSUPP; | ||
| 1004 | context->bts_ovfl_signal = cfg.signal; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | ds_release_bts(context->tracer); | ||
| 1008 | context->tracer = NULL; | ||
| 1009 | |||
| 1010 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { | ||
| 1011 | int err; | ||
| 1012 | |||
| 1013 | free_bts_buffer(context); | ||
| 1014 | if (!cfg.size) | ||
| 1015 | return 0; | ||
| 1016 | |||
| 1017 | err = alloc_bts_buffer(context, cfg.size); | ||
| 1018 | if (err < 0) | ||
| 1019 | return err; | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | if (cfg.flags & PTRACE_BTS_O_TRACE) | ||
| 1023 | flags |= BTS_USER; | ||
| 1024 | |||
| 1025 | if (cfg.flags & PTRACE_BTS_O_SCHED) | ||
| 1026 | flags |= BTS_TIMESTAMPS; | ||
| 1027 | |||
| 1028 | context->tracer = | ||
| 1029 | ds_request_bts_task(child, context->buffer, context->size, | ||
| 1030 | NULL, (size_t)-1, flags); | ||
| 1031 | if (unlikely(IS_ERR(context->tracer))) { | ||
| 1032 | int error = PTR_ERR(context->tracer); | ||
| 1033 | |||
| 1034 | free_bts_buffer(context); | ||
| 1035 | context->tracer = NULL; | ||
| 1036 | return error; | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | return sizeof(cfg); | ||
| 1040 | } | ||
| 1041 | |||
| 1042 | static int ptrace_bts_status(struct task_struct *child, | ||
| 1043 | long cfg_size, | ||
| 1044 | struct ptrace_bts_config __user *ucfg) | ||
| 1045 | { | ||
| 1046 | struct bts_context *context; | ||
| 1047 | const struct bts_trace *trace; | ||
| 1048 | struct ptrace_bts_config cfg; | ||
| 1049 | |||
| 1050 | context = child->bts; | ||
| 1051 | if (!context) | ||
| 1052 | return -ESRCH; | ||
| 1053 | |||
| 1054 | if (cfg_size < sizeof(cfg)) | ||
| 1055 | return -EIO; | ||
| 1056 | |||
| 1057 | trace = ds_read_bts(context->tracer); | ||
| 1058 | if (!trace) | ||
| 1059 | return -ESRCH; | ||
| 1060 | |||
| 1061 | memset(&cfg, 0, sizeof(cfg)); | ||
| 1062 | cfg.size = trace->ds.end - trace->ds.begin; | ||
| 1063 | cfg.signal = context->bts_ovfl_signal; | ||
| 1064 | cfg.bts_size = sizeof(struct bts_struct); | ||
| 1065 | |||
| 1066 | if (cfg.signal) | ||
| 1067 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | ||
| 1068 | |||
| 1069 | if (trace->ds.flags & BTS_USER) | ||
| 1070 | cfg.flags |= PTRACE_BTS_O_TRACE; | ||
| 1071 | |||
| 1072 | if (trace->ds.flags & BTS_TIMESTAMPS) | ||
| 1073 | cfg.flags |= PTRACE_BTS_O_SCHED; | ||
| 1074 | |||
| 1075 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | ||
| 1076 | return -EFAULT; | ||
| 1077 | |||
| 1078 | return sizeof(cfg); | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | static int ptrace_bts_clear(struct task_struct *child) | ||
| 1082 | { | ||
| 1083 | struct bts_context *context; | ||
| 1084 | const struct bts_trace *trace; | ||
| 1085 | |||
| 1086 | context = child->bts; | ||
| 1087 | if (!context) | ||
| 1088 | return -ESRCH; | ||
| 1089 | |||
| 1090 | trace = ds_read_bts(context->tracer); | ||
| 1091 | if (!trace) | ||
| 1092 | return -ESRCH; | ||
| 1093 | |||
| 1094 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | ||
| 1095 | |||
| 1096 | return ds_reset_bts(context->tracer); | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | static int ptrace_bts_size(struct task_struct *child) | ||
| 1100 | { | ||
| 1101 | struct bts_context *context; | ||
| 1102 | const struct bts_trace *trace; | ||
| 1103 | |||
| 1104 | context = child->bts; | ||
| 1105 | if (!context) | ||
| 1106 | return -ESRCH; | ||
| 1107 | |||
| 1108 | trace = ds_read_bts(context->tracer); | ||
| 1109 | if (!trace) | ||
| 1110 | return -ESRCH; | ||
| 1111 | |||
| 1112 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | /* | ||
| 1116 | * Called from __ptrace_unlink() after the child has been moved back | ||
| 1117 | * to its original parent. | ||
| 1118 | */ | ||
| 1119 | void ptrace_bts_untrace(struct task_struct *child) | ||
| 1120 | { | ||
| 1121 | if (unlikely(child->bts)) { | ||
| 1122 | free_bts_context(child->bts); | ||
| 1123 | child->bts = NULL; | ||
| 1124 | } | ||
| 1125 | } | ||
| 1126 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 1127 | |||
| 1128 | /* | 787 | /* |
| 1129 | * Called by kernel/ptrace.c when detaching.. | 788 | * Called by kernel/ptrace.c when detaching.. |
| 1130 | * | 789 | * |
| @@ -1252,39 +911,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 1252 | break; | 911 | break; |
| 1253 | #endif | 912 | #endif |
| 1254 | 913 | ||
| 1255 | /* | ||
| 1256 | * These bits need more cooking - not enabled yet: | ||
| 1257 | */ | ||
| 1258 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 1259 | case PTRACE_BTS_CONFIG: | ||
| 1260 | ret = ptrace_bts_config | ||
| 1261 | (child, data, (struct ptrace_bts_config __user *)addr); | ||
| 1262 | break; | ||
| 1263 | |||
| 1264 | case PTRACE_BTS_STATUS: | ||
| 1265 | ret = ptrace_bts_status | ||
| 1266 | (child, data, (struct ptrace_bts_config __user *)addr); | ||
| 1267 | break; | ||
| 1268 | |||
| 1269 | case PTRACE_BTS_SIZE: | ||
| 1270 | ret = ptrace_bts_size(child); | ||
| 1271 | break; | ||
| 1272 | |||
| 1273 | case PTRACE_BTS_GET: | ||
| 1274 | ret = ptrace_bts_read_record | ||
| 1275 | (child, data, (struct bts_struct __user *) addr); | ||
| 1276 | break; | ||
| 1277 | |||
| 1278 | case PTRACE_BTS_CLEAR: | ||
| 1279 | ret = ptrace_bts_clear(child); | ||
| 1280 | break; | ||
| 1281 | |||
| 1282 | case PTRACE_BTS_DRAIN: | ||
| 1283 | ret = ptrace_bts_drain | ||
| 1284 | (child, data, (struct bts_struct __user *) addr); | ||
| 1285 | break; | ||
| 1286 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 1287 | |||
| 1288 | default: | 914 | default: |
| 1289 | ret = ptrace_request(child, request, addr, data); | 915 | ret = ptrace_request(child, request, addr, data); |
| 1290 | break; | 916 | break; |
| @@ -1544,14 +1170,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
| 1544 | 1170 | ||
| 1545 | case PTRACE_GET_THREAD_AREA: | 1171 | case PTRACE_GET_THREAD_AREA: |
| 1546 | case PTRACE_SET_THREAD_AREA: | 1172 | case PTRACE_SET_THREAD_AREA: |
| 1547 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 1548 | case PTRACE_BTS_CONFIG: | ||
| 1549 | case PTRACE_BTS_STATUS: | ||
| 1550 | case PTRACE_BTS_SIZE: | ||
| 1551 | case PTRACE_BTS_GET: | ||
| 1552 | case PTRACE_BTS_CLEAR: | ||
| 1553 | case PTRACE_BTS_DRAIN: | ||
| 1554 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 1555 | return arch_ptrace(child, request, addr, data); | 1173 | return arch_ptrace(child, request, addr, data); |
| 1556 | 1174 | ||
| 1557 | default: | 1175 | default: |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 03801f2f761f..239427ca02af 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
| @@ -31,8 +31,16 @@ struct pvclock_shadow_time { | |||
| 31 | u32 tsc_to_nsec_mul; | 31 | u32 tsc_to_nsec_mul; |
| 32 | int tsc_shift; | 32 | int tsc_shift; |
| 33 | u32 version; | 33 | u32 version; |
| 34 | u8 flags; | ||
| 34 | }; | 35 | }; |
| 35 | 36 | ||
| 37 | static u8 valid_flags __read_mostly = 0; | ||
| 38 | |||
| 39 | void pvclock_set_flags(u8 flags) | ||
| 40 | { | ||
| 41 | valid_flags = flags; | ||
| 42 | } | ||
| 43 | |||
| 36 | /* | 44 | /* |
| 37 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, | 45 | * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, |
| 38 | * yielding a 64-bit result. | 46 | * yielding a 64-bit result. |
| @@ -91,6 +99,7 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | |||
| 91 | dst->system_timestamp = src->system_time; | 99 | dst->system_timestamp = src->system_time; |
| 92 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; | 100 | dst->tsc_to_nsec_mul = src->tsc_to_system_mul; |
| 93 | dst->tsc_shift = src->tsc_shift; | 101 | dst->tsc_shift = src->tsc_shift; |
| 102 | dst->flags = src->flags; | ||
| 94 | rmb(); /* test version after fetching data */ | 103 | rmb(); /* test version after fetching data */ |
| 95 | } while ((src->version & 1) || (dst->version != src->version)); | 104 | } while ((src->version & 1) || (dst->version != src->version)); |
| 96 | 105 | ||
| @@ -109,11 +118,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | |||
| 109 | return pv_tsc_khz; | 118 | return pv_tsc_khz; |
| 110 | } | 119 | } |
| 111 | 120 | ||
| 121 | static atomic64_t last_value = ATOMIC64_INIT(0); | ||
| 122 | |||
| 112 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 123 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
| 113 | { | 124 | { |
| 114 | struct pvclock_shadow_time shadow; | 125 | struct pvclock_shadow_time shadow; |
| 115 | unsigned version; | 126 | unsigned version; |
| 116 | cycle_t ret, offset; | 127 | cycle_t ret, offset; |
| 128 | u64 last; | ||
| 117 | 129 | ||
| 118 | do { | 130 | do { |
| 119 | version = pvclock_get_time_values(&shadow, src); | 131 | version = pvclock_get_time_values(&shadow, src); |
| @@ -123,6 +135,31 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | |||
| 123 | barrier(); | 135 | barrier(); |
| 124 | } while (version != src->version); | 136 | } while (version != src->version); |
| 125 | 137 | ||
| 138 | if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) && | ||
| 139 | (shadow.flags & PVCLOCK_TSC_STABLE_BIT)) | ||
| 140 | return ret; | ||
| 141 | |||
| 142 | /* | ||
| 143 | * Assumption here is that last_value, a global accumulator, always goes | ||
| 144 | * forward. If we are less than that, we should not be much smaller. | ||
| 145 | * We assume there is an error marging we're inside, and then the correction | ||
| 146 | * does not sacrifice accuracy. | ||
| 147 | * | ||
| 148 | * For reads: global may have changed between test and return, | ||
| 149 | * but this means someone else updated poked the clock at a later time. | ||
| 150 | * We just need to make sure we are not seeing a backwards event. | ||
| 151 | * | ||
| 152 | * For updates: last_value = ret is not enough, since two vcpus could be | ||
| 153 | * updating at the same time, and one of them could be slightly behind, | ||
| 154 | * making the assumption that last_value always go forward fail to hold. | ||
| 155 | */ | ||
| 156 | last = atomic64_read(&last_value); | ||
| 157 | do { | ||
| 158 | if (ret < last) | ||
| 159 | return last; | ||
| 160 | last = atomic64_cmpxchg(&last_value, last, ret); | ||
| 161 | } while (unlikely(last != ret)); | ||
| 162 | |||
| 126 | return ret; | 163 | return ret; |
| 127 | } | 164 | } |
| 128 | 165 | ||
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 12e9feaa2f7a..e72d3fc6547d 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
| @@ -495,10 +495,18 @@ void force_hpet_resume(void) | |||
| 495 | /* | 495 | /* |
| 496 | * HPET MSI on some boards (ATI SB700/SB800) has side effect on | 496 | * HPET MSI on some boards (ATI SB700/SB800) has side effect on |
| 497 | * floppy DMA. Disable HPET MSI on such platforms. | 497 | * floppy DMA. Disable HPET MSI on such platforms. |
| 498 | * See erratum #27 (Misinterpreted MSI Requests May Result in | ||
| 499 | * Corrupted LPC DMA Data) in AMD Publication #46837, | ||
| 500 | * "SB700 Family Product Errata", Rev. 1.0, March 2010. | ||
| 501 | * | ||
| 502 | * Also force the read back of the CMP register in hpet_next_event() | ||
| 503 | * to work around the problem that the CMP register write seems to be | ||
| 504 | * delayed. See hpet_next_event() for details. | ||
| 498 | */ | 505 | */ |
| 499 | static void force_disable_hpet_msi(struct pci_dev *unused) | 506 | static void force_disable_hpet_msi(struct pci_dev *unused) |
| 500 | { | 507 | { |
| 501 | hpet_msi_disable = 1; | 508 | hpet_msi_disable = 1; |
| 509 | hpet_readback_cmp = 1; | ||
| 502 | } | 510 | } |
| 503 | 511 | ||
| 504 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | 512 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c4851eff57b3..b4ae4acbd031 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -676,6 +676,17 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
| 676 | DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), | 676 | DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), |
| 677 | }, | 677 | }, |
| 678 | }, | 678 | }, |
| 679 | /* | ||
| 680 | * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so | ||
| 681 | * match on the product name. | ||
| 682 | */ | ||
| 683 | { | ||
| 684 | .callback = dmi_low_memory_corruption, | ||
| 685 | .ident = "Phoenix BIOS", | ||
| 686 | .matches = { | ||
| 687 | DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), | ||
| 688 | }, | ||
| 689 | }, | ||
| 679 | #endif | 690 | #endif |
| 680 | {} | 691 | {} |
| 681 | }; | 692 | }; |
| @@ -725,6 +736,7 @@ void __init setup_arch(char **cmdline_p) | |||
| 725 | /* VMI may relocate the fixmap; do this before touching ioremap area */ | 736 | /* VMI may relocate the fixmap; do this before touching ioremap area */ |
| 726 | vmi_init(); | 737 | vmi_init(); |
| 727 | 738 | ||
| 739 | early_trap_init(); | ||
| 728 | early_cpu_init(); | 740 | early_cpu_init(); |
| 729 | early_ioremap_init(); | 741 | early_ioremap_init(); |
| 730 | 742 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ef6370b00e70..a867940a6dfc 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
| @@ -265,10 +265,10 @@ void __init setup_per_cpu_areas(void) | |||
| 265 | 265 | ||
| 266 | #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) | 266 | #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) |
| 267 | /* | 267 | /* |
| 268 | * make sure boot cpu node_number is right, when boot cpu is on the | 268 | * make sure boot cpu numa_node is right, when boot cpu is on the |
| 269 | * node that doesn't have mem installed | 269 | * node that doesn't have mem installed |
| 270 | */ | 270 | */ |
| 271 | per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id); | 271 | set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id)); |
| 272 | #endif | 272 | #endif |
| 273 | 273 | ||
| 274 | /* Setup node to cpumask map */ | 274 | /* Setup node to cpumask map */ |
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c index 34e099382651..7ded57896c0a 100644 --- a/arch/x86/kernel/sfi.c +++ b/arch/x86/kernel/sfi.c | |||
| @@ -81,7 +81,6 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table) | |||
| 81 | #endif /* CONFIG_X86_LOCAL_APIC */ | 81 | #endif /* CONFIG_X86_LOCAL_APIC */ |
| 82 | 82 | ||
| 83 | #ifdef CONFIG_X86_IO_APIC | 83 | #ifdef CONFIG_X86_IO_APIC |
| 84 | static u32 gsi_base; | ||
| 85 | 84 | ||
| 86 | static int __init sfi_parse_ioapic(struct sfi_table_header *table) | 85 | static int __init sfi_parse_ioapic(struct sfi_table_header *table) |
| 87 | { | 86 | { |
| @@ -94,8 +93,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) | |||
| 94 | pentry = (struct sfi_apic_table_entry *)sb->pentry; | 93 | pentry = (struct sfi_apic_table_entry *)sb->pentry; |
| 95 | 94 | ||
| 96 | for (i = 0; i < num; i++) { | 95 | for (i = 0; i < num; i++) { |
| 97 | mp_register_ioapic(i, pentry->phys_addr, gsi_base); | 96 | mp_register_ioapic(i, pentry->phys_addr, gsi_end + 1); |
| 98 | gsi_base += io_apic_get_redir_entries(i); | ||
| 99 | pentry++; | 97 | pentry++; |
| 100 | } | 98 | } |
| 101 | 99 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 763d815e27a0..37462f1ddba5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -1215,9 +1215,17 @@ __init void prefill_possible_map(void) | |||
| 1215 | if (!num_processors) | 1215 | if (!num_processors) |
| 1216 | num_processors = 1; | 1216 | num_processors = 1; |
| 1217 | 1217 | ||
| 1218 | if (setup_possible_cpus == -1) | 1218 | i = setup_max_cpus ?: 1; |
| 1219 | possible = num_processors + disabled_cpus; | 1219 | if (setup_possible_cpus == -1) { |
| 1220 | else | 1220 | possible = num_processors; |
| 1221 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 1222 | if (setup_max_cpus) | ||
| 1223 | possible += disabled_cpus; | ||
| 1224 | #else | ||
| 1225 | if (possible > i) | ||
| 1226 | possible = i; | ||
| 1227 | #endif | ||
| 1228 | } else | ||
| 1221 | possible = setup_possible_cpus; | 1229 | possible = setup_possible_cpus; |
| 1222 | 1230 | ||
| 1223 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); | 1231 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); |
| @@ -1230,11 +1238,23 @@ __init void prefill_possible_map(void) | |||
| 1230 | possible = nr_cpu_ids; | 1238 | possible = nr_cpu_ids; |
| 1231 | } | 1239 | } |
| 1232 | 1240 | ||
| 1241 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 1242 | if (!setup_max_cpus) | ||
| 1243 | #endif | ||
| 1244 | if (possible > i) { | ||
| 1245 | printk(KERN_WARNING | ||
| 1246 | "%d Processors exceeds max_cpus limit of %u\n", | ||
| 1247 | possible, setup_max_cpus); | ||
| 1248 | possible = i; | ||
| 1249 | } | ||
| 1250 | |||
| 1233 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 1251 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
| 1234 | possible, max_t(int, possible - num_processors, 0)); | 1252 | possible, max_t(int, possible - num_processors, 0)); |
| 1235 | 1253 | ||
| 1236 | for (i = 0; i < possible; i++) | 1254 | for (i = 0; i < possible; i++) |
| 1237 | set_cpu_possible(i, true); | 1255 | set_cpu_possible(i, true); |
| 1256 | for (; i < NR_CPUS; i++) | ||
| 1257 | set_cpu_possible(i, false); | ||
| 1238 | 1258 | ||
| 1239 | nr_cpu_ids = possible; | 1259 | nr_cpu_ids = possible; |
| 1240 | } | 1260 | } |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 3149032ff107..58de45ee08b6 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
| @@ -158,22 +158,6 @@ static int enable_single_step(struct task_struct *child) | |||
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | /* | 160 | /* |
| 161 | * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running. | ||
| 162 | */ | ||
| 163 | static void write_debugctlmsr(struct task_struct *child, unsigned long val) | ||
| 164 | { | ||
| 165 | if (child->thread.debugctlmsr == val) | ||
| 166 | return; | ||
| 167 | |||
| 168 | child->thread.debugctlmsr = val; | ||
| 169 | |||
| 170 | if (child != current) | ||
| 171 | return; | ||
| 172 | |||
| 173 | update_debugctlmsr(val); | ||
| 174 | } | ||
| 175 | |||
| 176 | /* | ||
| 177 | * Enable single or block step. | 161 | * Enable single or block step. |
| 178 | */ | 162 | */ |
| 179 | static void enable_step(struct task_struct *child, bool block) | 163 | static void enable_step(struct task_struct *child, bool block) |
| @@ -186,15 +170,17 @@ static void enable_step(struct task_struct *child, bool block) | |||
| 186 | * that uses user-mode single stepping itself. | 170 | * that uses user-mode single stepping itself. |
| 187 | */ | 171 | */ |
| 188 | if (enable_single_step(child) && block) { | 172 | if (enable_single_step(child) && block) { |
| 189 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 173 | unsigned long debugctl = get_debugctlmsr(); |
| 190 | write_debugctlmsr(child, | 174 | |
| 191 | child->thread.debugctlmsr | DEBUGCTLMSR_BTF); | 175 | debugctl |= DEBUGCTLMSR_BTF; |
| 192 | } else { | 176 | update_debugctlmsr(debugctl); |
| 193 | write_debugctlmsr(child, | 177 | set_tsk_thread_flag(child, TIF_BLOCKSTEP); |
| 194 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); | 178 | } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { |
| 195 | 179 | unsigned long debugctl = get_debugctlmsr(); | |
| 196 | if (!child->thread.debugctlmsr) | 180 | |
| 197 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 181 | debugctl &= ~DEBUGCTLMSR_BTF; |
| 182 | update_debugctlmsr(debugctl); | ||
| 183 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
| 198 | } | 184 | } |
| 199 | } | 185 | } |
| 200 | 186 | ||
| @@ -213,11 +199,13 @@ void user_disable_single_step(struct task_struct *child) | |||
| 213 | /* | 199 | /* |
| 214 | * Make sure block stepping (BTF) is disabled. | 200 | * Make sure block stepping (BTF) is disabled. |
| 215 | */ | 201 | */ |
| 216 | write_debugctlmsr(child, | 202 | if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { |
| 217 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); | 203 | unsigned long debugctl = get_debugctlmsr(); |
| 218 | 204 | ||
| 219 | if (!child->thread.debugctlmsr) | 205 | debugctl &= ~DEBUGCTLMSR_BTF; |
| 220 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 206 | update_debugctlmsr(debugctl); |
| 207 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
| 208 | } | ||
| 221 | 209 | ||
| 222 | /* Always clear TIF_SINGLESTEP... */ | 210 | /* Always clear TIF_SINGLESTEP... */ |
| 223 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | 211 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 86c9f91b48ae..c2f1b26141e2 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
| @@ -46,6 +46,7 @@ | |||
| 46 | 46 | ||
| 47 | /* Global pointer to shared data; NULL means no measured launch. */ | 47 | /* Global pointer to shared data; NULL means no measured launch. */ |
| 48 | struct tboot *tboot __read_mostly; | 48 | struct tboot *tboot __read_mostly; |
| 49 | EXPORT_SYMBOL(tboot); | ||
| 49 | 50 | ||
| 50 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ | 51 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ |
| 51 | #define AP_WAIT_TIMEOUT 1 | 52 | #define AP_WAIT_TIMEOUT 1 |
| @@ -175,6 +176,9 @@ static void add_mac_region(phys_addr_t start, unsigned long size) | |||
| 175 | struct tboot_mac_region *mr; | 176 | struct tboot_mac_region *mr; |
| 176 | phys_addr_t end = start + size; | 177 | phys_addr_t end = start + size; |
| 177 | 178 | ||
| 179 | if (tboot->num_mac_regions >= MAX_TB_MAC_REGIONS) | ||
| 180 | panic("tboot: Too many MAC regions\n"); | ||
| 181 | |||
| 178 | if (start && size) { | 182 | if (start && size) { |
| 179 | mr = &tboot->mac_regions[tboot->num_mac_regions++]; | 183 | mr = &tboot->mac_regions[tboot->num_mac_regions++]; |
| 180 | mr->start = round_down(start, PAGE_SIZE); | 184 | mr->start = round_down(start, PAGE_SIZE); |
| @@ -184,18 +188,17 @@ static void add_mac_region(phys_addr_t start, unsigned long size) | |||
| 184 | 188 | ||
| 185 | static int tboot_setup_sleep(void) | 189 | static int tboot_setup_sleep(void) |
| 186 | { | 190 | { |
| 191 | int i; | ||
| 192 | |||
| 187 | tboot->num_mac_regions = 0; | 193 | tboot->num_mac_regions = 0; |
| 188 | 194 | ||
| 189 | /* S3 resume code */ | 195 | for (i = 0; i < e820.nr_map; i++) { |
| 190 | add_mac_region(acpi_wakeup_address, WAKEUP_SIZE); | 196 | if ((e820.map[i].type != E820_RAM) |
| 197 | && (e820.map[i].type != E820_RESERVED_KERN)) | ||
| 198 | continue; | ||
| 191 | 199 | ||
| 192 | #ifdef CONFIG_X86_TRAMPOLINE | 200 | add_mac_region(e820.map[i].addr, e820.map[i].size); |
| 193 | /* AP trampoline code */ | 201 | } |
| 194 | add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE); | ||
| 195 | #endif | ||
| 196 | |||
| 197 | /* kernel code + data + bss */ | ||
| 198 | add_mac_region(virt_to_phys(_text), _end - _text); | ||
| 199 | 202 | ||
| 200 | tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; | 203 | tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; |
| 201 | 204 | ||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 17b03dd3a6b5..7fea555929e2 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * SGI UltraViolet TLB flush routines. | 2 | * SGI UltraViolet TLB flush routines. |
| 3 | * | 3 | * |
| 4 | * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI. | 4 | * (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI. |
| 5 | * | 5 | * |
| 6 | * This code is released under the GNU General Public License version 2 or | 6 | * This code is released under the GNU General Public License version 2 or |
| 7 | * later. | 7 | * later. |
| @@ -20,42 +20,67 @@ | |||
| 20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
| 21 | #include <asm/tsc.h> | 21 | #include <asm/tsc.h> |
| 22 | #include <asm/irq_vectors.h> | 22 | #include <asm/irq_vectors.h> |
| 23 | #include <asm/timer.h> | ||
| 23 | 24 | ||
| 24 | static struct bau_control **uv_bau_table_bases __read_mostly; | 25 | struct msg_desc { |
| 25 | static int uv_bau_retry_limit __read_mostly; | 26 | struct bau_payload_queue_entry *msg; |
| 27 | int msg_slot; | ||
| 28 | int sw_ack_slot; | ||
| 29 | struct bau_payload_queue_entry *va_queue_first; | ||
| 30 | struct bau_payload_queue_entry *va_queue_last; | ||
| 31 | }; | ||
| 26 | 32 | ||
| 27 | /* base pnode in this partition */ | 33 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL |
| 28 | static int uv_partition_base_pnode __read_mostly; | 34 | |
| 35 | static int uv_bau_max_concurrent __read_mostly; | ||
| 36 | |||
| 37 | static int nobau; | ||
| 38 | static int __init setup_nobau(char *arg) | ||
| 39 | { | ||
| 40 | nobau = 1; | ||
| 41 | return 0; | ||
| 42 | } | ||
| 43 | early_param("nobau", setup_nobau); | ||
| 29 | 44 | ||
| 30 | static unsigned long uv_mmask __read_mostly; | 45 | /* base pnode in this partition */ |
| 46 | static int uv_partition_base_pnode __read_mostly; | ||
| 47 | /* position of pnode (which is nasid>>1): */ | ||
| 48 | static int uv_nshift __read_mostly; | ||
| 49 | static unsigned long uv_mmask __read_mostly; | ||
| 31 | 50 | ||
| 32 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | 51 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); |
| 33 | static DEFINE_PER_CPU(struct bau_control, bau_control); | 52 | static DEFINE_PER_CPU(struct bau_control, bau_control); |
| 53 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | ||
| 54 | |||
| 55 | struct reset_args { | ||
| 56 | int sender; | ||
| 57 | }; | ||
| 34 | 58 | ||
| 35 | /* | 59 | /* |
| 36 | * Determine the first node on a blade. | 60 | * Determine the first node on a uvhub. 'Nodes' are used for kernel |
| 61 | * memory allocation. | ||
| 37 | */ | 62 | */ |
| 38 | static int __init blade_to_first_node(int blade) | 63 | static int __init uvhub_to_first_node(int uvhub) |
| 39 | { | 64 | { |
| 40 | int node, b; | 65 | int node, b; |
| 41 | 66 | ||
| 42 | for_each_online_node(node) { | 67 | for_each_online_node(node) { |
| 43 | b = uv_node_to_blade_id(node); | 68 | b = uv_node_to_blade_id(node); |
| 44 | if (blade == b) | 69 | if (uvhub == b) |
| 45 | return node; | 70 | return node; |
| 46 | } | 71 | } |
| 47 | return -1; /* shouldn't happen */ | 72 | return -1; |
| 48 | } | 73 | } |
| 49 | 74 | ||
| 50 | /* | 75 | /* |
| 51 | * Determine the apicid of the first cpu on a blade. | 76 | * Determine the apicid of the first cpu on a uvhub. |
| 52 | */ | 77 | */ |
| 53 | static int __init blade_to_first_apicid(int blade) | 78 | static int __init uvhub_to_first_apicid(int uvhub) |
| 54 | { | 79 | { |
| 55 | int cpu; | 80 | int cpu; |
| 56 | 81 | ||
| 57 | for_each_present_cpu(cpu) | 82 | for_each_present_cpu(cpu) |
| 58 | if (blade == uv_cpu_to_blade_id(cpu)) | 83 | if (uvhub == uv_cpu_to_blade_id(cpu)) |
| 59 | return per_cpu(x86_cpu_to_apicid, cpu); | 84 | return per_cpu(x86_cpu_to_apicid, cpu); |
| 60 | return -1; | 85 | return -1; |
| 61 | } | 86 | } |
| @@ -68,195 +93,459 @@ static int __init blade_to_first_apicid(int blade) | |||
| 68 | * clear of the Timeout bit (as well) will free the resource. No reply will | 93 | * clear of the Timeout bit (as well) will free the resource. No reply will |
| 69 | * be sent (the hardware will only do one reply per message). | 94 | * be sent (the hardware will only do one reply per message). |
| 70 | */ | 95 | */ |
| 71 | static void uv_reply_to_message(int resource, | 96 | static inline void uv_reply_to_message(struct msg_desc *mdp, |
| 72 | struct bau_payload_queue_entry *msg, | 97 | struct bau_control *bcp) |
| 73 | struct bau_msg_status *msp) | ||
| 74 | { | 98 | { |
| 75 | unsigned long dw; | 99 | unsigned long dw; |
| 100 | struct bau_payload_queue_entry *msg; | ||
| 76 | 101 | ||
| 77 | dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource); | 102 | msg = mdp->msg; |
| 103 | if (!msg->canceled) { | ||
| 104 | dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | | ||
| 105 | msg->sw_ack_vector; | ||
| 106 | uv_write_local_mmr( | ||
| 107 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); | ||
| 108 | } | ||
| 78 | msg->replied_to = 1; | 109 | msg->replied_to = 1; |
| 79 | msg->sw_ack_vector = 0; | 110 | msg->sw_ack_vector = 0; |
| 80 | if (msp) | ||
| 81 | msp->seen_by.bits = 0; | ||
| 82 | uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); | ||
| 83 | } | 111 | } |
| 84 | 112 | ||
| 85 | /* | 113 | /* |
| 86 | * Do all the things a cpu should do for a TLB shootdown message. | 114 | * Process the receipt of a RETRY message |
| 87 | * Other cpu's may come here at the same time for this message. | ||
| 88 | */ | 115 | */ |
| 89 | static void uv_bau_process_message(struct bau_payload_queue_entry *msg, | 116 | static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, |
| 90 | int msg_slot, int sw_ack_slot) | 117 | struct bau_control *bcp) |
| 91 | { | 118 | { |
| 92 | unsigned long this_cpu_mask; | 119 | int i; |
| 93 | struct bau_msg_status *msp; | 120 | int cancel_count = 0; |
| 94 | int cpu; | 121 | int slot2; |
| 122 | unsigned long msg_res; | ||
| 123 | unsigned long mmr = 0; | ||
| 124 | struct bau_payload_queue_entry *msg; | ||
| 125 | struct bau_payload_queue_entry *msg2; | ||
| 126 | struct ptc_stats *stat; | ||
| 95 | 127 | ||
| 96 | msp = __get_cpu_var(bau_control).msg_statuses + msg_slot; | 128 | msg = mdp->msg; |
| 97 | cpu = uv_blade_processor_id(); | 129 | stat = &per_cpu(ptcstats, bcp->cpu); |
| 98 | msg->number_of_cpus = | 130 | stat->d_retries++; |
| 99 | uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); | 131 | /* |
| 100 | this_cpu_mask = 1UL << cpu; | 132 | * cancel any message from msg+1 to the retry itself |
| 101 | if (msp->seen_by.bits & this_cpu_mask) | 133 | */ |
| 102 | return; | 134 | for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { |
| 103 | atomic_or_long(&msp->seen_by.bits, this_cpu_mask); | 135 | if (msg2 > mdp->va_queue_last) |
| 136 | msg2 = mdp->va_queue_first; | ||
| 137 | if (msg2 == msg) | ||
| 138 | break; | ||
| 139 | |||
| 140 | /* same conditions for cancellation as uv_do_reset */ | ||
| 141 | if ((msg2->replied_to == 0) && (msg2->canceled == 0) && | ||
| 142 | (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & | ||
| 143 | msg->sw_ack_vector) == 0) && | ||
| 144 | (msg2->sending_cpu == msg->sending_cpu) && | ||
| 145 | (msg2->msg_type != MSG_NOOP)) { | ||
| 146 | slot2 = msg2 - mdp->va_queue_first; | ||
| 147 | mmr = uv_read_local_mmr | ||
| 148 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | ||
| 149 | msg_res = ((msg2->sw_ack_vector << 8) | | ||
| 150 | msg2->sw_ack_vector); | ||
| 151 | /* | ||
| 152 | * This is a message retry; clear the resources held | ||
| 153 | * by the previous message only if they timed out. | ||
| 154 | * If it has not timed out we have an unexpected | ||
| 155 | * situation to report. | ||
| 156 | */ | ||
| 157 | if (mmr & (msg_res << 8)) { | ||
| 158 | /* | ||
| 159 | * is the resource timed out? | ||
| 160 | * make everyone ignore the cancelled message. | ||
| 161 | */ | ||
| 162 | msg2->canceled = 1; | ||
| 163 | stat->d_canceled++; | ||
| 164 | cancel_count++; | ||
| 165 | uv_write_local_mmr( | ||
| 166 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | ||
| 167 | (msg_res << 8) | msg_res); | ||
| 168 | } else | ||
| 169 | printk(KERN_INFO "note bau retry: no effect\n"); | ||
| 170 | } | ||
| 171 | } | ||
| 172 | if (!cancel_count) | ||
| 173 | stat->d_nocanceled++; | ||
| 174 | } | ||
| 104 | 175 | ||
| 105 | if (msg->replied_to == 1) | 176 | /* |
| 106 | return; | 177 | * Do all the things a cpu should do for a TLB shootdown message. |
| 178 | * Other cpu's may come here at the same time for this message. | ||
| 179 | */ | ||
| 180 | static void uv_bau_process_message(struct msg_desc *mdp, | ||
| 181 | struct bau_control *bcp) | ||
| 182 | { | ||
| 183 | int msg_ack_count; | ||
| 184 | short socket_ack_count = 0; | ||
| 185 | struct ptc_stats *stat; | ||
| 186 | struct bau_payload_queue_entry *msg; | ||
| 187 | struct bau_control *smaster = bcp->socket_master; | ||
| 107 | 188 | ||
| 189 | /* | ||
| 190 | * This must be a normal message, or retry of a normal message | ||
| 191 | */ | ||
| 192 | msg = mdp->msg; | ||
| 193 | stat = &per_cpu(ptcstats, bcp->cpu); | ||
| 108 | if (msg->address == TLB_FLUSH_ALL) { | 194 | if (msg->address == TLB_FLUSH_ALL) { |
| 109 | local_flush_tlb(); | 195 | local_flush_tlb(); |
| 110 | __get_cpu_var(ptcstats).alltlb++; | 196 | stat->d_alltlb++; |
| 111 | } else { | 197 | } else { |
| 112 | __flush_tlb_one(msg->address); | 198 | __flush_tlb_one(msg->address); |
| 113 | __get_cpu_var(ptcstats).onetlb++; | 199 | stat->d_onetlb++; |
| 114 | } | 200 | } |
| 201 | stat->d_requestee++; | ||
| 202 | |||
| 203 | /* | ||
| 204 | * One cpu on each uvhub has the additional job on a RETRY | ||
| 205 | * of releasing the resource held by the message that is | ||
| 206 | * being retried. That message is identified by sending | ||
| 207 | * cpu number. | ||
| 208 | */ | ||
| 209 | if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) | ||
| 210 | uv_bau_process_retry_msg(mdp, bcp); | ||
| 115 | 211 | ||
| 116 | __get_cpu_var(ptcstats).requestee++; | 212 | /* |
| 213 | * This is a sw_ack message, so we have to reply to it. | ||
| 214 | * Count each responding cpu on the socket. This avoids | ||
| 215 | * pinging the count's cache line back and forth between | ||
| 216 | * the sockets. | ||
| 217 | */ | ||
| 218 | socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) | ||
| 219 | &smaster->socket_acknowledge_count[mdp->msg_slot]); | ||
| 220 | if (socket_ack_count == bcp->cpus_in_socket) { | ||
| 221 | /* | ||
| 222 | * Both sockets dump their completed count total into | ||
| 223 | * the message's count. | ||
| 224 | */ | ||
| 225 | smaster->socket_acknowledge_count[mdp->msg_slot] = 0; | ||
| 226 | msg_ack_count = atomic_add_short_return(socket_ack_count, | ||
| 227 | (struct atomic_short *)&msg->acknowledge_count); | ||
| 228 | |||
| 229 | if (msg_ack_count == bcp->cpus_in_uvhub) { | ||
| 230 | /* | ||
| 231 | * All cpus in uvhub saw it; reply | ||
| 232 | */ | ||
| 233 | uv_reply_to_message(mdp, bcp); | ||
| 234 | } | ||
| 235 | } | ||
| 117 | 236 | ||
| 118 | atomic_inc_short(&msg->acknowledge_count); | 237 | return; |
| 119 | if (msg->number_of_cpus == msg->acknowledge_count) | ||
| 120 | uv_reply_to_message(sw_ack_slot, msg, msp); | ||
| 121 | } | 238 | } |
| 122 | 239 | ||
| 123 | /* | 240 | /* |
| 124 | * Examine the payload queue on one distribution node to see | 241 | * Determine the first cpu on a uvhub. |
| 125 | * which messages have not been seen, and which cpu(s) have not seen them. | 242 | */ |
| 243 | static int uvhub_to_first_cpu(int uvhub) | ||
| 244 | { | ||
| 245 | int cpu; | ||
| 246 | for_each_present_cpu(cpu) | ||
| 247 | if (uvhub == uv_cpu_to_blade_id(cpu)) | ||
| 248 | return cpu; | ||
| 249 | return -1; | ||
| 250 | } | ||
| 251 | |||
| 252 | /* | ||
| 253 | * Last resort when we get a large number of destination timeouts is | ||
| 254 | * to clear resources held by a given cpu. | ||
| 255 | * Do this with IPI so that all messages in the BAU message queue | ||
| 256 | * can be identified by their nonzero sw_ack_vector field. | ||
| 126 | * | 257 | * |
| 127 | * Returns the number of cpu's that have not responded. | 258 | * This is entered for a single cpu on the uvhub. |
| 259 | * The sender want's this uvhub to free a specific message's | ||
| 260 | * sw_ack resources. | ||
| 128 | */ | 261 | */ |
| 129 | static int uv_examine_destination(struct bau_control *bau_tablesp, int sender) | 262 | static void |
| 263 | uv_do_reset(void *ptr) | ||
| 130 | { | 264 | { |
| 131 | struct bau_payload_queue_entry *msg; | ||
| 132 | struct bau_msg_status *msp; | ||
| 133 | int count = 0; | ||
| 134 | int i; | 265 | int i; |
| 135 | int j; | 266 | int slot; |
| 267 | int count = 0; | ||
| 268 | unsigned long mmr; | ||
| 269 | unsigned long msg_res; | ||
| 270 | struct bau_control *bcp; | ||
| 271 | struct reset_args *rap; | ||
| 272 | struct bau_payload_queue_entry *msg; | ||
| 273 | struct ptc_stats *stat; | ||
| 136 | 274 | ||
| 137 | for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE; | 275 | bcp = &per_cpu(bau_control, smp_processor_id()); |
| 138 | msg++, i++) { | 276 | rap = (struct reset_args *)ptr; |
| 139 | if ((msg->sending_cpu == sender) && (!msg->replied_to)) { | 277 | stat = &per_cpu(ptcstats, bcp->cpu); |
| 140 | msp = bau_tablesp->msg_statuses + i; | 278 | stat->d_resets++; |
| 141 | printk(KERN_DEBUG | 279 | |
| 142 | "blade %d: address:%#lx %d of %d, not cpu(s): ", | 280 | /* |
| 143 | i, msg->address, msg->acknowledge_count, | 281 | * We're looking for the given sender, and |
| 144 | msg->number_of_cpus); | 282 | * will free its sw_ack resource. |
| 145 | for (j = 0; j < msg->number_of_cpus; j++) { | 283 | * If all cpu's finally responded after the timeout, its |
| 146 | if (!((1L << j) & msp->seen_by.bits)) { | 284 | * message 'replied_to' was set. |
| 147 | count++; | 285 | */ |
| 148 | printk("%d ", j); | 286 | for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { |
| 149 | } | 287 | /* uv_do_reset: same conditions for cancellation as |
| 288 | uv_bau_process_retry_msg() */ | ||
| 289 | if ((msg->replied_to == 0) && | ||
| 290 | (msg->canceled == 0) && | ||
| 291 | (msg->sending_cpu == rap->sender) && | ||
| 292 | (msg->sw_ack_vector) && | ||
| 293 | (msg->msg_type != MSG_NOOP)) { | ||
| 294 | /* | ||
| 295 | * make everyone else ignore this message | ||
| 296 | */ | ||
| 297 | msg->canceled = 1; | ||
| 298 | slot = msg - bcp->va_queue_first; | ||
| 299 | count++; | ||
| 300 | /* | ||
| 301 | * only reset the resource if it is still pending | ||
| 302 | */ | ||
| 303 | mmr = uv_read_local_mmr | ||
| 304 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | ||
| 305 | msg_res = ((msg->sw_ack_vector << 8) | | ||
| 306 | msg->sw_ack_vector); | ||
| 307 | if (mmr & msg_res) { | ||
| 308 | stat->d_rcanceled++; | ||
| 309 | uv_write_local_mmr( | ||
| 310 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | ||
| 311 | msg_res); | ||
| 150 | } | 312 | } |
| 151 | printk("\n"); | ||
| 152 | } | 313 | } |
| 153 | } | 314 | } |
| 154 | return count; | 315 | return; |
| 155 | } | 316 | } |
| 156 | 317 | ||
| 157 | /* | 318 | /* |
| 158 | * Examine the payload queue on all the distribution nodes to see | 319 | * Use IPI to get all target uvhubs to release resources held by |
| 159 | * which messages have not been seen, and which cpu(s) have not seen them. | 320 | * a given sending cpu number. |
| 160 | * | ||
| 161 | * Returns the number of cpu's that have not responded. | ||
| 162 | */ | 321 | */ |
| 163 | static int uv_examine_destinations(struct bau_target_nodemask *distribution) | 322 | static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, |
| 323 | int sender) | ||
| 164 | { | 324 | { |
| 165 | int sender; | 325 | int uvhub; |
| 166 | int i; | 326 | int cpu; |
| 167 | int count = 0; | 327 | cpumask_t mask; |
| 328 | struct reset_args reset_args; | ||
| 329 | |||
| 330 | reset_args.sender = sender; | ||
| 168 | 331 | ||
| 169 | sender = smp_processor_id(); | 332 | cpus_clear(mask); |
| 170 | for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) { | 333 | /* find a single cpu for each uvhub in this distribution mask */ |
| 171 | if (!bau_node_isset(i, distribution)) | 334 | for (uvhub = 0; |
| 335 | uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; | ||
| 336 | uvhub++) { | ||
| 337 | if (!bau_uvhub_isset(uvhub, distribution)) | ||
| 172 | continue; | 338 | continue; |
| 173 | count += uv_examine_destination(uv_bau_table_bases[i], sender); | 339 | /* find a cpu for this uvhub */ |
| 340 | cpu = uvhub_to_first_cpu(uvhub); | ||
| 341 | cpu_set(cpu, mask); | ||
| 174 | } | 342 | } |
| 175 | return count; | 343 | /* IPI all cpus; Preemption is already disabled */ |
| 344 | smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); | ||
| 345 | return; | ||
| 346 | } | ||
| 347 | |||
| 348 | static inline unsigned long | ||
| 349 | cycles_2_us(unsigned long long cyc) | ||
| 350 | { | ||
| 351 | unsigned long long ns; | ||
| 352 | unsigned long us; | ||
| 353 | ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) | ||
| 354 | >> CYC2NS_SCALE_FACTOR; | ||
| 355 | us = ns / 1000; | ||
| 356 | return us; | ||
| 176 | } | 357 | } |
| 177 | 358 | ||
| 178 | /* | 359 | /* |
| 179 | * wait for completion of a broadcast message | 360 | * wait for all cpus on this hub to finish their sends and go quiet |
| 180 | * | 361 | * leaves uvhub_quiesce set so that no new broadcasts are started by |
| 181 | * return COMPLETE, RETRY or GIVEUP | 362 | * bau_flush_send_and_wait() |
| 363 | */ | ||
| 364 | static inline void | ||
| 365 | quiesce_local_uvhub(struct bau_control *hmaster) | ||
| 366 | { | ||
| 367 | atomic_add_short_return(1, (struct atomic_short *) | ||
| 368 | &hmaster->uvhub_quiesce); | ||
| 369 | } | ||
| 370 | |||
| 371 | /* | ||
| 372 | * mark this quiet-requestor as done | ||
| 373 | */ | ||
| 374 | static inline void | ||
| 375 | end_uvhub_quiesce(struct bau_control *hmaster) | ||
| 376 | { | ||
| 377 | atomic_add_short_return(-1, (struct atomic_short *) | ||
| 378 | &hmaster->uvhub_quiesce); | ||
| 379 | } | ||
| 380 | |||
| 381 | /* | ||
| 382 | * Wait for completion of a broadcast software ack message | ||
| 383 | * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP | ||
| 182 | */ | 384 | */ |
| 183 | static int uv_wait_completion(struct bau_desc *bau_desc, | 385 | static int uv_wait_completion(struct bau_desc *bau_desc, |
| 184 | unsigned long mmr_offset, int right_shift) | 386 | unsigned long mmr_offset, int right_shift, int this_cpu, |
| 387 | struct bau_control *bcp, struct bau_control *smaster, long try) | ||
| 185 | { | 388 | { |
| 186 | int exams = 0; | 389 | int relaxes = 0; |
| 187 | long destination_timeouts = 0; | ||
| 188 | long source_timeouts = 0; | ||
| 189 | unsigned long descriptor_status; | 390 | unsigned long descriptor_status; |
| 391 | unsigned long mmr; | ||
| 392 | unsigned long mask; | ||
| 393 | cycles_t ttime; | ||
| 394 | cycles_t timeout_time; | ||
| 395 | struct ptc_stats *stat = &per_cpu(ptcstats, this_cpu); | ||
| 396 | struct bau_control *hmaster; | ||
| 397 | |||
| 398 | hmaster = bcp->uvhub_master; | ||
| 399 | timeout_time = get_cycles() + bcp->timeout_interval; | ||
| 190 | 400 | ||
| 401 | /* spin on the status MMR, waiting for it to go idle */ | ||
| 191 | while ((descriptor_status = (((unsigned long) | 402 | while ((descriptor_status = (((unsigned long) |
| 192 | uv_read_local_mmr(mmr_offset) >> | 403 | uv_read_local_mmr(mmr_offset) >> |
| 193 | right_shift) & UV_ACT_STATUS_MASK)) != | 404 | right_shift) & UV_ACT_STATUS_MASK)) != |
| 194 | DESC_STATUS_IDLE) { | 405 | DESC_STATUS_IDLE) { |
| 195 | if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { | ||
| 196 | source_timeouts++; | ||
| 197 | if (source_timeouts > SOURCE_TIMEOUT_LIMIT) | ||
| 198 | source_timeouts = 0; | ||
| 199 | __get_cpu_var(ptcstats).s_retry++; | ||
| 200 | return FLUSH_RETRY; | ||
| 201 | } | ||
| 202 | /* | 406 | /* |
| 203 | * spin here looking for progress at the destinations | 407 | * Our software ack messages may be blocked because there are |
| 408 | * no swack resources available. As long as none of them | ||
| 409 | * has timed out hardware will NACK our message and its | ||
| 410 | * state will stay IDLE. | ||
| 204 | */ | 411 | */ |
| 205 | if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) { | 412 | if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { |
| 206 | destination_timeouts++; | 413 | stat->s_stimeout++; |
| 207 | if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) { | 414 | return FLUSH_GIVEUP; |
| 208 | /* | 415 | } else if (descriptor_status == |
| 209 | * returns number of cpus not responding | 416 | DESC_STATUS_DESTINATION_TIMEOUT) { |
| 210 | */ | 417 | stat->s_dtimeout++; |
| 211 | if (uv_examine_destinations | 418 | ttime = get_cycles(); |
| 212 | (&bau_desc->distribution) == 0) { | 419 | |
| 213 | __get_cpu_var(ptcstats).d_retry++; | 420 | /* |
| 214 | return FLUSH_RETRY; | 421 | * Our retries may be blocked by all destination |
| 215 | } | 422 | * swack resources being consumed, and a timeout |
| 216 | exams++; | 423 | * pending. In that case hardware returns the |
| 217 | if (exams >= uv_bau_retry_limit) { | 424 | * ERROR that looks like a destination timeout. |
| 218 | printk(KERN_DEBUG | 425 | */ |
| 219 | "uv_flush_tlb_others"); | 426 | if (cycles_2_us(ttime - bcp->send_message) < BIOS_TO) { |
| 220 | printk("giving up on cpu %d\n", | 427 | bcp->conseccompletes = 0; |
| 221 | smp_processor_id()); | 428 | return FLUSH_RETRY_PLUGGED; |
| 429 | } | ||
| 430 | |||
| 431 | bcp->conseccompletes = 0; | ||
| 432 | return FLUSH_RETRY_TIMEOUT; | ||
| 433 | } else { | ||
| 434 | /* | ||
| 435 | * descriptor_status is still BUSY | ||
| 436 | */ | ||
| 437 | cpu_relax(); | ||
| 438 | relaxes++; | ||
| 439 | if (relaxes >= 10000) { | ||
| 440 | relaxes = 0; | ||
| 441 | if (get_cycles() > timeout_time) { | ||
| 442 | quiesce_local_uvhub(hmaster); | ||
| 443 | |||
| 444 | /* single-thread the register change */ | ||
| 445 | spin_lock(&hmaster->masks_lock); | ||
| 446 | mmr = uv_read_local_mmr(mmr_offset); | ||
| 447 | mask = 0UL; | ||
| 448 | mask |= (3UL < right_shift); | ||
| 449 | mask = ~mask; | ||
| 450 | mmr &= mask; | ||
| 451 | uv_write_local_mmr(mmr_offset, mmr); | ||
| 452 | spin_unlock(&hmaster->masks_lock); | ||
| 453 | end_uvhub_quiesce(hmaster); | ||
| 454 | stat->s_busy++; | ||
| 222 | return FLUSH_GIVEUP; | 455 | return FLUSH_GIVEUP; |
| 223 | } | 456 | } |
| 224 | /* | ||
| 225 | * delays can hang the simulator | ||
| 226 | udelay(1000); | ||
| 227 | */ | ||
| 228 | destination_timeouts = 0; | ||
| 229 | } | 457 | } |
| 230 | } | 458 | } |
| 231 | cpu_relax(); | ||
| 232 | } | 459 | } |
| 460 | bcp->conseccompletes++; | ||
| 233 | return FLUSH_COMPLETE; | 461 | return FLUSH_COMPLETE; |
| 234 | } | 462 | } |
| 235 | 463 | ||
| 464 | static inline cycles_t | ||
| 465 | sec_2_cycles(unsigned long sec) | ||
| 466 | { | ||
| 467 | unsigned long ns; | ||
| 468 | cycles_t cyc; | ||
| 469 | |||
| 470 | ns = sec * 1000000000; | ||
| 471 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
| 472 | return cyc; | ||
| 473 | } | ||
| 474 | |||
| 475 | /* | ||
| 476 | * conditionally add 1 to *v, unless *v is >= u | ||
| 477 | * return 0 if we cannot add 1 to *v because it is >= u | ||
| 478 | * return 1 if we can add 1 to *v because it is < u | ||
| 479 | * the add is atomic | ||
| 480 | * | ||
| 481 | * This is close to atomic_add_unless(), but this allows the 'u' value | ||
| 482 | * to be lowered below the current 'v'. atomic_add_unless can only stop | ||
| 483 | * on equal. | ||
| 484 | */ | ||
| 485 | static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | ||
| 486 | { | ||
| 487 | spin_lock(lock); | ||
| 488 | if (atomic_read(v) >= u) { | ||
| 489 | spin_unlock(lock); | ||
| 490 | return 0; | ||
| 491 | } | ||
| 492 | atomic_inc(v); | ||
| 493 | spin_unlock(lock); | ||
| 494 | return 1; | ||
| 495 | } | ||
| 496 | |||
| 236 | /** | 497 | /** |
| 237 | * uv_flush_send_and_wait | 498 | * uv_flush_send_and_wait |
| 238 | * | 499 | * |
| 239 | * Send a broadcast and wait for a broadcast message to complete. | 500 | * Send a broadcast and wait for it to complete. |
| 240 | * | 501 | * |
| 241 | * The flush_mask contains the cpus the broadcast was sent to. | 502 | * The flush_mask contains the cpus the broadcast is to be sent to, plus |
| 503 | * cpus that are on the local uvhub. | ||
| 242 | * | 504 | * |
| 243 | * Returns NULL if all remote flushing was done. The mask is zeroed. | 505 | * Returns NULL if all flushing represented in the mask was done. The mask |
| 506 | * is zeroed. | ||
| 244 | * Returns @flush_mask if some remote flushing remains to be done. The | 507 | * Returns @flush_mask if some remote flushing remains to be done. The |
| 245 | * mask will have some bits still set. | 508 | * mask will have some bits still set, representing any cpus on the local |
| 509 | * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed. | ||
| 246 | */ | 510 | */ |
| 247 | const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, | 511 | const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, |
| 248 | struct bau_desc *bau_desc, | 512 | struct cpumask *flush_mask, |
| 249 | struct cpumask *flush_mask) | 513 | struct bau_control *bcp) |
| 250 | { | 514 | { |
| 251 | int completion_status = 0; | ||
| 252 | int right_shift; | 515 | int right_shift; |
| 253 | int tries = 0; | 516 | int uvhub; |
| 254 | int pnode; | ||
| 255 | int bit; | 517 | int bit; |
| 518 | int completion_status = 0; | ||
| 519 | int seq_number = 0; | ||
| 520 | long try = 0; | ||
| 521 | int cpu = bcp->uvhub_cpu; | ||
| 522 | int this_cpu = bcp->cpu; | ||
| 523 | int this_uvhub = bcp->uvhub; | ||
| 256 | unsigned long mmr_offset; | 524 | unsigned long mmr_offset; |
| 257 | unsigned long index; | 525 | unsigned long index; |
| 258 | cycles_t time1; | 526 | cycles_t time1; |
| 259 | cycles_t time2; | 527 | cycles_t time2; |
| 528 | struct ptc_stats *stat = &per_cpu(ptcstats, bcp->cpu); | ||
| 529 | struct bau_control *smaster = bcp->socket_master; | ||
| 530 | struct bau_control *hmaster = bcp->uvhub_master; | ||
| 531 | |||
| 532 | /* | ||
| 533 | * Spin here while there are hmaster->max_concurrent or more active | ||
| 534 | * descriptors. This is the per-uvhub 'throttle'. | ||
| 535 | */ | ||
| 536 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | ||
| 537 | &hmaster->active_descriptor_count, | ||
| 538 | hmaster->max_concurrent)) { | ||
| 539 | stat->s_throttles++; | ||
| 540 | do { | ||
| 541 | cpu_relax(); | ||
| 542 | } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | ||
| 543 | &hmaster->active_descriptor_count, | ||
| 544 | hmaster->max_concurrent)); | ||
| 545 | } | ||
| 546 | |||
| 547 | while (hmaster->uvhub_quiesce) | ||
| 548 | cpu_relax(); | ||
| 260 | 549 | ||
| 261 | if (cpu < UV_CPUS_PER_ACT_STATUS) { | 550 | if (cpu < UV_CPUS_PER_ACT_STATUS) { |
| 262 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; | 551 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; |
| @@ -268,24 +557,108 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, | |||
| 268 | } | 557 | } |
| 269 | time1 = get_cycles(); | 558 | time1 = get_cycles(); |
| 270 | do { | 559 | do { |
| 271 | tries++; | 560 | /* |
| 561 | * Every message from any given cpu gets a unique message | ||
| 562 | * sequence number. But retries use that same number. | ||
| 563 | * Our message may have timed out at the destination because | ||
| 564 | * all sw-ack resources are in use and there is a timeout | ||
| 565 | * pending there. In that case, our last send never got | ||
| 566 | * placed into the queue and we need to persist until it | ||
| 567 | * does. | ||
| 568 | * | ||
| 569 | * Make any retry a type MSG_RETRY so that the destination will | ||
| 570 | * free any resource held by a previous message from this cpu. | ||
| 571 | */ | ||
| 572 | if (try == 0) { | ||
| 573 | /* use message type set by the caller the first time */ | ||
| 574 | seq_number = bcp->message_number++; | ||
| 575 | } else { | ||
| 576 | /* use RETRY type on all the rest; same sequence */ | ||
| 577 | bau_desc->header.msg_type = MSG_RETRY; | ||
| 578 | stat->s_retry_messages++; | ||
| 579 | } | ||
| 580 | bau_desc->header.sequence = seq_number; | ||
| 272 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | | 581 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | |
| 273 | cpu; | 582 | bcp->uvhub_cpu; |
| 583 | bcp->send_message = get_cycles(); | ||
| 584 | |||
| 274 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | 585 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); |
| 586 | |||
| 587 | try++; | ||
| 275 | completion_status = uv_wait_completion(bau_desc, mmr_offset, | 588 | completion_status = uv_wait_completion(bau_desc, mmr_offset, |
| 276 | right_shift); | 589 | right_shift, this_cpu, bcp, smaster, try); |
| 277 | } while (completion_status == FLUSH_RETRY); | 590 | |
| 591 | if (completion_status == FLUSH_RETRY_PLUGGED) { | ||
| 592 | /* | ||
| 593 | * Our retries may be blocked by all destination swack | ||
| 594 | * resources being consumed, and a timeout pending. In | ||
| 595 | * that case hardware immediately returns the ERROR | ||
| 596 | * that looks like a destination timeout. | ||
| 597 | */ | ||
| 598 | udelay(TIMEOUT_DELAY); | ||
| 599 | bcp->plugged_tries++; | ||
| 600 | if (bcp->plugged_tries >= PLUGSB4RESET) { | ||
| 601 | bcp->plugged_tries = 0; | ||
| 602 | quiesce_local_uvhub(hmaster); | ||
| 603 | spin_lock(&hmaster->queue_lock); | ||
| 604 | uv_reset_with_ipi(&bau_desc->distribution, | ||
| 605 | this_cpu); | ||
| 606 | spin_unlock(&hmaster->queue_lock); | ||
| 607 | end_uvhub_quiesce(hmaster); | ||
| 608 | bcp->ipi_attempts++; | ||
| 609 | stat->s_resets_plug++; | ||
| 610 | } | ||
| 611 | } else if (completion_status == FLUSH_RETRY_TIMEOUT) { | ||
| 612 | hmaster->max_concurrent = 1; | ||
| 613 | bcp->timeout_tries++; | ||
| 614 | udelay(TIMEOUT_DELAY); | ||
| 615 | if (bcp->timeout_tries >= TIMEOUTSB4RESET) { | ||
| 616 | bcp->timeout_tries = 0; | ||
| 617 | quiesce_local_uvhub(hmaster); | ||
| 618 | spin_lock(&hmaster->queue_lock); | ||
| 619 | uv_reset_with_ipi(&bau_desc->distribution, | ||
| 620 | this_cpu); | ||
| 621 | spin_unlock(&hmaster->queue_lock); | ||
| 622 | end_uvhub_quiesce(hmaster); | ||
| 623 | bcp->ipi_attempts++; | ||
| 624 | stat->s_resets_timeout++; | ||
| 625 | } | ||
| 626 | } | ||
| 627 | if (bcp->ipi_attempts >= 3) { | ||
| 628 | bcp->ipi_attempts = 0; | ||
| 629 | completion_status = FLUSH_GIVEUP; | ||
| 630 | break; | ||
| 631 | } | ||
| 632 | cpu_relax(); | ||
| 633 | } while ((completion_status == FLUSH_RETRY_PLUGGED) || | ||
| 634 | (completion_status == FLUSH_RETRY_TIMEOUT)); | ||
| 278 | time2 = get_cycles(); | 635 | time2 = get_cycles(); |
| 279 | __get_cpu_var(ptcstats).sflush += (time2 - time1); | ||
| 280 | if (tries > 1) | ||
| 281 | __get_cpu_var(ptcstats).retriesok++; | ||
| 282 | 636 | ||
| 283 | if (completion_status == FLUSH_GIVEUP) { | 637 | if ((completion_status == FLUSH_COMPLETE) && (bcp->conseccompletes > 5) |
| 638 | && (hmaster->max_concurrent < hmaster->max_concurrent_constant)) | ||
| 639 | hmaster->max_concurrent++; | ||
| 640 | |||
| 641 | /* | ||
| 642 | * hold any cpu not timing out here; no other cpu currently held by | ||
| 643 | * the 'throttle' should enter the activation code | ||
| 644 | */ | ||
| 645 | while (hmaster->uvhub_quiesce) | ||
| 646 | cpu_relax(); | ||
| 647 | atomic_dec(&hmaster->active_descriptor_count); | ||
| 648 | |||
| 649 | /* guard against cycles wrap */ | ||
| 650 | if (time2 > time1) | ||
| 651 | stat->s_time += (time2 - time1); | ||
| 652 | else | ||
| 653 | stat->s_requestor--; /* don't count this one */ | ||
| 654 | if (completion_status == FLUSH_COMPLETE && try > 1) | ||
| 655 | stat->s_retriesok++; | ||
| 656 | else if (completion_status == FLUSH_GIVEUP) { | ||
| 284 | /* | 657 | /* |
| 285 | * Cause the caller to do an IPI-style TLB shootdown on | 658 | * Cause the caller to do an IPI-style TLB shootdown on |
| 286 | * the cpu's, all of which are still in the mask. | 659 | * the target cpu's, all of which are still in the mask. |
| 287 | */ | 660 | */ |
| 288 | __get_cpu_var(ptcstats).ptc_i++; | 661 | stat->s_giveup++; |
| 289 | return flush_mask; | 662 | return flush_mask; |
| 290 | } | 663 | } |
| 291 | 664 | ||
| @@ -294,18 +667,17 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, | |||
| 294 | * use the IPI method of shootdown on them. | 667 | * use the IPI method of shootdown on them. |
| 295 | */ | 668 | */ |
| 296 | for_each_cpu(bit, flush_mask) { | 669 | for_each_cpu(bit, flush_mask) { |
| 297 | pnode = uv_cpu_to_pnode(bit); | 670 | uvhub = uv_cpu_to_blade_id(bit); |
| 298 | if (pnode == this_pnode) | 671 | if (uvhub == this_uvhub) |
| 299 | continue; | 672 | continue; |
| 300 | cpumask_clear_cpu(bit, flush_mask); | 673 | cpumask_clear_cpu(bit, flush_mask); |
| 301 | } | 674 | } |
| 302 | if (!cpumask_empty(flush_mask)) | 675 | if (!cpumask_empty(flush_mask)) |
| 303 | return flush_mask; | 676 | return flush_mask; |
| 677 | |||
| 304 | return NULL; | 678 | return NULL; |
| 305 | } | 679 | } |
| 306 | 680 | ||
| 307 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | ||
| 308 | |||
| 309 | /** | 681 | /** |
| 310 | * uv_flush_tlb_others - globally purge translation cache of a virtual | 682 | * uv_flush_tlb_others - globally purge translation cache of a virtual |
| 311 | * address or all TLB's | 683 | * address or all TLB's |
| @@ -322,8 +694,8 @@ static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | |||
| 322 | * The caller has derived the cpumask from the mm_struct. This function | 694 | * The caller has derived the cpumask from the mm_struct. This function |
| 323 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) | 695 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) |
| 324 | * | 696 | * |
| 325 | * The cpumask is converted into a nodemask of the nodes containing | 697 | * The cpumask is converted into a uvhubmask of the uvhubs containing |
| 326 | * the cpus. | 698 | * those cpus. |
| 327 | * | 699 | * |
| 328 | * Note that this function should be called with preemption disabled. | 700 | * Note that this function should be called with preemption disabled. |
| 329 | * | 701 | * |
| @@ -335,52 +707,82 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
| 335 | struct mm_struct *mm, | 707 | struct mm_struct *mm, |
| 336 | unsigned long va, unsigned int cpu) | 708 | unsigned long va, unsigned int cpu) |
| 337 | { | 709 | { |
| 338 | struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask); | 710 | int remotes; |
| 339 | int i; | 711 | int tcpu; |
| 340 | int bit; | 712 | int uvhub; |
| 341 | int pnode; | ||
| 342 | int uv_cpu; | ||
| 343 | int this_pnode; | ||
| 344 | int locals = 0; | 713 | int locals = 0; |
| 345 | struct bau_desc *bau_desc; | 714 | struct bau_desc *bau_desc; |
| 715 | struct cpumask *flush_mask; | ||
| 716 | struct ptc_stats *stat; | ||
| 717 | struct bau_control *bcp; | ||
| 346 | 718 | ||
| 347 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | 719 | if (nobau) |
| 720 | return cpumask; | ||
| 348 | 721 | ||
| 349 | uv_cpu = uv_blade_processor_id(); | 722 | bcp = &per_cpu(bau_control, cpu); |
| 350 | this_pnode = uv_hub_info->pnode; | 723 | /* |
| 351 | bau_desc = __get_cpu_var(bau_control).descriptor_base; | 724 | * Each sending cpu has a per-cpu mask which it fills from the caller's |
| 352 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; | 725 | * cpu mask. Only remote cpus are converted to uvhubs and copied. |
| 726 | */ | ||
| 727 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); | ||
| 728 | /* | ||
| 729 | * copy cpumask to flush_mask, removing current cpu | ||
| 730 | * (current cpu should already have been flushed by the caller and | ||
| 731 | * should never be returned if we return flush_mask) | ||
| 732 | */ | ||
| 733 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | ||
| 734 | if (cpu_isset(cpu, *cpumask)) | ||
| 735 | locals++; /* current cpu was targeted */ | ||
| 353 | 736 | ||
| 354 | bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 737 | bau_desc = bcp->descriptor_base; |
| 738 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | ||
| 355 | 739 | ||
| 356 | i = 0; | 740 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
| 357 | for_each_cpu(bit, flush_mask) { | 741 | remotes = 0; |
| 358 | pnode = uv_cpu_to_pnode(bit); | 742 | for_each_cpu(tcpu, flush_mask) { |
| 359 | BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1)); | 743 | uvhub = uv_cpu_to_blade_id(tcpu); |
| 360 | if (pnode == this_pnode) { | 744 | if (uvhub == bcp->uvhub) { |
| 361 | locals++; | 745 | locals++; |
| 362 | continue; | 746 | continue; |
| 363 | } | 747 | } |
| 364 | bau_node_set(pnode - uv_partition_base_pnode, | 748 | bau_uvhub_set(uvhub, &bau_desc->distribution); |
| 365 | &bau_desc->distribution); | 749 | remotes++; |
| 366 | i++; | ||
| 367 | } | 750 | } |
| 368 | if (i == 0) { | 751 | if (remotes == 0) { |
| 369 | /* | 752 | /* |
| 370 | * no off_node flushing; return status for local node | 753 | * No off_hub flushing; return status for local hub. |
| 754 | * Return the caller's mask if all were local (the current | ||
| 755 | * cpu may be in that mask). | ||
| 371 | */ | 756 | */ |
| 372 | if (locals) | 757 | if (locals) |
| 373 | return flush_mask; | 758 | return cpumask; |
| 374 | else | 759 | else |
| 375 | return NULL; | 760 | return NULL; |
| 376 | } | 761 | } |
| 377 | __get_cpu_var(ptcstats).requestor++; | 762 | stat = &per_cpu(ptcstats, cpu); |
| 378 | __get_cpu_var(ptcstats).ntargeted += i; | 763 | stat->s_requestor++; |
| 764 | stat->s_ntargcpu += remotes; | ||
| 765 | remotes = bau_uvhub_weight(&bau_desc->distribution); | ||
| 766 | stat->s_ntarguvhub += remotes; | ||
| 767 | if (remotes >= 16) | ||
| 768 | stat->s_ntarguvhub16++; | ||
| 769 | else if (remotes >= 8) | ||
| 770 | stat->s_ntarguvhub8++; | ||
| 771 | else if (remotes >= 4) | ||
| 772 | stat->s_ntarguvhub4++; | ||
| 773 | else if (remotes >= 2) | ||
| 774 | stat->s_ntarguvhub2++; | ||
| 775 | else | ||
| 776 | stat->s_ntarguvhub1++; | ||
| 379 | 777 | ||
| 380 | bau_desc->payload.address = va; | 778 | bau_desc->payload.address = va; |
| 381 | bau_desc->payload.sending_cpu = cpu; | 779 | bau_desc->payload.sending_cpu = cpu; |
| 382 | 780 | ||
| 383 | return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask); | 781 | /* |
| 782 | * uv_flush_send_and_wait returns null if all cpu's were messaged, or | ||
| 783 | * the adjusted flush_mask if any cpu's were not messaged. | ||
| 784 | */ | ||
| 785 | return uv_flush_send_and_wait(bau_desc, flush_mask, bcp); | ||
| 384 | } | 786 | } |
| 385 | 787 | ||
| 386 | /* | 788 | /* |
| @@ -389,87 +791,70 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
| 389 | * | 791 | * |
| 390 | * We received a broadcast assist message. | 792 | * We received a broadcast assist message. |
| 391 | * | 793 | * |
| 392 | * Interrupts may have been disabled; this interrupt could represent | 794 | * Interrupts are disabled; this interrupt could represent |
| 393 | * the receipt of several messages. | 795 | * the receipt of several messages. |
| 394 | * | 796 | * |
| 395 | * All cores/threads on this node get this interrupt. | 797 | * All cores/threads on this hub get this interrupt. |
| 396 | * The last one to see it does the s/w ack. | 798 | * The last one to see it does the software ack. |
| 397 | * (the resource will not be freed until noninterruptable cpus see this | 799 | * (the resource will not be freed until noninterruptable cpus see this |
| 398 | * interrupt; hardware will timeout the s/w ack and reply ERROR) | 800 | * interrupt; hardware may timeout the s/w ack and reply ERROR) |
| 399 | */ | 801 | */ |
| 400 | void uv_bau_message_interrupt(struct pt_regs *regs) | 802 | void uv_bau_message_interrupt(struct pt_regs *regs) |
| 401 | { | 803 | { |
| 402 | struct bau_payload_queue_entry *va_queue_first; | ||
| 403 | struct bau_payload_queue_entry *va_queue_last; | ||
| 404 | struct bau_payload_queue_entry *msg; | ||
| 405 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
| 406 | cycles_t time1; | ||
| 407 | cycles_t time2; | ||
| 408 | int msg_slot; | ||
| 409 | int sw_ack_slot; | ||
| 410 | int fw; | ||
| 411 | int count = 0; | 804 | int count = 0; |
| 412 | unsigned long local_pnode; | 805 | cycles_t time_start; |
| 413 | 806 | struct bau_payload_queue_entry *msg; | |
| 414 | ack_APIC_irq(); | 807 | struct bau_control *bcp; |
| 415 | exit_idle(); | 808 | struct ptc_stats *stat; |
| 416 | irq_enter(); | 809 | struct msg_desc msgdesc; |
| 417 | 810 | ||
| 418 | time1 = get_cycles(); | 811 | time_start = get_cycles(); |
| 419 | 812 | bcp = &per_cpu(bau_control, smp_processor_id()); | |
| 420 | local_pnode = uv_blade_to_pnode(uv_numa_blade_id()); | 813 | stat = &per_cpu(ptcstats, smp_processor_id()); |
| 421 | 814 | msgdesc.va_queue_first = bcp->va_queue_first; | |
| 422 | va_queue_first = __get_cpu_var(bau_control).va_queue_first; | 815 | msgdesc.va_queue_last = bcp->va_queue_last; |
| 423 | va_queue_last = __get_cpu_var(bau_control).va_queue_last; | 816 | msg = bcp->bau_msg_head; |
| 424 | |||
| 425 | msg = __get_cpu_var(bau_control).bau_msg_head; | ||
| 426 | while (msg->sw_ack_vector) { | 817 | while (msg->sw_ack_vector) { |
| 427 | count++; | 818 | count++; |
| 428 | fw = msg->sw_ack_vector; | 819 | msgdesc.msg_slot = msg - msgdesc.va_queue_first; |
| 429 | msg_slot = msg - va_queue_first; | 820 | msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; |
| 430 | sw_ack_slot = ffs(fw) - 1; | 821 | msgdesc.msg = msg; |
| 431 | 822 | uv_bau_process_message(&msgdesc, bcp); | |
| 432 | uv_bau_process_message(msg, msg_slot, sw_ack_slot); | ||
| 433 | |||
| 434 | msg++; | 823 | msg++; |
| 435 | if (msg > va_queue_last) | 824 | if (msg > msgdesc.va_queue_last) |
| 436 | msg = va_queue_first; | 825 | msg = msgdesc.va_queue_first; |
| 437 | __get_cpu_var(bau_control).bau_msg_head = msg; | 826 | bcp->bau_msg_head = msg; |
| 438 | } | 827 | } |
| 828 | stat->d_time += (get_cycles() - time_start); | ||
| 439 | if (!count) | 829 | if (!count) |
| 440 | __get_cpu_var(ptcstats).nomsg++; | 830 | stat->d_nomsg++; |
| 441 | else if (count > 1) | 831 | else if (count > 1) |
| 442 | __get_cpu_var(ptcstats).multmsg++; | 832 | stat->d_multmsg++; |
| 443 | 833 | ack_APIC_irq(); | |
| 444 | time2 = get_cycles(); | ||
| 445 | __get_cpu_var(ptcstats).dflush += (time2 - time1); | ||
| 446 | |||
| 447 | irq_exit(); | ||
| 448 | set_irq_regs(old_regs); | ||
| 449 | } | 834 | } |
| 450 | 835 | ||
| 451 | /* | 836 | /* |
| 452 | * uv_enable_timeouts | 837 | * uv_enable_timeouts |
| 453 | * | 838 | * |
| 454 | * Each target blade (i.e. blades that have cpu's) needs to have | 839 | * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have |
| 455 | * shootdown message timeouts enabled. The timeout does not cause | 840 | * shootdown message timeouts enabled. The timeout does not cause |
| 456 | * an interrupt, but causes an error message to be returned to | 841 | * an interrupt, but causes an error message to be returned to |
| 457 | * the sender. | 842 | * the sender. |
| 458 | */ | 843 | */ |
| 459 | static void uv_enable_timeouts(void) | 844 | static void uv_enable_timeouts(void) |
| 460 | { | 845 | { |
| 461 | int blade; | 846 | int uvhub; |
| 462 | int nblades; | 847 | int nuvhubs; |
| 463 | int pnode; | 848 | int pnode; |
| 464 | unsigned long mmr_image; | 849 | unsigned long mmr_image; |
| 465 | 850 | ||
| 466 | nblades = uv_num_possible_blades(); | 851 | nuvhubs = uv_num_possible_blades(); |
| 467 | 852 | ||
| 468 | for (blade = 0; blade < nblades; blade++) { | 853 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { |
| 469 | if (!uv_blade_nr_possible_cpus(blade)) | 854 | if (!uv_blade_nr_possible_cpus(uvhub)) |
| 470 | continue; | 855 | continue; |
| 471 | 856 | ||
| 472 | pnode = uv_blade_to_pnode(blade); | 857 | pnode = uv_blade_to_pnode(uvhub); |
| 473 | mmr_image = | 858 | mmr_image = |
| 474 | uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); | 859 | uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); |
| 475 | /* | 860 | /* |
| @@ -479,16 +864,16 @@ static void uv_enable_timeouts(void) | |||
| 479 | * To program the period, the SOFT_ACK_MODE must be off. | 864 | * To program the period, the SOFT_ACK_MODE must be off. |
| 480 | */ | 865 | */ |
| 481 | mmr_image &= ~((unsigned long)1 << | 866 | mmr_image &= ~((unsigned long)1 << |
| 482 | UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); | 867 | UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); |
| 483 | uv_write_global_mmr64 | 868 | uv_write_global_mmr64 |
| 484 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 869 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
| 485 | /* | 870 | /* |
| 486 | * Set the 4-bit period. | 871 | * Set the 4-bit period. |
| 487 | */ | 872 | */ |
| 488 | mmr_image &= ~((unsigned long)0xf << | 873 | mmr_image &= ~((unsigned long)0xf << |
| 489 | UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); | 874 | UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); |
| 490 | mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << | 875 | mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << |
| 491 | UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); | 876 | UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); |
| 492 | uv_write_global_mmr64 | 877 | uv_write_global_mmr64 |
| 493 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 878 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
| 494 | /* | 879 | /* |
| @@ -497,7 +882,7 @@ static void uv_enable_timeouts(void) | |||
| 497 | * indicated in bits 2:0 (7 causes all of them to timeout). | 882 | * indicated in bits 2:0 (7 causes all of them to timeout). |
| 498 | */ | 883 | */ |
| 499 | mmr_image |= ((unsigned long)1 << | 884 | mmr_image |= ((unsigned long)1 << |
| 500 | UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); | 885 | UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); |
| 501 | uv_write_global_mmr64 | 886 | uv_write_global_mmr64 |
| 502 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 887 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
| 503 | } | 888 | } |
| @@ -522,9 +907,20 @@ static void uv_ptc_seq_stop(struct seq_file *file, void *data) | |||
| 522 | { | 907 | { |
| 523 | } | 908 | } |
| 524 | 909 | ||
| 910 | static inline unsigned long long | ||
| 911 | millisec_2_cycles(unsigned long millisec) | ||
| 912 | { | ||
| 913 | unsigned long ns; | ||
| 914 | unsigned long long cyc; | ||
| 915 | |||
| 916 | ns = millisec * 1000; | ||
| 917 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
| 918 | return cyc; | ||
| 919 | } | ||
| 920 | |||
| 525 | /* | 921 | /* |
| 526 | * Display the statistics thru /proc | 922 | * Display the statistics thru /proc. |
| 527 | * data points to the cpu number | 923 | * 'data' points to the cpu number |
| 528 | */ | 924 | */ |
| 529 | static int uv_ptc_seq_show(struct seq_file *file, void *data) | 925 | static int uv_ptc_seq_show(struct seq_file *file, void *data) |
| 530 | { | 926 | { |
| @@ -535,78 +931,155 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
| 535 | 931 | ||
| 536 | if (!cpu) { | 932 | if (!cpu) { |
| 537 | seq_printf(file, | 933 | seq_printf(file, |
| 538 | "# cpu requestor requestee one all sretry dretry ptc_i "); | 934 | "# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 "); |
| 539 | seq_printf(file, | 935 | seq_printf(file, |
| 540 | "sw_ack sflush dflush sok dnomsg dmult starget\n"); | 936 | "numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto "); |
| 937 | seq_printf(file, | ||
| 938 | "retries rok resetp resett giveup sto bz throt "); | ||
| 939 | seq_printf(file, | ||
| 940 | "sw_ack recv rtime all "); | ||
| 941 | seq_printf(file, | ||
| 942 | "one mult none retry canc nocan reset rcan\n"); | ||
| 541 | } | 943 | } |
| 542 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | 944 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { |
| 543 | stat = &per_cpu(ptcstats, cpu); | 945 | stat = &per_cpu(ptcstats, cpu); |
| 544 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ", | 946 | /* source side statistics */ |
| 545 | cpu, stat->requestor, | 947 | seq_printf(file, |
| 546 | stat->requestee, stat->onetlb, stat->alltlb, | 948 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
| 547 | stat->s_retry, stat->d_retry, stat->ptc_i); | 949 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), |
| 548 | seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n", | 950 | stat->s_ntarguvhub, stat->s_ntarguvhub16, |
| 951 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | ||
| 952 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | ||
| 953 | stat->s_ntargcpu, stat->s_dtimeout); | ||
| 954 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", | ||
| 955 | stat->s_retry_messages, stat->s_retriesok, | ||
| 956 | stat->s_resets_plug, stat->s_resets_timeout, | ||
| 957 | stat->s_giveup, stat->s_stimeout, | ||
| 958 | stat->s_busy, stat->s_throttles); | ||
| 959 | /* destination side statistics */ | ||
| 960 | seq_printf(file, | ||
| 961 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", | ||
| 549 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), | 962 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), |
| 550 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), | 963 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), |
| 551 | stat->sflush, stat->dflush, | 964 | stat->d_requestee, cycles_2_us(stat->d_time), |
| 552 | stat->retriesok, stat->nomsg, | 965 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, |
| 553 | stat->multmsg, stat->ntargeted); | 966 | stat->d_nomsg, stat->d_retries, stat->d_canceled, |
| 967 | stat->d_nocanceled, stat->d_resets, | ||
| 968 | stat->d_rcanceled); | ||
| 554 | } | 969 | } |
| 555 | 970 | ||
| 556 | return 0; | 971 | return 0; |
| 557 | } | 972 | } |
| 558 | 973 | ||
| 559 | /* | 974 | /* |
| 975 | * -1: resetf the statistics | ||
| 560 | * 0: display meaning of the statistics | 976 | * 0: display meaning of the statistics |
| 561 | * >0: retry limit | 977 | * >0: maximum concurrent active descriptors per uvhub (throttle) |
| 562 | */ | 978 | */ |
| 563 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | 979 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, |
| 564 | size_t count, loff_t *data) | 980 | size_t count, loff_t *data) |
| 565 | { | 981 | { |
| 566 | long newmode; | 982 | int cpu; |
| 983 | long input_arg; | ||
| 567 | char optstr[64]; | 984 | char optstr[64]; |
| 985 | struct ptc_stats *stat; | ||
| 986 | struct bau_control *bcp; | ||
| 568 | 987 | ||
| 569 | if (count == 0 || count > sizeof(optstr)) | 988 | if (count == 0 || count > sizeof(optstr)) |
| 570 | return -EINVAL; | 989 | return -EINVAL; |
| 571 | if (copy_from_user(optstr, user, count)) | 990 | if (copy_from_user(optstr, user, count)) |
| 572 | return -EFAULT; | 991 | return -EFAULT; |
| 573 | optstr[count - 1] = '\0'; | 992 | optstr[count - 1] = '\0'; |
| 574 | if (strict_strtoul(optstr, 10, &newmode) < 0) { | 993 | if (strict_strtol(optstr, 10, &input_arg) < 0) { |
| 575 | printk(KERN_DEBUG "%s is invalid\n", optstr); | 994 | printk(KERN_DEBUG "%s is invalid\n", optstr); |
| 576 | return -EINVAL; | 995 | return -EINVAL; |
| 577 | } | 996 | } |
| 578 | 997 | ||
| 579 | if (newmode == 0) { | 998 | if (input_arg == 0) { |
| 580 | printk(KERN_DEBUG "# cpu: cpu number\n"); | 999 | printk(KERN_DEBUG "# cpu: cpu number\n"); |
| 1000 | printk(KERN_DEBUG "Sender statistics:\n"); | ||
| 1001 | printk(KERN_DEBUG | ||
| 1002 | "sent: number of shootdown messages sent\n"); | ||
| 1003 | printk(KERN_DEBUG | ||
| 1004 | "stime: time spent sending messages\n"); | ||
| 1005 | printk(KERN_DEBUG | ||
| 1006 | "numuvhubs: number of hubs targeted with shootdown\n"); | ||
| 1007 | printk(KERN_DEBUG | ||
| 1008 | "numuvhubs16: number times 16 or more hubs targeted\n"); | ||
| 1009 | printk(KERN_DEBUG | ||
| 1010 | "numuvhubs8: number times 8 or more hubs targeted\n"); | ||
| 1011 | printk(KERN_DEBUG | ||
| 1012 | "numuvhubs4: number times 4 or more hubs targeted\n"); | ||
| 1013 | printk(KERN_DEBUG | ||
| 1014 | "numuvhubs2: number times 2 or more hubs targeted\n"); | ||
| 1015 | printk(KERN_DEBUG | ||
| 1016 | "numuvhubs1: number times 1 hub targeted\n"); | ||
| 1017 | printk(KERN_DEBUG | ||
| 1018 | "numcpus: number of cpus targeted with shootdown\n"); | ||
| 1019 | printk(KERN_DEBUG | ||
| 1020 | "dto: number of destination timeouts\n"); | ||
| 1021 | printk(KERN_DEBUG | ||
| 1022 | "retries: destination timeout retries sent\n"); | ||
| 1023 | printk(KERN_DEBUG | ||
| 1024 | "rok: : destination timeouts successfully retried\n"); | ||
| 1025 | printk(KERN_DEBUG | ||
| 1026 | "resetp: ipi-style resource resets for plugs\n"); | ||
| 1027 | printk(KERN_DEBUG | ||
| 1028 | "resett: ipi-style resource resets for timeouts\n"); | ||
| 1029 | printk(KERN_DEBUG | ||
| 1030 | "giveup: fall-backs to ipi-style shootdowns\n"); | ||
| 1031 | printk(KERN_DEBUG | ||
| 1032 | "sto: number of source timeouts\n"); | ||
| 1033 | printk(KERN_DEBUG | ||
| 1034 | "bz: number of stay-busy's\n"); | ||
| 1035 | printk(KERN_DEBUG | ||
| 1036 | "throt: number times spun in throttle\n"); | ||
| 1037 | printk(KERN_DEBUG "Destination side statistics:\n"); | ||
| 581 | printk(KERN_DEBUG | 1038 | printk(KERN_DEBUG |
| 582 | "requestor: times this cpu was the flush requestor\n"); | 1039 | "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); |
| 583 | printk(KERN_DEBUG | 1040 | printk(KERN_DEBUG |
| 584 | "requestee: times this cpu was requested to flush its TLBs\n"); | 1041 | "recv: shootdown messages received\n"); |
| 585 | printk(KERN_DEBUG | 1042 | printk(KERN_DEBUG |
| 586 | "one: times requested to flush a single address\n"); | 1043 | "rtime: time spent processing messages\n"); |
| 587 | printk(KERN_DEBUG | 1044 | printk(KERN_DEBUG |
| 588 | "all: times requested to flush all TLB's\n"); | 1045 | "all: shootdown all-tlb messages\n"); |
| 589 | printk(KERN_DEBUG | 1046 | printk(KERN_DEBUG |
| 590 | "sretry: number of retries of source-side timeouts\n"); | 1047 | "one: shootdown one-tlb messages\n"); |
| 591 | printk(KERN_DEBUG | 1048 | printk(KERN_DEBUG |
| 592 | "dretry: number of retries of destination-side timeouts\n"); | 1049 | "mult: interrupts that found multiple messages\n"); |
| 593 | printk(KERN_DEBUG | 1050 | printk(KERN_DEBUG |
| 594 | "ptc_i: times UV fell through to IPI-style flushes\n"); | 1051 | "none: interrupts that found no messages\n"); |
| 595 | printk(KERN_DEBUG | 1052 | printk(KERN_DEBUG |
| 596 | "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); | 1053 | "retry: number of retry messages processed\n"); |
| 597 | printk(KERN_DEBUG | 1054 | printk(KERN_DEBUG |
| 598 | "sflush_us: cycles spent in uv_flush_tlb_others()\n"); | 1055 | "canc: number messages canceled by retries\n"); |
| 599 | printk(KERN_DEBUG | 1056 | printk(KERN_DEBUG |
| 600 | "dflush_us: cycles spent in handling flush requests\n"); | 1057 | "nocan: number retries that found nothing to cancel\n"); |
| 601 | printk(KERN_DEBUG "sok: successes on retry\n"); | ||
| 602 | printk(KERN_DEBUG "dnomsg: interrupts with no message\n"); | ||
| 603 | printk(KERN_DEBUG | 1058 | printk(KERN_DEBUG |
| 604 | "dmult: interrupts with multiple messages\n"); | 1059 | "reset: number of ipi-style reset requests processed\n"); |
| 605 | printk(KERN_DEBUG "starget: nodes targeted\n"); | 1060 | printk(KERN_DEBUG |
| 1061 | "rcan: number messages canceled by reset requests\n"); | ||
| 1062 | } else if (input_arg == -1) { | ||
| 1063 | for_each_present_cpu(cpu) { | ||
| 1064 | stat = &per_cpu(ptcstats, cpu); | ||
| 1065 | memset(stat, 0, sizeof(struct ptc_stats)); | ||
| 1066 | } | ||
| 606 | } else { | 1067 | } else { |
| 607 | uv_bau_retry_limit = newmode; | 1068 | uv_bau_max_concurrent = input_arg; |
| 608 | printk(KERN_DEBUG "timeout retry limit:%d\n", | 1069 | bcp = &per_cpu(bau_control, smp_processor_id()); |
| 609 | uv_bau_retry_limit); | 1070 | if (uv_bau_max_concurrent < 1 || |
| 1071 | uv_bau_max_concurrent > bcp->cpus_in_uvhub) { | ||
| 1072 | printk(KERN_DEBUG | ||
| 1073 | "Error: BAU max concurrent %d; %d is invalid\n", | ||
| 1074 | bcp->max_concurrent, uv_bau_max_concurrent); | ||
| 1075 | return -EINVAL; | ||
| 1076 | } | ||
| 1077 | printk(KERN_DEBUG "Set BAU max concurrent:%d\n", | ||
| 1078 | uv_bau_max_concurrent); | ||
| 1079 | for_each_present_cpu(cpu) { | ||
| 1080 | bcp = &per_cpu(bau_control, cpu); | ||
| 1081 | bcp->max_concurrent = uv_bau_max_concurrent; | ||
| 1082 | } | ||
| 610 | } | 1083 | } |
| 611 | 1084 | ||
| 612 | return count; | 1085 | return count; |
| @@ -650,79 +1123,30 @@ static int __init uv_ptc_init(void) | |||
| 650 | } | 1123 | } |
| 651 | 1124 | ||
| 652 | /* | 1125 | /* |
| 653 | * begin the initialization of the per-blade control structures | ||
| 654 | */ | ||
| 655 | static struct bau_control * __init uv_table_bases_init(int blade, int node) | ||
| 656 | { | ||
| 657 | int i; | ||
| 658 | struct bau_msg_status *msp; | ||
| 659 | struct bau_control *bau_tabp; | ||
| 660 | |||
| 661 | bau_tabp = | ||
| 662 | kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node); | ||
| 663 | BUG_ON(!bau_tabp); | ||
| 664 | |||
| 665 | bau_tabp->msg_statuses = | ||
| 666 | kmalloc_node(sizeof(struct bau_msg_status) * | ||
| 667 | DEST_Q_SIZE, GFP_KERNEL, node); | ||
| 668 | BUG_ON(!bau_tabp->msg_statuses); | ||
| 669 | |||
| 670 | for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++) | ||
| 671 | bau_cpubits_clear(&msp->seen_by, (int) | ||
| 672 | uv_blade_nr_possible_cpus(blade)); | ||
| 673 | |||
| 674 | uv_bau_table_bases[blade] = bau_tabp; | ||
| 675 | |||
| 676 | return bau_tabp; | ||
| 677 | } | ||
| 678 | |||
| 679 | /* | ||
| 680 | * finish the initialization of the per-blade control structures | ||
| 681 | */ | ||
| 682 | static void __init | ||
| 683 | uv_table_bases_finish(int blade, | ||
| 684 | struct bau_control *bau_tablesp, | ||
| 685 | struct bau_desc *adp) | ||
| 686 | { | ||
| 687 | struct bau_control *bcp; | ||
| 688 | int cpu; | ||
| 689 | |||
| 690 | for_each_present_cpu(cpu) { | ||
| 691 | if (blade != uv_cpu_to_blade_id(cpu)) | ||
| 692 | continue; | ||
| 693 | |||
| 694 | bcp = (struct bau_control *)&per_cpu(bau_control, cpu); | ||
| 695 | bcp->bau_msg_head = bau_tablesp->va_queue_first; | ||
| 696 | bcp->va_queue_first = bau_tablesp->va_queue_first; | ||
| 697 | bcp->va_queue_last = bau_tablesp->va_queue_last; | ||
| 698 | bcp->msg_statuses = bau_tablesp->msg_statuses; | ||
| 699 | bcp->descriptor_base = adp; | ||
| 700 | } | ||
| 701 | } | ||
| 702 | |||
| 703 | /* | ||
| 704 | * initialize the sending side's sending buffers | 1126 | * initialize the sending side's sending buffers |
| 705 | */ | 1127 | */ |
| 706 | static struct bau_desc * __init | 1128 | static void |
| 707 | uv_activation_descriptor_init(int node, int pnode) | 1129 | uv_activation_descriptor_init(int node, int pnode) |
| 708 | { | 1130 | { |
| 709 | int i; | 1131 | int i; |
| 1132 | int cpu; | ||
| 710 | unsigned long pa; | 1133 | unsigned long pa; |
| 711 | unsigned long m; | 1134 | unsigned long m; |
| 712 | unsigned long n; | 1135 | unsigned long n; |
| 713 | struct bau_desc *adp; | 1136 | struct bau_desc *bau_desc; |
| 714 | struct bau_desc *ad2; | 1137 | struct bau_desc *bd2; |
| 1138 | struct bau_control *bcp; | ||
| 715 | 1139 | ||
| 716 | /* | 1140 | /* |
| 717 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) | 1141 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) |
| 718 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade | 1142 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub |
| 719 | */ | 1143 | */ |
| 720 | adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* | 1144 | bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* |
| 721 | UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); | 1145 | UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); |
| 722 | BUG_ON(!adp); | 1146 | BUG_ON(!bau_desc); |
| 723 | 1147 | ||
| 724 | pa = uv_gpa(adp); /* need the real nasid*/ | 1148 | pa = uv_gpa(bau_desc); /* need the real nasid*/ |
| 725 | n = uv_gpa_to_pnode(pa); | 1149 | n = pa >> uv_nshift; |
| 726 | m = pa & uv_mmask; | 1150 | m = pa & uv_mmask; |
| 727 | 1151 | ||
| 728 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | 1152 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
| @@ -731,96 +1155,188 @@ uv_activation_descriptor_init(int node, int pnode) | |||
| 731 | /* | 1155 | /* |
| 732 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | 1156 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each |
| 733 | * cpu even though we only use the first one; one descriptor can | 1157 | * cpu even though we only use the first one; one descriptor can |
| 734 | * describe a broadcast to 256 nodes. | 1158 | * describe a broadcast to 256 uv hubs. |
| 735 | */ | 1159 | */ |
| 736 | for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); | 1160 | for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); |
| 737 | i++, ad2++) { | 1161 | i++, bd2++) { |
| 738 | memset(ad2, 0, sizeof(struct bau_desc)); | 1162 | memset(bd2, 0, sizeof(struct bau_desc)); |
| 739 | ad2->header.sw_ack_flag = 1; | 1163 | bd2->header.sw_ack_flag = 1; |
| 740 | /* | 1164 | /* |
| 741 | * base_dest_nodeid is the first node in the partition, so | 1165 | * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub |
| 742 | * the bit map will indicate partition-relative node numbers. | 1166 | * in the partition. The bit map will indicate uvhub numbers, |
| 743 | * note that base_dest_nodeid is actually a nasid. | 1167 | * which are 0-N in a partition. Pnodes are unique system-wide. |
| 744 | */ | 1168 | */ |
| 745 | ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; | 1169 | bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; |
| 746 | ad2->header.dest_subnodeid = 0x10; /* the LB */ | 1170 | bd2->header.dest_subnodeid = 0x10; /* the LB */ |
| 747 | ad2->header.command = UV_NET_ENDPOINT_INTD; | 1171 | bd2->header.command = UV_NET_ENDPOINT_INTD; |
| 748 | ad2->header.int_both = 1; | 1172 | bd2->header.int_both = 1; |
| 749 | /* | 1173 | /* |
| 750 | * all others need to be set to zero: | 1174 | * all others need to be set to zero: |
| 751 | * fairness chaining multilevel count replied_to | 1175 | * fairness chaining multilevel count replied_to |
| 752 | */ | 1176 | */ |
| 753 | } | 1177 | } |
| 754 | return adp; | 1178 | for_each_present_cpu(cpu) { |
| 1179 | if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) | ||
| 1180 | continue; | ||
| 1181 | bcp = &per_cpu(bau_control, cpu); | ||
| 1182 | bcp->descriptor_base = bau_desc; | ||
| 1183 | } | ||
| 755 | } | 1184 | } |
| 756 | 1185 | ||
| 757 | /* | 1186 | /* |
| 758 | * initialize the destination side's receiving buffers | 1187 | * initialize the destination side's receiving buffers |
| 1188 | * entered for each uvhub in the partition | ||
| 1189 | * - node is first node (kernel memory notion) on the uvhub | ||
| 1190 | * - pnode is the uvhub's physical identifier | ||
| 759 | */ | 1191 | */ |
| 760 | static struct bau_payload_queue_entry * __init | 1192 | static void |
| 761 | uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) | 1193 | uv_payload_queue_init(int node, int pnode) |
| 762 | { | 1194 | { |
| 763 | struct bau_payload_queue_entry *pqp; | ||
| 764 | unsigned long pa; | ||
| 765 | int pn; | 1195 | int pn; |
| 1196 | int cpu; | ||
| 766 | char *cp; | 1197 | char *cp; |
| 1198 | unsigned long pa; | ||
| 1199 | struct bau_payload_queue_entry *pqp; | ||
| 1200 | struct bau_payload_queue_entry *pqp_malloc; | ||
| 1201 | struct bau_control *bcp; | ||
| 767 | 1202 | ||
| 768 | pqp = (struct bau_payload_queue_entry *) kmalloc_node( | 1203 | pqp = (struct bau_payload_queue_entry *) kmalloc_node( |
| 769 | (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), | 1204 | (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), |
| 770 | GFP_KERNEL, node); | 1205 | GFP_KERNEL, node); |
| 771 | BUG_ON(!pqp); | 1206 | BUG_ON(!pqp); |
| 1207 | pqp_malloc = pqp; | ||
| 772 | 1208 | ||
| 773 | cp = (char *)pqp + 31; | 1209 | cp = (char *)pqp + 31; |
| 774 | pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); | 1210 | pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); |
| 775 | bau_tablesp->va_queue_first = pqp; | 1211 | |
| 1212 | for_each_present_cpu(cpu) { | ||
| 1213 | if (pnode != uv_cpu_to_pnode(cpu)) | ||
| 1214 | continue; | ||
| 1215 | /* for every cpu on this pnode: */ | ||
| 1216 | bcp = &per_cpu(bau_control, cpu); | ||
| 1217 | bcp->va_queue_first = pqp; | ||
| 1218 | bcp->bau_msg_head = pqp; | ||
| 1219 | bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); | ||
| 1220 | } | ||
| 776 | /* | 1221 | /* |
| 777 | * need the pnode of where the memory was really allocated | 1222 | * need the pnode of where the memory was really allocated |
| 778 | */ | 1223 | */ |
| 779 | pa = uv_gpa(pqp); | 1224 | pa = uv_gpa(pqp); |
| 780 | pn = uv_gpa_to_pnode(pa); | 1225 | pn = pa >> uv_nshift; |
| 781 | uv_write_global_mmr64(pnode, | 1226 | uv_write_global_mmr64(pnode, |
| 782 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, | 1227 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, |
| 783 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | | 1228 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | |
| 784 | uv_physnodeaddr(pqp)); | 1229 | uv_physnodeaddr(pqp)); |
| 785 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, | 1230 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, |
| 786 | uv_physnodeaddr(pqp)); | 1231 | uv_physnodeaddr(pqp)); |
| 787 | bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1); | ||
| 788 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, | 1232 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, |
| 789 | (unsigned long) | 1233 | (unsigned long) |
| 790 | uv_physnodeaddr(bau_tablesp->va_queue_last)); | 1234 | uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); |
| 1235 | /* in effect, all msg_type's are set to MSG_NOOP */ | ||
| 791 | memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); | 1236 | memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); |
| 792 | |||
| 793 | return pqp; | ||
| 794 | } | 1237 | } |
| 795 | 1238 | ||
| 796 | /* | 1239 | /* |
| 797 | * Initialization of each UV blade's structures | 1240 | * Initialization of each UV hub's structures |
| 798 | */ | 1241 | */ |
| 799 | static int __init uv_init_blade(int blade) | 1242 | static void __init uv_init_uvhub(int uvhub, int vector) |
| 800 | { | 1243 | { |
| 801 | int node; | 1244 | int node; |
| 802 | int pnode; | 1245 | int pnode; |
| 803 | unsigned long pa; | ||
| 804 | unsigned long apicid; | 1246 | unsigned long apicid; |
| 805 | struct bau_desc *adp; | 1247 | |
| 806 | struct bau_payload_queue_entry *pqp; | 1248 | node = uvhub_to_first_node(uvhub); |
| 807 | struct bau_control *bau_tablesp; | 1249 | pnode = uv_blade_to_pnode(uvhub); |
| 808 | 1250 | uv_activation_descriptor_init(node, pnode); | |
| 809 | node = blade_to_first_node(blade); | 1251 | uv_payload_queue_init(node, pnode); |
| 810 | bau_tablesp = uv_table_bases_init(blade, node); | ||
| 811 | pnode = uv_blade_to_pnode(blade); | ||
| 812 | adp = uv_activation_descriptor_init(node, pnode); | ||
| 813 | pqp = uv_payload_queue_init(node, pnode, bau_tablesp); | ||
| 814 | uv_table_bases_finish(blade, bau_tablesp, adp); | ||
| 815 | /* | 1252 | /* |
| 816 | * the below initialization can't be in firmware because the | 1253 | * the below initialization can't be in firmware because the |
| 817 | * messaging IRQ will be determined by the OS | 1254 | * messaging IRQ will be determined by the OS |
| 818 | */ | 1255 | */ |
| 819 | apicid = blade_to_first_apicid(blade); | 1256 | apicid = uvhub_to_first_apicid(uvhub); |
| 820 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); | ||
| 821 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | 1257 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
| 822 | ((apicid << 32) | UV_BAU_MESSAGE)); | 1258 | ((apicid << 32) | vector)); |
| 823 | return 0; | 1259 | } |
| 1260 | |||
| 1261 | /* | ||
| 1262 | * initialize the bau_control structure for each cpu | ||
| 1263 | */ | ||
| 1264 | static void uv_init_per_cpu(int nuvhubs) | ||
| 1265 | { | ||
| 1266 | int i, j, k; | ||
| 1267 | int cpu; | ||
| 1268 | int pnode; | ||
| 1269 | int uvhub; | ||
| 1270 | short socket = 0; | ||
| 1271 | struct bau_control *bcp; | ||
| 1272 | struct uvhub_desc *bdp; | ||
| 1273 | struct socket_desc *sdp; | ||
| 1274 | struct bau_control *hmaster = NULL; | ||
| 1275 | struct bau_control *smaster = NULL; | ||
| 1276 | struct socket_desc { | ||
| 1277 | short num_cpus; | ||
| 1278 | short cpu_number[16]; | ||
| 1279 | }; | ||
| 1280 | struct uvhub_desc { | ||
| 1281 | short num_sockets; | ||
| 1282 | short num_cpus; | ||
| 1283 | short uvhub; | ||
| 1284 | short pnode; | ||
| 1285 | struct socket_desc socket[2]; | ||
| 1286 | }; | ||
| 1287 | struct uvhub_desc *uvhub_descs; | ||
| 1288 | |||
| 1289 | uvhub_descs = (struct uvhub_desc *) | ||
| 1290 | kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); | ||
| 1291 | memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); | ||
| 1292 | for_each_present_cpu(cpu) { | ||
| 1293 | bcp = &per_cpu(bau_control, cpu); | ||
| 1294 | memset(bcp, 0, sizeof(struct bau_control)); | ||
| 1295 | spin_lock_init(&bcp->masks_lock); | ||
| 1296 | bcp->max_concurrent = uv_bau_max_concurrent; | ||
| 1297 | pnode = uv_cpu_hub_info(cpu)->pnode; | ||
| 1298 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | ||
| 1299 | bdp = &uvhub_descs[uvhub]; | ||
| 1300 | bdp->num_cpus++; | ||
| 1301 | bdp->uvhub = uvhub; | ||
| 1302 | bdp->pnode = pnode; | ||
| 1303 | /* time interval to catch a hardware stay-busy bug */ | ||
| 1304 | bcp->timeout_interval = millisec_2_cycles(3); | ||
| 1305 | /* kludge: assume uv_hub.h is constant */ | ||
| 1306 | socket = (cpu_physical_id(cpu)>>5)&1; | ||
| 1307 | if (socket >= bdp->num_sockets) | ||
| 1308 | bdp->num_sockets = socket+1; | ||
| 1309 | sdp = &bdp->socket[socket]; | ||
| 1310 | sdp->cpu_number[sdp->num_cpus] = cpu; | ||
| 1311 | sdp->num_cpus++; | ||
| 1312 | } | ||
| 1313 | socket = 0; | ||
| 1314 | for_each_possible_blade(uvhub) { | ||
| 1315 | bdp = &uvhub_descs[uvhub]; | ||
| 1316 | for (i = 0; i < bdp->num_sockets; i++) { | ||
| 1317 | sdp = &bdp->socket[i]; | ||
| 1318 | for (j = 0; j < sdp->num_cpus; j++) { | ||
| 1319 | cpu = sdp->cpu_number[j]; | ||
| 1320 | bcp = &per_cpu(bau_control, cpu); | ||
| 1321 | bcp->cpu = cpu; | ||
| 1322 | if (j == 0) { | ||
| 1323 | smaster = bcp; | ||
| 1324 | if (i == 0) | ||
| 1325 | hmaster = bcp; | ||
| 1326 | } | ||
| 1327 | bcp->cpus_in_uvhub = bdp->num_cpus; | ||
| 1328 | bcp->cpus_in_socket = sdp->num_cpus; | ||
| 1329 | bcp->socket_master = smaster; | ||
| 1330 | bcp->uvhub_master = hmaster; | ||
| 1331 | for (k = 0; k < DEST_Q_SIZE; k++) | ||
| 1332 | bcp->socket_acknowledge_count[k] = 0; | ||
| 1333 | bcp->uvhub_cpu = | ||
| 1334 | uv_cpu_hub_info(cpu)->blade_processor_id; | ||
| 1335 | } | ||
| 1336 | socket++; | ||
| 1337 | } | ||
| 1338 | } | ||
| 1339 | kfree(uvhub_descs); | ||
| 824 | } | 1340 | } |
| 825 | 1341 | ||
| 826 | /* | 1342 | /* |
| @@ -828,38 +1344,54 @@ static int __init uv_init_blade(int blade) | |||
| 828 | */ | 1344 | */ |
| 829 | static int __init uv_bau_init(void) | 1345 | static int __init uv_bau_init(void) |
| 830 | { | 1346 | { |
| 831 | int blade; | 1347 | int uvhub; |
| 832 | int nblades; | 1348 | int pnode; |
| 1349 | int nuvhubs; | ||
| 833 | int cur_cpu; | 1350 | int cur_cpu; |
| 1351 | int vector; | ||
| 1352 | unsigned long mmr; | ||
| 834 | 1353 | ||
| 835 | if (!is_uv_system()) | 1354 | if (!is_uv_system()) |
| 836 | return 0; | 1355 | return 0; |
| 837 | 1356 | ||
| 1357 | if (nobau) | ||
| 1358 | return 0; | ||
| 1359 | |||
| 838 | for_each_possible_cpu(cur_cpu) | 1360 | for_each_possible_cpu(cur_cpu) |
| 839 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), | 1361 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), |
| 840 | GFP_KERNEL, cpu_to_node(cur_cpu)); | 1362 | GFP_KERNEL, cpu_to_node(cur_cpu)); |
| 841 | 1363 | ||
| 842 | uv_bau_retry_limit = 1; | 1364 | uv_bau_max_concurrent = MAX_BAU_CONCURRENT; |
| 1365 | uv_nshift = uv_hub_info->m_val; | ||
| 843 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; | 1366 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; |
| 844 | nblades = uv_num_possible_blades(); | 1367 | nuvhubs = uv_num_possible_blades(); |
| 845 | 1368 | ||
| 846 | uv_bau_table_bases = (struct bau_control **) | 1369 | uv_init_per_cpu(nuvhubs); |
| 847 | kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL); | ||
| 848 | BUG_ON(!uv_bau_table_bases); | ||
| 849 | 1370 | ||
| 850 | uv_partition_base_pnode = 0x7fffffff; | 1371 | uv_partition_base_pnode = 0x7fffffff; |
| 851 | for (blade = 0; blade < nblades; blade++) | 1372 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) |
| 852 | if (uv_blade_nr_possible_cpus(blade) && | 1373 | if (uv_blade_nr_possible_cpus(uvhub) && |
| 853 | (uv_blade_to_pnode(blade) < uv_partition_base_pnode)) | 1374 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) |
| 854 | uv_partition_base_pnode = uv_blade_to_pnode(blade); | 1375 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); |
| 855 | for (blade = 0; blade < nblades; blade++) | 1376 | |
| 856 | if (uv_blade_nr_possible_cpus(blade)) | 1377 | vector = UV_BAU_MESSAGE; |
| 857 | uv_init_blade(blade); | 1378 | for_each_possible_blade(uvhub) |
| 858 | 1379 | if (uv_blade_nr_possible_cpus(uvhub)) | |
| 859 | alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); | 1380 | uv_init_uvhub(uvhub, vector); |
| 1381 | |||
| 860 | uv_enable_timeouts(); | 1382 | uv_enable_timeouts(); |
| 1383 | alloc_intr_gate(vector, uv_bau_message_intr1); | ||
| 1384 | |||
| 1385 | for_each_possible_blade(uvhub) { | ||
| 1386 | pnode = uv_blade_to_pnode(uvhub); | ||
| 1387 | /* INIT the bau */ | ||
| 1388 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, | ||
| 1389 | ((unsigned long)1 << 63)); | ||
| 1390 | mmr = 1; /* should be 1 to broadcast to both sockets */ | ||
| 1391 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, mmr); | ||
| 1392 | } | ||
| 861 | 1393 | ||
| 862 | return 0; | 1394 | return 0; |
| 863 | } | 1395 | } |
| 864 | __initcall(uv_bau_init); | 1396 | core_initcall(uv_bau_init); |
| 865 | __initcall(uv_ptc_init); | 1397 | core_initcall(uv_ptc_init); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1168e4454188..142d70c74b02 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/kprobes.h> | 15 | #include <linux/kprobes.h> |
| 16 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
| 17 | #include <linux/kdebug.h> | 17 | #include <linux/kdebug.h> |
| 18 | #include <linux/kgdb.h> | ||
| 18 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
| 19 | #include <linux/module.h> | 20 | #include <linux/module.h> |
| 20 | #include <linux/ptrace.h> | 21 | #include <linux/ptrace.h> |
| @@ -108,15 +109,6 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
| 108 | dec_preempt_count(); | 109 | dec_preempt_count(); |
| 109 | } | 110 | } |
| 110 | 111 | ||
| 111 | #ifdef CONFIG_X86_32 | ||
| 112 | static inline void | ||
| 113 | die_if_kernel(const char *str, struct pt_regs *regs, long err) | ||
| 114 | { | ||
| 115 | if (!user_mode_vm(regs)) | ||
| 116 | die(str, regs, err); | ||
| 117 | } | ||
| 118 | #endif | ||
| 119 | |||
| 120 | static void __kprobes | 112 | static void __kprobes |
| 121 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | 113 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
| 122 | long error_code, siginfo_t *info) | 114 | long error_code, siginfo_t *info) |
| @@ -460,6 +452,11 @@ void restart_nmi(void) | |||
| 460 | /* May run on IST stack. */ | 452 | /* May run on IST stack. */ |
| 461 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | 453 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) |
| 462 | { | 454 | { |
| 455 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | ||
| 456 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | ||
| 457 | == NOTIFY_STOP) | ||
| 458 | return; | ||
| 459 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | ||
| 463 | #ifdef CONFIG_KPROBES | 460 | #ifdef CONFIG_KPROBES |
| 464 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | 461 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) |
| 465 | == NOTIFY_STOP) | 462 | == NOTIFY_STOP) |
| @@ -543,11 +540,11 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
| 543 | 540 | ||
| 544 | /* DR6 may or may not be cleared by the CPU */ | 541 | /* DR6 may or may not be cleared by the CPU */ |
| 545 | set_debugreg(0, 6); | 542 | set_debugreg(0, 6); |
| 543 | |||
| 546 | /* | 544 | /* |
| 547 | * The processor cleared BTF, so don't mark that we need it set. | 545 | * The processor cleared BTF, so don't mark that we need it set. |
| 548 | */ | 546 | */ |
| 549 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 547 | clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); |
| 550 | tsk->thread.debugctlmsr = 0; | ||
| 551 | 548 | ||
| 552 | /* Store the virtualized DR6 value */ | 549 | /* Store the virtualized DR6 value */ |
| 553 | tsk->thread.debugreg6 = dr6; | 550 | tsk->thread.debugreg6 = dr6; |
| @@ -585,55 +582,67 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
| 585 | return; | 582 | return; |
| 586 | } | 583 | } |
| 587 | 584 | ||
| 588 | #ifdef CONFIG_X86_64 | ||
| 589 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | ||
| 590 | { | ||
| 591 | if (fixup_exception(regs)) | ||
| 592 | return 1; | ||
| 593 | |||
| 594 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | ||
| 595 | /* Illegal floating point operation in the kernel */ | ||
| 596 | current->thread.trap_no = trapnr; | ||
| 597 | die(str, regs, 0); | ||
| 598 | return 0; | ||
| 599 | } | ||
| 600 | #endif | ||
| 601 | |||
| 602 | /* | 585 | /* |
| 603 | * Note that we play around with the 'TS' bit in an attempt to get | 586 | * Note that we play around with the 'TS' bit in an attempt to get |
| 604 | * the correct behaviour even in the presence of the asynchronous | 587 | * the correct behaviour even in the presence of the asynchronous |
| 605 | * IRQ13 behaviour | 588 | * IRQ13 behaviour |
| 606 | */ | 589 | */ |
| 607 | void math_error(void __user *ip) | 590 | void math_error(struct pt_regs *regs, int error_code, int trapnr) |
| 608 | { | 591 | { |
| 609 | struct task_struct *task; | 592 | struct task_struct *task = current; |
| 610 | siginfo_t info; | 593 | siginfo_t info; |
| 611 | unsigned short cwd, swd, err; | 594 | unsigned short err; |
| 595 | char *str = (trapnr == 16) ? "fpu exception" : "simd exception"; | ||
| 596 | |||
| 597 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) | ||
| 598 | return; | ||
| 599 | conditional_sti(regs); | ||
| 600 | |||
| 601 | if (!user_mode_vm(regs)) | ||
| 602 | { | ||
| 603 | if (!fixup_exception(regs)) { | ||
| 604 | task->thread.error_code = error_code; | ||
| 605 | task->thread.trap_no = trapnr; | ||
| 606 | die(str, regs, error_code); | ||
| 607 | } | ||
| 608 | return; | ||
| 609 | } | ||
| 612 | 610 | ||
| 613 | /* | 611 | /* |
| 614 | * Save the info for the exception handler and clear the error. | 612 | * Save the info for the exception handler and clear the error. |
| 615 | */ | 613 | */ |
| 616 | task = current; | ||
| 617 | save_init_fpu(task); | 614 | save_init_fpu(task); |
| 618 | task->thread.trap_no = 16; | 615 | task->thread.trap_no = trapnr; |
| 619 | task->thread.error_code = 0; | 616 | task->thread.error_code = error_code; |
| 620 | info.si_signo = SIGFPE; | 617 | info.si_signo = SIGFPE; |
| 621 | info.si_errno = 0; | 618 | info.si_errno = 0; |
| 622 | info.si_addr = ip; | 619 | info.si_addr = (void __user *)regs->ip; |
| 623 | /* | 620 | if (trapnr == 16) { |
| 624 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 621 | unsigned short cwd, swd; |
| 625 | * status. 0x3f is the exception bits in these regs, 0x200 is the | 622 | /* |
| 626 | * C1 reg you need in case of a stack fault, 0x040 is the stack | 623 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
| 627 | * fault bit. We should only be taking one exception at a time, | 624 | * status. 0x3f is the exception bits in these regs, 0x200 is the |
| 628 | * so if this combination doesn't produce any single exception, | 625 | * C1 reg you need in case of a stack fault, 0x040 is the stack |
| 629 | * then we have a bad program that isn't synchronizing its FPU usage | 626 | * fault bit. We should only be taking one exception at a time, |
| 630 | * and it will suffer the consequences since we won't be able to | 627 | * so if this combination doesn't produce any single exception, |
| 631 | * fully reproduce the context of the exception | 628 | * then we have a bad program that isn't synchronizing its FPU usage |
| 632 | */ | 629 | * and it will suffer the consequences since we won't be able to |
| 633 | cwd = get_fpu_cwd(task); | 630 | * fully reproduce the context of the exception |
| 634 | swd = get_fpu_swd(task); | 631 | */ |
| 632 | cwd = get_fpu_cwd(task); | ||
| 633 | swd = get_fpu_swd(task); | ||
| 635 | 634 | ||
| 636 | err = swd & ~cwd; | 635 | err = swd & ~cwd; |
| 636 | } else { | ||
| 637 | /* | ||
| 638 | * The SIMD FPU exceptions are handled a little differently, as there | ||
| 639 | * is only a single status/control register. Thus, to determine which | ||
| 640 | * unmasked exception was caught we must mask the exception mask bits | ||
| 641 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
| 642 | */ | ||
| 643 | unsigned short mxcsr = get_fpu_mxcsr(task); | ||
| 644 | err = ~(mxcsr >> 7) & mxcsr; | ||
| 645 | } | ||
| 637 | 646 | ||
| 638 | if (err & 0x001) { /* Invalid op */ | 647 | if (err & 0x001) { /* Invalid op */ |
| 639 | /* | 648 | /* |
| @@ -662,97 +671,17 @@ void math_error(void __user *ip) | |||
| 662 | 671 | ||
| 663 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | 672 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) |
| 664 | { | 673 | { |
| 665 | conditional_sti(regs); | ||
| 666 | |||
| 667 | #ifdef CONFIG_X86_32 | 674 | #ifdef CONFIG_X86_32 |
| 668 | ignore_fpu_irq = 1; | 675 | ignore_fpu_irq = 1; |
| 669 | #else | ||
| 670 | if (!user_mode(regs) && | ||
| 671 | kernel_math_error(regs, "kernel x87 math error", 16)) | ||
| 672 | return; | ||
| 673 | #endif | 676 | #endif |
| 674 | 677 | ||
| 675 | math_error((void __user *)regs->ip); | 678 | math_error(regs, error_code, 16); |
| 676 | } | ||
| 677 | |||
| 678 | static void simd_math_error(void __user *ip) | ||
| 679 | { | ||
| 680 | struct task_struct *task; | ||
| 681 | siginfo_t info; | ||
| 682 | unsigned short mxcsr; | ||
| 683 | |||
| 684 | /* | ||
| 685 | * Save the info for the exception handler and clear the error. | ||
| 686 | */ | ||
| 687 | task = current; | ||
| 688 | save_init_fpu(task); | ||
| 689 | task->thread.trap_no = 19; | ||
| 690 | task->thread.error_code = 0; | ||
| 691 | info.si_signo = SIGFPE; | ||
| 692 | info.si_errno = 0; | ||
| 693 | info.si_code = __SI_FAULT; | ||
| 694 | info.si_addr = ip; | ||
| 695 | /* | ||
| 696 | * The SIMD FPU exceptions are handled a little differently, as there | ||
| 697 | * is only a single status/control register. Thus, to determine which | ||
| 698 | * unmasked exception was caught we must mask the exception mask bits | ||
| 699 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
| 700 | */ | ||
| 701 | mxcsr = get_fpu_mxcsr(task); | ||
| 702 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | ||
| 703 | case 0x000: | ||
| 704 | default: | ||
| 705 | break; | ||
| 706 | case 0x001: /* Invalid Op */ | ||
| 707 | info.si_code = FPE_FLTINV; | ||
| 708 | break; | ||
| 709 | case 0x002: /* Denormalize */ | ||
| 710 | case 0x010: /* Underflow */ | ||
| 711 | info.si_code = FPE_FLTUND; | ||
| 712 | break; | ||
| 713 | case 0x004: /* Zero Divide */ | ||
| 714 | info.si_code = FPE_FLTDIV; | ||
| 715 | break; | ||
| 716 | case 0x008: /* Overflow */ | ||
| 717 | info.si_code = FPE_FLTOVF; | ||
| 718 | break; | ||
| 719 | case 0x020: /* Precision */ | ||
| 720 | info.si_code = FPE_FLTRES; | ||
| 721 | break; | ||
| 722 | } | ||
| 723 | force_sig_info(SIGFPE, &info, task); | ||
| 724 | } | 679 | } |
| 725 | 680 | ||
| 726 | dotraplinkage void | 681 | dotraplinkage void |
| 727 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | 682 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) |
| 728 | { | 683 | { |
| 729 | conditional_sti(regs); | 684 | math_error(regs, error_code, 19); |
| 730 | |||
| 731 | #ifdef CONFIG_X86_32 | ||
| 732 | if (cpu_has_xmm) { | ||
| 733 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | ||
| 734 | ignore_fpu_irq = 1; | ||
| 735 | simd_math_error((void __user *)regs->ip); | ||
| 736 | return; | ||
| 737 | } | ||
| 738 | /* | ||
| 739 | * Handle strange cache flush from user space exception | ||
| 740 | * in all other cases. This is undocumented behaviour. | ||
| 741 | */ | ||
| 742 | if (regs->flags & X86_VM_MASK) { | ||
| 743 | handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); | ||
| 744 | return; | ||
| 745 | } | ||
| 746 | current->thread.trap_no = 19; | ||
| 747 | current->thread.error_code = error_code; | ||
| 748 | die_if_kernel("cache flush denied", regs, error_code); | ||
| 749 | force_sig(SIGSEGV, current); | ||
| 750 | #else | ||
| 751 | if (!user_mode(regs) && | ||
| 752 | kernel_math_error(regs, "kernel simd math error", 19)) | ||
| 753 | return; | ||
| 754 | simd_math_error((void __user *)regs->ip); | ||
| 755 | #endif | ||
| 756 | } | 685 | } |
| 757 | 686 | ||
| 758 | dotraplinkage void | 687 | dotraplinkage void |
| @@ -879,6 +808,16 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
| 879 | } | 808 | } |
| 880 | #endif | 809 | #endif |
| 881 | 810 | ||
| 811 | /* Set of traps needed for early debugging. */ | ||
| 812 | void __init early_trap_init(void) | ||
| 813 | { | ||
| 814 | set_intr_gate_ist(1, &debug, DEBUG_STACK); | ||
| 815 | /* int3 can be called from all */ | ||
| 816 | set_system_intr_gate_ist(3, &int3, DEBUG_STACK); | ||
| 817 | set_intr_gate(14, &page_fault); | ||
| 818 | load_idt(&idt_descr); | ||
| 819 | } | ||
| 820 | |||
| 882 | void __init trap_init(void) | 821 | void __init trap_init(void) |
| 883 | { | 822 | { |
| 884 | int i; | 823 | int i; |
| @@ -892,10 +831,7 @@ void __init trap_init(void) | |||
| 892 | #endif | 831 | #endif |
| 893 | 832 | ||
| 894 | set_intr_gate(0, ÷_error); | 833 | set_intr_gate(0, ÷_error); |
| 895 | set_intr_gate_ist(1, &debug, DEBUG_STACK); | ||
| 896 | set_intr_gate_ist(2, &nmi, NMI_STACK); | 834 | set_intr_gate_ist(2, &nmi, NMI_STACK); |
| 897 | /* int3 can be called from all */ | ||
| 898 | set_system_intr_gate_ist(3, &int3, DEBUG_STACK); | ||
| 899 | /* int4 can be called from all */ | 835 | /* int4 can be called from all */ |
| 900 | set_system_intr_gate(4, &overflow); | 836 | set_system_intr_gate(4, &overflow); |
| 901 | set_intr_gate(5, &bounds); | 837 | set_intr_gate(5, &bounds); |
| @@ -911,7 +847,6 @@ void __init trap_init(void) | |||
| 911 | set_intr_gate(11, &segment_not_present); | 847 | set_intr_gate(11, &segment_not_present); |
| 912 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); | 848 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); |
| 913 | set_intr_gate(13, &general_protection); | 849 | set_intr_gate(13, &general_protection); |
| 914 | set_intr_gate(14, &page_fault); | ||
| 915 | set_intr_gate(15, &spurious_interrupt_bug); | 850 | set_intr_gate(15, &spurious_interrupt_bug); |
| 916 | set_intr_gate(16, &coprocessor_error); | 851 | set_intr_gate(16, &coprocessor_error); |
| 917 | set_intr_gate(17, &alignment_check); | 852 | set_intr_gate(17, &alignment_check); |
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 1d40336b030a..1132129db792 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c | |||
| @@ -44,7 +44,7 @@ static void uv_ack_apic(unsigned int irq) | |||
| 44 | ack_APIC_irq(); | 44 | ack_APIC_irq(); |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | struct irq_chip uv_irq_chip = { | 47 | static struct irq_chip uv_irq_chip = { |
| 48 | .name = "UV-CORE", | 48 | .name = "UV-CORE", |
| 49 | .startup = uv_noop_ret, | 49 | .startup = uv_noop_ret, |
| 50 | .shutdown = uv_noop, | 50 | .shutdown = uv_noop, |
| @@ -141,7 +141,7 @@ int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) | |||
| 141 | */ | 141 | */ |
| 142 | static int | 142 | static int |
| 143 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | 143 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, |
| 144 | unsigned long mmr_offset, int restrict) | 144 | unsigned long mmr_offset, int limit) |
| 145 | { | 145 | { |
| 146 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | 146 | const struct cpumask *eligible_cpu = cpumask_of(cpu); |
| 147 | struct irq_desc *desc = irq_to_desc(irq); | 147 | struct irq_desc *desc = irq_to_desc(irq); |
| @@ -160,7 +160,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
| 160 | if (err != 0) | 160 | if (err != 0) |
| 161 | return err; | 161 | return err; |
| 162 | 162 | ||
| 163 | if (restrict == UV_AFFINITY_CPU) | 163 | if (limit == UV_AFFINITY_CPU) |
| 164 | desc->status |= IRQ_NO_BALANCING; | 164 | desc->status |= IRQ_NO_BALANCING; |
| 165 | else | 165 | else |
| 166 | desc->status |= IRQ_MOVE_PCNTXT; | 166 | desc->status |= IRQ_MOVE_PCNTXT; |
| @@ -214,7 +214,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 214 | unsigned long mmr_value; | 214 | unsigned long mmr_value; |
| 215 | struct uv_IO_APIC_route_entry *entry; | 215 | struct uv_IO_APIC_route_entry *entry; |
| 216 | unsigned long mmr_offset; | 216 | unsigned long mmr_offset; |
| 217 | unsigned mmr_pnode; | 217 | int mmr_pnode; |
| 218 | 218 | ||
| 219 | if (set_desc_affinity(desc, mask, &dest)) | 219 | if (set_desc_affinity(desc, mask, &dest)) |
| 220 | return -1; | 220 | return -1; |
| @@ -248,7 +248,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
| 248 | * interrupt is raised. | 248 | * interrupt is raised. |
| 249 | */ | 249 | */ |
| 250 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | 250 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, |
| 251 | unsigned long mmr_offset, int restrict) | 251 | unsigned long mmr_offset, int limit) |
| 252 | { | 252 | { |
| 253 | int irq, ret; | 253 | int irq, ret; |
| 254 | 254 | ||
| @@ -258,7 +258,7 @@ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | |||
| 258 | return -EBUSY; | 258 | return -EBUSY; |
| 259 | 259 | ||
| 260 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, | 260 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, |
| 261 | restrict); | 261 | limit); |
| 262 | if (ret == irq) | 262 | if (ret == irq) |
| 263 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); | 263 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); |
| 264 | else | 264 | else |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 693920b22496..1b950d151e58 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
| @@ -54,7 +54,6 @@ EXPORT_SYMBOL(memcpy); | |||
| 54 | EXPORT_SYMBOL(__memcpy); | 54 | EXPORT_SYMBOL(__memcpy); |
| 55 | 55 | ||
| 56 | EXPORT_SYMBOL(empty_zero_page); | 56 | EXPORT_SYMBOL(empty_zero_page); |
| 57 | EXPORT_SYMBOL(init_level4_pgt); | ||
| 58 | #ifndef CONFIG_PARAVIRT | 57 | #ifndef CONFIG_PARAVIRT |
| 59 | EXPORT_SYMBOL(native_load_gs_index); | 58 | EXPORT_SYMBOL(native_load_gs_index); |
| 60 | #endif | 59 | #endif |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 782c3a362ec6..37e68fc5e24a 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
| @@ -99,7 +99,7 @@ int save_i387_xstate(void __user *buf) | |||
| 99 | if (err) | 99 | if (err) |
| 100 | return err; | 100 | return err; |
| 101 | 101 | ||
| 102 | if (task_thread_info(tsk)->status & TS_XSAVE) | 102 | if (use_xsave()) |
| 103 | err = xsave_user(buf); | 103 | err = xsave_user(buf); |
| 104 | else | 104 | else |
| 105 | err = fxsave_user(buf); | 105 | err = fxsave_user(buf); |
| @@ -109,14 +109,14 @@ int save_i387_xstate(void __user *buf) | |||
| 109 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 109 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
| 110 | stts(); | 110 | stts(); |
| 111 | } else { | 111 | } else { |
| 112 | if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, | 112 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, |
| 113 | xstate_size)) | 113 | xstate_size)) |
| 114 | return -1; | 114 | return -1; |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | clear_used_math(); /* trigger finit */ | 117 | clear_used_math(); /* trigger finit */ |
| 118 | 118 | ||
| 119 | if (task_thread_info(tsk)->status & TS_XSAVE) { | 119 | if (use_xsave()) { |
| 120 | struct _fpstate __user *fx = buf; | 120 | struct _fpstate __user *fx = buf; |
| 121 | struct _xstate __user *x = buf; | 121 | struct _xstate __user *x = buf; |
| 122 | u64 xstate_bv; | 122 | u64 xstate_bv; |
| @@ -225,7 +225,7 @@ int restore_i387_xstate(void __user *buf) | |||
| 225 | clts(); | 225 | clts(); |
| 226 | task_thread_info(current)->status |= TS_USEDFPU; | 226 | task_thread_info(current)->status |= TS_USEDFPU; |
| 227 | } | 227 | } |
| 228 | if (task_thread_info(tsk)->status & TS_XSAVE) | 228 | if (use_xsave()) |
| 229 | err = restore_user_xstate(buf); | 229 | err = restore_user_xstate(buf); |
| 230 | else | 230 | else |
| 231 | err = fxrstor_checking((__force struct i387_fxsave_struct *) | 231 | err = fxrstor_checking((__force struct i387_fxsave_struct *) |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4dade6ac0827..5ac0bb465ed6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <asm/kvm_emulate.h> | 33 | #include <asm/kvm_emulate.h> |
| 34 | 34 | ||
| 35 | #include "x86.h" | 35 | #include "x86.h" |
| 36 | #include "tss.h" | ||
| 36 | 37 | ||
| 37 | /* | 38 | /* |
| 38 | * Opcode effective-address decode tables. | 39 | * Opcode effective-address decode tables. |
| @@ -50,6 +51,8 @@ | |||
| 50 | #define DstReg (2<<1) /* Register operand. */ | 51 | #define DstReg (2<<1) /* Register operand. */ |
| 51 | #define DstMem (3<<1) /* Memory operand. */ | 52 | #define DstMem (3<<1) /* Memory operand. */ |
| 52 | #define DstAcc (4<<1) /* Destination Accumulator */ | 53 | #define DstAcc (4<<1) /* Destination Accumulator */ |
| 54 | #define DstDI (5<<1) /* Destination is in ES:(E)DI */ | ||
| 55 | #define DstMem64 (6<<1) /* 64bit memory operand */ | ||
| 53 | #define DstMask (7<<1) | 56 | #define DstMask (7<<1) |
| 54 | /* Source operand type. */ | 57 | /* Source operand type. */ |
| 55 | #define SrcNone (0<<4) /* No source operand. */ | 58 | #define SrcNone (0<<4) /* No source operand. */ |
| @@ -63,6 +66,7 @@ | |||
| 63 | #define SrcOne (7<<4) /* Implied '1' */ | 66 | #define SrcOne (7<<4) /* Implied '1' */ |
| 64 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ | 67 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
| 65 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ | 68 | #define SrcImmU (9<<4) /* Immediate operand, unsigned */ |
| 69 | #define SrcSI (0xa<<4) /* Source is in the DS:RSI */ | ||
| 66 | #define SrcMask (0xf<<4) | 70 | #define SrcMask (0xf<<4) |
| 67 | /* Generic ModRM decode. */ | 71 | /* Generic ModRM decode. */ |
| 68 | #define ModRM (1<<8) | 72 | #define ModRM (1<<8) |
| @@ -85,6 +89,9 @@ | |||
| 85 | #define Src2ImmByte (2<<29) | 89 | #define Src2ImmByte (2<<29) |
| 86 | #define Src2One (3<<29) | 90 | #define Src2One (3<<29) |
| 87 | #define Src2Imm16 (4<<29) | 91 | #define Src2Imm16 (4<<29) |
| 92 | #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be | ||
| 93 | in memory and second argument is located | ||
| 94 | immediately after the first one in memory. */ | ||
| 88 | #define Src2Mask (7<<29) | 95 | #define Src2Mask (7<<29) |
| 89 | 96 | ||
| 90 | enum { | 97 | enum { |
| @@ -147,8 +154,8 @@ static u32 opcode_table[256] = { | |||
| 147 | 0, 0, 0, 0, | 154 | 0, 0, 0, 0, |
| 148 | /* 0x68 - 0x6F */ | 155 | /* 0x68 - 0x6F */ |
| 149 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, | 156 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
| 150 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ | 157 | DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */ |
| 151 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ | 158 | SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */ |
| 152 | /* 0x70 - 0x77 */ | 159 | /* 0x70 - 0x77 */ |
| 153 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 160 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
| 154 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, | 161 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
| @@ -173,12 +180,12 @@ static u32 opcode_table[256] = { | |||
| 173 | /* 0xA0 - 0xA7 */ | 180 | /* 0xA0 - 0xA7 */ |
| 174 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 181 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
| 175 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 182 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, |
| 176 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 183 | ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, |
| 177 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 184 | ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, |
| 178 | /* 0xA8 - 0xAF */ | 185 | /* 0xA8 - 0xAF */ |
| 179 | 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 186 | 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, |
| 180 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 187 | ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, |
| 181 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 188 | ByteOp | DstDI | String, DstDI | String, |
| 182 | /* 0xB0 - 0xB7 */ | 189 | /* 0xB0 - 0xB7 */ |
| 183 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 190 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
| 184 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | 191 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
| @@ -204,13 +211,13 @@ static u32 opcode_table[256] = { | |||
| 204 | 0, 0, 0, 0, 0, 0, 0, 0, | 211 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 205 | /* 0xE0 - 0xE7 */ | 212 | /* 0xE0 - 0xE7 */ |
| 206 | 0, 0, 0, 0, | 213 | 0, 0, 0, 0, |
| 207 | ByteOp | SrcImmUByte, SrcImmUByte, | 214 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
| 208 | ByteOp | SrcImmUByte, SrcImmUByte, | 215 | ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, |
| 209 | /* 0xE8 - 0xEF */ | 216 | /* 0xE8 - 0xEF */ |
| 210 | SrcImm | Stack, SrcImm | ImplicitOps, | 217 | SrcImm | Stack, SrcImm | ImplicitOps, |
| 211 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, | 218 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, |
| 212 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 219 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
| 213 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 220 | SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, |
| 214 | /* 0xF0 - 0xF7 */ | 221 | /* 0xF0 - 0xF7 */ |
| 215 | 0, 0, 0, 0, | 222 | 0, 0, 0, 0, |
| 216 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, | 223 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, |
| @@ -343,7 +350,8 @@ static u32 group_table[] = { | |||
| 343 | [Group5*8] = | 350 | [Group5*8] = |
| 344 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, | 351 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
| 345 | SrcMem | ModRM | Stack, 0, | 352 | SrcMem | ModRM | Stack, 0, |
| 346 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | 353 | SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, |
| 354 | SrcMem | ModRM | Stack, 0, | ||
| 347 | [Group7*8] = | 355 | [Group7*8] = |
| 348 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, | 356 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
| 349 | SrcNone | ModRM | DstMem | Mov, 0, | 357 | SrcNone | ModRM | DstMem | Mov, 0, |
| @@ -353,14 +361,14 @@ static u32 group_table[] = { | |||
| 353 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, | 361 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, |
| 354 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, | 362 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, |
| 355 | [Group9*8] = | 363 | [Group9*8] = |
| 356 | 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, | 364 | 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0, |
| 357 | }; | 365 | }; |
| 358 | 366 | ||
| 359 | static u32 group2_table[] = { | 367 | static u32 group2_table[] = { |
| 360 | [Group7*8] = | 368 | [Group7*8] = |
| 361 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, | 369 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv, |
| 362 | SrcNone | ModRM | DstMem | Mov, 0, | 370 | SrcNone | ModRM | DstMem | Mov, 0, |
| 363 | SrcMem16 | ModRM | Mov, 0, | 371 | SrcMem16 | ModRM | Mov | Priv, 0, |
| 364 | [Group9*8] = | 372 | [Group9*8] = |
| 365 | 0, 0, 0, 0, 0, 0, 0, 0, | 373 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 366 | }; | 374 | }; |
| @@ -562,7 +570,7 @@ static u32 group2_table[] = { | |||
| 562 | #define insn_fetch(_type, _size, _eip) \ | 570 | #define insn_fetch(_type, _size, _eip) \ |
| 563 | ({ unsigned long _x; \ | 571 | ({ unsigned long _x; \ |
| 564 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ | 572 | rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ |
| 565 | if (rc != 0) \ | 573 | if (rc != X86EMUL_CONTINUE) \ |
| 566 | goto done; \ | 574 | goto done; \ |
| 567 | (_eip) += (_size); \ | 575 | (_eip) += (_size); \ |
| 568 | (_type)_x; \ | 576 | (_type)_x; \ |
| @@ -638,40 +646,40 @@ static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) | |||
| 638 | 646 | ||
| 639 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 647 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
| 640 | struct x86_emulate_ops *ops, | 648 | struct x86_emulate_ops *ops, |
| 641 | unsigned long linear, u8 *dest) | 649 | unsigned long eip, u8 *dest) |
| 642 | { | 650 | { |
| 643 | struct fetch_cache *fc = &ctxt->decode.fetch; | 651 | struct fetch_cache *fc = &ctxt->decode.fetch; |
| 644 | int rc; | 652 | int rc; |
| 645 | int size; | 653 | int size, cur_size; |
| 646 | 654 | ||
| 647 | if (linear < fc->start || linear >= fc->end) { | 655 | if (eip == fc->end) { |
| 648 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); | 656 | cur_size = fc->end - fc->start; |
| 649 | rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); | 657 | size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); |
| 650 | if (rc) | 658 | rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, |
| 659 | size, ctxt->vcpu, NULL); | ||
| 660 | if (rc != X86EMUL_CONTINUE) | ||
| 651 | return rc; | 661 | return rc; |
| 652 | fc->start = linear; | 662 | fc->end += size; |
| 653 | fc->end = linear + size; | ||
| 654 | } | 663 | } |
| 655 | *dest = fc->data[linear - fc->start]; | 664 | *dest = fc->data[eip - fc->start]; |
| 656 | return 0; | 665 | return X86EMUL_CONTINUE; |
| 657 | } | 666 | } |
| 658 | 667 | ||
| 659 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | 668 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, |
| 660 | struct x86_emulate_ops *ops, | 669 | struct x86_emulate_ops *ops, |
| 661 | unsigned long eip, void *dest, unsigned size) | 670 | unsigned long eip, void *dest, unsigned size) |
| 662 | { | 671 | { |
| 663 | int rc = 0; | 672 | int rc; |
| 664 | 673 | ||
| 665 | /* x86 instructions are limited to 15 bytes. */ | 674 | /* x86 instructions are limited to 15 bytes. */ |
| 666 | if (eip + size - ctxt->decode.eip_orig > 15) | 675 | if (eip + size - ctxt->eip > 15) |
| 667 | return X86EMUL_UNHANDLEABLE; | 676 | return X86EMUL_UNHANDLEABLE; |
| 668 | eip += ctxt->cs_base; | ||
| 669 | while (size--) { | 677 | while (size--) { |
| 670 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 678 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); |
| 671 | if (rc) | 679 | if (rc != X86EMUL_CONTINUE) |
| 672 | return rc; | 680 | return rc; |
| 673 | } | 681 | } |
| 674 | return 0; | 682 | return X86EMUL_CONTINUE; |
| 675 | } | 683 | } |
| 676 | 684 | ||
| 677 | /* | 685 | /* |
| @@ -702,7 +710,7 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
| 702 | *address = 0; | 710 | *address = 0; |
| 703 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, | 711 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, |
| 704 | ctxt->vcpu, NULL); | 712 | ctxt->vcpu, NULL); |
| 705 | if (rc) | 713 | if (rc != X86EMUL_CONTINUE) |
| 706 | return rc; | 714 | return rc; |
| 707 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, | 715 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, |
| 708 | ctxt->vcpu, NULL); | 716 | ctxt->vcpu, NULL); |
| @@ -782,7 +790,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
| 782 | struct decode_cache *c = &ctxt->decode; | 790 | struct decode_cache *c = &ctxt->decode; |
| 783 | u8 sib; | 791 | u8 sib; |
| 784 | int index_reg = 0, base_reg = 0, scale; | 792 | int index_reg = 0, base_reg = 0, scale; |
| 785 | int rc = 0; | 793 | int rc = X86EMUL_CONTINUE; |
| 786 | 794 | ||
| 787 | if (c->rex_prefix) { | 795 | if (c->rex_prefix) { |
| 788 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ | 796 | c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ |
| @@ -895,7 +903,7 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, | |||
| 895 | struct x86_emulate_ops *ops) | 903 | struct x86_emulate_ops *ops) |
| 896 | { | 904 | { |
| 897 | struct decode_cache *c = &ctxt->decode; | 905 | struct decode_cache *c = &ctxt->decode; |
| 898 | int rc = 0; | 906 | int rc = X86EMUL_CONTINUE; |
| 899 | 907 | ||
| 900 | switch (c->ad_bytes) { | 908 | switch (c->ad_bytes) { |
| 901 | case 2: | 909 | case 2: |
| @@ -916,14 +924,18 @@ int | |||
| 916 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 924 | x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
| 917 | { | 925 | { |
| 918 | struct decode_cache *c = &ctxt->decode; | 926 | struct decode_cache *c = &ctxt->decode; |
| 919 | int rc = 0; | 927 | int rc = X86EMUL_CONTINUE; |
| 920 | int mode = ctxt->mode; | 928 | int mode = ctxt->mode; |
| 921 | int def_op_bytes, def_ad_bytes, group; | 929 | int def_op_bytes, def_ad_bytes, group; |
| 922 | 930 | ||
| 923 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
| 924 | 931 | ||
| 932 | /* we cannot decode insn before we complete previous rep insn */ | ||
| 933 | WARN_ON(ctxt->restart); | ||
| 934 | |||
| 935 | /* Shadow copy of register state. Committed on successful emulation. */ | ||
| 925 | memset(c, 0, sizeof(struct decode_cache)); | 936 | memset(c, 0, sizeof(struct decode_cache)); |
| 926 | c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); | 937 | c->eip = ctxt->eip; |
| 938 | c->fetch.start = c->fetch.end = c->eip; | ||
| 927 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 939 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
| 928 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 940 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
| 929 | 941 | ||
| @@ -1015,11 +1027,6 @@ done_prefixes: | |||
| 1015 | } | 1027 | } |
| 1016 | } | 1028 | } |
| 1017 | 1029 | ||
| 1018 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
| 1019 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); | ||
| 1020 | return -1; | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | if (c->d & Group) { | 1030 | if (c->d & Group) { |
| 1024 | group = c->d & GroupMask; | 1031 | group = c->d & GroupMask; |
| 1025 | c->modrm = insn_fetch(u8, 1, c->eip); | 1032 | c->modrm = insn_fetch(u8, 1, c->eip); |
| @@ -1046,7 +1053,7 @@ done_prefixes: | |||
| 1046 | rc = decode_modrm(ctxt, ops); | 1053 | rc = decode_modrm(ctxt, ops); |
| 1047 | else if (c->d & MemAbs) | 1054 | else if (c->d & MemAbs) |
| 1048 | rc = decode_abs(ctxt, ops); | 1055 | rc = decode_abs(ctxt, ops); |
| 1049 | if (rc) | 1056 | if (rc != X86EMUL_CONTINUE) |
| 1050 | goto done; | 1057 | goto done; |
| 1051 | 1058 | ||
| 1052 | if (!c->has_seg_override) | 1059 | if (!c->has_seg_override) |
| @@ -1057,6 +1064,10 @@ done_prefixes: | |||
| 1057 | 1064 | ||
| 1058 | if (c->ad_bytes != 8) | 1065 | if (c->ad_bytes != 8) |
| 1059 | c->modrm_ea = (u32)c->modrm_ea; | 1066 | c->modrm_ea = (u32)c->modrm_ea; |
| 1067 | |||
| 1068 | if (c->rip_relative) | ||
| 1069 | c->modrm_ea += c->eip; | ||
| 1070 | |||
| 1060 | /* | 1071 | /* |
| 1061 | * Decode and fetch the source operand: register, memory | 1072 | * Decode and fetch the source operand: register, memory |
| 1062 | * or immediate. | 1073 | * or immediate. |
| @@ -1091,6 +1102,8 @@ done_prefixes: | |||
| 1091 | break; | 1102 | break; |
| 1092 | } | 1103 | } |
| 1093 | c->src.type = OP_MEM; | 1104 | c->src.type = OP_MEM; |
| 1105 | c->src.ptr = (unsigned long *)c->modrm_ea; | ||
| 1106 | c->src.val = 0; | ||
| 1094 | break; | 1107 | break; |
| 1095 | case SrcImm: | 1108 | case SrcImm: |
| 1096 | case SrcImmU: | 1109 | case SrcImmU: |
| @@ -1139,6 +1152,14 @@ done_prefixes: | |||
| 1139 | c->src.bytes = 1; | 1152 | c->src.bytes = 1; |
| 1140 | c->src.val = 1; | 1153 | c->src.val = 1; |
| 1141 | break; | 1154 | break; |
| 1155 | case SrcSI: | ||
| 1156 | c->src.type = OP_MEM; | ||
| 1157 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 1158 | c->src.ptr = (unsigned long *) | ||
| 1159 | register_address(c, seg_override_base(ctxt, c), | ||
| 1160 | c->regs[VCPU_REGS_RSI]); | ||
| 1161 | c->src.val = 0; | ||
| 1162 | break; | ||
| 1142 | } | 1163 | } |
| 1143 | 1164 | ||
| 1144 | /* | 1165 | /* |
| @@ -1168,6 +1189,12 @@ done_prefixes: | |||
| 1168 | c->src2.bytes = 1; | 1189 | c->src2.bytes = 1; |
| 1169 | c->src2.val = 1; | 1190 | c->src2.val = 1; |
| 1170 | break; | 1191 | break; |
| 1192 | case Src2Mem16: | ||
| 1193 | c->src2.type = OP_MEM; | ||
| 1194 | c->src2.bytes = 2; | ||
| 1195 | c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); | ||
| 1196 | c->src2.val = 0; | ||
| 1197 | break; | ||
| 1171 | } | 1198 | } |
| 1172 | 1199 | ||
| 1173 | /* Decode and fetch the destination operand: register or memory. */ | 1200 | /* Decode and fetch the destination operand: register or memory. */ |
| @@ -1180,6 +1207,7 @@ done_prefixes: | |||
| 1180 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); | 1207 | c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); |
| 1181 | break; | 1208 | break; |
| 1182 | case DstMem: | 1209 | case DstMem: |
| 1210 | case DstMem64: | ||
| 1183 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1211 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
| 1184 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1212 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
| 1185 | c->dst.type = OP_REG; | 1213 | c->dst.type = OP_REG; |
| @@ -1188,12 +1216,24 @@ done_prefixes: | |||
| 1188 | break; | 1216 | break; |
| 1189 | } | 1217 | } |
| 1190 | c->dst.type = OP_MEM; | 1218 | c->dst.type = OP_MEM; |
| 1219 | c->dst.ptr = (unsigned long *)c->modrm_ea; | ||
| 1220 | if ((c->d & DstMask) == DstMem64) | ||
| 1221 | c->dst.bytes = 8; | ||
| 1222 | else | ||
| 1223 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 1224 | c->dst.val = 0; | ||
| 1225 | if (c->d & BitOp) { | ||
| 1226 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | ||
| 1227 | |||
| 1228 | c->dst.ptr = (void *)c->dst.ptr + | ||
| 1229 | (c->src.val & mask) / 8; | ||
| 1230 | } | ||
| 1191 | break; | 1231 | break; |
| 1192 | case DstAcc: | 1232 | case DstAcc: |
| 1193 | c->dst.type = OP_REG; | 1233 | c->dst.type = OP_REG; |
| 1194 | c->dst.bytes = c->op_bytes; | 1234 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
| 1195 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | 1235 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; |
| 1196 | switch (c->op_bytes) { | 1236 | switch (c->dst.bytes) { |
| 1197 | case 1: | 1237 | case 1: |
| 1198 | c->dst.val = *(u8 *)c->dst.ptr; | 1238 | c->dst.val = *(u8 *)c->dst.ptr; |
| 1199 | break; | 1239 | break; |
| @@ -1203,18 +1243,248 @@ done_prefixes: | |||
| 1203 | case 4: | 1243 | case 4: |
| 1204 | c->dst.val = *(u32 *)c->dst.ptr; | 1244 | c->dst.val = *(u32 *)c->dst.ptr; |
| 1205 | break; | 1245 | break; |
| 1246 | case 8: | ||
| 1247 | c->dst.val = *(u64 *)c->dst.ptr; | ||
| 1248 | break; | ||
| 1206 | } | 1249 | } |
| 1207 | c->dst.orig_val = c->dst.val; | 1250 | c->dst.orig_val = c->dst.val; |
| 1208 | break; | 1251 | break; |
| 1252 | case DstDI: | ||
| 1253 | c->dst.type = OP_MEM; | ||
| 1254 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 1255 | c->dst.ptr = (unsigned long *) | ||
| 1256 | register_address(c, es_base(ctxt), | ||
| 1257 | c->regs[VCPU_REGS_RDI]); | ||
| 1258 | c->dst.val = 0; | ||
| 1259 | break; | ||
| 1209 | } | 1260 | } |
| 1210 | 1261 | ||
| 1211 | if (c->rip_relative) | ||
| 1212 | c->modrm_ea += c->eip; | ||
| 1213 | |||
| 1214 | done: | 1262 | done: |
| 1215 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 1263 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
| 1216 | } | 1264 | } |
| 1217 | 1265 | ||
| 1266 | static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | ||
| 1267 | struct x86_emulate_ops *ops, | ||
| 1268 | unsigned int size, unsigned short port, | ||
| 1269 | void *dest) | ||
| 1270 | { | ||
| 1271 | struct read_cache *rc = &ctxt->decode.io_read; | ||
| 1272 | |||
| 1273 | if (rc->pos == rc->end) { /* refill pio read ahead */ | ||
| 1274 | struct decode_cache *c = &ctxt->decode; | ||
| 1275 | unsigned int in_page, n; | ||
| 1276 | unsigned int count = c->rep_prefix ? | ||
| 1277 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1; | ||
| 1278 | in_page = (ctxt->eflags & EFLG_DF) ? | ||
| 1279 | offset_in_page(c->regs[VCPU_REGS_RDI]) : | ||
| 1280 | PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]); | ||
| 1281 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, | ||
| 1282 | count); | ||
| 1283 | if (n == 0) | ||
| 1284 | n = 1; | ||
| 1285 | rc->pos = rc->end = 0; | ||
| 1286 | if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) | ||
| 1287 | return 0; | ||
| 1288 | rc->end = n * size; | ||
| 1289 | } | ||
| 1290 | |||
| 1291 | memcpy(dest, rc->data + rc->pos, size); | ||
| 1292 | rc->pos += size; | ||
| 1293 | return 1; | ||
| 1294 | } | ||
| 1295 | |||
| 1296 | static u32 desc_limit_scaled(struct desc_struct *desc) | ||
| 1297 | { | ||
| 1298 | u32 limit = get_desc_limit(desc); | ||
| 1299 | |||
| 1300 | return desc->g ? (limit << 12) | 0xfff : limit; | ||
| 1301 | } | ||
| 1302 | |||
| 1303 | static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | ||
| 1304 | struct x86_emulate_ops *ops, | ||
| 1305 | u16 selector, struct desc_ptr *dt) | ||
| 1306 | { | ||
| 1307 | if (selector & 1 << 2) { | ||
| 1308 | struct desc_struct desc; | ||
| 1309 | memset (dt, 0, sizeof *dt); | ||
| 1310 | if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) | ||
| 1311 | return; | ||
| 1312 | |||
| 1313 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | ||
| 1314 | dt->address = get_desc_base(&desc); | ||
| 1315 | } else | ||
| 1316 | ops->get_gdt(dt, ctxt->vcpu); | ||
| 1317 | } | ||
| 1318 | |||
| 1319 | /* allowed just for 8 bytes segments */ | ||
| 1320 | static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
| 1321 | struct x86_emulate_ops *ops, | ||
| 1322 | u16 selector, struct desc_struct *desc) | ||
| 1323 | { | ||
| 1324 | struct desc_ptr dt; | ||
| 1325 | u16 index = selector >> 3; | ||
| 1326 | int ret; | ||
| 1327 | u32 err; | ||
| 1328 | ulong addr; | ||
| 1329 | |||
| 1330 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
| 1331 | |||
| 1332 | if (dt.size < index * 8 + 7) { | ||
| 1333 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
| 1334 | return X86EMUL_PROPAGATE_FAULT; | ||
| 1335 | } | ||
| 1336 | addr = dt.address + index * 8; | ||
| 1337 | ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
| 1338 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
| 1339 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
| 1340 | |||
| 1341 | return ret; | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | /* allowed just for 8 bytes segments */ | ||
| 1345 | static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
| 1346 | struct x86_emulate_ops *ops, | ||
| 1347 | u16 selector, struct desc_struct *desc) | ||
| 1348 | { | ||
| 1349 | struct desc_ptr dt; | ||
| 1350 | u16 index = selector >> 3; | ||
| 1351 | u32 err; | ||
| 1352 | ulong addr; | ||
| 1353 | int ret; | ||
| 1354 | |||
| 1355 | get_descriptor_table_ptr(ctxt, ops, selector, &dt); | ||
| 1356 | |||
| 1357 | if (dt.size < index * 8 + 7) { | ||
| 1358 | kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); | ||
| 1359 | return X86EMUL_PROPAGATE_FAULT; | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | addr = dt.address + index * 8; | ||
| 1363 | ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); | ||
| 1364 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
| 1365 | kvm_inject_page_fault(ctxt->vcpu, addr, err); | ||
| 1366 | |||
| 1367 | return ret; | ||
| 1368 | } | ||
| 1369 | |||
| 1370 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | ||
| 1371 | struct x86_emulate_ops *ops, | ||
| 1372 | u16 selector, int seg) | ||
| 1373 | { | ||
| 1374 | struct desc_struct seg_desc; | ||
| 1375 | u8 dpl, rpl, cpl; | ||
| 1376 | unsigned err_vec = GP_VECTOR; | ||
| 1377 | u32 err_code = 0; | ||
| 1378 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
| 1379 | int ret; | ||
| 1380 | |||
| 1381 | memset(&seg_desc, 0, sizeof seg_desc); | ||
| 1382 | |||
| 1383 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) | ||
| 1384 | || ctxt->mode == X86EMUL_MODE_REAL) { | ||
| 1385 | /* set real mode segment descriptor */ | ||
| 1386 | set_desc_base(&seg_desc, selector << 4); | ||
| 1387 | set_desc_limit(&seg_desc, 0xffff); | ||
| 1388 | seg_desc.type = 3; | ||
| 1389 | seg_desc.p = 1; | ||
| 1390 | seg_desc.s = 1; | ||
| 1391 | goto load; | ||
| 1392 | } | ||
| 1393 | |||
| 1394 | /* NULL selector is not valid for TR, CS and SS */ | ||
| 1395 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | ||
| 1396 | && null_selector) | ||
| 1397 | goto exception; | ||
| 1398 | |||
| 1399 | /* TR should be in GDT only */ | ||
| 1400 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | ||
| 1401 | goto exception; | ||
| 1402 | |||
| 1403 | if (null_selector) /* for NULL selector skip all following checks */ | ||
| 1404 | goto load; | ||
| 1405 | |||
| 1406 | ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
| 1407 | if (ret != X86EMUL_CONTINUE) | ||
| 1408 | return ret; | ||
| 1409 | |||
| 1410 | err_code = selector & 0xfffc; | ||
| 1411 | err_vec = GP_VECTOR; | ||
| 1412 | |||
| 1413 | /* can't load system descriptor into segment selecor */ | ||
| 1414 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | ||
| 1415 | goto exception; | ||
| 1416 | |||
| 1417 | if (!seg_desc.p) { | ||
| 1418 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
| 1419 | goto exception; | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | rpl = selector & 3; | ||
| 1423 | dpl = seg_desc.dpl; | ||
| 1424 | cpl = ops->cpl(ctxt->vcpu); | ||
| 1425 | |||
| 1426 | switch (seg) { | ||
| 1427 | case VCPU_SREG_SS: | ||
| 1428 | /* | ||
| 1429 | * segment is not a writable data segment or segment | ||
| 1430 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
| 1431 | */ | ||
| 1432 | if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) | ||
| 1433 | goto exception; | ||
| 1434 | break; | ||
| 1435 | case VCPU_SREG_CS: | ||
| 1436 | if (!(seg_desc.type & 8)) | ||
| 1437 | goto exception; | ||
| 1438 | |||
| 1439 | if (seg_desc.type & 4) { | ||
| 1440 | /* conforming */ | ||
| 1441 | if (dpl > cpl) | ||
| 1442 | goto exception; | ||
| 1443 | } else { | ||
| 1444 | /* nonconforming */ | ||
| 1445 | if (rpl > cpl || dpl != cpl) | ||
| 1446 | goto exception; | ||
| 1447 | } | ||
| 1448 | /* CS(RPL) <- CPL */ | ||
| 1449 | selector = (selector & 0xfffc) | cpl; | ||
| 1450 | break; | ||
| 1451 | case VCPU_SREG_TR: | ||
| 1452 | if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) | ||
| 1453 | goto exception; | ||
| 1454 | break; | ||
| 1455 | case VCPU_SREG_LDTR: | ||
| 1456 | if (seg_desc.s || seg_desc.type != 2) | ||
| 1457 | goto exception; | ||
| 1458 | break; | ||
| 1459 | default: /* DS, ES, FS, or GS */ | ||
| 1460 | /* | ||
| 1461 | * segment is not a data or readable code segment or | ||
| 1462 | * ((segment is a data or nonconforming code segment) | ||
| 1463 | * and (both RPL and CPL > DPL)) | ||
| 1464 | */ | ||
| 1465 | if ((seg_desc.type & 0xa) == 0x8 || | ||
| 1466 | (((seg_desc.type & 0xc) != 0xc) && | ||
| 1467 | (rpl > dpl && cpl > dpl))) | ||
| 1468 | goto exception; | ||
| 1469 | break; | ||
| 1470 | } | ||
| 1471 | |||
| 1472 | if (seg_desc.s) { | ||
| 1473 | /* mark segment as accessed */ | ||
| 1474 | seg_desc.type |= 1; | ||
| 1475 | ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); | ||
| 1476 | if (ret != X86EMUL_CONTINUE) | ||
| 1477 | return ret; | ||
| 1478 | } | ||
| 1479 | load: | ||
| 1480 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | ||
| 1481 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | ||
| 1482 | return X86EMUL_CONTINUE; | ||
| 1483 | exception: | ||
| 1484 | kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); | ||
| 1485 | return X86EMUL_PROPAGATE_FAULT; | ||
| 1486 | } | ||
| 1487 | |||
| 1218 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | 1488 | static inline void emulate_push(struct x86_emulate_ctxt *ctxt) |
| 1219 | { | 1489 | { |
| 1220 | struct decode_cache *c = &ctxt->decode; | 1490 | struct decode_cache *c = &ctxt->decode; |
| @@ -1251,7 +1521,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
| 1251 | int rc; | 1521 | int rc; |
| 1252 | unsigned long val, change_mask; | 1522 | unsigned long val, change_mask; |
| 1253 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 1523 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
| 1254 | int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); | 1524 | int cpl = ops->cpl(ctxt->vcpu); |
| 1255 | 1525 | ||
| 1256 | rc = emulate_pop(ctxt, ops, &val, len); | 1526 | rc = emulate_pop(ctxt, ops, &val, len); |
| 1257 | if (rc != X86EMUL_CONTINUE) | 1527 | if (rc != X86EMUL_CONTINUE) |
| @@ -1306,10 +1576,10 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
| 1306 | int rc; | 1576 | int rc; |
| 1307 | 1577 | ||
| 1308 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | 1578 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); |
| 1309 | if (rc != 0) | 1579 | if (rc != X86EMUL_CONTINUE) |
| 1310 | return rc; | 1580 | return rc; |
| 1311 | 1581 | ||
| 1312 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); | 1582 | rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); |
| 1313 | return rc; | 1583 | return rc; |
| 1314 | } | 1584 | } |
| 1315 | 1585 | ||
| @@ -1332,7 +1602,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
| 1332 | struct x86_emulate_ops *ops) | 1602 | struct x86_emulate_ops *ops) |
| 1333 | { | 1603 | { |
| 1334 | struct decode_cache *c = &ctxt->decode; | 1604 | struct decode_cache *c = &ctxt->decode; |
| 1335 | int rc = 0; | 1605 | int rc = X86EMUL_CONTINUE; |
| 1336 | int reg = VCPU_REGS_RDI; | 1606 | int reg = VCPU_REGS_RDI; |
| 1337 | 1607 | ||
| 1338 | while (reg >= VCPU_REGS_RAX) { | 1608 | while (reg >= VCPU_REGS_RAX) { |
| @@ -1343,7 +1613,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, | |||
| 1343 | } | 1613 | } |
| 1344 | 1614 | ||
| 1345 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | 1615 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); |
| 1346 | if (rc != 0) | 1616 | if (rc != X86EMUL_CONTINUE) |
| 1347 | break; | 1617 | break; |
| 1348 | --reg; | 1618 | --reg; |
| 1349 | } | 1619 | } |
| @@ -1354,12 +1624,8 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | |||
| 1354 | struct x86_emulate_ops *ops) | 1624 | struct x86_emulate_ops *ops) |
| 1355 | { | 1625 | { |
| 1356 | struct decode_cache *c = &ctxt->decode; | 1626 | struct decode_cache *c = &ctxt->decode; |
| 1357 | int rc; | ||
| 1358 | 1627 | ||
| 1359 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); | 1628 | return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); |
| 1360 | if (rc != 0) | ||
| 1361 | return rc; | ||
| 1362 | return 0; | ||
| 1363 | } | 1629 | } |
| 1364 | 1630 | ||
| 1365 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) | 1631 | static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) |
| @@ -1395,7 +1661,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
| 1395 | struct x86_emulate_ops *ops) | 1661 | struct x86_emulate_ops *ops) |
| 1396 | { | 1662 | { |
| 1397 | struct decode_cache *c = &ctxt->decode; | 1663 | struct decode_cache *c = &ctxt->decode; |
| 1398 | int rc = 0; | ||
| 1399 | 1664 | ||
| 1400 | switch (c->modrm_reg) { | 1665 | switch (c->modrm_reg) { |
| 1401 | case 0 ... 1: /* test */ | 1666 | case 0 ... 1: /* test */ |
| @@ -1408,11 +1673,9 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, | |||
| 1408 | emulate_1op("neg", c->dst, ctxt->eflags); | 1673 | emulate_1op("neg", c->dst, ctxt->eflags); |
| 1409 | break; | 1674 | break; |
| 1410 | default: | 1675 | default: |
| 1411 | DPRINTF("Cannot emulate %02x\n", c->b); | 1676 | return 0; |
| 1412 | rc = X86EMUL_UNHANDLEABLE; | ||
| 1413 | break; | ||
| 1414 | } | 1677 | } |
| 1415 | return rc; | 1678 | return 1; |
| 1416 | } | 1679 | } |
| 1417 | 1680 | ||
| 1418 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | 1681 | static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, |
| @@ -1442,20 +1705,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
| 1442 | emulate_push(ctxt); | 1705 | emulate_push(ctxt); |
| 1443 | break; | 1706 | break; |
| 1444 | } | 1707 | } |
| 1445 | return 0; | 1708 | return X86EMUL_CONTINUE; |
| 1446 | } | 1709 | } |
| 1447 | 1710 | ||
| 1448 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | 1711 | static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, |
| 1449 | struct x86_emulate_ops *ops, | 1712 | struct x86_emulate_ops *ops) |
| 1450 | unsigned long memop) | ||
| 1451 | { | 1713 | { |
| 1452 | struct decode_cache *c = &ctxt->decode; | 1714 | struct decode_cache *c = &ctxt->decode; |
| 1453 | u64 old, new; | 1715 | u64 old = c->dst.orig_val; |
| 1454 | int rc; | ||
| 1455 | |||
| 1456 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); | ||
| 1457 | if (rc != X86EMUL_CONTINUE) | ||
| 1458 | return rc; | ||
| 1459 | 1716 | ||
| 1460 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || | 1717 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || |
| 1461 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { | 1718 | ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { |
| @@ -1463,17 +1720,13 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
| 1463 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); | 1720 | c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); |
| 1464 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); | 1721 | c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); |
| 1465 | ctxt->eflags &= ~EFLG_ZF; | 1722 | ctxt->eflags &= ~EFLG_ZF; |
| 1466 | |||
| 1467 | } else { | 1723 | } else { |
| 1468 | new = ((u64)c->regs[VCPU_REGS_RCX] << 32) | | 1724 | c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | |
| 1469 | (u32) c->regs[VCPU_REGS_RBX]; | 1725 | (u32) c->regs[VCPU_REGS_RBX]; |
| 1470 | 1726 | ||
| 1471 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); | ||
| 1472 | if (rc != X86EMUL_CONTINUE) | ||
| 1473 | return rc; | ||
| 1474 | ctxt->eflags |= EFLG_ZF; | 1727 | ctxt->eflags |= EFLG_ZF; |
| 1475 | } | 1728 | } |
| 1476 | return 0; | 1729 | return X86EMUL_CONTINUE; |
| 1477 | } | 1730 | } |
| 1478 | 1731 | ||
| 1479 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | 1732 | static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, |
| @@ -1484,14 +1737,14 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
| 1484 | unsigned long cs; | 1737 | unsigned long cs; |
| 1485 | 1738 | ||
| 1486 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); | 1739 | rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); |
| 1487 | if (rc) | 1740 | if (rc != X86EMUL_CONTINUE) |
| 1488 | return rc; | 1741 | return rc; |
| 1489 | if (c->op_bytes == 4) | 1742 | if (c->op_bytes == 4) |
| 1490 | c->eip = (u32)c->eip; | 1743 | c->eip = (u32)c->eip; |
| 1491 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1744 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
| 1492 | if (rc) | 1745 | if (rc != X86EMUL_CONTINUE) |
| 1493 | return rc; | 1746 | return rc; |
| 1494 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); | 1747 | rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); |
| 1495 | return rc; | 1748 | return rc; |
| 1496 | } | 1749 | } |
| 1497 | 1750 | ||
| @@ -1544,7 +1797,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
| 1544 | default: | 1797 | default: |
| 1545 | break; | 1798 | break; |
| 1546 | } | 1799 | } |
| 1547 | return 0; | 1800 | return X86EMUL_CONTINUE; |
| 1548 | } | 1801 | } |
| 1549 | 1802 | ||
| 1550 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | 1803 | static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) |
| @@ -1598,8 +1851,11 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
| 1598 | u64 msr_data; | 1851 | u64 msr_data; |
| 1599 | 1852 | ||
| 1600 | /* syscall is not available in real mode */ | 1853 | /* syscall is not available in real mode */ |
| 1601 | if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) | 1854 | if (ctxt->mode == X86EMUL_MODE_REAL || |
| 1602 | return X86EMUL_UNHANDLEABLE; | 1855 | ctxt->mode == X86EMUL_MODE_VM86) { |
| 1856 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
| 1857 | return X86EMUL_PROPAGATE_FAULT; | ||
| 1858 | } | ||
| 1603 | 1859 | ||
| 1604 | setup_syscalls_segments(ctxt, &cs, &ss); | 1860 | setup_syscalls_segments(ctxt, &cs, &ss); |
| 1605 | 1861 | ||
| @@ -1649,14 +1905,16 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
| 1649 | /* inject #GP if in real mode */ | 1905 | /* inject #GP if in real mode */ |
| 1650 | if (ctxt->mode == X86EMUL_MODE_REAL) { | 1906 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
| 1651 | kvm_inject_gp(ctxt->vcpu, 0); | 1907 | kvm_inject_gp(ctxt->vcpu, 0); |
| 1652 | return X86EMUL_UNHANDLEABLE; | 1908 | return X86EMUL_PROPAGATE_FAULT; |
| 1653 | } | 1909 | } |
| 1654 | 1910 | ||
| 1655 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1911 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
| 1656 | * Therefore, we inject an #UD. | 1912 | * Therefore, we inject an #UD. |
| 1657 | */ | 1913 | */ |
| 1658 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1914 | if (ctxt->mode == X86EMUL_MODE_PROT64) { |
| 1659 | return X86EMUL_UNHANDLEABLE; | 1915 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
| 1916 | return X86EMUL_PROPAGATE_FAULT; | ||
| 1917 | } | ||
| 1660 | 1918 | ||
| 1661 | setup_syscalls_segments(ctxt, &cs, &ss); | 1919 | setup_syscalls_segments(ctxt, &cs, &ss); |
| 1662 | 1920 | ||
| @@ -1711,7 +1969,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
| 1711 | if (ctxt->mode == X86EMUL_MODE_REAL || | 1969 | if (ctxt->mode == X86EMUL_MODE_REAL || |
| 1712 | ctxt->mode == X86EMUL_MODE_VM86) { | 1970 | ctxt->mode == X86EMUL_MODE_VM86) { |
| 1713 | kvm_inject_gp(ctxt->vcpu, 0); | 1971 | kvm_inject_gp(ctxt->vcpu, 0); |
| 1714 | return X86EMUL_UNHANDLEABLE; | 1972 | return X86EMUL_PROPAGATE_FAULT; |
| 1715 | } | 1973 | } |
| 1716 | 1974 | ||
| 1717 | setup_syscalls_segments(ctxt, &cs, &ss); | 1975 | setup_syscalls_segments(ctxt, &cs, &ss); |
| @@ -1756,7 +2014,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
| 1756 | return X86EMUL_CONTINUE; | 2014 | return X86EMUL_CONTINUE; |
| 1757 | } | 2015 | } |
| 1758 | 2016 | ||
| 1759 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | 2017 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, |
| 2018 | struct x86_emulate_ops *ops) | ||
| 1760 | { | 2019 | { |
| 1761 | int iopl; | 2020 | int iopl; |
| 1762 | if (ctxt->mode == X86EMUL_MODE_REAL) | 2021 | if (ctxt->mode == X86EMUL_MODE_REAL) |
| @@ -1764,7 +2023,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | |||
| 1764 | if (ctxt->mode == X86EMUL_MODE_VM86) | 2023 | if (ctxt->mode == X86EMUL_MODE_VM86) |
| 1765 | return true; | 2024 | return true; |
| 1766 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | 2025 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; |
| 1767 | return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; | 2026 | return ops->cpl(ctxt->vcpu) > iopl; |
| 1768 | } | 2027 | } |
| 1769 | 2028 | ||
| 1770 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2029 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
| @@ -1801,22 +2060,419 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | |||
| 1801 | struct x86_emulate_ops *ops, | 2060 | struct x86_emulate_ops *ops, |
| 1802 | u16 port, u16 len) | 2061 | u16 port, u16 len) |
| 1803 | { | 2062 | { |
| 1804 | if (emulator_bad_iopl(ctxt)) | 2063 | if (emulator_bad_iopl(ctxt, ops)) |
| 1805 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) | 2064 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) |
| 1806 | return false; | 2065 | return false; |
| 1807 | return true; | 2066 | return true; |
| 1808 | } | 2067 | } |
| 1809 | 2068 | ||
| 2069 | static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, | ||
| 2070 | struct x86_emulate_ops *ops, | ||
| 2071 | int seg) | ||
| 2072 | { | ||
| 2073 | struct desc_struct desc; | ||
| 2074 | if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) | ||
| 2075 | return get_desc_base(&desc); | ||
| 2076 | else | ||
| 2077 | return ~0; | ||
| 2078 | } | ||
| 2079 | |||
| 2080 | static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, | ||
| 2081 | struct x86_emulate_ops *ops, | ||
| 2082 | struct tss_segment_16 *tss) | ||
| 2083 | { | ||
| 2084 | struct decode_cache *c = &ctxt->decode; | ||
| 2085 | |||
| 2086 | tss->ip = c->eip; | ||
| 2087 | tss->flag = ctxt->eflags; | ||
| 2088 | tss->ax = c->regs[VCPU_REGS_RAX]; | ||
| 2089 | tss->cx = c->regs[VCPU_REGS_RCX]; | ||
| 2090 | tss->dx = c->regs[VCPU_REGS_RDX]; | ||
| 2091 | tss->bx = c->regs[VCPU_REGS_RBX]; | ||
| 2092 | tss->sp = c->regs[VCPU_REGS_RSP]; | ||
| 2093 | tss->bp = c->regs[VCPU_REGS_RBP]; | ||
| 2094 | tss->si = c->regs[VCPU_REGS_RSI]; | ||
| 2095 | tss->di = c->regs[VCPU_REGS_RDI]; | ||
| 2096 | |||
| 2097 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
| 2098 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
| 2099 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
| 2100 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
| 2101 | tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
| 2102 | } | ||
| 2103 | |||
| 2104 | static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | ||
| 2105 | struct x86_emulate_ops *ops, | ||
| 2106 | struct tss_segment_16 *tss) | ||
| 2107 | { | ||
| 2108 | struct decode_cache *c = &ctxt->decode; | ||
| 2109 | int ret; | ||
| 2110 | |||
| 2111 | c->eip = tss->ip; | ||
| 2112 | ctxt->eflags = tss->flag | 2; | ||
| 2113 | c->regs[VCPU_REGS_RAX] = tss->ax; | ||
| 2114 | c->regs[VCPU_REGS_RCX] = tss->cx; | ||
| 2115 | c->regs[VCPU_REGS_RDX] = tss->dx; | ||
| 2116 | c->regs[VCPU_REGS_RBX] = tss->bx; | ||
| 2117 | c->regs[VCPU_REGS_RSP] = tss->sp; | ||
| 2118 | c->regs[VCPU_REGS_RBP] = tss->bp; | ||
| 2119 | c->regs[VCPU_REGS_RSI] = tss->si; | ||
| 2120 | c->regs[VCPU_REGS_RDI] = tss->di; | ||
| 2121 | |||
| 2122 | /* | ||
| 2123 | * SDM says that segment selectors are loaded before segment | ||
| 2124 | * descriptors | ||
| 2125 | */ | ||
| 2126 | ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); | ||
| 2127 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
| 2128 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
| 2129 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
| 2130 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
| 2131 | |||
| 2132 | /* | ||
| 2133 | * Now load segment descriptors. If fault happenes at this stage | ||
| 2134 | * it is handled in a context of new task | ||
| 2135 | */ | ||
| 2136 | ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); | ||
| 2137 | if (ret != X86EMUL_CONTINUE) | ||
| 2138 | return ret; | ||
| 2139 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
| 2140 | if (ret != X86EMUL_CONTINUE) | ||
| 2141 | return ret; | ||
| 2142 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
| 2143 | if (ret != X86EMUL_CONTINUE) | ||
| 2144 | return ret; | ||
| 2145 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
| 2146 | if (ret != X86EMUL_CONTINUE) | ||
| 2147 | return ret; | ||
| 2148 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
| 2149 | if (ret != X86EMUL_CONTINUE) | ||
| 2150 | return ret; | ||
| 2151 | |||
| 2152 | return X86EMUL_CONTINUE; | ||
| 2153 | } | ||
| 2154 | |||
| 2155 | static int task_switch_16(struct x86_emulate_ctxt *ctxt, | ||
| 2156 | struct x86_emulate_ops *ops, | ||
| 2157 | u16 tss_selector, u16 old_tss_sel, | ||
| 2158 | ulong old_tss_base, struct desc_struct *new_desc) | ||
| 2159 | { | ||
| 2160 | struct tss_segment_16 tss_seg; | ||
| 2161 | int ret; | ||
| 2162 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
| 2163 | |||
| 2164 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2165 | &err); | ||
| 2166 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2167 | /* FIXME: need to provide precise fault address */ | ||
| 2168 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
| 2169 | return ret; | ||
| 2170 | } | ||
| 2171 | |||
| 2172 | save_state_to_tss16(ctxt, ops, &tss_seg); | ||
| 2173 | |||
| 2174 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2175 | &err); | ||
| 2176 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2177 | /* FIXME: need to provide precise fault address */ | ||
| 2178 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
| 2179 | return ret; | ||
| 2180 | } | ||
| 2181 | |||
| 2182 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2183 | &err); | ||
| 2184 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2185 | /* FIXME: need to provide precise fault address */ | ||
| 2186 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
| 2187 | return ret; | ||
| 2188 | } | ||
| 2189 | |||
| 2190 | if (old_tss_sel != 0xffff) { | ||
| 2191 | tss_seg.prev_task_link = old_tss_sel; | ||
| 2192 | |||
| 2193 | ret = ops->write_std(new_tss_base, | ||
| 2194 | &tss_seg.prev_task_link, | ||
| 2195 | sizeof tss_seg.prev_task_link, | ||
| 2196 | ctxt->vcpu, &err); | ||
| 2197 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2198 | /* FIXME: need to provide precise fault address */ | ||
| 2199 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
| 2200 | return ret; | ||
| 2201 | } | ||
| 2202 | } | ||
| 2203 | |||
| 2204 | return load_state_from_tss16(ctxt, ops, &tss_seg); | ||
| 2205 | } | ||
| 2206 | |||
| 2207 | static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, | ||
| 2208 | struct x86_emulate_ops *ops, | ||
| 2209 | struct tss_segment_32 *tss) | ||
| 2210 | { | ||
| 2211 | struct decode_cache *c = &ctxt->decode; | ||
| 2212 | |||
| 2213 | tss->cr3 = ops->get_cr(3, ctxt->vcpu); | ||
| 2214 | tss->eip = c->eip; | ||
| 2215 | tss->eflags = ctxt->eflags; | ||
| 2216 | tss->eax = c->regs[VCPU_REGS_RAX]; | ||
| 2217 | tss->ecx = c->regs[VCPU_REGS_RCX]; | ||
| 2218 | tss->edx = c->regs[VCPU_REGS_RDX]; | ||
| 2219 | tss->ebx = c->regs[VCPU_REGS_RBX]; | ||
| 2220 | tss->esp = c->regs[VCPU_REGS_RSP]; | ||
| 2221 | tss->ebp = c->regs[VCPU_REGS_RBP]; | ||
| 2222 | tss->esi = c->regs[VCPU_REGS_RSI]; | ||
| 2223 | tss->edi = c->regs[VCPU_REGS_RDI]; | ||
| 2224 | |||
| 2225 | tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); | ||
| 2226 | tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); | ||
| 2227 | tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); | ||
| 2228 | tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); | ||
| 2229 | tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); | ||
| 2230 | tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); | ||
| 2231 | tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); | ||
| 2232 | } | ||
| 2233 | |||
| 2234 | static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | ||
| 2235 | struct x86_emulate_ops *ops, | ||
| 2236 | struct tss_segment_32 *tss) | ||
| 2237 | { | ||
| 2238 | struct decode_cache *c = &ctxt->decode; | ||
| 2239 | int ret; | ||
| 2240 | |||
| 2241 | ops->set_cr(3, tss->cr3, ctxt->vcpu); | ||
| 2242 | c->eip = tss->eip; | ||
| 2243 | ctxt->eflags = tss->eflags | 2; | ||
| 2244 | c->regs[VCPU_REGS_RAX] = tss->eax; | ||
| 2245 | c->regs[VCPU_REGS_RCX] = tss->ecx; | ||
| 2246 | c->regs[VCPU_REGS_RDX] = tss->edx; | ||
| 2247 | c->regs[VCPU_REGS_RBX] = tss->ebx; | ||
| 2248 | c->regs[VCPU_REGS_RSP] = tss->esp; | ||
| 2249 | c->regs[VCPU_REGS_RBP] = tss->ebp; | ||
| 2250 | c->regs[VCPU_REGS_RSI] = tss->esi; | ||
| 2251 | c->regs[VCPU_REGS_RDI] = tss->edi; | ||
| 2252 | |||
| 2253 | /* | ||
| 2254 | * SDM says that segment selectors are loaded before segment | ||
| 2255 | * descriptors | ||
| 2256 | */ | ||
| 2257 | ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); | ||
| 2258 | ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); | ||
| 2259 | ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); | ||
| 2260 | ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); | ||
| 2261 | ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); | ||
| 2262 | ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); | ||
| 2263 | ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); | ||
| 2264 | |||
| 2265 | /* | ||
| 2266 | * Now load segment descriptors. If fault happenes at this stage | ||
| 2267 | * it is handled in a context of new task | ||
| 2268 | */ | ||
| 2269 | ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); | ||
| 2270 | if (ret != X86EMUL_CONTINUE) | ||
| 2271 | return ret; | ||
| 2272 | ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); | ||
| 2273 | if (ret != X86EMUL_CONTINUE) | ||
| 2274 | return ret; | ||
| 2275 | ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); | ||
| 2276 | if (ret != X86EMUL_CONTINUE) | ||
| 2277 | return ret; | ||
| 2278 | ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); | ||
| 2279 | if (ret != X86EMUL_CONTINUE) | ||
| 2280 | return ret; | ||
| 2281 | ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); | ||
| 2282 | if (ret != X86EMUL_CONTINUE) | ||
| 2283 | return ret; | ||
| 2284 | ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); | ||
| 2285 | if (ret != X86EMUL_CONTINUE) | ||
| 2286 | return ret; | ||
| 2287 | ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); | ||
| 2288 | if (ret != X86EMUL_CONTINUE) | ||
| 2289 | return ret; | ||
| 2290 | |||
| 2291 | return X86EMUL_CONTINUE; | ||
| 2292 | } | ||
| 2293 | |||
| 2294 | static int task_switch_32(struct x86_emulate_ctxt *ctxt, | ||
| 2295 | struct x86_emulate_ops *ops, | ||
| 2296 | u16 tss_selector, u16 old_tss_sel, | ||
| 2297 | ulong old_tss_base, struct desc_struct *new_desc) | ||
| 2298 | { | ||
| 2299 | struct tss_segment_32 tss_seg; | ||
| 2300 | int ret; | ||
| 2301 | u32 err, new_tss_base = get_desc_base(new_desc); | ||
| 2302 | |||
| 2303 | ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2304 | &err); | ||
| 2305 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2306 | /* FIXME: need to provide precise fault address */ | ||
| 2307 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
| 2308 | return ret; | ||
| 2309 | } | ||
| 2310 | |||
| 2311 | save_state_to_tss32(ctxt, ops, &tss_seg); | ||
| 2312 | |||
| 2313 | ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2314 | &err); | ||
| 2315 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2316 | /* FIXME: need to provide precise fault address */ | ||
| 2317 | kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); | ||
| 2318 | return ret; | ||
| 2319 | } | ||
| 2320 | |||
| 2321 | ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, | ||
| 2322 | &err); | ||
| 2323 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2324 | /* FIXME: need to provide precise fault address */ | ||
| 2325 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
| 2326 | return ret; | ||
| 2327 | } | ||
| 2328 | |||
| 2329 | if (old_tss_sel != 0xffff) { | ||
| 2330 | tss_seg.prev_task_link = old_tss_sel; | ||
| 2331 | |||
| 2332 | ret = ops->write_std(new_tss_base, | ||
| 2333 | &tss_seg.prev_task_link, | ||
| 2334 | sizeof tss_seg.prev_task_link, | ||
| 2335 | ctxt->vcpu, &err); | ||
| 2336 | if (ret == X86EMUL_PROPAGATE_FAULT) { | ||
| 2337 | /* FIXME: need to provide precise fault address */ | ||
| 2338 | kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); | ||
| 2339 | return ret; | ||
| 2340 | } | ||
| 2341 | } | ||
| 2342 | |||
| 2343 | return load_state_from_tss32(ctxt, ops, &tss_seg); | ||
| 2344 | } | ||
| 2345 | |||
| 2346 | static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | ||
| 2347 | struct x86_emulate_ops *ops, | ||
| 2348 | u16 tss_selector, int reason, | ||
| 2349 | bool has_error_code, u32 error_code) | ||
| 2350 | { | ||
| 2351 | struct desc_struct curr_tss_desc, next_tss_desc; | ||
| 2352 | int ret; | ||
| 2353 | u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); | ||
| 2354 | ulong old_tss_base = | ||
| 2355 | get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); | ||
| 2356 | u32 desc_limit; | ||
| 2357 | |||
| 2358 | /* FIXME: old_tss_base == ~0 ? */ | ||
| 2359 | |||
| 2360 | ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); | ||
| 2361 | if (ret != X86EMUL_CONTINUE) | ||
| 2362 | return ret; | ||
| 2363 | ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); | ||
| 2364 | if (ret != X86EMUL_CONTINUE) | ||
| 2365 | return ret; | ||
| 2366 | |||
| 2367 | /* FIXME: check that next_tss_desc is tss */ | ||
| 2368 | |||
| 2369 | if (reason != TASK_SWITCH_IRET) { | ||
| 2370 | if ((tss_selector & 3) > next_tss_desc.dpl || | ||
| 2371 | ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { | ||
| 2372 | kvm_inject_gp(ctxt->vcpu, 0); | ||
| 2373 | return X86EMUL_PROPAGATE_FAULT; | ||
| 2374 | } | ||
| 2375 | } | ||
| 2376 | |||
| 2377 | desc_limit = desc_limit_scaled(&next_tss_desc); | ||
| 2378 | if (!next_tss_desc.p || | ||
| 2379 | ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || | ||
| 2380 | desc_limit < 0x2b)) { | ||
| 2381 | kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, | ||
| 2382 | tss_selector & 0xfffc); | ||
| 2383 | return X86EMUL_PROPAGATE_FAULT; | ||
| 2384 | } | ||
| 2385 | |||
| 2386 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
| 2387 | curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ | ||
| 2388 | write_segment_descriptor(ctxt, ops, old_tss_sel, | ||
| 2389 | &curr_tss_desc); | ||
| 2390 | } | ||
| 2391 | |||
| 2392 | if (reason == TASK_SWITCH_IRET) | ||
| 2393 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; | ||
| 2394 | |||
| 2395 | /* set back link to prev task only if NT bit is set in eflags | ||
| 2396 | note that old_tss_sel is not used afetr this point */ | ||
| 2397 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
| 2398 | old_tss_sel = 0xffff; | ||
| 2399 | |||
| 2400 | if (next_tss_desc.type & 8) | ||
| 2401 | ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, | ||
| 2402 | old_tss_base, &next_tss_desc); | ||
| 2403 | else | ||
| 2404 | ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, | ||
| 2405 | old_tss_base, &next_tss_desc); | ||
| 2406 | if (ret != X86EMUL_CONTINUE) | ||
| 2407 | return ret; | ||
| 2408 | |||
| 2409 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) | ||
| 2410 | ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT; | ||
| 2411 | |||
| 2412 | if (reason != TASK_SWITCH_IRET) { | ||
| 2413 | next_tss_desc.type |= (1 << 1); /* set busy flag */ | ||
| 2414 | write_segment_descriptor(ctxt, ops, tss_selector, | ||
| 2415 | &next_tss_desc); | ||
| 2416 | } | ||
| 2417 | |||
| 2418 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | ||
| 2419 | ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); | ||
| 2420 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | ||
| 2421 | |||
| 2422 | if (has_error_code) { | ||
| 2423 | struct decode_cache *c = &ctxt->decode; | ||
| 2424 | |||
| 2425 | c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; | ||
| 2426 | c->lock_prefix = 0; | ||
| 2427 | c->src.val = (unsigned long) error_code; | ||
| 2428 | emulate_push(ctxt); | ||
| 2429 | } | ||
| 2430 | |||
| 2431 | return ret; | ||
| 2432 | } | ||
| 2433 | |||
| 2434 | int emulator_task_switch(struct x86_emulate_ctxt *ctxt, | ||
| 2435 | struct x86_emulate_ops *ops, | ||
| 2436 | u16 tss_selector, int reason, | ||
| 2437 | bool has_error_code, u32 error_code) | ||
| 2438 | { | ||
| 2439 | struct decode_cache *c = &ctxt->decode; | ||
| 2440 | int rc; | ||
| 2441 | |||
| 2442 | memset(c, 0, sizeof(struct decode_cache)); | ||
| 2443 | c->eip = ctxt->eip; | ||
| 2444 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | ||
| 2445 | c->dst.type = OP_NONE; | ||
| 2446 | |||
| 2447 | rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, | ||
| 2448 | has_error_code, error_code); | ||
| 2449 | |||
| 2450 | if (rc == X86EMUL_CONTINUE) { | ||
| 2451 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | ||
| 2452 | kvm_rip_write(ctxt->vcpu, c->eip); | ||
| 2453 | rc = writeback(ctxt, ops); | ||
| 2454 | } | ||
| 2455 | |||
| 2456 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | ||
| 2457 | } | ||
| 2458 | |||
| 2459 | static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, | ||
| 2460 | int reg, struct operand *op) | ||
| 2461 | { | ||
| 2462 | struct decode_cache *c = &ctxt->decode; | ||
| 2463 | int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; | ||
| 2464 | |||
| 2465 | register_address_increment(c, &c->regs[reg], df * op->bytes); | ||
| 2466 | op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]); | ||
| 2467 | } | ||
| 2468 | |||
| 1810 | int | 2469 | int |
| 1811 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 2470 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
| 1812 | { | 2471 | { |
| 1813 | unsigned long memop = 0; | ||
| 1814 | u64 msr_data; | 2472 | u64 msr_data; |
| 1815 | unsigned long saved_eip = 0; | ||
| 1816 | struct decode_cache *c = &ctxt->decode; | 2473 | struct decode_cache *c = &ctxt->decode; |
| 1817 | unsigned int port; | 2474 | int rc = X86EMUL_CONTINUE; |
| 1818 | int io_dir_in; | 2475 | int saved_dst_type = c->dst.type; |
| 1819 | int rc = 0; | ||
| 1820 | 2476 | ||
| 1821 | ctxt->interruptibility = 0; | 2477 | ctxt->interruptibility = 0; |
| 1822 | 2478 | ||
| @@ -1826,26 +2482,30 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1826 | */ | 2482 | */ |
| 1827 | 2483 | ||
| 1828 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 2484 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
| 1829 | saved_eip = c->eip; | 2485 | |
| 2486 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
| 2487 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
| 2488 | goto done; | ||
| 2489 | } | ||
| 1830 | 2490 | ||
| 1831 | /* LOCK prefix is allowed only with some instructions */ | 2491 | /* LOCK prefix is allowed only with some instructions */ |
| 1832 | if (c->lock_prefix && !(c->d & Lock)) { | 2492 | if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { |
| 1833 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | 2493 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
| 1834 | goto done; | 2494 | goto done; |
| 1835 | } | 2495 | } |
| 1836 | 2496 | ||
| 1837 | /* Privileged instruction can be executed only in CPL=0 */ | 2497 | /* Privileged instruction can be executed only in CPL=0 */ |
| 1838 | if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { | 2498 | if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { |
| 1839 | kvm_inject_gp(ctxt->vcpu, 0); | 2499 | kvm_inject_gp(ctxt->vcpu, 0); |
| 1840 | goto done; | 2500 | goto done; |
| 1841 | } | 2501 | } |
| 1842 | 2502 | ||
| 1843 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) | ||
| 1844 | memop = c->modrm_ea; | ||
| 1845 | |||
| 1846 | if (c->rep_prefix && (c->d & String)) { | 2503 | if (c->rep_prefix && (c->d & String)) { |
| 2504 | ctxt->restart = true; | ||
| 1847 | /* All REP prefixes have the same first termination condition */ | 2505 | /* All REP prefixes have the same first termination condition */ |
| 1848 | if (c->regs[VCPU_REGS_RCX] == 0) { | 2506 | if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { |
| 2507 | string_done: | ||
| 2508 | ctxt->restart = false; | ||
| 1849 | kvm_rip_write(ctxt->vcpu, c->eip); | 2509 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 1850 | goto done; | 2510 | goto done; |
| 1851 | } | 2511 | } |
| @@ -1857,25 +2517,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1857 | * - if REPNE/REPNZ and ZF = 1 then done | 2517 | * - if REPNE/REPNZ and ZF = 1 then done |
| 1858 | */ | 2518 | */ |
| 1859 | if ((c->b == 0xa6) || (c->b == 0xa7) || | 2519 | if ((c->b == 0xa6) || (c->b == 0xa7) || |
| 1860 | (c->b == 0xae) || (c->b == 0xaf)) { | 2520 | (c->b == 0xae) || (c->b == 0xaf)) { |
| 1861 | if ((c->rep_prefix == REPE_PREFIX) && | 2521 | if ((c->rep_prefix == REPE_PREFIX) && |
| 1862 | ((ctxt->eflags & EFLG_ZF) == 0)) { | 2522 | ((ctxt->eflags & EFLG_ZF) == 0)) |
| 1863 | kvm_rip_write(ctxt->vcpu, c->eip); | 2523 | goto string_done; |
| 1864 | goto done; | ||
| 1865 | } | ||
| 1866 | if ((c->rep_prefix == REPNE_PREFIX) && | 2524 | if ((c->rep_prefix == REPNE_PREFIX) && |
| 1867 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { | 2525 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) |
| 1868 | kvm_rip_write(ctxt->vcpu, c->eip); | 2526 | goto string_done; |
| 1869 | goto done; | ||
| 1870 | } | ||
| 1871 | } | 2527 | } |
| 1872 | c->regs[VCPU_REGS_RCX]--; | 2528 | c->eip = ctxt->eip; |
| 1873 | c->eip = kvm_rip_read(ctxt->vcpu); | ||
| 1874 | } | 2529 | } |
| 1875 | 2530 | ||
| 1876 | if (c->src.type == OP_MEM) { | 2531 | if (c->src.type == OP_MEM) { |
| 1877 | c->src.ptr = (unsigned long *)memop; | ||
| 1878 | c->src.val = 0; | ||
| 1879 | rc = ops->read_emulated((unsigned long)c->src.ptr, | 2532 | rc = ops->read_emulated((unsigned long)c->src.ptr, |
| 1880 | &c->src.val, | 2533 | &c->src.val, |
| 1881 | c->src.bytes, | 2534 | c->src.bytes, |
| @@ -1885,29 +2538,25 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1885 | c->src.orig_val = c->src.val; | 2538 | c->src.orig_val = c->src.val; |
| 1886 | } | 2539 | } |
| 1887 | 2540 | ||
| 2541 | if (c->src2.type == OP_MEM) { | ||
| 2542 | rc = ops->read_emulated((unsigned long)c->src2.ptr, | ||
| 2543 | &c->src2.val, | ||
| 2544 | c->src2.bytes, | ||
| 2545 | ctxt->vcpu); | ||
| 2546 | if (rc != X86EMUL_CONTINUE) | ||
| 2547 | goto done; | ||
| 2548 | } | ||
| 2549 | |||
| 1888 | if ((c->d & DstMask) == ImplicitOps) | 2550 | if ((c->d & DstMask) == ImplicitOps) |
| 1889 | goto special_insn; | 2551 | goto special_insn; |
| 1890 | 2552 | ||
| 1891 | 2553 | ||
| 1892 | if (c->dst.type == OP_MEM) { | 2554 | if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { |
| 1893 | c->dst.ptr = (unsigned long *)memop; | 2555 | /* optimisation - avoid slow emulated read if Mov */ |
| 1894 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2556 | rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, |
| 1895 | c->dst.val = 0; | 2557 | c->dst.bytes, ctxt->vcpu); |
| 1896 | if (c->d & BitOp) { | 2558 | if (rc != X86EMUL_CONTINUE) |
| 1897 | unsigned long mask = ~(c->dst.bytes * 8 - 1); | 2559 | goto done; |
| 1898 | |||
| 1899 | c->dst.ptr = (void *)c->dst.ptr + | ||
| 1900 | (c->src.val & mask) / 8; | ||
| 1901 | } | ||
| 1902 | if (!(c->d & Mov)) { | ||
| 1903 | /* optimisation - avoid slow emulated read */ | ||
| 1904 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
| 1905 | &c->dst.val, | ||
| 1906 | c->dst.bytes, | ||
| 1907 | ctxt->vcpu); | ||
| 1908 | if (rc != X86EMUL_CONTINUE) | ||
| 1909 | goto done; | ||
| 1910 | } | ||
| 1911 | } | 2560 | } |
| 1912 | c->dst.orig_val = c->dst.val; | 2561 | c->dst.orig_val = c->dst.val; |
| 1913 | 2562 | ||
| @@ -1926,7 +2575,7 @@ special_insn: | |||
| 1926 | break; | 2575 | break; |
| 1927 | case 0x07: /* pop es */ | 2576 | case 0x07: /* pop es */ |
| 1928 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | 2577 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); |
| 1929 | if (rc != 0) | 2578 | if (rc != X86EMUL_CONTINUE) |
| 1930 | goto done; | 2579 | goto done; |
| 1931 | break; | 2580 | break; |
| 1932 | case 0x08 ... 0x0d: | 2581 | case 0x08 ... 0x0d: |
| @@ -1945,7 +2594,7 @@ special_insn: | |||
| 1945 | break; | 2594 | break; |
| 1946 | case 0x17: /* pop ss */ | 2595 | case 0x17: /* pop ss */ |
| 1947 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | 2596 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); |
| 1948 | if (rc != 0) | 2597 | if (rc != X86EMUL_CONTINUE) |
| 1949 | goto done; | 2598 | goto done; |
| 1950 | break; | 2599 | break; |
| 1951 | case 0x18 ... 0x1d: | 2600 | case 0x18 ... 0x1d: |
| @@ -1957,7 +2606,7 @@ special_insn: | |||
| 1957 | break; | 2606 | break; |
| 1958 | case 0x1f: /* pop ds */ | 2607 | case 0x1f: /* pop ds */ |
| 1959 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | 2608 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); |
| 1960 | if (rc != 0) | 2609 | if (rc != X86EMUL_CONTINUE) |
| 1961 | goto done; | 2610 | goto done; |
| 1962 | break; | 2611 | break; |
| 1963 | case 0x20 ... 0x25: | 2612 | case 0x20 ... 0x25: |
| @@ -1988,7 +2637,7 @@ special_insn: | |||
| 1988 | case 0x58 ... 0x5f: /* pop reg */ | 2637 | case 0x58 ... 0x5f: /* pop reg */ |
| 1989 | pop_instruction: | 2638 | pop_instruction: |
| 1990 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); | 2639 | rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); |
| 1991 | if (rc != 0) | 2640 | if (rc != X86EMUL_CONTINUE) |
| 1992 | goto done; | 2641 | goto done; |
| 1993 | break; | 2642 | break; |
| 1994 | case 0x60: /* pusha */ | 2643 | case 0x60: /* pusha */ |
| @@ -1996,7 +2645,7 @@ special_insn: | |||
| 1996 | break; | 2645 | break; |
| 1997 | case 0x61: /* popa */ | 2646 | case 0x61: /* popa */ |
| 1998 | rc = emulate_popa(ctxt, ops); | 2647 | rc = emulate_popa(ctxt, ops); |
| 1999 | if (rc != 0) | 2648 | if (rc != X86EMUL_CONTINUE) |
| 2000 | goto done; | 2649 | goto done; |
| 2001 | break; | 2650 | break; |
| 2002 | case 0x63: /* movsxd */ | 2651 | case 0x63: /* movsxd */ |
| @@ -2010,47 +2659,29 @@ special_insn: | |||
| 2010 | break; | 2659 | break; |
| 2011 | case 0x6c: /* insb */ | 2660 | case 0x6c: /* insb */ |
| 2012 | case 0x6d: /* insw/insd */ | 2661 | case 0x6d: /* insw/insd */ |
| 2662 | c->dst.bytes = min(c->dst.bytes, 4u); | ||
| 2013 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2663 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
| 2014 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2664 | c->dst.bytes)) { |
| 2015 | kvm_inject_gp(ctxt->vcpu, 0); | 2665 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2016 | goto done; | 2666 | goto done; |
| 2017 | } | 2667 | } |
| 2018 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2668 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, |
| 2019 | 1, | 2669 | c->regs[VCPU_REGS_RDX], &c->dst.val)) |
| 2020 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2670 | goto done; /* IO is needed, skip writeback */ |
| 2021 | c->rep_prefix ? | 2671 | break; |
| 2022 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | ||
| 2023 | (ctxt->eflags & EFLG_DF), | ||
| 2024 | register_address(c, es_base(ctxt), | ||
| 2025 | c->regs[VCPU_REGS_RDI]), | ||
| 2026 | c->rep_prefix, | ||
| 2027 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
| 2028 | c->eip = saved_eip; | ||
| 2029 | return -1; | ||
| 2030 | } | ||
| 2031 | return 0; | ||
| 2032 | case 0x6e: /* outsb */ | 2672 | case 0x6e: /* outsb */ |
| 2033 | case 0x6f: /* outsw/outsd */ | 2673 | case 0x6f: /* outsw/outsd */ |
| 2674 | c->src.bytes = min(c->src.bytes, 4u); | ||
| 2034 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | 2675 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
| 2035 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | 2676 | c->src.bytes)) { |
| 2036 | kvm_inject_gp(ctxt->vcpu, 0); | 2677 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2037 | goto done; | 2678 | goto done; |
| 2038 | } | 2679 | } |
| 2039 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2680 | ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], |
| 2040 | 0, | 2681 | &c->src.val, 1, ctxt->vcpu); |
| 2041 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2682 | |
| 2042 | c->rep_prefix ? | 2683 | c->dst.type = OP_NONE; /* nothing to writeback */ |
| 2043 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | 2684 | break; |
| 2044 | (ctxt->eflags & EFLG_DF), | ||
| 2045 | register_address(c, | ||
| 2046 | seg_override_base(ctxt, c), | ||
| 2047 | c->regs[VCPU_REGS_RSI]), | ||
| 2048 | c->rep_prefix, | ||
| 2049 | c->regs[VCPU_REGS_RDX]) == 0) { | ||
| 2050 | c->eip = saved_eip; | ||
| 2051 | return -1; | ||
| 2052 | } | ||
| 2053 | return 0; | ||
| 2054 | case 0x70 ... 0x7f: /* jcc (short) */ | 2685 | case 0x70 ... 0x7f: /* jcc (short) */ |
| 2055 | if (test_cc(c->b, ctxt->eflags)) | 2686 | if (test_cc(c->b, ctxt->eflags)) |
| 2056 | jmp_rel(c, c->src.val); | 2687 | jmp_rel(c, c->src.val); |
| @@ -2107,12 +2738,11 @@ special_insn: | |||
| 2107 | case 0x8c: { /* mov r/m, sreg */ | 2738 | case 0x8c: { /* mov r/m, sreg */ |
| 2108 | struct kvm_segment segreg; | 2739 | struct kvm_segment segreg; |
| 2109 | 2740 | ||
| 2110 | if (c->modrm_reg <= 5) | 2741 | if (c->modrm_reg <= VCPU_SREG_GS) |
| 2111 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); | 2742 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); |
| 2112 | else { | 2743 | else { |
| 2113 | printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", | 2744 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
| 2114 | c->modrm); | 2745 | goto done; |
| 2115 | goto cannot_emulate; | ||
| 2116 | } | 2746 | } |
| 2117 | c->dst.val = segreg.selector; | 2747 | c->dst.val = segreg.selector; |
| 2118 | break; | 2748 | break; |
| @@ -2132,16 +2762,16 @@ special_insn: | |||
| 2132 | } | 2762 | } |
| 2133 | 2763 | ||
| 2134 | if (c->modrm_reg == VCPU_SREG_SS) | 2764 | if (c->modrm_reg == VCPU_SREG_SS) |
| 2135 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | 2765 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); |
| 2136 | 2766 | ||
| 2137 | rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); | 2767 | rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); |
| 2138 | 2768 | ||
| 2139 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2769 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2140 | break; | 2770 | break; |
| 2141 | } | 2771 | } |
| 2142 | case 0x8f: /* pop (sole member of Grp1a) */ | 2772 | case 0x8f: /* pop (sole member of Grp1a) */ |
| 2143 | rc = emulate_grp1a(ctxt, ops); | 2773 | rc = emulate_grp1a(ctxt, ops); |
| 2144 | if (rc != 0) | 2774 | if (rc != X86EMUL_CONTINUE) |
| 2145 | goto done; | 2775 | goto done; |
| 2146 | break; | 2776 | break; |
| 2147 | case 0x90: /* nop / xchg r8,rax */ | 2777 | case 0x90: /* nop / xchg r8,rax */ |
| @@ -2175,89 +2805,16 @@ special_insn: | |||
| 2175 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; | 2805 | c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; |
| 2176 | break; | 2806 | break; |
| 2177 | case 0xa4 ... 0xa5: /* movs */ | 2807 | case 0xa4 ... 0xa5: /* movs */ |
| 2178 | c->dst.type = OP_MEM; | 2808 | goto mov; |
| 2179 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 2180 | c->dst.ptr = (unsigned long *)register_address(c, | ||
| 2181 | es_base(ctxt), | ||
| 2182 | c->regs[VCPU_REGS_RDI]); | ||
| 2183 | rc = ops->read_emulated(register_address(c, | ||
| 2184 | seg_override_base(ctxt, c), | ||
| 2185 | c->regs[VCPU_REGS_RSI]), | ||
| 2186 | &c->dst.val, | ||
| 2187 | c->dst.bytes, ctxt->vcpu); | ||
| 2188 | if (rc != X86EMUL_CONTINUE) | ||
| 2189 | goto done; | ||
| 2190 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
| 2191 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
| 2192 | : c->dst.bytes); | ||
| 2193 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
| 2194 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
| 2195 | : c->dst.bytes); | ||
| 2196 | break; | ||
| 2197 | case 0xa6 ... 0xa7: /* cmps */ | 2809 | case 0xa6 ... 0xa7: /* cmps */ |
| 2198 | c->src.type = OP_NONE; /* Disable writeback. */ | ||
| 2199 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 2200 | c->src.ptr = (unsigned long *)register_address(c, | ||
| 2201 | seg_override_base(ctxt, c), | ||
| 2202 | c->regs[VCPU_REGS_RSI]); | ||
| 2203 | rc = ops->read_emulated((unsigned long)c->src.ptr, | ||
| 2204 | &c->src.val, | ||
| 2205 | c->src.bytes, | ||
| 2206 | ctxt->vcpu); | ||
| 2207 | if (rc != X86EMUL_CONTINUE) | ||
| 2208 | goto done; | ||
| 2209 | |||
| 2210 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2810 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2211 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 2212 | c->dst.ptr = (unsigned long *)register_address(c, | ||
| 2213 | es_base(ctxt), | ||
| 2214 | c->regs[VCPU_REGS_RDI]); | ||
| 2215 | rc = ops->read_emulated((unsigned long)c->dst.ptr, | ||
| 2216 | &c->dst.val, | ||
| 2217 | c->dst.bytes, | ||
| 2218 | ctxt->vcpu); | ||
| 2219 | if (rc != X86EMUL_CONTINUE) | ||
| 2220 | goto done; | ||
| 2221 | |||
| 2222 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2811 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
| 2223 | 2812 | goto cmp; | |
| 2224 | emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); | ||
| 2225 | |||
| 2226 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
| 2227 | (ctxt->eflags & EFLG_DF) ? -c->src.bytes | ||
| 2228 | : c->src.bytes); | ||
| 2229 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
| 2230 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
| 2231 | : c->dst.bytes); | ||
| 2232 | |||
| 2233 | break; | ||
| 2234 | case 0xaa ... 0xab: /* stos */ | 2813 | case 0xaa ... 0xab: /* stos */ |
| 2235 | c->dst.type = OP_MEM; | ||
| 2236 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 2237 | c->dst.ptr = (unsigned long *)register_address(c, | ||
| 2238 | es_base(ctxt), | ||
| 2239 | c->regs[VCPU_REGS_RDI]); | ||
| 2240 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 2814 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
| 2241 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | ||
| 2242 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
| 2243 | : c->dst.bytes); | ||
| 2244 | break; | 2815 | break; |
| 2245 | case 0xac ... 0xad: /* lods */ | 2816 | case 0xac ... 0xad: /* lods */ |
| 2246 | c->dst.type = OP_REG; | 2817 | goto mov; |
| 2247 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
| 2248 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | ||
| 2249 | rc = ops->read_emulated(register_address(c, | ||
| 2250 | seg_override_base(ctxt, c), | ||
| 2251 | c->regs[VCPU_REGS_RSI]), | ||
| 2252 | &c->dst.val, | ||
| 2253 | c->dst.bytes, | ||
| 2254 | ctxt->vcpu); | ||
| 2255 | if (rc != X86EMUL_CONTINUE) | ||
| 2256 | goto done; | ||
| 2257 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | ||
| 2258 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | ||
| 2259 | : c->dst.bytes); | ||
| 2260 | break; | ||
| 2261 | case 0xae ... 0xaf: /* scas */ | 2818 | case 0xae ... 0xaf: /* scas */ |
| 2262 | DPRINTF("Urk! I don't handle SCAS.\n"); | 2819 | DPRINTF("Urk! I don't handle SCAS.\n"); |
| 2263 | goto cannot_emulate; | 2820 | goto cannot_emulate; |
| @@ -2277,7 +2834,7 @@ special_insn: | |||
| 2277 | break; | 2834 | break; |
| 2278 | case 0xcb: /* ret far */ | 2835 | case 0xcb: /* ret far */ |
| 2279 | rc = emulate_ret_far(ctxt, ops); | 2836 | rc = emulate_ret_far(ctxt, ops); |
| 2280 | if (rc) | 2837 | if (rc != X86EMUL_CONTINUE) |
| 2281 | goto done; | 2838 | goto done; |
| 2282 | break; | 2839 | break; |
| 2283 | case 0xd0 ... 0xd1: /* Grp2 */ | 2840 | case 0xd0 ... 0xd1: /* Grp2 */ |
| @@ -2290,14 +2847,10 @@ special_insn: | |||
| 2290 | break; | 2847 | break; |
| 2291 | case 0xe4: /* inb */ | 2848 | case 0xe4: /* inb */ |
| 2292 | case 0xe5: /* in */ | 2849 | case 0xe5: /* in */ |
| 2293 | port = c->src.val; | 2850 | goto do_io_in; |
| 2294 | io_dir_in = 1; | ||
| 2295 | goto do_io; | ||
| 2296 | case 0xe6: /* outb */ | 2851 | case 0xe6: /* outb */ |
| 2297 | case 0xe7: /* out */ | 2852 | case 0xe7: /* out */ |
| 2298 | port = c->src.val; | 2853 | goto do_io_out; |
| 2299 | io_dir_in = 0; | ||
| 2300 | goto do_io; | ||
| 2301 | case 0xe8: /* call (near) */ { | 2854 | case 0xe8: /* call (near) */ { |
| 2302 | long int rel = c->src.val; | 2855 | long int rel = c->src.val; |
| 2303 | c->src.val = (unsigned long) c->eip; | 2856 | c->src.val = (unsigned long) c->eip; |
| @@ -2308,8 +2861,9 @@ special_insn: | |||
| 2308 | case 0xe9: /* jmp rel */ | 2861 | case 0xe9: /* jmp rel */ |
| 2309 | goto jmp; | 2862 | goto jmp; |
| 2310 | case 0xea: /* jmp far */ | 2863 | case 0xea: /* jmp far */ |
| 2311 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, | 2864 | jump_far: |
| 2312 | VCPU_SREG_CS)) | 2865 | if (load_segment_descriptor(ctxt, ops, c->src2.val, |
| 2866 | VCPU_SREG_CS)) | ||
| 2313 | goto done; | 2867 | goto done; |
| 2314 | 2868 | ||
| 2315 | c->eip = c->src.val; | 2869 | c->eip = c->src.val; |
| @@ -2321,25 +2875,29 @@ special_insn: | |||
| 2321 | break; | 2875 | break; |
| 2322 | case 0xec: /* in al,dx */ | 2876 | case 0xec: /* in al,dx */ |
| 2323 | case 0xed: /* in (e/r)ax,dx */ | 2877 | case 0xed: /* in (e/r)ax,dx */ |
| 2324 | port = c->regs[VCPU_REGS_RDX]; | 2878 | c->src.val = c->regs[VCPU_REGS_RDX]; |
| 2325 | io_dir_in = 1; | 2879 | do_io_in: |
| 2326 | goto do_io; | 2880 | c->dst.bytes = min(c->dst.bytes, 4u); |
| 2881 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { | ||
| 2882 | kvm_inject_gp(ctxt->vcpu, 0); | ||
| 2883 | goto done; | ||
| 2884 | } | ||
| 2885 | if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, | ||
| 2886 | &c->dst.val)) | ||
| 2887 | goto done; /* IO is needed */ | ||
| 2888 | break; | ||
| 2327 | case 0xee: /* out al,dx */ | 2889 | case 0xee: /* out al,dx */ |
| 2328 | case 0xef: /* out (e/r)ax,dx */ | 2890 | case 0xef: /* out (e/r)ax,dx */ |
| 2329 | port = c->regs[VCPU_REGS_RDX]; | 2891 | c->src.val = c->regs[VCPU_REGS_RDX]; |
| 2330 | io_dir_in = 0; | 2892 | do_io_out: |
| 2331 | do_io: | 2893 | c->dst.bytes = min(c->dst.bytes, 4u); |
| 2332 | if (!emulator_io_permited(ctxt, ops, port, | 2894 | if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { |
| 2333 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
| 2334 | kvm_inject_gp(ctxt->vcpu, 0); | 2895 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2335 | goto done; | 2896 | goto done; |
| 2336 | } | 2897 | } |
| 2337 | if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | 2898 | ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, |
| 2338 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2899 | ctxt->vcpu); |
| 2339 | port) != 0) { | 2900 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2340 | c->eip = saved_eip; | ||
| 2341 | goto cannot_emulate; | ||
| 2342 | } | ||
| 2343 | break; | 2901 | break; |
| 2344 | case 0xf4: /* hlt */ | 2902 | case 0xf4: /* hlt */ |
| 2345 | ctxt->vcpu->arch.halt_request = 1; | 2903 | ctxt->vcpu->arch.halt_request = 1; |
| @@ -2350,16 +2908,15 @@ special_insn: | |||
| 2350 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2908 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2351 | break; | 2909 | break; |
| 2352 | case 0xf6 ... 0xf7: /* Grp3 */ | 2910 | case 0xf6 ... 0xf7: /* Grp3 */ |
| 2353 | rc = emulate_grp3(ctxt, ops); | 2911 | if (!emulate_grp3(ctxt, ops)) |
| 2354 | if (rc != 0) | 2912 | goto cannot_emulate; |
| 2355 | goto done; | ||
| 2356 | break; | 2913 | break; |
| 2357 | case 0xf8: /* clc */ | 2914 | case 0xf8: /* clc */ |
| 2358 | ctxt->eflags &= ~EFLG_CF; | 2915 | ctxt->eflags &= ~EFLG_CF; |
| 2359 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2916 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2360 | break; | 2917 | break; |
| 2361 | case 0xfa: /* cli */ | 2918 | case 0xfa: /* cli */ |
| 2362 | if (emulator_bad_iopl(ctxt)) | 2919 | if (emulator_bad_iopl(ctxt, ops)) |
| 2363 | kvm_inject_gp(ctxt->vcpu, 0); | 2920 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2364 | else { | 2921 | else { |
| 2365 | ctxt->eflags &= ~X86_EFLAGS_IF; | 2922 | ctxt->eflags &= ~X86_EFLAGS_IF; |
| @@ -2367,10 +2924,10 @@ special_insn: | |||
| 2367 | } | 2924 | } |
| 2368 | break; | 2925 | break; |
| 2369 | case 0xfb: /* sti */ | 2926 | case 0xfb: /* sti */ |
| 2370 | if (emulator_bad_iopl(ctxt)) | 2927 | if (emulator_bad_iopl(ctxt, ops)) |
| 2371 | kvm_inject_gp(ctxt->vcpu, 0); | 2928 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2372 | else { | 2929 | else { |
| 2373 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | 2930 | toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); |
| 2374 | ctxt->eflags |= X86_EFLAGS_IF; | 2931 | ctxt->eflags |= X86_EFLAGS_IF; |
| 2375 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2932 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2376 | } | 2933 | } |
| @@ -2383,28 +2940,55 @@ special_insn: | |||
| 2383 | ctxt->eflags |= EFLG_DF; | 2940 | ctxt->eflags |= EFLG_DF; |
| 2384 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2941 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 2385 | break; | 2942 | break; |
| 2386 | case 0xfe ... 0xff: /* Grp4/Grp5 */ | 2943 | case 0xfe: /* Grp4 */ |
| 2944 | grp45: | ||
| 2387 | rc = emulate_grp45(ctxt, ops); | 2945 | rc = emulate_grp45(ctxt, ops); |
| 2388 | if (rc != 0) | 2946 | if (rc != X86EMUL_CONTINUE) |
| 2389 | goto done; | 2947 | goto done; |
| 2390 | break; | 2948 | break; |
| 2949 | case 0xff: /* Grp5 */ | ||
| 2950 | if (c->modrm_reg == 5) | ||
| 2951 | goto jump_far; | ||
| 2952 | goto grp45; | ||
| 2391 | } | 2953 | } |
| 2392 | 2954 | ||
| 2393 | writeback: | 2955 | writeback: |
| 2394 | rc = writeback(ctxt, ops); | 2956 | rc = writeback(ctxt, ops); |
| 2395 | if (rc != 0) | 2957 | if (rc != X86EMUL_CONTINUE) |
| 2396 | goto done; | 2958 | goto done; |
| 2397 | 2959 | ||
| 2960 | /* | ||
| 2961 | * restore dst type in case the decoding will be reused | ||
| 2962 | * (happens for string instruction ) | ||
| 2963 | */ | ||
| 2964 | c->dst.type = saved_dst_type; | ||
| 2965 | |||
| 2966 | if ((c->d & SrcMask) == SrcSI) | ||
| 2967 | string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, | ||
| 2968 | &c->src); | ||
| 2969 | |||
| 2970 | if ((c->d & DstMask) == DstDI) | ||
| 2971 | string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); | ||
| 2972 | |||
| 2973 | if (c->rep_prefix && (c->d & String)) { | ||
| 2974 | struct read_cache *rc = &ctxt->decode.io_read; | ||
| 2975 | register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); | ||
| 2976 | /* | ||
| 2977 | * Re-enter guest when pio read ahead buffer is empty or, | ||
| 2978 | * if it is not used, after each 1024 iteration. | ||
| 2979 | */ | ||
| 2980 | if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) || | ||
| 2981 | (rc->end != 0 && rc->end == rc->pos)) | ||
| 2982 | ctxt->restart = false; | ||
| 2983 | } | ||
| 2984 | |||
| 2398 | /* Commit shadow register state. */ | 2985 | /* Commit shadow register state. */ |
| 2399 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 2986 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); |
| 2400 | kvm_rip_write(ctxt->vcpu, c->eip); | 2987 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 2988 | ops->set_rflags(ctxt->vcpu, ctxt->eflags); | ||
| 2401 | 2989 | ||
| 2402 | done: | 2990 | done: |
| 2403 | if (rc == X86EMUL_UNHANDLEABLE) { | 2991 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
| 2404 | c->eip = saved_eip; | ||
| 2405 | return -1; | ||
| 2406 | } | ||
| 2407 | return 0; | ||
| 2408 | 2992 | ||
| 2409 | twobyte_insn: | 2993 | twobyte_insn: |
| 2410 | switch (c->b) { | 2994 | switch (c->b) { |
| @@ -2418,18 +3002,18 @@ twobyte_insn: | |||
| 2418 | goto cannot_emulate; | 3002 | goto cannot_emulate; |
| 2419 | 3003 | ||
| 2420 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3004 | rc = kvm_fix_hypercall(ctxt->vcpu); |
| 2421 | if (rc) | 3005 | if (rc != X86EMUL_CONTINUE) |
| 2422 | goto done; | 3006 | goto done; |
| 2423 | 3007 | ||
| 2424 | /* Let the processor re-execute the fixed hypercall */ | 3008 | /* Let the processor re-execute the fixed hypercall */ |
| 2425 | c->eip = kvm_rip_read(ctxt->vcpu); | 3009 | c->eip = ctxt->eip; |
| 2426 | /* Disable writeback. */ | 3010 | /* Disable writeback. */ |
| 2427 | c->dst.type = OP_NONE; | 3011 | c->dst.type = OP_NONE; |
| 2428 | break; | 3012 | break; |
| 2429 | case 2: /* lgdt */ | 3013 | case 2: /* lgdt */ |
| 2430 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3014 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
| 2431 | &size, &address, c->op_bytes); | 3015 | &size, &address, c->op_bytes); |
| 2432 | if (rc) | 3016 | if (rc != X86EMUL_CONTINUE) |
| 2433 | goto done; | 3017 | goto done; |
| 2434 | realmode_lgdt(ctxt->vcpu, size, address); | 3018 | realmode_lgdt(ctxt->vcpu, size, address); |
| 2435 | /* Disable writeback. */ | 3019 | /* Disable writeback. */ |
| @@ -2440,7 +3024,7 @@ twobyte_insn: | |||
| 2440 | switch (c->modrm_rm) { | 3024 | switch (c->modrm_rm) { |
| 2441 | case 1: | 3025 | case 1: |
| 2442 | rc = kvm_fix_hypercall(ctxt->vcpu); | 3026 | rc = kvm_fix_hypercall(ctxt->vcpu); |
| 2443 | if (rc) | 3027 | if (rc != X86EMUL_CONTINUE) |
| 2444 | goto done; | 3028 | goto done; |
| 2445 | break; | 3029 | break; |
| 2446 | default: | 3030 | default: |
| @@ -2450,7 +3034,7 @@ twobyte_insn: | |||
| 2450 | rc = read_descriptor(ctxt, ops, c->src.ptr, | 3034 | rc = read_descriptor(ctxt, ops, c->src.ptr, |
| 2451 | &size, &address, | 3035 | &size, &address, |
| 2452 | c->op_bytes); | 3036 | c->op_bytes); |
| 2453 | if (rc) | 3037 | if (rc != X86EMUL_CONTINUE) |
| 2454 | goto done; | 3038 | goto done; |
| 2455 | realmode_lidt(ctxt->vcpu, size, address); | 3039 | realmode_lidt(ctxt->vcpu, size, address); |
| 2456 | } | 3040 | } |
| @@ -2459,15 +3043,18 @@ twobyte_insn: | |||
| 2459 | break; | 3043 | break; |
| 2460 | case 4: /* smsw */ | 3044 | case 4: /* smsw */ |
| 2461 | c->dst.bytes = 2; | 3045 | c->dst.bytes = 2; |
| 2462 | c->dst.val = realmode_get_cr(ctxt->vcpu, 0); | 3046 | c->dst.val = ops->get_cr(0, ctxt->vcpu); |
| 2463 | break; | 3047 | break; |
| 2464 | case 6: /* lmsw */ | 3048 | case 6: /* lmsw */ |
| 2465 | realmode_lmsw(ctxt->vcpu, (u16)c->src.val, | 3049 | ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) | |
| 2466 | &ctxt->eflags); | 3050 | (c->src.val & 0x0f), ctxt->vcpu); |
| 2467 | c->dst.type = OP_NONE; | 3051 | c->dst.type = OP_NONE; |
| 2468 | break; | 3052 | break; |
| 3053 | case 5: /* not defined */ | ||
| 3054 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
| 3055 | goto done; | ||
| 2469 | case 7: /* invlpg*/ | 3056 | case 7: /* invlpg*/ |
| 2470 | emulate_invlpg(ctxt->vcpu, memop); | 3057 | emulate_invlpg(ctxt->vcpu, c->modrm_ea); |
| 2471 | /* Disable writeback. */ | 3058 | /* Disable writeback. */ |
| 2472 | c->dst.type = OP_NONE; | 3059 | c->dst.type = OP_NONE; |
| 2473 | break; | 3060 | break; |
| @@ -2493,54 +3080,54 @@ twobyte_insn: | |||
| 2493 | c->dst.type = OP_NONE; | 3080 | c->dst.type = OP_NONE; |
| 2494 | break; | 3081 | break; |
| 2495 | case 0x20: /* mov cr, reg */ | 3082 | case 0x20: /* mov cr, reg */ |
| 2496 | if (c->modrm_mod != 3) | 3083 | switch (c->modrm_reg) { |
| 2497 | goto cannot_emulate; | 3084 | case 1: |
| 2498 | c->regs[c->modrm_rm] = | 3085 | case 5 ... 7: |
| 2499 | realmode_get_cr(ctxt->vcpu, c->modrm_reg); | 3086 | case 9 ... 15: |
| 3087 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
| 3088 | goto done; | ||
| 3089 | } | ||
| 3090 | c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); | ||
| 2500 | c->dst.type = OP_NONE; /* no writeback */ | 3091 | c->dst.type = OP_NONE; /* no writeback */ |
| 2501 | break; | 3092 | break; |
| 2502 | case 0x21: /* mov from dr to reg */ | 3093 | case 0x21: /* mov from dr to reg */ |
| 2503 | if (c->modrm_mod != 3) | 3094 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
| 2504 | goto cannot_emulate; | 3095 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
| 2505 | rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | 3096 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
| 2506 | if (rc) | 3097 | goto done; |
| 2507 | goto cannot_emulate; | 3098 | } |
| 3099 | emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); | ||
| 2508 | c->dst.type = OP_NONE; /* no writeback */ | 3100 | c->dst.type = OP_NONE; /* no writeback */ |
| 2509 | break; | 3101 | break; |
| 2510 | case 0x22: /* mov reg, cr */ | 3102 | case 0x22: /* mov reg, cr */ |
| 2511 | if (c->modrm_mod != 3) | 3103 | ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); |
| 2512 | goto cannot_emulate; | ||
| 2513 | realmode_set_cr(ctxt->vcpu, | ||
| 2514 | c->modrm_reg, c->modrm_val, &ctxt->eflags); | ||
| 2515 | c->dst.type = OP_NONE; | 3104 | c->dst.type = OP_NONE; |
| 2516 | break; | 3105 | break; |
| 2517 | case 0x23: /* mov from reg to dr */ | 3106 | case 0x23: /* mov from reg to dr */ |
| 2518 | if (c->modrm_mod != 3) | 3107 | if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && |
| 2519 | goto cannot_emulate; | 3108 | (c->modrm_reg == 4 || c->modrm_reg == 5)) { |
| 2520 | rc = emulator_set_dr(ctxt, c->modrm_reg, | 3109 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
| 2521 | c->regs[c->modrm_rm]); | 3110 | goto done; |
| 2522 | if (rc) | 3111 | } |
| 2523 | goto cannot_emulate; | 3112 | emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); |
| 2524 | c->dst.type = OP_NONE; /* no writeback */ | 3113 | c->dst.type = OP_NONE; /* no writeback */ |
| 2525 | break; | 3114 | break; |
| 2526 | case 0x30: | 3115 | case 0x30: |
| 2527 | /* wrmsr */ | 3116 | /* wrmsr */ |
| 2528 | msr_data = (u32)c->regs[VCPU_REGS_RAX] | 3117 | msr_data = (u32)c->regs[VCPU_REGS_RAX] |
| 2529 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); | 3118 | | ((u64)c->regs[VCPU_REGS_RDX] << 32); |
| 2530 | rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); | 3119 | if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { |
| 2531 | if (rc) { | ||
| 2532 | kvm_inject_gp(ctxt->vcpu, 0); | 3120 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2533 | c->eip = kvm_rip_read(ctxt->vcpu); | 3121 | goto done; |
| 2534 | } | 3122 | } |
| 2535 | rc = X86EMUL_CONTINUE; | 3123 | rc = X86EMUL_CONTINUE; |
| 2536 | c->dst.type = OP_NONE; | 3124 | c->dst.type = OP_NONE; |
| 2537 | break; | 3125 | break; |
| 2538 | case 0x32: | 3126 | case 0x32: |
| 2539 | /* rdmsr */ | 3127 | /* rdmsr */ |
| 2540 | rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); | 3128 | if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { |
| 2541 | if (rc) { | ||
| 2542 | kvm_inject_gp(ctxt->vcpu, 0); | 3129 | kvm_inject_gp(ctxt->vcpu, 0); |
| 2543 | c->eip = kvm_rip_read(ctxt->vcpu); | 3130 | goto done; |
| 2544 | } else { | 3131 | } else { |
| 2545 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 3132 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
| 2546 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; | 3133 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; |
| @@ -2577,7 +3164,7 @@ twobyte_insn: | |||
| 2577 | break; | 3164 | break; |
| 2578 | case 0xa1: /* pop fs */ | 3165 | case 0xa1: /* pop fs */ |
| 2579 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | 3166 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); |
| 2580 | if (rc != 0) | 3167 | if (rc != X86EMUL_CONTINUE) |
| 2581 | goto done; | 3168 | goto done; |
| 2582 | break; | 3169 | break; |
| 2583 | case 0xa3: | 3170 | case 0xa3: |
| @@ -2596,7 +3183,7 @@ twobyte_insn: | |||
| 2596 | break; | 3183 | break; |
| 2597 | case 0xa9: /* pop gs */ | 3184 | case 0xa9: /* pop gs */ |
| 2598 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | 3185 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); |
| 2599 | if (rc != 0) | 3186 | if (rc != X86EMUL_CONTINUE) |
| 2600 | goto done; | 3187 | goto done; |
| 2601 | break; | 3188 | break; |
| 2602 | case 0xab: | 3189 | case 0xab: |
| @@ -2668,16 +3255,14 @@ twobyte_insn: | |||
| 2668 | (u64) c->src.val; | 3255 | (u64) c->src.val; |
| 2669 | break; | 3256 | break; |
| 2670 | case 0xc7: /* Grp9 (cmpxchg8b) */ | 3257 | case 0xc7: /* Grp9 (cmpxchg8b) */ |
| 2671 | rc = emulate_grp9(ctxt, ops, memop); | 3258 | rc = emulate_grp9(ctxt, ops); |
| 2672 | if (rc != 0) | 3259 | if (rc != X86EMUL_CONTINUE) |
| 2673 | goto done; | 3260 | goto done; |
| 2674 | c->dst.type = OP_NONE; | ||
| 2675 | break; | 3261 | break; |
| 2676 | } | 3262 | } |
| 2677 | goto writeback; | 3263 | goto writeback; |
| 2678 | 3264 | ||
| 2679 | cannot_emulate: | 3265 | cannot_emulate: |
| 2680 | DPRINTF("Cannot emulate %02x\n", c->b); | 3266 | DPRINTF("Cannot emulate %02x\n", c->b); |
| 2681 | c->eip = saved_eip; | ||
| 2682 | return -1; | 3267 | return -1; |
| 2683 | } | 3268 | } |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index a790fa128a9f..93825ff3338f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
| @@ -33,6 +33,29 @@ | |||
| 33 | #include <linux/kvm_host.h> | 33 | #include <linux/kvm_host.h> |
| 34 | #include "trace.h" | 34 | #include "trace.h" |
| 35 | 35 | ||
| 36 | static void pic_lock(struct kvm_pic *s) | ||
| 37 | __acquires(&s->lock) | ||
| 38 | { | ||
| 39 | raw_spin_lock(&s->lock); | ||
| 40 | } | ||
| 41 | |||
| 42 | static void pic_unlock(struct kvm_pic *s) | ||
| 43 | __releases(&s->lock) | ||
| 44 | { | ||
| 45 | bool wakeup = s->wakeup_needed; | ||
| 46 | struct kvm_vcpu *vcpu; | ||
| 47 | |||
| 48 | s->wakeup_needed = false; | ||
| 49 | |||
| 50 | raw_spin_unlock(&s->lock); | ||
| 51 | |||
| 52 | if (wakeup) { | ||
| 53 | vcpu = s->kvm->bsp_vcpu; | ||
| 54 | if (vcpu) | ||
| 55 | kvm_vcpu_kick(vcpu); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 36 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 59 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
| 37 | { | 60 | { |
| 38 | s->isr &= ~(1 << irq); | 61 | s->isr &= ~(1 << irq); |
| @@ -45,19 +68,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
| 45 | * Other interrupt may be delivered to PIC while lock is dropped but | 68 | * Other interrupt may be delivered to PIC while lock is dropped but |
| 46 | * it should be safe since PIC state is already updated at this stage. | 69 | * it should be safe since PIC state is already updated at this stage. |
| 47 | */ | 70 | */ |
| 48 | raw_spin_unlock(&s->pics_state->lock); | 71 | pic_unlock(s->pics_state); |
| 49 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 72 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
| 50 | raw_spin_lock(&s->pics_state->lock); | 73 | pic_lock(s->pics_state); |
| 51 | } | 74 | } |
| 52 | 75 | ||
| 53 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 76 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
| 54 | { | 77 | { |
| 55 | struct kvm_pic *s = pic_irqchip(kvm); | 78 | struct kvm_pic *s = pic_irqchip(kvm); |
| 56 | 79 | ||
| 57 | raw_spin_lock(&s->lock); | 80 | pic_lock(s); |
| 58 | s->pics[0].isr_ack = 0xff; | 81 | s->pics[0].isr_ack = 0xff; |
| 59 | s->pics[1].isr_ack = 0xff; | 82 | s->pics[1].isr_ack = 0xff; |
| 60 | raw_spin_unlock(&s->lock); | 83 | pic_unlock(s); |
| 61 | } | 84 | } |
| 62 | 85 | ||
| 63 | /* | 86 | /* |
| @@ -158,9 +181,9 @@ static void pic_update_irq(struct kvm_pic *s) | |||
| 158 | 181 | ||
| 159 | void kvm_pic_update_irq(struct kvm_pic *s) | 182 | void kvm_pic_update_irq(struct kvm_pic *s) |
| 160 | { | 183 | { |
| 161 | raw_spin_lock(&s->lock); | 184 | pic_lock(s); |
| 162 | pic_update_irq(s); | 185 | pic_update_irq(s); |
| 163 | raw_spin_unlock(&s->lock); | 186 | pic_unlock(s); |
| 164 | } | 187 | } |
| 165 | 188 | ||
| 166 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 189 | int kvm_pic_set_irq(void *opaque, int irq, int level) |
| @@ -168,14 +191,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
| 168 | struct kvm_pic *s = opaque; | 191 | struct kvm_pic *s = opaque; |
| 169 | int ret = -1; | 192 | int ret = -1; |
| 170 | 193 | ||
| 171 | raw_spin_lock(&s->lock); | 194 | pic_lock(s); |
| 172 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 195 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
| 173 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 196 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
| 174 | pic_update_irq(s); | 197 | pic_update_irq(s); |
| 175 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 198 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
| 176 | s->pics[irq >> 3].imr, ret == 0); | 199 | s->pics[irq >> 3].imr, ret == 0); |
| 177 | } | 200 | } |
| 178 | raw_spin_unlock(&s->lock); | 201 | pic_unlock(s); |
| 179 | 202 | ||
| 180 | return ret; | 203 | return ret; |
| 181 | } | 204 | } |
| @@ -205,7 +228,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
| 205 | int irq, irq2, intno; | 228 | int irq, irq2, intno; |
| 206 | struct kvm_pic *s = pic_irqchip(kvm); | 229 | struct kvm_pic *s = pic_irqchip(kvm); |
| 207 | 230 | ||
| 208 | raw_spin_lock(&s->lock); | 231 | pic_lock(s); |
| 209 | irq = pic_get_irq(&s->pics[0]); | 232 | irq = pic_get_irq(&s->pics[0]); |
| 210 | if (irq >= 0) { | 233 | if (irq >= 0) { |
| 211 | pic_intack(&s->pics[0], irq); | 234 | pic_intack(&s->pics[0], irq); |
| @@ -230,7 +253,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
| 230 | intno = s->pics[0].irq_base + irq; | 253 | intno = s->pics[0].irq_base + irq; |
| 231 | } | 254 | } |
| 232 | pic_update_irq(s); | 255 | pic_update_irq(s); |
| 233 | raw_spin_unlock(&s->lock); | 256 | pic_unlock(s); |
| 234 | 257 | ||
| 235 | return intno; | 258 | return intno; |
| 236 | } | 259 | } |
| @@ -444,7 +467,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
| 444 | printk(KERN_ERR "PIC: non byte write\n"); | 467 | printk(KERN_ERR "PIC: non byte write\n"); |
| 445 | return 0; | 468 | return 0; |
| 446 | } | 469 | } |
| 447 | raw_spin_lock(&s->lock); | 470 | pic_lock(s); |
| 448 | switch (addr) { | 471 | switch (addr) { |
| 449 | case 0x20: | 472 | case 0x20: |
| 450 | case 0x21: | 473 | case 0x21: |
| @@ -457,7 +480,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
| 457 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 480 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
| 458 | break; | 481 | break; |
| 459 | } | 482 | } |
| 460 | raw_spin_unlock(&s->lock); | 483 | pic_unlock(s); |
| 461 | return 0; | 484 | return 0; |
| 462 | } | 485 | } |
| 463 | 486 | ||
| @@ -474,7 +497,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
| 474 | printk(KERN_ERR "PIC: non byte read\n"); | 497 | printk(KERN_ERR "PIC: non byte read\n"); |
| 475 | return 0; | 498 | return 0; |
| 476 | } | 499 | } |
| 477 | raw_spin_lock(&s->lock); | 500 | pic_lock(s); |
| 478 | switch (addr) { | 501 | switch (addr) { |
| 479 | case 0x20: | 502 | case 0x20: |
| 480 | case 0x21: | 503 | case 0x21: |
| @@ -488,7 +511,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
| 488 | break; | 511 | break; |
| 489 | } | 512 | } |
| 490 | *(unsigned char *)val = data; | 513 | *(unsigned char *)val = data; |
| 491 | raw_spin_unlock(&s->lock); | 514 | pic_unlock(s); |
| 492 | return 0; | 515 | return 0; |
| 493 | } | 516 | } |
| 494 | 517 | ||
| @@ -505,7 +528,7 @@ static void pic_irq_request(void *opaque, int level) | |||
| 505 | s->output = level; | 528 | s->output = level; |
| 506 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | 529 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { |
| 507 | s->pics[0].isr_ack &= ~(1 << irq); | 530 | s->pics[0].isr_ack &= ~(1 << irq); |
| 508 | kvm_vcpu_kick(vcpu); | 531 | s->wakeup_needed = true; |
| 509 | } | 532 | } |
| 510 | } | 533 | } |
| 511 | 534 | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 34b15915754d..cd1f362f413d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
| @@ -63,6 +63,7 @@ struct kvm_kpic_state { | |||
| 63 | 63 | ||
| 64 | struct kvm_pic { | 64 | struct kvm_pic { |
| 65 | raw_spinlock_t lock; | 65 | raw_spinlock_t lock; |
| 66 | bool wakeup_needed; | ||
| 66 | unsigned pending_acks; | 67 | unsigned pending_acks; |
| 67 | struct kvm *kvm; | 68 | struct kvm *kvm; |
| 68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 69 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 55c7524dda54..64bc6ea78d90 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h | |||
| @@ -10,9 +10,7 @@ struct kvm_timer { | |||
| 10 | }; | 10 | }; |
| 11 | 11 | ||
| 12 | struct kvm_timer_ops { | 12 | struct kvm_timer_ops { |
| 13 | bool (*is_periodic)(struct kvm_timer *); | 13 | bool (*is_periodic)(struct kvm_timer *); |
| 14 | }; | 14 | }; |
| 15 | 15 | ||
| 16 | |||
| 17 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); | 16 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); |
| 18 | |||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 19a8906bcaa2..81563e76e28f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -148,7 +148,6 @@ module_param(oos_shadow, bool, 0644); | |||
| 148 | 148 | ||
| 149 | #include <trace/events/kvm.h> | 149 | #include <trace/events/kvm.h> |
| 150 | 150 | ||
| 151 | #undef TRACE_INCLUDE_FILE | ||
| 152 | #define CREATE_TRACE_POINTS | 151 | #define CREATE_TRACE_POINTS |
| 153 | #include "mmutrace.h" | 152 | #include "mmutrace.h" |
| 154 | 153 | ||
| @@ -174,12 +173,7 @@ struct kvm_shadow_walk_iterator { | |||
| 174 | shadow_walk_okay(&(_walker)); \ | 173 | shadow_walk_okay(&(_walker)); \ |
| 175 | shadow_walk_next(&(_walker))) | 174 | shadow_walk_next(&(_walker))) |
| 176 | 175 | ||
| 177 | 176 | typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); | |
| 178 | struct kvm_unsync_walk { | ||
| 179 | int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); | ||
| 180 | }; | ||
| 181 | |||
| 182 | typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); | ||
| 183 | 177 | ||
| 184 | static struct kmem_cache *pte_chain_cache; | 178 | static struct kmem_cache *pte_chain_cache; |
| 185 | static struct kmem_cache *rmap_desc_cache; | 179 | static struct kmem_cache *rmap_desc_cache; |
| @@ -223,7 +217,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | |||
| 223 | } | 217 | } |
| 224 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 218 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
| 225 | 219 | ||
| 226 | static int is_write_protection(struct kvm_vcpu *vcpu) | 220 | static bool is_write_protection(struct kvm_vcpu *vcpu) |
| 227 | { | 221 | { |
| 228 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); | 222 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
| 229 | } | 223 | } |
| @@ -327,7 +321,6 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | |||
| 327 | page = alloc_page(GFP_KERNEL); | 321 | page = alloc_page(GFP_KERNEL); |
| 328 | if (!page) | 322 | if (!page) |
| 329 | return -ENOMEM; | 323 | return -ENOMEM; |
| 330 | set_page_private(page, 0); | ||
| 331 | cache->objects[cache->nobjs++] = page_address(page); | 324 | cache->objects[cache->nobjs++] = page_address(page); |
| 332 | } | 325 | } |
| 333 | return 0; | 326 | return 0; |
| @@ -438,9 +431,9 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
| 438 | int i; | 431 | int i; |
| 439 | 432 | ||
| 440 | gfn = unalias_gfn(kvm, gfn); | 433 | gfn = unalias_gfn(kvm, gfn); |
| 434 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
| 441 | for (i = PT_DIRECTORY_LEVEL; | 435 | for (i = PT_DIRECTORY_LEVEL; |
| 442 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 436 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
| 443 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
| 444 | write_count = slot_largepage_idx(gfn, slot, i); | 437 | write_count = slot_largepage_idx(gfn, slot, i); |
| 445 | *write_count -= 1; | 438 | *write_count -= 1; |
| 446 | WARN_ON(*write_count < 0); | 439 | WARN_ON(*write_count < 0); |
| @@ -654,7 +647,6 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
| 654 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | 647 | static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) |
| 655 | { | 648 | { |
| 656 | struct kvm_rmap_desc *desc; | 649 | struct kvm_rmap_desc *desc; |
| 657 | struct kvm_rmap_desc *prev_desc; | ||
| 658 | u64 *prev_spte; | 650 | u64 *prev_spte; |
| 659 | int i; | 651 | int i; |
| 660 | 652 | ||
| @@ -666,7 +658,6 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | |||
| 666 | return NULL; | 658 | return NULL; |
| 667 | } | 659 | } |
| 668 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); | 660 | desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); |
| 669 | prev_desc = NULL; | ||
| 670 | prev_spte = NULL; | 661 | prev_spte = NULL; |
| 671 | while (desc) { | 662 | while (desc) { |
| 672 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { | 663 | for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { |
| @@ -794,7 +785,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
| 794 | int retval = 0; | 785 | int retval = 0; |
| 795 | struct kvm_memslots *slots; | 786 | struct kvm_memslots *slots; |
| 796 | 787 | ||
| 797 | slots = rcu_dereference(kvm->memslots); | 788 | slots = kvm_memslots(kvm); |
| 798 | 789 | ||
| 799 | for (i = 0; i < slots->nmemslots; i++) { | 790 | for (i = 0; i < slots->nmemslots; i++) { |
| 800 | struct kvm_memory_slot *memslot = &slots->memslots[i]; | 791 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
| @@ -925,7 +916,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
| 925 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 916 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
| 926 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 917 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
| 927 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 918 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
| 928 | INIT_LIST_HEAD(&sp->oos_link); | ||
| 929 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | 919 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
| 930 | sp->multimapped = 0; | 920 | sp->multimapped = 0; |
| 931 | sp->parent_pte = parent_pte; | 921 | sp->parent_pte = parent_pte; |
| @@ -1009,8 +999,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
| 1009 | } | 999 | } |
| 1010 | 1000 | ||
| 1011 | 1001 | ||
| 1012 | static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1002 | static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) |
| 1013 | mmu_parent_walk_fn fn) | ||
| 1014 | { | 1003 | { |
| 1015 | struct kvm_pte_chain *pte_chain; | 1004 | struct kvm_pte_chain *pte_chain; |
| 1016 | struct hlist_node *node; | 1005 | struct hlist_node *node; |
| @@ -1019,8 +1008,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
| 1019 | 1008 | ||
| 1020 | if (!sp->multimapped && sp->parent_pte) { | 1009 | if (!sp->multimapped && sp->parent_pte) { |
| 1021 | parent_sp = page_header(__pa(sp->parent_pte)); | 1010 | parent_sp = page_header(__pa(sp->parent_pte)); |
| 1022 | fn(vcpu, parent_sp); | 1011 | fn(parent_sp); |
| 1023 | mmu_parent_walk(vcpu, parent_sp, fn); | 1012 | mmu_parent_walk(parent_sp, fn); |
| 1024 | return; | 1013 | return; |
| 1025 | } | 1014 | } |
| 1026 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | 1015 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) |
| @@ -1028,8 +1017,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
| 1028 | if (!pte_chain->parent_ptes[i]) | 1017 | if (!pte_chain->parent_ptes[i]) |
| 1029 | break; | 1018 | break; |
| 1030 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | 1019 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); |
| 1031 | fn(vcpu, parent_sp); | 1020 | fn(parent_sp); |
| 1032 | mmu_parent_walk(vcpu, parent_sp, fn); | 1021 | mmu_parent_walk(parent_sp, fn); |
| 1033 | } | 1022 | } |
| 1034 | } | 1023 | } |
| 1035 | 1024 | ||
| @@ -1066,16 +1055,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | |||
| 1066 | } | 1055 | } |
| 1067 | } | 1056 | } |
| 1068 | 1057 | ||
| 1069 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1058 | static int unsync_walk_fn(struct kvm_mmu_page *sp) |
| 1070 | { | 1059 | { |
| 1071 | kvm_mmu_update_parents_unsync(sp); | 1060 | kvm_mmu_update_parents_unsync(sp); |
| 1072 | return 1; | 1061 | return 1; |
| 1073 | } | 1062 | } |
| 1074 | 1063 | ||
| 1075 | static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, | 1064 | static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) |
| 1076 | struct kvm_mmu_page *sp) | ||
| 1077 | { | 1065 | { |
| 1078 | mmu_parent_walk(vcpu, sp, unsync_walk_fn); | 1066 | mmu_parent_walk(sp, unsync_walk_fn); |
| 1079 | kvm_mmu_update_parents_unsync(sp); | 1067 | kvm_mmu_update_parents_unsync(sp); |
| 1080 | } | 1068 | } |
| 1081 | 1069 | ||
| @@ -1201,6 +1189,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
| 1201 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1189 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
| 1202 | { | 1190 | { |
| 1203 | WARN_ON(!sp->unsync); | 1191 | WARN_ON(!sp->unsync); |
| 1192 | trace_kvm_mmu_sync_page(sp); | ||
| 1204 | sp->unsync = 0; | 1193 | sp->unsync = 0; |
| 1205 | --kvm->stat.mmu_unsync; | 1194 | --kvm->stat.mmu_unsync; |
| 1206 | } | 1195 | } |
| @@ -1209,12 +1198,11 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | |||
| 1209 | 1198 | ||
| 1210 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1199 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
| 1211 | { | 1200 | { |
| 1212 | if (sp->role.glevels != vcpu->arch.mmu.root_level) { | 1201 | if (sp->role.cr4_pae != !!is_pae(vcpu)) { |
| 1213 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1202 | kvm_mmu_zap_page(vcpu->kvm, sp); |
| 1214 | return 1; | 1203 | return 1; |
| 1215 | } | 1204 | } |
| 1216 | 1205 | ||
| 1217 | trace_kvm_mmu_sync_page(sp); | ||
| 1218 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) | 1206 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) |
| 1219 | kvm_flush_remote_tlbs(vcpu->kvm); | 1207 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 1220 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1208 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
| @@ -1331,6 +1319,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 1331 | role = vcpu->arch.mmu.base_role; | 1319 | role = vcpu->arch.mmu.base_role; |
| 1332 | role.level = level; | 1320 | role.level = level; |
| 1333 | role.direct = direct; | 1321 | role.direct = direct; |
| 1322 | if (role.direct) | ||
| 1323 | role.cr4_pae = 0; | ||
| 1334 | role.access = access; | 1324 | role.access = access; |
| 1335 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { | 1325 | if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { |
| 1336 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); | 1326 | quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); |
| @@ -1351,7 +1341,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 1351 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1341 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
| 1352 | if (sp->unsync_children) { | 1342 | if (sp->unsync_children) { |
| 1353 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | 1343 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); |
| 1354 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1344 | kvm_mmu_mark_parents_unsync(sp); |
| 1355 | } | 1345 | } |
| 1356 | trace_kvm_mmu_get_page(sp, false); | 1346 | trace_kvm_mmu_get_page(sp, false); |
| 1357 | return sp; | 1347 | return sp; |
| @@ -1573,13 +1563,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
| 1573 | r = 0; | 1563 | r = 0; |
| 1574 | index = kvm_page_table_hashfn(gfn); | 1564 | index = kvm_page_table_hashfn(gfn); |
| 1575 | bucket = &kvm->arch.mmu_page_hash[index]; | 1565 | bucket = &kvm->arch.mmu_page_hash[index]; |
| 1566 | restart: | ||
| 1576 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 1567 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) |
| 1577 | if (sp->gfn == gfn && !sp->role.direct) { | 1568 | if (sp->gfn == gfn && !sp->role.direct) { |
| 1578 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1569 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
| 1579 | sp->role.word); | 1570 | sp->role.word); |
| 1580 | r = 1; | 1571 | r = 1; |
| 1581 | if (kvm_mmu_zap_page(kvm, sp)) | 1572 | if (kvm_mmu_zap_page(kvm, sp)) |
| 1582 | n = bucket->first; | 1573 | goto restart; |
| 1583 | } | 1574 | } |
| 1584 | return r; | 1575 | return r; |
| 1585 | } | 1576 | } |
| @@ -1593,13 +1584,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
| 1593 | 1584 | ||
| 1594 | index = kvm_page_table_hashfn(gfn); | 1585 | index = kvm_page_table_hashfn(gfn); |
| 1595 | bucket = &kvm->arch.mmu_page_hash[index]; | 1586 | bucket = &kvm->arch.mmu_page_hash[index]; |
| 1587 | restart: | ||
| 1596 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { | 1588 | hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { |
| 1597 | if (sp->gfn == gfn && !sp->role.direct | 1589 | if (sp->gfn == gfn && !sp->role.direct |
| 1598 | && !sp->role.invalid) { | 1590 | && !sp->role.invalid) { |
| 1599 | pgprintk("%s: zap %lx %x\n", | 1591 | pgprintk("%s: zap %lx %x\n", |
| 1600 | __func__, gfn, sp->role.word); | 1592 | __func__, gfn, sp->role.word); |
| 1601 | if (kvm_mmu_zap_page(kvm, sp)) | 1593 | if (kvm_mmu_zap_page(kvm, sp)) |
| 1602 | nn = bucket->first; | 1594 | goto restart; |
| 1603 | } | 1595 | } |
| 1604 | } | 1596 | } |
| 1605 | } | 1597 | } |
| @@ -1626,20 +1618,6 @@ static void mmu_convert_notrap(struct kvm_mmu_page *sp) | |||
| 1626 | } | 1618 | } |
| 1627 | } | 1619 | } |
| 1628 | 1620 | ||
| 1629 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 1630 | { | ||
| 1631 | struct page *page; | ||
| 1632 | |||
| 1633 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); | ||
| 1634 | |||
| 1635 | if (gpa == UNMAPPED_GVA) | ||
| 1636 | return NULL; | ||
| 1637 | |||
| 1638 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
| 1639 | |||
| 1640 | return page; | ||
| 1641 | } | ||
| 1642 | |||
| 1643 | /* | 1621 | /* |
| 1644 | * The function is based on mtrr_type_lookup() in | 1622 | * The function is based on mtrr_type_lookup() in |
| 1645 | * arch/x86/kernel/cpu/mtrr/generic.c | 1623 | * arch/x86/kernel/cpu/mtrr/generic.c |
| @@ -1752,7 +1730,6 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 1752 | struct kvm_mmu_page *s; | 1730 | struct kvm_mmu_page *s; |
| 1753 | struct hlist_node *node, *n; | 1731 | struct hlist_node *node, *n; |
| 1754 | 1732 | ||
| 1755 | trace_kvm_mmu_unsync_page(sp); | ||
| 1756 | index = kvm_page_table_hashfn(sp->gfn); | 1733 | index = kvm_page_table_hashfn(sp->gfn); |
| 1757 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1734 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
| 1758 | /* don't unsync if pagetable is shadowed with multiple roles */ | 1735 | /* don't unsync if pagetable is shadowed with multiple roles */ |
| @@ -1762,10 +1739,11 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 1762 | if (s->role.word != sp->role.word) | 1739 | if (s->role.word != sp->role.word) |
| 1763 | return 1; | 1740 | return 1; |
| 1764 | } | 1741 | } |
| 1742 | trace_kvm_mmu_unsync_page(sp); | ||
| 1765 | ++vcpu->kvm->stat.mmu_unsync; | 1743 | ++vcpu->kvm->stat.mmu_unsync; |
| 1766 | sp->unsync = 1; | 1744 | sp->unsync = 1; |
| 1767 | 1745 | ||
| 1768 | kvm_mmu_mark_parents_unsync(vcpu, sp); | 1746 | kvm_mmu_mark_parents_unsync(sp); |
| 1769 | 1747 | ||
| 1770 | mmu_convert_notrap(sp); | 1748 | mmu_convert_notrap(sp); |
| 1771 | return 0; | 1749 | return 0; |
| @@ -2081,21 +2059,23 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 2081 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2059 | hpa_t root = vcpu->arch.mmu.root_hpa; |
| 2082 | 2060 | ||
| 2083 | ASSERT(!VALID_PAGE(root)); | 2061 | ASSERT(!VALID_PAGE(root)); |
| 2084 | if (tdp_enabled) | ||
| 2085 | direct = 1; | ||
| 2086 | if (mmu_check_root(vcpu, root_gfn)) | 2062 | if (mmu_check_root(vcpu, root_gfn)) |
| 2087 | return 1; | 2063 | return 1; |
| 2064 | if (tdp_enabled) { | ||
| 2065 | direct = 1; | ||
| 2066 | root_gfn = 0; | ||
| 2067 | } | ||
| 2068 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 2088 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 2069 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
| 2089 | PT64_ROOT_LEVEL, direct, | 2070 | PT64_ROOT_LEVEL, direct, |
| 2090 | ACC_ALL, NULL); | 2071 | ACC_ALL, NULL); |
| 2091 | root = __pa(sp->spt); | 2072 | root = __pa(sp->spt); |
| 2092 | ++sp->root_count; | 2073 | ++sp->root_count; |
| 2074 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 2093 | vcpu->arch.mmu.root_hpa = root; | 2075 | vcpu->arch.mmu.root_hpa = root; |
| 2094 | return 0; | 2076 | return 0; |
| 2095 | } | 2077 | } |
| 2096 | direct = !is_paging(vcpu); | 2078 | direct = !is_paging(vcpu); |
| 2097 | if (tdp_enabled) | ||
| 2098 | direct = 1; | ||
| 2099 | for (i = 0; i < 4; ++i) { | 2079 | for (i = 0; i < 4; ++i) { |
| 2100 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 2080 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
| 2101 | 2081 | ||
| @@ -2111,11 +2091,18 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 2111 | root_gfn = 0; | 2091 | root_gfn = 0; |
| 2112 | if (mmu_check_root(vcpu, root_gfn)) | 2092 | if (mmu_check_root(vcpu, root_gfn)) |
| 2113 | return 1; | 2093 | return 1; |
| 2094 | if (tdp_enabled) { | ||
| 2095 | direct = 1; | ||
| 2096 | root_gfn = i << 30; | ||
| 2097 | } | ||
| 2098 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 2114 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 2099 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
| 2115 | PT32_ROOT_LEVEL, direct, | 2100 | PT32_ROOT_LEVEL, direct, |
| 2116 | ACC_ALL, NULL); | 2101 | ACC_ALL, NULL); |
| 2117 | root = __pa(sp->spt); | 2102 | root = __pa(sp->spt); |
| 2118 | ++sp->root_count; | 2103 | ++sp->root_count; |
| 2104 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 2105 | |||
| 2119 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 2106 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; |
| 2120 | } | 2107 | } |
| 2121 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 2108 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
| @@ -2299,13 +2286,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
| 2299 | /* no rsvd bits for 2 level 4K page table entries */ | 2286 | /* no rsvd bits for 2 level 4K page table entries */ |
| 2300 | context->rsvd_bits_mask[0][1] = 0; | 2287 | context->rsvd_bits_mask[0][1] = 0; |
| 2301 | context->rsvd_bits_mask[0][0] = 0; | 2288 | context->rsvd_bits_mask[0][0] = 0; |
| 2289 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; | ||
| 2290 | |||
| 2291 | if (!is_pse(vcpu)) { | ||
| 2292 | context->rsvd_bits_mask[1][1] = 0; | ||
| 2293 | break; | ||
| 2294 | } | ||
| 2295 | |||
| 2302 | if (is_cpuid_PSE36()) | 2296 | if (is_cpuid_PSE36()) |
| 2303 | /* 36bits PSE 4MB page */ | 2297 | /* 36bits PSE 4MB page */ |
| 2304 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); | 2298 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); |
| 2305 | else | 2299 | else |
| 2306 | /* 32 bits PSE 4MB page */ | 2300 | /* 32 bits PSE 4MB page */ |
| 2307 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | 2301 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); |
| 2308 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | ||
| 2309 | break; | 2302 | break; |
| 2310 | case PT32E_ROOT_LEVEL: | 2303 | case PT32E_ROOT_LEVEL: |
| 2311 | context->rsvd_bits_mask[0][2] = | 2304 | context->rsvd_bits_mask[0][2] = |
| @@ -2318,7 +2311,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
| 2318 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2311 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
| 2319 | rsvd_bits(maxphyaddr, 62) | | 2312 | rsvd_bits(maxphyaddr, 62) | |
| 2320 | rsvd_bits(13, 20); /* large page */ | 2313 | rsvd_bits(13, 20); /* large page */ |
| 2321 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2314 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
| 2322 | break; | 2315 | break; |
| 2323 | case PT64_ROOT_LEVEL: | 2316 | case PT64_ROOT_LEVEL: |
| 2324 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 2317 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
| @@ -2336,7 +2329,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | |||
| 2336 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 2329 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
| 2337 | rsvd_bits(maxphyaddr, 51) | | 2330 | rsvd_bits(maxphyaddr, 51) | |
| 2338 | rsvd_bits(13, 20); /* large page */ | 2331 | rsvd_bits(13, 20); /* large page */ |
| 2339 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; | 2332 | context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; |
| 2340 | break; | 2333 | break; |
| 2341 | } | 2334 | } |
| 2342 | } | 2335 | } |
| @@ -2438,7 +2431,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | |||
| 2438 | else | 2431 | else |
| 2439 | r = paging32_init_context(vcpu); | 2432 | r = paging32_init_context(vcpu); |
| 2440 | 2433 | ||
| 2441 | vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; | 2434 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
| 2435 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | ||
| 2442 | 2436 | ||
| 2443 | return r; | 2437 | return r; |
| 2444 | } | 2438 | } |
| @@ -2478,7 +2472,9 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
| 2478 | goto out; | 2472 | goto out; |
| 2479 | spin_lock(&vcpu->kvm->mmu_lock); | 2473 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2480 | kvm_mmu_free_some_pages(vcpu); | 2474 | kvm_mmu_free_some_pages(vcpu); |
| 2475 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 2481 | r = mmu_alloc_roots(vcpu); | 2476 | r = mmu_alloc_roots(vcpu); |
| 2477 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 2482 | mmu_sync_roots(vcpu); | 2478 | mmu_sync_roots(vcpu); |
| 2483 | spin_unlock(&vcpu->kvm->mmu_lock); | 2479 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 2484 | if (r) | 2480 | if (r) |
| @@ -2527,7 +2523,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
| 2527 | } | 2523 | } |
| 2528 | 2524 | ||
| 2529 | ++vcpu->kvm->stat.mmu_pte_updated; | 2525 | ++vcpu->kvm->stat.mmu_pte_updated; |
| 2530 | if (sp->role.glevels == PT32_ROOT_LEVEL) | 2526 | if (!sp->role.cr4_pae) |
| 2531 | paging32_update_pte(vcpu, sp, spte, new); | 2527 | paging32_update_pte(vcpu, sp, spte, new); |
| 2532 | else | 2528 | else |
| 2533 | paging64_update_pte(vcpu, sp, spte, new); | 2529 | paging64_update_pte(vcpu, sp, spte, new); |
| @@ -2562,36 +2558,11 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
| 2562 | } | 2558 | } |
| 2563 | 2559 | ||
| 2564 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 2560 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 2565 | const u8 *new, int bytes) | 2561 | u64 gpte) |
| 2566 | { | 2562 | { |
| 2567 | gfn_t gfn; | 2563 | gfn_t gfn; |
| 2568 | int r; | ||
| 2569 | u64 gpte = 0; | ||
| 2570 | pfn_t pfn; | 2564 | pfn_t pfn; |
| 2571 | 2565 | ||
| 2572 | if (bytes != 4 && bytes != 8) | ||
| 2573 | return; | ||
| 2574 | |||
| 2575 | /* | ||
| 2576 | * Assume that the pte write on a page table of the same type | ||
| 2577 | * as the current vcpu paging mode. This is nearly always true | ||
| 2578 | * (might be false while changing modes). Note it is verified later | ||
| 2579 | * by update_pte(). | ||
| 2580 | */ | ||
| 2581 | if (is_pae(vcpu)) { | ||
| 2582 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
| 2583 | if ((bytes == 4) && (gpa % 4 == 0)) { | ||
| 2584 | r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8); | ||
| 2585 | if (r) | ||
| 2586 | return; | ||
| 2587 | memcpy((void *)&gpte + (gpa % 8), new, 4); | ||
| 2588 | } else if ((bytes == 8) && (gpa % 8 == 0)) { | ||
| 2589 | memcpy((void *)&gpte, new, 8); | ||
| 2590 | } | ||
| 2591 | } else { | ||
| 2592 | if ((bytes == 4) && (gpa % 4 == 0)) | ||
| 2593 | memcpy((void *)&gpte, new, 4); | ||
| 2594 | } | ||
| 2595 | if (!is_present_gpte(gpte)) | 2566 | if (!is_present_gpte(gpte)) |
| 2596 | return; | 2567 | return; |
| 2597 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2568 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
| @@ -2640,10 +2611,46 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2640 | int flooded = 0; | 2611 | int flooded = 0; |
| 2641 | int npte; | 2612 | int npte; |
| 2642 | int r; | 2613 | int r; |
| 2614 | int invlpg_counter; | ||
| 2643 | 2615 | ||
| 2644 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2616 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
| 2645 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 2617 | |
| 2618 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); | ||
| 2619 | |||
| 2620 | /* | ||
| 2621 | * Assume that the pte write on a page table of the same type | ||
| 2622 | * as the current vcpu paging mode. This is nearly always true | ||
| 2623 | * (might be false while changing modes). Note it is verified later | ||
| 2624 | * by update_pte(). | ||
| 2625 | */ | ||
| 2626 | if ((is_pae(vcpu) && bytes == 4) || !new) { | ||
| 2627 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | ||
| 2628 | if (is_pae(vcpu)) { | ||
| 2629 | gpa &= ~(gpa_t)7; | ||
| 2630 | bytes = 8; | ||
| 2631 | } | ||
| 2632 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
| 2633 | if (r) | ||
| 2634 | gentry = 0; | ||
| 2635 | new = (const u8 *)&gentry; | ||
| 2636 | } | ||
| 2637 | |||
| 2638 | switch (bytes) { | ||
| 2639 | case 4: | ||
| 2640 | gentry = *(const u32 *)new; | ||
| 2641 | break; | ||
| 2642 | case 8: | ||
| 2643 | gentry = *(const u64 *)new; | ||
| 2644 | break; | ||
| 2645 | default: | ||
| 2646 | gentry = 0; | ||
| 2647 | break; | ||
| 2648 | } | ||
| 2649 | |||
| 2650 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | ||
| 2646 | spin_lock(&vcpu->kvm->mmu_lock); | 2651 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2652 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | ||
| 2653 | gentry = 0; | ||
| 2647 | kvm_mmu_access_page(vcpu, gfn); | 2654 | kvm_mmu_access_page(vcpu, gfn); |
| 2648 | kvm_mmu_free_some_pages(vcpu); | 2655 | kvm_mmu_free_some_pages(vcpu); |
| 2649 | ++vcpu->kvm->stat.mmu_pte_write; | 2656 | ++vcpu->kvm->stat.mmu_pte_write; |
| @@ -2662,10 +2669,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2662 | } | 2669 | } |
| 2663 | index = kvm_page_table_hashfn(gfn); | 2670 | index = kvm_page_table_hashfn(gfn); |
| 2664 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 2671 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
| 2672 | |||
| 2673 | restart: | ||
| 2665 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2674 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { |
| 2666 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) | 2675 | if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) |
| 2667 | continue; | 2676 | continue; |
| 2668 | pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | 2677 | pte_size = sp->role.cr4_pae ? 8 : 4; |
| 2669 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2678 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
| 2670 | misaligned |= bytes < 4; | 2679 | misaligned |= bytes < 4; |
| 2671 | if (misaligned || flooded) { | 2680 | if (misaligned || flooded) { |
| @@ -2682,14 +2691,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2682 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2691 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
| 2683 | gpa, bytes, sp->role.word); | 2692 | gpa, bytes, sp->role.word); |
| 2684 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) | 2693 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) |
| 2685 | n = bucket->first; | 2694 | goto restart; |
| 2686 | ++vcpu->kvm->stat.mmu_flooded; | 2695 | ++vcpu->kvm->stat.mmu_flooded; |
| 2687 | continue; | 2696 | continue; |
| 2688 | } | 2697 | } |
| 2689 | page_offset = offset; | 2698 | page_offset = offset; |
| 2690 | level = sp->role.level; | 2699 | level = sp->role.level; |
| 2691 | npte = 1; | 2700 | npte = 1; |
| 2692 | if (sp->role.glevels == PT32_ROOT_LEVEL) { | 2701 | if (!sp->role.cr4_pae) { |
| 2693 | page_offset <<= 1; /* 32->64 */ | 2702 | page_offset <<= 1; /* 32->64 */ |
| 2694 | /* | 2703 | /* |
| 2695 | * A 32-bit pde maps 4MB while the shadow pdes map | 2704 | * A 32-bit pde maps 4MB while the shadow pdes map |
| @@ -2707,20 +2716,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 2707 | continue; | 2716 | continue; |
| 2708 | } | 2717 | } |
| 2709 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 2718 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
| 2710 | if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { | ||
| 2711 | gentry = 0; | ||
| 2712 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
| 2713 | gpa & ~(u64)(pte_size - 1), | ||
| 2714 | &gentry, pte_size); | ||
| 2715 | new = (const void *)&gentry; | ||
| 2716 | if (r < 0) | ||
| 2717 | new = NULL; | ||
| 2718 | } | ||
| 2719 | while (npte--) { | 2719 | while (npte--) { |
| 2720 | entry = *spte; | 2720 | entry = *spte; |
| 2721 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 2721 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
| 2722 | if (new) | 2722 | if (gentry) |
| 2723 | mmu_pte_write_new_pte(vcpu, sp, spte, new); | 2723 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
| 2724 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 2724 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); |
| 2725 | ++spte; | 2725 | ++spte; |
| 2726 | } | 2726 | } |
| @@ -2900,22 +2900,23 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
| 2900 | struct kvm_mmu_page *sp, *node; | 2900 | struct kvm_mmu_page *sp, *node; |
| 2901 | 2901 | ||
| 2902 | spin_lock(&kvm->mmu_lock); | 2902 | spin_lock(&kvm->mmu_lock); |
| 2903 | restart: | ||
| 2903 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 2904 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
| 2904 | if (kvm_mmu_zap_page(kvm, sp)) | 2905 | if (kvm_mmu_zap_page(kvm, sp)) |
| 2905 | node = container_of(kvm->arch.active_mmu_pages.next, | 2906 | goto restart; |
| 2906 | struct kvm_mmu_page, link); | 2907 | |
| 2907 | spin_unlock(&kvm->mmu_lock); | 2908 | spin_unlock(&kvm->mmu_lock); |
| 2908 | 2909 | ||
| 2909 | kvm_flush_remote_tlbs(kvm); | 2910 | kvm_flush_remote_tlbs(kvm); |
| 2910 | } | 2911 | } |
| 2911 | 2912 | ||
| 2912 | static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) | 2913 | static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm) |
| 2913 | { | 2914 | { |
| 2914 | struct kvm_mmu_page *page; | 2915 | struct kvm_mmu_page *page; |
| 2915 | 2916 | ||
| 2916 | page = container_of(kvm->arch.active_mmu_pages.prev, | 2917 | page = container_of(kvm->arch.active_mmu_pages.prev, |
| 2917 | struct kvm_mmu_page, link); | 2918 | struct kvm_mmu_page, link); |
| 2918 | kvm_mmu_zap_page(kvm, page); | 2919 | return kvm_mmu_zap_page(kvm, page) + 1; |
| 2919 | } | 2920 | } |
| 2920 | 2921 | ||
| 2921 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | 2922 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) |
| @@ -2927,7 +2928,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
| 2927 | spin_lock(&kvm_lock); | 2928 | spin_lock(&kvm_lock); |
| 2928 | 2929 | ||
| 2929 | list_for_each_entry(kvm, &vm_list, vm_list) { | 2930 | list_for_each_entry(kvm, &vm_list, vm_list) { |
| 2930 | int npages, idx; | 2931 | int npages, idx, freed_pages; |
| 2931 | 2932 | ||
| 2932 | idx = srcu_read_lock(&kvm->srcu); | 2933 | idx = srcu_read_lock(&kvm->srcu); |
| 2933 | spin_lock(&kvm->mmu_lock); | 2934 | spin_lock(&kvm->mmu_lock); |
| @@ -2935,8 +2936,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
| 2935 | kvm->arch.n_free_mmu_pages; | 2936 | kvm->arch.n_free_mmu_pages; |
| 2936 | cache_count += npages; | 2937 | cache_count += npages; |
| 2937 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | 2938 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { |
| 2938 | kvm_mmu_remove_one_alloc_mmu_page(kvm); | 2939 | freed_pages = kvm_mmu_remove_some_alloc_mmu_pages(kvm); |
| 2939 | cache_count--; | 2940 | cache_count -= freed_pages; |
| 2940 | kvm_freed = kvm; | 2941 | kvm_freed = kvm; |
| 2941 | } | 2942 | } |
| 2942 | nr_to_scan--; | 2943 | nr_to_scan--; |
| @@ -3011,7 +3012,8 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
| 3011 | unsigned int nr_pages = 0; | 3012 | unsigned int nr_pages = 0; |
| 3012 | struct kvm_memslots *slots; | 3013 | struct kvm_memslots *slots; |
| 3013 | 3014 | ||
| 3014 | slots = rcu_dereference(kvm->memslots); | 3015 | slots = kvm_memslots(kvm); |
| 3016 | |||
| 3015 | for (i = 0; i < slots->nmemslots; i++) | 3017 | for (i = 0; i < slots->nmemslots; i++) |
| 3016 | nr_pages += slots->memslots[i].npages; | 3018 | nr_pages += slots->memslots[i].npages; |
| 3017 | 3019 | ||
| @@ -3174,8 +3176,7 @@ static gva_t canonicalize(gva_t gva) | |||
| 3174 | } | 3176 | } |
| 3175 | 3177 | ||
| 3176 | 3178 | ||
| 3177 | typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, | 3179 | typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); |
| 3178 | u64 *sptep); | ||
| 3179 | 3180 | ||
| 3180 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | 3181 | static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, |
| 3181 | inspect_spte_fn fn) | 3182 | inspect_spte_fn fn) |
| @@ -3191,7 +3192,7 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
| 3191 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 3192 | child = page_header(ent & PT64_BASE_ADDR_MASK); |
| 3192 | __mmu_spte_walk(kvm, child, fn); | 3193 | __mmu_spte_walk(kvm, child, fn); |
| 3193 | } else | 3194 | } else |
| 3194 | fn(kvm, sp, &sp->spt[i]); | 3195 | fn(kvm, &sp->spt[i]); |
| 3195 | } | 3196 | } |
| 3196 | } | 3197 | } |
| 3197 | } | 3198 | } |
| @@ -3282,11 +3283,13 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
| 3282 | 3283 | ||
| 3283 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3284 | static int count_rmaps(struct kvm_vcpu *vcpu) |
| 3284 | { | 3285 | { |
| 3286 | struct kvm *kvm = vcpu->kvm; | ||
| 3287 | struct kvm_memslots *slots; | ||
| 3285 | int nmaps = 0; | 3288 | int nmaps = 0; |
| 3286 | int i, j, k, idx; | 3289 | int i, j, k, idx; |
| 3287 | 3290 | ||
| 3288 | idx = srcu_read_lock(&kvm->srcu); | 3291 | idx = srcu_read_lock(&kvm->srcu); |
| 3289 | slots = rcu_dereference(kvm->memslots); | 3292 | slots = kvm_memslots(kvm); |
| 3290 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3293 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
| 3291 | struct kvm_memory_slot *m = &slots->memslots[i]; | 3294 | struct kvm_memory_slot *m = &slots->memslots[i]; |
| 3292 | struct kvm_rmap_desc *d; | 3295 | struct kvm_rmap_desc *d; |
| @@ -3315,7 +3318,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
| 3315 | return nmaps; | 3318 | return nmaps; |
| 3316 | } | 3319 | } |
| 3317 | 3320 | ||
| 3318 | void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | 3321 | void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) |
| 3319 | { | 3322 | { |
| 3320 | unsigned long *rmapp; | 3323 | unsigned long *rmapp; |
| 3321 | struct kvm_mmu_page *rev_sp; | 3324 | struct kvm_mmu_page *rev_sp; |
| @@ -3331,14 +3334,14 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) | |||
| 3331 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", | 3334 | printk(KERN_ERR "%s: no memslot for gfn %ld\n", |
| 3332 | audit_msg, gfn); | 3335 | audit_msg, gfn); |
| 3333 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", | 3336 | printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", |
| 3334 | audit_msg, sptep - rev_sp->spt, | 3337 | audit_msg, (long int)(sptep - rev_sp->spt), |
| 3335 | rev_sp->gfn); | 3338 | rev_sp->gfn); |
| 3336 | dump_stack(); | 3339 | dump_stack(); |
| 3337 | return; | 3340 | return; |
| 3338 | } | 3341 | } |
| 3339 | 3342 | ||
| 3340 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], | 3343 | rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], |
| 3341 | is_large_pte(*sptep)); | 3344 | rev_sp->role.level); |
| 3342 | if (!*rmapp) { | 3345 | if (!*rmapp) { |
| 3343 | if (!printk_ratelimit()) | 3346 | if (!printk_ratelimit()) |
| 3344 | return; | 3347 | return; |
| @@ -3373,7 +3376,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) | |||
| 3373 | continue; | 3376 | continue; |
| 3374 | if (!(ent & PT_WRITABLE_MASK)) | 3377 | if (!(ent & PT_WRITABLE_MASK)) |
| 3375 | continue; | 3378 | continue; |
| 3376 | inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); | 3379 | inspect_spte_has_rmap(vcpu->kvm, &pt[i]); |
| 3377 | } | 3380 | } |
| 3378 | } | 3381 | } |
| 3379 | return; | 3382 | return; |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 3e4a5c6ca2a9..42f07b1bfbc9 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
| @@ -6,14 +6,12 @@ | |||
| 6 | 6 | ||
| 7 | #undef TRACE_SYSTEM | 7 | #undef TRACE_SYSTEM |
| 8 | #define TRACE_SYSTEM kvmmmu | 8 | #define TRACE_SYSTEM kvmmmu |
| 9 | #define TRACE_INCLUDE_PATH . | ||
| 10 | #define TRACE_INCLUDE_FILE mmutrace | ||
| 11 | 9 | ||
| 12 | #define KVM_MMU_PAGE_FIELDS \ | 10 | #define KVM_MMU_PAGE_FIELDS \ |
| 13 | __field(__u64, gfn) \ | 11 | __field(__u64, gfn) \ |
| 14 | __field(__u32, role) \ | 12 | __field(__u32, role) \ |
| 15 | __field(__u32, root_count) \ | 13 | __field(__u32, root_count) \ |
| 16 | __field(__u32, unsync) | 14 | __field(bool, unsync) |
| 17 | 15 | ||
| 18 | #define KVM_MMU_PAGE_ASSIGN(sp) \ | 16 | #define KVM_MMU_PAGE_ASSIGN(sp) \ |
| 19 | __entry->gfn = sp->gfn; \ | 17 | __entry->gfn = sp->gfn; \ |
| @@ -30,14 +28,14 @@ | |||
| 30 | \ | 28 | \ |
| 31 | role.word = __entry->role; \ | 29 | role.word = __entry->role; \ |
| 32 | \ | 30 | \ |
| 33 | trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \ | 31 | trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s" \ |
| 34 | " %snxe root %u %s%c", \ | 32 | " %snxe root %u %s%c", \ |
| 35 | __entry->gfn, role.level, role.glevels, \ | 33 | __entry->gfn, role.level, \ |
| 34 | role.cr4_pae ? " pae" : "", \ | ||
| 36 | role.quadrant, \ | 35 | role.quadrant, \ |
| 37 | role.direct ? " direct" : "", \ | 36 | role.direct ? " direct" : "", \ |
| 38 | access_str[role.access], \ | 37 | access_str[role.access], \ |
| 39 | role.invalid ? " invalid" : "", \ | 38 | role.invalid ? " invalid" : "", \ |
| 40 | role.cr4_pge ? "" : "!", \ | ||
| 41 | role.nxe ? "" : "!", \ | 39 | role.nxe ? "" : "!", \ |
| 42 | __entry->root_count, \ | 40 | __entry->root_count, \ |
| 43 | __entry->unsync ? "unsync" : "sync", 0); \ | 41 | __entry->unsync ? "unsync" : "sync", 0); \ |
| @@ -94,15 +92,15 @@ TRACE_EVENT( | |||
| 94 | TP_printk("pte %llx level %u", __entry->pte, __entry->level) | 92 | TP_printk("pte %llx level %u", __entry->pte, __entry->level) |
| 95 | ); | 93 | ); |
| 96 | 94 | ||
| 97 | /* We set a pte accessed bit */ | 95 | DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class, |
| 98 | TRACE_EVENT( | 96 | |
| 99 | kvm_mmu_set_accessed_bit, | ||
| 100 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), | 97 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
| 98 | |||
| 101 | TP_ARGS(table_gfn, index, size), | 99 | TP_ARGS(table_gfn, index, size), |
| 102 | 100 | ||
| 103 | TP_STRUCT__entry( | 101 | TP_STRUCT__entry( |
| 104 | __field(__u64, gpa) | 102 | __field(__u64, gpa) |
| 105 | ), | 103 | ), |
| 106 | 104 | ||
| 107 | TP_fast_assign( | 105 | TP_fast_assign( |
| 108 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) | 106 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) |
| @@ -112,22 +110,20 @@ TRACE_EVENT( | |||
| 112 | TP_printk("gpa %llx", __entry->gpa) | 110 | TP_printk("gpa %llx", __entry->gpa) |
| 113 | ); | 111 | ); |
| 114 | 112 | ||
| 115 | /* We set a pte dirty bit */ | 113 | /* We set a pte accessed bit */ |
| 116 | TRACE_EVENT( | 114 | DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit, |
| 117 | kvm_mmu_set_dirty_bit, | 115 | |
| 118 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), | 116 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
| 119 | TP_ARGS(table_gfn, index, size), | ||
| 120 | 117 | ||
| 121 | TP_STRUCT__entry( | 118 | TP_ARGS(table_gfn, index, size) |
| 122 | __field(__u64, gpa) | 119 | ); |
| 123 | ), | ||
| 124 | 120 | ||
| 125 | TP_fast_assign( | 121 | /* We set a pte dirty bit */ |
| 126 | __entry->gpa = ((u64)table_gfn << PAGE_SHIFT) | 122 | DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit, |
| 127 | + index * size; | ||
| 128 | ), | ||
| 129 | 123 | ||
| 130 | TP_printk("gpa %llx", __entry->gpa) | 124 | TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size), |
| 125 | |||
| 126 | TP_ARGS(table_gfn, index, size) | ||
| 131 | ); | 127 | ); |
| 132 | 128 | ||
| 133 | TRACE_EVENT( | 129 | TRACE_EVENT( |
| @@ -166,55 +162,45 @@ TRACE_EVENT( | |||
| 166 | __entry->created ? "new" : "existing") | 162 | __entry->created ? "new" : "existing") |
| 167 | ); | 163 | ); |
| 168 | 164 | ||
| 169 | TRACE_EVENT( | 165 | DECLARE_EVENT_CLASS(kvm_mmu_page_class, |
| 170 | kvm_mmu_sync_page, | 166 | |
| 171 | TP_PROTO(struct kvm_mmu_page *sp), | 167 | TP_PROTO(struct kvm_mmu_page *sp), |
| 172 | TP_ARGS(sp), | 168 | TP_ARGS(sp), |
| 173 | 169 | ||
| 174 | TP_STRUCT__entry( | 170 | TP_STRUCT__entry( |
| 175 | KVM_MMU_PAGE_FIELDS | 171 | KVM_MMU_PAGE_FIELDS |
| 176 | ), | 172 | ), |
| 177 | 173 | ||
| 178 | TP_fast_assign( | 174 | TP_fast_assign( |
| 179 | KVM_MMU_PAGE_ASSIGN(sp) | 175 | KVM_MMU_PAGE_ASSIGN(sp) |
| 180 | ), | 176 | ), |
| 181 | 177 | ||
| 182 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | 178 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) |
| 183 | ); | 179 | ); |
| 184 | 180 | ||
| 185 | TRACE_EVENT( | 181 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page, |
| 186 | kvm_mmu_unsync_page, | ||
| 187 | TP_PROTO(struct kvm_mmu_page *sp), | 182 | TP_PROTO(struct kvm_mmu_page *sp), |
| 188 | TP_ARGS(sp), | ||
| 189 | |||
| 190 | TP_STRUCT__entry( | ||
| 191 | KVM_MMU_PAGE_FIELDS | ||
| 192 | ), | ||
| 193 | 183 | ||
| 194 | TP_fast_assign( | 184 | TP_ARGS(sp) |
| 195 | KVM_MMU_PAGE_ASSIGN(sp) | ||
| 196 | ), | ||
| 197 | |||
| 198 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | ||
| 199 | ); | 185 | ); |
| 200 | 186 | ||
| 201 | TRACE_EVENT( | 187 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page, |
| 202 | kvm_mmu_zap_page, | ||
| 203 | TP_PROTO(struct kvm_mmu_page *sp), | 188 | TP_PROTO(struct kvm_mmu_page *sp), |
| 204 | TP_ARGS(sp), | ||
| 205 | 189 | ||
| 206 | TP_STRUCT__entry( | 190 | TP_ARGS(sp) |
| 207 | KVM_MMU_PAGE_FIELDS | 191 | ); |
| 208 | ), | ||
| 209 | 192 | ||
| 210 | TP_fast_assign( | 193 | DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page, |
| 211 | KVM_MMU_PAGE_ASSIGN(sp) | 194 | TP_PROTO(struct kvm_mmu_page *sp), |
| 212 | ), | ||
| 213 | 195 | ||
| 214 | TP_printk("%s", KVM_MMU_PAGE_PRINTK()) | 196 | TP_ARGS(sp) |
| 215 | ); | 197 | ); |
| 216 | |||
| 217 | #endif /* _TRACE_KVMMMU_H */ | 198 | #endif /* _TRACE_KVMMMU_H */ |
| 218 | 199 | ||
| 200 | #undef TRACE_INCLUDE_PATH | ||
| 201 | #define TRACE_INCLUDE_PATH . | ||
| 202 | #undef TRACE_INCLUDE_FILE | ||
| 203 | #define TRACE_INCLUDE_FILE mmutrace | ||
| 204 | |||
| 219 | /* This part must be outside protection */ | 205 | /* This part must be outside protection */ |
| 220 | #include <trace/define_trace.h> | 206 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 81eab9a50e6a..89d66ca4d87c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -170,7 +170,7 @@ walk: | |||
| 170 | goto access_error; | 170 | goto access_error; |
| 171 | 171 | ||
| 172 | #if PTTYPE == 64 | 172 | #if PTTYPE == 64 |
| 173 | if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) | 173 | if (fetch_fault && (pte & PT64_NX_MASK)) |
| 174 | goto access_error; | 174 | goto access_error; |
| 175 | #endif | 175 | #endif |
| 176 | 176 | ||
| @@ -190,10 +190,10 @@ walk: | |||
| 190 | 190 | ||
| 191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || | 191 | if ((walker->level == PT_PAGE_TABLE_LEVEL) || |
| 192 | ((walker->level == PT_DIRECTORY_LEVEL) && | 192 | ((walker->level == PT_DIRECTORY_LEVEL) && |
| 193 | (pte & PT_PAGE_SIZE_MASK) && | 193 | is_large_pte(pte) && |
| 194 | (PTTYPE == 64 || is_pse(vcpu))) || | 194 | (PTTYPE == 64 || is_pse(vcpu))) || |
| 195 | ((walker->level == PT_PDPE_LEVEL) && | 195 | ((walker->level == PT_PDPE_LEVEL) && |
| 196 | (pte & PT_PAGE_SIZE_MASK) && | 196 | is_large_pte(pte) && |
| 197 | is_long_mode(vcpu))) { | 197 | is_long_mode(vcpu))) { |
| 198 | int lvl = walker->level; | 198 | int lvl = walker->level; |
| 199 | 199 | ||
| @@ -258,11 +258,17 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
| 258 | pt_element_t gpte; | 258 | pt_element_t gpte; |
| 259 | unsigned pte_access; | 259 | unsigned pte_access; |
| 260 | pfn_t pfn; | 260 | pfn_t pfn; |
| 261 | u64 new_spte; | ||
| 261 | 262 | ||
| 262 | gpte = *(const pt_element_t *)pte; | 263 | gpte = *(const pt_element_t *)pte; |
| 263 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { | 264 | if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { |
| 264 | if (!is_present_gpte(gpte)) | 265 | if (!is_present_gpte(gpte)) { |
| 265 | __set_spte(spte, shadow_notrap_nonpresent_pte); | 266 | if (page->unsync) |
| 267 | new_spte = shadow_trap_nonpresent_pte; | ||
| 268 | else | ||
| 269 | new_spte = shadow_notrap_nonpresent_pte; | ||
| 270 | __set_spte(spte, new_spte); | ||
| 271 | } | ||
| 266 | return; | 272 | return; |
| 267 | } | 273 | } |
| 268 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 274 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
| @@ -457,6 +463,7 @@ out_unlock: | |||
| 457 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 463 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
| 458 | { | 464 | { |
| 459 | struct kvm_shadow_walk_iterator iterator; | 465 | struct kvm_shadow_walk_iterator iterator; |
| 466 | gpa_t pte_gpa = -1; | ||
| 460 | int level; | 467 | int level; |
| 461 | u64 *sptep; | 468 | u64 *sptep; |
| 462 | int need_flush = 0; | 469 | int need_flush = 0; |
| @@ -467,9 +474,16 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 467 | level = iterator.level; | 474 | level = iterator.level; |
| 468 | sptep = iterator.sptep; | 475 | sptep = iterator.sptep; |
| 469 | 476 | ||
| 470 | if (level == PT_PAGE_TABLE_LEVEL || | 477 | if (is_last_spte(*sptep, level)) { |
| 471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 478 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); |
| 472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 479 | int offset, shift; |
| 480 | |||
| 481 | shift = PAGE_SHIFT - | ||
| 482 | (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level; | ||
| 483 | offset = sp->role.quadrant << shift; | ||
| 484 | |||
| 485 | pte_gpa = (sp->gfn << PAGE_SHIFT) + offset; | ||
| 486 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
| 473 | 487 | ||
| 474 | if (is_shadow_present_pte(*sptep)) { | 488 | if (is_shadow_present_pte(*sptep)) { |
| 475 | rmap_remove(vcpu->kvm, sptep); | 489 | rmap_remove(vcpu->kvm, sptep); |
| @@ -487,7 +501,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 487 | 501 | ||
| 488 | if (need_flush) | 502 | if (need_flush) |
| 489 | kvm_flush_remote_tlbs(vcpu->kvm); | 503 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 504 | |||
| 505 | atomic_inc(&vcpu->kvm->arch.invlpg_counter); | ||
| 506 | |||
| 490 | spin_unlock(&vcpu->kvm->mmu_lock); | 507 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 508 | |||
| 509 | if (pte_gpa == -1) | ||
| 510 | return; | ||
| 511 | |||
| 512 | if (mmu_topup_memory_caches(vcpu)) | ||
| 513 | return; | ||
| 514 | kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); | ||
| 491 | } | 515 | } |
| 492 | 516 | ||
| 493 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 517 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
| @@ -551,12 +575,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 551 | { | 575 | { |
| 552 | int i, offset, nr_present; | 576 | int i, offset, nr_present; |
| 553 | bool reset_host_protection; | 577 | bool reset_host_protection; |
| 578 | gpa_t first_pte_gpa; | ||
| 554 | 579 | ||
| 555 | offset = nr_present = 0; | 580 | offset = nr_present = 0; |
| 556 | 581 | ||
| 557 | if (PTTYPE == 32) | 582 | if (PTTYPE == 32) |
| 558 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 583 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
| 559 | 584 | ||
| 585 | first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t); | ||
| 586 | |||
| 560 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | 587 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { |
| 561 | unsigned pte_access; | 588 | unsigned pte_access; |
| 562 | pt_element_t gpte; | 589 | pt_element_t gpte; |
| @@ -566,8 +593,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 566 | if (!is_shadow_present_pte(sp->spt[i])) | 593 | if (!is_shadow_present_pte(sp->spt[i])) |
| 567 | continue; | 594 | continue; |
| 568 | 595 | ||
| 569 | pte_gpa = gfn_to_gpa(sp->gfn); | 596 | pte_gpa = first_pte_gpa + i * sizeof(pt_element_t); |
| 570 | pte_gpa += (i+offset) * sizeof(pt_element_t); | ||
| 571 | 597 | ||
| 572 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | 598 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, |
| 573 | sizeof(pt_element_t))) | 599 | sizeof(pt_element_t))) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2ba58206812a..96dc232bfc56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -44,10 +44,11 @@ MODULE_LICENSE("GPL"); | |||
| 44 | #define SEG_TYPE_LDT 2 | 44 | #define SEG_TYPE_LDT 2 |
| 45 | #define SEG_TYPE_BUSY_TSS16 3 | 45 | #define SEG_TYPE_BUSY_TSS16 3 |
| 46 | 46 | ||
| 47 | #define SVM_FEATURE_NPT (1 << 0) | 47 | #define SVM_FEATURE_NPT (1 << 0) |
| 48 | #define SVM_FEATURE_LBRV (1 << 1) | 48 | #define SVM_FEATURE_LBRV (1 << 1) |
| 49 | #define SVM_FEATURE_SVML (1 << 2) | 49 | #define SVM_FEATURE_SVML (1 << 2) |
| 50 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | 50 | #define SVM_FEATURE_NRIP (1 << 3) |
| 51 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
| 51 | 52 | ||
| 52 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 53 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
| 53 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 54 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
| @@ -70,6 +71,7 @@ struct kvm_vcpu; | |||
| 70 | struct nested_state { | 71 | struct nested_state { |
| 71 | struct vmcb *hsave; | 72 | struct vmcb *hsave; |
| 72 | u64 hsave_msr; | 73 | u64 hsave_msr; |
| 74 | u64 vm_cr_msr; | ||
| 73 | u64 vmcb; | 75 | u64 vmcb; |
| 74 | 76 | ||
| 75 | /* These are the merged vectors */ | 77 | /* These are the merged vectors */ |
| @@ -77,6 +79,7 @@ struct nested_state { | |||
| 77 | 79 | ||
| 78 | /* gpa pointers to the real vectors */ | 80 | /* gpa pointers to the real vectors */ |
| 79 | u64 vmcb_msrpm; | 81 | u64 vmcb_msrpm; |
| 82 | u64 vmcb_iopm; | ||
| 80 | 83 | ||
| 81 | /* A VMEXIT is required but not yet emulated */ | 84 | /* A VMEXIT is required but not yet emulated */ |
| 82 | bool exit_required; | 85 | bool exit_required; |
| @@ -91,6 +94,9 @@ struct nested_state { | |||
| 91 | 94 | ||
| 92 | }; | 95 | }; |
| 93 | 96 | ||
| 97 | #define MSRPM_OFFSETS 16 | ||
| 98 | static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; | ||
| 99 | |||
| 94 | struct vcpu_svm { | 100 | struct vcpu_svm { |
| 95 | struct kvm_vcpu vcpu; | 101 | struct kvm_vcpu vcpu; |
| 96 | struct vmcb *vmcb; | 102 | struct vmcb *vmcb; |
| @@ -110,13 +116,39 @@ struct vcpu_svm { | |||
| 110 | struct nested_state nested; | 116 | struct nested_state nested; |
| 111 | 117 | ||
| 112 | bool nmi_singlestep; | 118 | bool nmi_singlestep; |
| 119 | |||
| 120 | unsigned int3_injected; | ||
| 121 | unsigned long int3_rip; | ||
| 122 | }; | ||
| 123 | |||
| 124 | #define MSR_INVALID 0xffffffffU | ||
| 125 | |||
| 126 | static struct svm_direct_access_msrs { | ||
| 127 | u32 index; /* Index of the MSR */ | ||
| 128 | bool always; /* True if intercept is always on */ | ||
| 129 | } direct_access_msrs[] = { | ||
| 130 | { .index = MSR_K6_STAR, .always = true }, | ||
| 131 | { .index = MSR_IA32_SYSENTER_CS, .always = true }, | ||
| 132 | #ifdef CONFIG_X86_64 | ||
| 133 | { .index = MSR_GS_BASE, .always = true }, | ||
| 134 | { .index = MSR_FS_BASE, .always = true }, | ||
| 135 | { .index = MSR_KERNEL_GS_BASE, .always = true }, | ||
| 136 | { .index = MSR_LSTAR, .always = true }, | ||
| 137 | { .index = MSR_CSTAR, .always = true }, | ||
| 138 | { .index = MSR_SYSCALL_MASK, .always = true }, | ||
| 139 | #endif | ||
| 140 | { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, | ||
| 141 | { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, | ||
| 142 | { .index = MSR_IA32_LASTINTFROMIP, .always = false }, | ||
| 143 | { .index = MSR_IA32_LASTINTTOIP, .always = false }, | ||
| 144 | { .index = MSR_INVALID, .always = false }, | ||
| 113 | }; | 145 | }; |
| 114 | 146 | ||
| 115 | /* enable NPT for AMD64 and X86 with PAE */ | 147 | /* enable NPT for AMD64 and X86 with PAE */ |
| 116 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 148 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
| 117 | static bool npt_enabled = true; | 149 | static bool npt_enabled = true; |
| 118 | #else | 150 | #else |
| 119 | static bool npt_enabled = false; | 151 | static bool npt_enabled; |
| 120 | #endif | 152 | #endif |
| 121 | static int npt = 1; | 153 | static int npt = 1; |
| 122 | 154 | ||
| @@ -129,6 +161,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu); | |||
| 129 | static void svm_complete_interrupts(struct vcpu_svm *svm); | 161 | static void svm_complete_interrupts(struct vcpu_svm *svm); |
| 130 | 162 | ||
| 131 | static int nested_svm_exit_handled(struct vcpu_svm *svm); | 163 | static int nested_svm_exit_handled(struct vcpu_svm *svm); |
| 164 | static int nested_svm_intercept(struct vcpu_svm *svm); | ||
| 132 | static int nested_svm_vmexit(struct vcpu_svm *svm); | 165 | static int nested_svm_vmexit(struct vcpu_svm *svm); |
| 133 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 166 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
| 134 | bool has_error_code, u32 error_code); | 167 | bool has_error_code, u32 error_code); |
| @@ -163,8 +196,8 @@ static unsigned long iopm_base; | |||
| 163 | struct kvm_ldttss_desc { | 196 | struct kvm_ldttss_desc { |
| 164 | u16 limit0; | 197 | u16 limit0; |
| 165 | u16 base0; | 198 | u16 base0; |
| 166 | unsigned base1 : 8, type : 5, dpl : 2, p : 1; | 199 | unsigned base1:8, type:5, dpl:2, p:1; |
| 167 | unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; | 200 | unsigned limit1:4, zero0:3, g:1, base2:8; |
| 168 | u32 base3; | 201 | u32 base3; |
| 169 | u32 zero1; | 202 | u32 zero1; |
| 170 | } __attribute__((packed)); | 203 | } __attribute__((packed)); |
| @@ -194,6 +227,27 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; | |||
| 194 | #define MSRS_RANGE_SIZE 2048 | 227 | #define MSRS_RANGE_SIZE 2048 |
| 195 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) | 228 | #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) |
| 196 | 229 | ||
| 230 | static u32 svm_msrpm_offset(u32 msr) | ||
| 231 | { | ||
| 232 | u32 offset; | ||
| 233 | int i; | ||
| 234 | |||
| 235 | for (i = 0; i < NUM_MSR_MAPS; i++) { | ||
| 236 | if (msr < msrpm_ranges[i] || | ||
| 237 | msr >= msrpm_ranges[i] + MSRS_IN_RANGE) | ||
| 238 | continue; | ||
| 239 | |||
| 240 | offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ | ||
| 241 | offset += (i * MSRS_RANGE_SIZE); /* add range offset */ | ||
| 242 | |||
| 243 | /* Now we have the u8 offset - but need the u32 offset */ | ||
| 244 | return offset / 4; | ||
| 245 | } | ||
| 246 | |||
| 247 | /* MSR not in any range */ | ||
| 248 | return MSR_INVALID; | ||
| 249 | } | ||
| 250 | |||
| 197 | #define MAX_INST_SIZE 15 | 251 | #define MAX_INST_SIZE 15 |
| 198 | 252 | ||
| 199 | static inline u32 svm_has(u32 feat) | 253 | static inline u32 svm_has(u32 feat) |
| @@ -213,7 +267,7 @@ static inline void stgi(void) | |||
| 213 | 267 | ||
| 214 | static inline void invlpga(unsigned long addr, u32 asid) | 268 | static inline void invlpga(unsigned long addr, u32 asid) |
| 215 | { | 269 | { |
| 216 | asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); | 270 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); |
| 217 | } | 271 | } |
| 218 | 272 | ||
| 219 | static inline void force_new_asid(struct kvm_vcpu *vcpu) | 273 | static inline void force_new_asid(struct kvm_vcpu *vcpu) |
| @@ -235,23 +289,6 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
| 235 | vcpu->arch.efer = efer; | 289 | vcpu->arch.efer = efer; |
| 236 | } | 290 | } |
| 237 | 291 | ||
| 238 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
| 239 | bool has_error_code, u32 error_code) | ||
| 240 | { | ||
| 241 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 242 | |||
| 243 | /* If we are within a nested VM we'd better #VMEXIT and let the | ||
| 244 | guest handle the exception */ | ||
| 245 | if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
| 246 | return; | ||
| 247 | |||
| 248 | svm->vmcb->control.event_inj = nr | ||
| 249 | | SVM_EVTINJ_VALID | ||
| 250 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
| 251 | | SVM_EVTINJ_TYPE_EXEPT; | ||
| 252 | svm->vmcb->control.event_inj_err = error_code; | ||
| 253 | } | ||
| 254 | |||
| 255 | static int is_external_interrupt(u32 info) | 292 | static int is_external_interrupt(u32 info) |
| 256 | { | 293 | { |
| 257 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | 294 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; |
| @@ -264,7 +301,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
| 264 | u32 ret = 0; | 301 | u32 ret = 0; |
| 265 | 302 | ||
| 266 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) | 303 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) |
| 267 | ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; | 304 | ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; |
| 268 | return ret & mask; | 305 | return ret & mask; |
| 269 | } | 306 | } |
| 270 | 307 | ||
| @@ -283,6 +320,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 283 | { | 320 | { |
| 284 | struct vcpu_svm *svm = to_svm(vcpu); | 321 | struct vcpu_svm *svm = to_svm(vcpu); |
| 285 | 322 | ||
| 323 | if (svm->vmcb->control.next_rip != 0) | ||
| 324 | svm->next_rip = svm->vmcb->control.next_rip; | ||
| 325 | |||
| 286 | if (!svm->next_rip) { | 326 | if (!svm->next_rip) { |
| 287 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != | 327 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
| 288 | EMULATE_DONE) | 328 | EMULATE_DONE) |
| @@ -297,6 +337,43 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 297 | svm_set_interrupt_shadow(vcpu, 0); | 337 | svm_set_interrupt_shadow(vcpu, 0); |
| 298 | } | 338 | } |
| 299 | 339 | ||
| 340 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | ||
| 341 | bool has_error_code, u32 error_code, | ||
| 342 | bool reinject) | ||
| 343 | { | ||
| 344 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 345 | |||
| 346 | /* | ||
| 347 | * If we are within a nested VM we'd better #VMEXIT and let the guest | ||
| 348 | * handle the exception | ||
| 349 | */ | ||
| 350 | if (!reinject && | ||
| 351 | nested_svm_check_exception(svm, nr, has_error_code, error_code)) | ||
| 352 | return; | ||
| 353 | |||
| 354 | if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { | ||
| 355 | unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); | ||
| 356 | |||
| 357 | /* | ||
| 358 | * For guest debugging where we have to reinject #BP if some | ||
| 359 | * INT3 is guest-owned: | ||
| 360 | * Emulate nRIP by moving RIP forward. Will fail if injection | ||
| 361 | * raises a fault that is not intercepted. Still better than | ||
| 362 | * failing in all cases. | ||
| 363 | */ | ||
| 364 | skip_emulated_instruction(&svm->vcpu); | ||
| 365 | rip = kvm_rip_read(&svm->vcpu); | ||
| 366 | svm->int3_rip = rip + svm->vmcb->save.cs.base; | ||
| 367 | svm->int3_injected = rip - old_rip; | ||
| 368 | } | ||
| 369 | |||
| 370 | svm->vmcb->control.event_inj = nr | ||
| 371 | | SVM_EVTINJ_VALID | ||
| 372 | | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) | ||
| 373 | | SVM_EVTINJ_TYPE_EXEPT; | ||
| 374 | svm->vmcb->control.event_inj_err = error_code; | ||
| 375 | } | ||
| 376 | |||
| 300 | static int has_svm(void) | 377 | static int has_svm(void) |
| 301 | { | 378 | { |
| 302 | const char *msg; | 379 | const char *msg; |
| @@ -319,7 +396,7 @@ static int svm_hardware_enable(void *garbage) | |||
| 319 | 396 | ||
| 320 | struct svm_cpu_data *sd; | 397 | struct svm_cpu_data *sd; |
| 321 | uint64_t efer; | 398 | uint64_t efer; |
| 322 | struct descriptor_table gdt_descr; | 399 | struct desc_ptr gdt_descr; |
| 323 | struct desc_struct *gdt; | 400 | struct desc_struct *gdt; |
| 324 | int me = raw_smp_processor_id(); | 401 | int me = raw_smp_processor_id(); |
| 325 | 402 | ||
| @@ -344,8 +421,8 @@ static int svm_hardware_enable(void *garbage) | |||
| 344 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; | 421 | sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; |
| 345 | sd->next_asid = sd->max_asid + 1; | 422 | sd->next_asid = sd->max_asid + 1; |
| 346 | 423 | ||
| 347 | kvm_get_gdt(&gdt_descr); | 424 | native_store_gdt(&gdt_descr); |
| 348 | gdt = (struct desc_struct *)gdt_descr.base; | 425 | gdt = (struct desc_struct *)gdt_descr.address; |
| 349 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 426 | sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
| 350 | 427 | ||
| 351 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 428 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
| @@ -391,42 +468,98 @@ err_1: | |||
| 391 | 468 | ||
| 392 | } | 469 | } |
| 393 | 470 | ||
| 471 | static bool valid_msr_intercept(u32 index) | ||
| 472 | { | ||
| 473 | int i; | ||
| 474 | |||
| 475 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) | ||
| 476 | if (direct_access_msrs[i].index == index) | ||
| 477 | return true; | ||
| 478 | |||
| 479 | return false; | ||
| 480 | } | ||
| 481 | |||
| 394 | static void set_msr_interception(u32 *msrpm, unsigned msr, | 482 | static void set_msr_interception(u32 *msrpm, unsigned msr, |
| 395 | int read, int write) | 483 | int read, int write) |
| 396 | { | 484 | { |
| 485 | u8 bit_read, bit_write; | ||
| 486 | unsigned long tmp; | ||
| 487 | u32 offset; | ||
| 488 | |||
| 489 | /* | ||
| 490 | * If this warning triggers extend the direct_access_msrs list at the | ||
| 491 | * beginning of the file | ||
| 492 | */ | ||
| 493 | WARN_ON(!valid_msr_intercept(msr)); | ||
| 494 | |||
| 495 | offset = svm_msrpm_offset(msr); | ||
| 496 | bit_read = 2 * (msr & 0x0f); | ||
| 497 | bit_write = 2 * (msr & 0x0f) + 1; | ||
| 498 | tmp = msrpm[offset]; | ||
| 499 | |||
| 500 | BUG_ON(offset == MSR_INVALID); | ||
| 501 | |||
| 502 | read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); | ||
| 503 | write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); | ||
| 504 | |||
| 505 | msrpm[offset] = tmp; | ||
| 506 | } | ||
| 507 | |||
| 508 | static void svm_vcpu_init_msrpm(u32 *msrpm) | ||
| 509 | { | ||
| 397 | int i; | 510 | int i; |
| 398 | 511 | ||
| 399 | for (i = 0; i < NUM_MSR_MAPS; i++) { | 512 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); |
| 400 | if (msr >= msrpm_ranges[i] && | 513 | |
| 401 | msr < msrpm_ranges[i] + MSRS_IN_RANGE) { | 514 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
| 402 | u32 msr_offset = (i * MSRS_IN_RANGE + msr - | 515 | if (!direct_access_msrs[i].always) |
| 403 | msrpm_ranges[i]) * 2; | 516 | continue; |
| 404 | 517 | ||
| 405 | u32 *base = msrpm + (msr_offset / 32); | 518 | set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); |
| 406 | u32 msr_shift = msr_offset % 32; | 519 | } |
| 407 | u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); | 520 | } |
| 408 | *base = (*base & ~(0x3 << msr_shift)) | | 521 | |
| 409 | (mask << msr_shift); | 522 | static void add_msr_offset(u32 offset) |
| 523 | { | ||
| 524 | int i; | ||
| 525 | |||
| 526 | for (i = 0; i < MSRPM_OFFSETS; ++i) { | ||
| 527 | |||
| 528 | /* Offset already in list? */ | ||
| 529 | if (msrpm_offsets[i] == offset) | ||
| 410 | return; | 530 | return; |
| 411 | } | 531 | |
| 532 | /* Slot used by another offset? */ | ||
| 533 | if (msrpm_offsets[i] != MSR_INVALID) | ||
| 534 | continue; | ||
| 535 | |||
| 536 | /* Add offset to list */ | ||
| 537 | msrpm_offsets[i] = offset; | ||
| 538 | |||
| 539 | return; | ||
| 412 | } | 540 | } |
| 541 | |||
| 542 | /* | ||
| 543 | * If this BUG triggers the msrpm_offsets table has an overflow. Just | ||
| 544 | * increase MSRPM_OFFSETS in this case. | ||
| 545 | */ | ||
| 413 | BUG(); | 546 | BUG(); |
| 414 | } | 547 | } |
| 415 | 548 | ||
| 416 | static void svm_vcpu_init_msrpm(u32 *msrpm) | 549 | static void init_msrpm_offsets(void) |
| 417 | { | 550 | { |
| 418 | memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); | 551 | int i; |
| 419 | 552 | ||
| 420 | #ifdef CONFIG_X86_64 | 553 | memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); |
| 421 | set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); | 554 | |
| 422 | set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); | 555 | for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { |
| 423 | set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); | 556 | u32 offset; |
| 424 | set_msr_interception(msrpm, MSR_LSTAR, 1, 1); | 557 | |
| 425 | set_msr_interception(msrpm, MSR_CSTAR, 1, 1); | 558 | offset = svm_msrpm_offset(direct_access_msrs[i].index); |
| 426 | set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); | 559 | BUG_ON(offset == MSR_INVALID); |
| 427 | #endif | 560 | |
| 428 | set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); | 561 | add_msr_offset(offset); |
| 429 | set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); | 562 | } |
| 430 | } | 563 | } |
| 431 | 564 | ||
| 432 | static void svm_enable_lbrv(struct vcpu_svm *svm) | 565 | static void svm_enable_lbrv(struct vcpu_svm *svm) |
| @@ -467,6 +600,8 @@ static __init int svm_hardware_setup(void) | |||
| 467 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); | 600 | memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); |
| 468 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; | 601 | iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; |
| 469 | 602 | ||
| 603 | init_msrpm_offsets(); | ||
| 604 | |||
| 470 | if (boot_cpu_has(X86_FEATURE_NX)) | 605 | if (boot_cpu_has(X86_FEATURE_NX)) |
| 471 | kvm_enable_efer_bits(EFER_NX); | 606 | kvm_enable_efer_bits(EFER_NX); |
| 472 | 607 | ||
| @@ -523,7 +658,7 @@ static void init_seg(struct vmcb_seg *seg) | |||
| 523 | { | 658 | { |
| 524 | seg->selector = 0; | 659 | seg->selector = 0; |
| 525 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | | 660 | seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | |
| 526 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ | 661 | SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ |
| 527 | seg->limit = 0xffff; | 662 | seg->limit = 0xffff; |
| 528 | seg->base = 0; | 663 | seg->base = 0; |
| 529 | } | 664 | } |
| @@ -543,16 +678,16 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 543 | 678 | ||
| 544 | svm->vcpu.fpu_active = 1; | 679 | svm->vcpu.fpu_active = 1; |
| 545 | 680 | ||
| 546 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 681 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
| 547 | INTERCEPT_CR3_MASK | | 682 | INTERCEPT_CR3_MASK | |
| 548 | INTERCEPT_CR4_MASK; | 683 | INTERCEPT_CR4_MASK; |
| 549 | 684 | ||
| 550 | control->intercept_cr_write = INTERCEPT_CR0_MASK | | 685 | control->intercept_cr_write = INTERCEPT_CR0_MASK | |
| 551 | INTERCEPT_CR3_MASK | | 686 | INTERCEPT_CR3_MASK | |
| 552 | INTERCEPT_CR4_MASK | | 687 | INTERCEPT_CR4_MASK | |
| 553 | INTERCEPT_CR8_MASK; | 688 | INTERCEPT_CR8_MASK; |
| 554 | 689 | ||
| 555 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 690 | control->intercept_dr_read = INTERCEPT_DR0_MASK | |
| 556 | INTERCEPT_DR1_MASK | | 691 | INTERCEPT_DR1_MASK | |
| 557 | INTERCEPT_DR2_MASK | | 692 | INTERCEPT_DR2_MASK | |
| 558 | INTERCEPT_DR3_MASK | | 693 | INTERCEPT_DR3_MASK | |
| @@ -561,7 +696,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 561 | INTERCEPT_DR6_MASK | | 696 | INTERCEPT_DR6_MASK | |
| 562 | INTERCEPT_DR7_MASK; | 697 | INTERCEPT_DR7_MASK; |
| 563 | 698 | ||
| 564 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 699 | control->intercept_dr_write = INTERCEPT_DR0_MASK | |
| 565 | INTERCEPT_DR1_MASK | | 700 | INTERCEPT_DR1_MASK | |
| 566 | INTERCEPT_DR2_MASK | | 701 | INTERCEPT_DR2_MASK | |
| 567 | INTERCEPT_DR3_MASK | | 702 | INTERCEPT_DR3_MASK | |
| @@ -575,7 +710,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 575 | (1 << MC_VECTOR); | 710 | (1 << MC_VECTOR); |
| 576 | 711 | ||
| 577 | 712 | ||
| 578 | control->intercept = (1ULL << INTERCEPT_INTR) | | 713 | control->intercept = (1ULL << INTERCEPT_INTR) | |
| 579 | (1ULL << INTERCEPT_NMI) | | 714 | (1ULL << INTERCEPT_NMI) | |
| 580 | (1ULL << INTERCEPT_SMI) | | 715 | (1ULL << INTERCEPT_SMI) | |
| 581 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | 716 | (1ULL << INTERCEPT_SELECTIVE_CR0) | |
| @@ -636,7 +771,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 636 | save->rip = 0x0000fff0; | 771 | save->rip = 0x0000fff0; |
| 637 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 772 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
| 638 | 773 | ||
| 639 | /* This is the guest-visible cr0 value. | 774 | /* |
| 775 | * This is the guest-visible cr0 value. | ||
| 640 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. | 776 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
| 641 | */ | 777 | */ |
| 642 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 778 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
| @@ -729,6 +865,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 729 | svm_vcpu_init_msrpm(svm->msrpm); | 865 | svm_vcpu_init_msrpm(svm->msrpm); |
| 730 | 866 | ||
| 731 | svm->nested.msrpm = page_address(nested_msrpm_pages); | 867 | svm->nested.msrpm = page_address(nested_msrpm_pages); |
| 868 | svm_vcpu_init_msrpm(svm->nested.msrpm); | ||
| 732 | 869 | ||
| 733 | svm->vmcb = page_address(page); | 870 | svm->vmcb = page_address(page); |
| 734 | clear_page(svm->vmcb); | 871 | clear_page(svm->vmcb); |
| @@ -882,7 +1019,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
| 882 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; | 1019 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; |
| 883 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; | 1020 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; |
| 884 | 1021 | ||
| 885 | /* AMD's VMCB does not have an explicit unusable field, so emulate it | 1022 | /* |
| 1023 | * AMD's VMCB does not have an explicit unusable field, so emulate it | ||
| 886 | * for cross vendor migration purposes by "not present" | 1024 | * for cross vendor migration purposes by "not present" |
| 887 | */ | 1025 | */ |
| 888 | var->unusable = !var->present || (var->type == 0); | 1026 | var->unusable = !var->present || (var->type == 0); |
| @@ -918,7 +1056,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
| 918 | var->type |= 0x1; | 1056 | var->type |= 0x1; |
| 919 | break; | 1057 | break; |
| 920 | case VCPU_SREG_SS: | 1058 | case VCPU_SREG_SS: |
| 921 | /* On AMD CPUs sometimes the DB bit in the segment | 1059 | /* |
| 1060 | * On AMD CPUs sometimes the DB bit in the segment | ||
| 922 | * descriptor is left as 1, although the whole segment has | 1061 | * descriptor is left as 1, although the whole segment has |
| 923 | * been made unusable. Clear it here to pass an Intel VMX | 1062 | * been made unusable. Clear it here to pass an Intel VMX |
| 924 | * entry check when cross vendor migrating. | 1063 | * entry check when cross vendor migrating. |
| @@ -936,36 +1075,36 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu) | |||
| 936 | return save->cpl; | 1075 | return save->cpl; |
| 937 | } | 1076 | } |
| 938 | 1077 | ||
| 939 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1078 | static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 940 | { | 1079 | { |
| 941 | struct vcpu_svm *svm = to_svm(vcpu); | 1080 | struct vcpu_svm *svm = to_svm(vcpu); |
| 942 | 1081 | ||
| 943 | dt->limit = svm->vmcb->save.idtr.limit; | 1082 | dt->size = svm->vmcb->save.idtr.limit; |
| 944 | dt->base = svm->vmcb->save.idtr.base; | 1083 | dt->address = svm->vmcb->save.idtr.base; |
| 945 | } | 1084 | } |
| 946 | 1085 | ||
| 947 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1086 | static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 948 | { | 1087 | { |
| 949 | struct vcpu_svm *svm = to_svm(vcpu); | 1088 | struct vcpu_svm *svm = to_svm(vcpu); |
| 950 | 1089 | ||
| 951 | svm->vmcb->save.idtr.limit = dt->limit; | 1090 | svm->vmcb->save.idtr.limit = dt->size; |
| 952 | svm->vmcb->save.idtr.base = dt->base ; | 1091 | svm->vmcb->save.idtr.base = dt->address ; |
| 953 | } | 1092 | } |
| 954 | 1093 | ||
| 955 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1094 | static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 956 | { | 1095 | { |
| 957 | struct vcpu_svm *svm = to_svm(vcpu); | 1096 | struct vcpu_svm *svm = to_svm(vcpu); |
| 958 | 1097 | ||
| 959 | dt->limit = svm->vmcb->save.gdtr.limit; | 1098 | dt->size = svm->vmcb->save.gdtr.limit; |
| 960 | dt->base = svm->vmcb->save.gdtr.base; | 1099 | dt->address = svm->vmcb->save.gdtr.base; |
| 961 | } | 1100 | } |
| 962 | 1101 | ||
| 963 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 1102 | static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 964 | { | 1103 | { |
| 965 | struct vcpu_svm *svm = to_svm(vcpu); | 1104 | struct vcpu_svm *svm = to_svm(vcpu); |
| 966 | 1105 | ||
| 967 | svm->vmcb->save.gdtr.limit = dt->limit; | 1106 | svm->vmcb->save.gdtr.limit = dt->size; |
| 968 | svm->vmcb->save.gdtr.base = dt->base ; | 1107 | svm->vmcb->save.gdtr.base = dt->address ; |
| 969 | } | 1108 | } |
| 970 | 1109 | ||
| 971 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | 1110 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) |
| @@ -978,6 +1117,7 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | |||
| 978 | 1117 | ||
| 979 | static void update_cr0_intercept(struct vcpu_svm *svm) | 1118 | static void update_cr0_intercept(struct vcpu_svm *svm) |
| 980 | { | 1119 | { |
| 1120 | struct vmcb *vmcb = svm->vmcb; | ||
| 981 | ulong gcr0 = svm->vcpu.arch.cr0; | 1121 | ulong gcr0 = svm->vcpu.arch.cr0; |
| 982 | u64 *hcr0 = &svm->vmcb->save.cr0; | 1122 | u64 *hcr0 = &svm->vmcb->save.cr0; |
| 983 | 1123 | ||
| @@ -989,11 +1129,25 @@ static void update_cr0_intercept(struct vcpu_svm *svm) | |||
| 989 | 1129 | ||
| 990 | 1130 | ||
| 991 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | 1131 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { |
| 992 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | 1132 | vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; |
| 993 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | 1133 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; |
| 1134 | if (is_nested(svm)) { | ||
| 1135 | struct vmcb *hsave = svm->nested.hsave; | ||
| 1136 | |||
| 1137 | hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
| 1138 | hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
| 1139 | vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; | ||
| 1140 | vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; | ||
| 1141 | } | ||
| 994 | } else { | 1142 | } else { |
| 995 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | 1143 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; |
| 996 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | 1144 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; |
| 1145 | if (is_nested(svm)) { | ||
| 1146 | struct vmcb *hsave = svm->nested.hsave; | ||
| 1147 | |||
| 1148 | hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
| 1149 | hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
| 1150 | } | ||
| 997 | } | 1151 | } |
| 998 | } | 1152 | } |
| 999 | 1153 | ||
| @@ -1001,6 +1155,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 1001 | { | 1155 | { |
| 1002 | struct vcpu_svm *svm = to_svm(vcpu); | 1156 | struct vcpu_svm *svm = to_svm(vcpu); |
| 1003 | 1157 | ||
| 1158 | if (is_nested(svm)) { | ||
| 1159 | /* | ||
| 1160 | * We are here because we run in nested mode, the host kvm | ||
| 1161 | * intercepts cr0 writes but the l1 hypervisor does not. | ||
| 1162 | * But the L1 hypervisor may intercept selective cr0 writes. | ||
| 1163 | * This needs to be checked here. | ||
| 1164 | */ | ||
| 1165 | unsigned long old, new; | ||
| 1166 | |||
| 1167 | /* Remove bits that would trigger a real cr0 write intercept */ | ||
| 1168 | old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; | ||
| 1169 | new = cr0 & SVM_CR0_SELECTIVE_MASK; | ||
| 1170 | |||
| 1171 | if (old == new) { | ||
| 1172 | /* cr0 write with ts and mp unchanged */ | ||
| 1173 | svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; | ||
| 1174 | if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) | ||
| 1175 | return; | ||
| 1176 | } | ||
| 1177 | } | ||
| 1178 | |||
| 1004 | #ifdef CONFIG_X86_64 | 1179 | #ifdef CONFIG_X86_64 |
| 1005 | if (vcpu->arch.efer & EFER_LME) { | 1180 | if (vcpu->arch.efer & EFER_LME) { |
| 1006 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1181 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
| @@ -1134,70 +1309,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
| 1134 | svm->vmcb->control.asid = sd->next_asid++; | 1309 | svm->vmcb->control.asid = sd->next_asid++; |
| 1135 | } | 1310 | } |
| 1136 | 1311 | ||
| 1137 | static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) | 1312 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
| 1138 | { | 1313 | { |
| 1139 | struct vcpu_svm *svm = to_svm(vcpu); | 1314 | struct vcpu_svm *svm = to_svm(vcpu); |
| 1140 | 1315 | ||
| 1141 | switch (dr) { | 1316 | svm->vmcb->save.dr7 = value; |
| 1142 | case 0 ... 3: | ||
| 1143 | *dest = vcpu->arch.db[dr]; | ||
| 1144 | break; | ||
| 1145 | case 4: | ||
| 1146 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
| 1147 | return EMULATE_FAIL; /* will re-inject UD */ | ||
| 1148 | /* fall through */ | ||
| 1149 | case 6: | ||
| 1150 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
| 1151 | *dest = vcpu->arch.dr6; | ||
| 1152 | else | ||
| 1153 | *dest = svm->vmcb->save.dr6; | ||
| 1154 | break; | ||
| 1155 | case 5: | ||
| 1156 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
| 1157 | return EMULATE_FAIL; /* will re-inject UD */ | ||
| 1158 | /* fall through */ | ||
| 1159 | case 7: | ||
| 1160 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | ||
| 1161 | *dest = vcpu->arch.dr7; | ||
| 1162 | else | ||
| 1163 | *dest = svm->vmcb->save.dr7; | ||
| 1164 | break; | ||
| 1165 | } | ||
| 1166 | |||
| 1167 | return EMULATE_DONE; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) | ||
| 1171 | { | ||
| 1172 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 1173 | |||
| 1174 | switch (dr) { | ||
| 1175 | case 0 ... 3: | ||
| 1176 | vcpu->arch.db[dr] = value; | ||
| 1177 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
| 1178 | vcpu->arch.eff_db[dr] = value; | ||
| 1179 | break; | ||
| 1180 | case 4: | ||
| 1181 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
| 1182 | return EMULATE_FAIL; /* will re-inject UD */ | ||
| 1183 | /* fall through */ | ||
| 1184 | case 6: | ||
| 1185 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; | ||
| 1186 | break; | ||
| 1187 | case 5: | ||
| 1188 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
| 1189 | return EMULATE_FAIL; /* will re-inject UD */ | ||
| 1190 | /* fall through */ | ||
| 1191 | case 7: | ||
| 1192 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; | ||
| 1193 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
| 1194 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | ||
| 1195 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); | ||
| 1196 | } | ||
| 1197 | break; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | return EMULATE_DONE; | ||
| 1201 | } | 1317 | } |
| 1202 | 1318 | ||
| 1203 | static int pf_interception(struct vcpu_svm *svm) | 1319 | static int pf_interception(struct vcpu_svm *svm) |
| @@ -1234,7 +1350,7 @@ static int db_interception(struct vcpu_svm *svm) | |||
| 1234 | } | 1350 | } |
| 1235 | 1351 | ||
| 1236 | if (svm->vcpu.guest_debug & | 1352 | if (svm->vcpu.guest_debug & |
| 1237 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ | 1353 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { |
| 1238 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1354 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
| 1239 | kvm_run->debug.arch.pc = | 1355 | kvm_run->debug.arch.pc = |
| 1240 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1356 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
| @@ -1268,7 +1384,22 @@ static int ud_interception(struct vcpu_svm *svm) | |||
| 1268 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | 1384 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
| 1269 | { | 1385 | { |
| 1270 | struct vcpu_svm *svm = to_svm(vcpu); | 1386 | struct vcpu_svm *svm = to_svm(vcpu); |
| 1271 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1387 | u32 excp; |
| 1388 | |||
| 1389 | if (is_nested(svm)) { | ||
| 1390 | u32 h_excp, n_excp; | ||
| 1391 | |||
| 1392 | h_excp = svm->nested.hsave->control.intercept_exceptions; | ||
| 1393 | n_excp = svm->nested.intercept_exceptions; | ||
| 1394 | h_excp &= ~(1 << NM_VECTOR); | ||
| 1395 | excp = h_excp | n_excp; | ||
| 1396 | } else { | ||
| 1397 | excp = svm->vmcb->control.intercept_exceptions; | ||
| 1398 | excp &= ~(1 << NM_VECTOR); | ||
| 1399 | } | ||
| 1400 | |||
| 1401 | svm->vmcb->control.intercept_exceptions = excp; | ||
| 1402 | |||
| 1272 | svm->vcpu.fpu_active = 1; | 1403 | svm->vcpu.fpu_active = 1; |
| 1273 | update_cr0_intercept(svm); | 1404 | update_cr0_intercept(svm); |
| 1274 | } | 1405 | } |
| @@ -1309,29 +1440,23 @@ static int shutdown_interception(struct vcpu_svm *svm) | |||
| 1309 | 1440 | ||
| 1310 | static int io_interception(struct vcpu_svm *svm) | 1441 | static int io_interception(struct vcpu_svm *svm) |
| 1311 | { | 1442 | { |
| 1443 | struct kvm_vcpu *vcpu = &svm->vcpu; | ||
| 1312 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1444 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
| 1313 | int size, in, string; | 1445 | int size, in, string; |
| 1314 | unsigned port; | 1446 | unsigned port; |
| 1315 | 1447 | ||
| 1316 | ++svm->vcpu.stat.io_exits; | 1448 | ++svm->vcpu.stat.io_exits; |
| 1317 | |||
| 1318 | svm->next_rip = svm->vmcb->control.exit_info_2; | ||
| 1319 | |||
| 1320 | string = (io_info & SVM_IOIO_STR_MASK) != 0; | 1449 | string = (io_info & SVM_IOIO_STR_MASK) != 0; |
| 1321 | |||
| 1322 | if (string) { | ||
| 1323 | if (emulate_instruction(&svm->vcpu, | ||
| 1324 | 0, 0, 0) == EMULATE_DO_MMIO) | ||
| 1325 | return 0; | ||
| 1326 | return 1; | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; | 1450 | in = (io_info & SVM_IOIO_TYPE_MASK) != 0; |
| 1451 | if (string || in) | ||
| 1452 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); | ||
| 1453 | |||
| 1330 | port = io_info >> 16; | 1454 | port = io_info >> 16; |
| 1331 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1455 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
| 1332 | 1456 | svm->next_rip = svm->vmcb->control.exit_info_2; | |
| 1333 | skip_emulated_instruction(&svm->vcpu); | 1457 | skip_emulated_instruction(&svm->vcpu); |
| 1334 | return kvm_emulate_pio(&svm->vcpu, in, size, port); | 1458 | |
| 1459 | return kvm_fast_pio_out(vcpu, size, port); | ||
| 1335 | } | 1460 | } |
| 1336 | 1461 | ||
| 1337 | static int nmi_interception(struct vcpu_svm *svm) | 1462 | static int nmi_interception(struct vcpu_svm *svm) |
| @@ -1384,6 +1509,8 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) | |||
| 1384 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | 1509 | static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, |
| 1385 | bool has_error_code, u32 error_code) | 1510 | bool has_error_code, u32 error_code) |
| 1386 | { | 1511 | { |
| 1512 | int vmexit; | ||
| 1513 | |||
| 1387 | if (!is_nested(svm)) | 1514 | if (!is_nested(svm)) |
| 1388 | return 0; | 1515 | return 0; |
| 1389 | 1516 | ||
| @@ -1392,21 +1519,28 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, | |||
| 1392 | svm->vmcb->control.exit_info_1 = error_code; | 1519 | svm->vmcb->control.exit_info_1 = error_code; |
| 1393 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; | 1520 | svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; |
| 1394 | 1521 | ||
| 1395 | return nested_svm_exit_handled(svm); | 1522 | vmexit = nested_svm_intercept(svm); |
| 1523 | if (vmexit == NESTED_EXIT_DONE) | ||
| 1524 | svm->nested.exit_required = true; | ||
| 1525 | |||
| 1526 | return vmexit; | ||
| 1396 | } | 1527 | } |
| 1397 | 1528 | ||
| 1398 | static inline int nested_svm_intr(struct vcpu_svm *svm) | 1529 | /* This function returns true if it is save to enable the irq window */ |
| 1530 | static inline bool nested_svm_intr(struct vcpu_svm *svm) | ||
| 1399 | { | 1531 | { |
| 1400 | if (!is_nested(svm)) | 1532 | if (!is_nested(svm)) |
| 1401 | return 0; | 1533 | return true; |
| 1402 | 1534 | ||
| 1403 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) | 1535 | if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) |
| 1404 | return 0; | 1536 | return true; |
| 1405 | 1537 | ||
| 1406 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) | 1538 | if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) |
| 1407 | return 0; | 1539 | return false; |
| 1408 | 1540 | ||
| 1409 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1541 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
| 1542 | svm->vmcb->control.exit_info_1 = 0; | ||
| 1543 | svm->vmcb->control.exit_info_2 = 0; | ||
| 1410 | 1544 | ||
| 1411 | if (svm->nested.intercept & 1ULL) { | 1545 | if (svm->nested.intercept & 1ULL) { |
| 1412 | /* | 1546 | /* |
| @@ -1417,21 +1551,40 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
| 1417 | */ | 1551 | */ |
| 1418 | svm->nested.exit_required = true; | 1552 | svm->nested.exit_required = true; |
| 1419 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | 1553 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); |
| 1420 | return 1; | 1554 | return false; |
| 1421 | } | 1555 | } |
| 1422 | 1556 | ||
| 1423 | return 0; | 1557 | return true; |
| 1558 | } | ||
| 1559 | |||
| 1560 | /* This function returns true if it is save to enable the nmi window */ | ||
| 1561 | static inline bool nested_svm_nmi(struct vcpu_svm *svm) | ||
| 1562 | { | ||
| 1563 | if (!is_nested(svm)) | ||
| 1564 | return true; | ||
| 1565 | |||
| 1566 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) | ||
| 1567 | return true; | ||
| 1568 | |||
| 1569 | svm->vmcb->control.exit_code = SVM_EXIT_NMI; | ||
| 1570 | svm->nested.exit_required = true; | ||
| 1571 | |||
| 1572 | return false; | ||
| 1424 | } | 1573 | } |
| 1425 | 1574 | ||
| 1426 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | 1575 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) |
| 1427 | { | 1576 | { |
| 1428 | struct page *page; | 1577 | struct page *page; |
| 1429 | 1578 | ||
| 1579 | might_sleep(); | ||
| 1580 | |||
| 1430 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1581 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
| 1431 | if (is_error_page(page)) | 1582 | if (is_error_page(page)) |
| 1432 | goto error; | 1583 | goto error; |
| 1433 | 1584 | ||
| 1434 | return kmap_atomic(page, idx); | 1585 | *_page = page; |
| 1586 | |||
| 1587 | return kmap(page); | ||
| 1435 | 1588 | ||
| 1436 | error: | 1589 | error: |
| 1437 | kvm_release_page_clean(page); | 1590 | kvm_release_page_clean(page); |
| @@ -1440,61 +1593,55 @@ error: | |||
| 1440 | return NULL; | 1593 | return NULL; |
| 1441 | } | 1594 | } |
| 1442 | 1595 | ||
| 1443 | static void nested_svm_unmap(void *addr, enum km_type idx) | 1596 | static void nested_svm_unmap(struct page *page) |
| 1444 | { | 1597 | { |
| 1445 | struct page *page; | 1598 | kunmap(page); |
| 1599 | kvm_release_page_dirty(page); | ||
| 1600 | } | ||
| 1446 | 1601 | ||
| 1447 | if (!addr) | 1602 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) |
| 1448 | return; | 1603 | { |
| 1604 | unsigned port; | ||
| 1605 | u8 val, bit; | ||
| 1606 | u64 gpa; | ||
| 1449 | 1607 | ||
| 1450 | page = kmap_atomic_to_page(addr); | 1608 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) |
| 1609 | return NESTED_EXIT_HOST; | ||
| 1451 | 1610 | ||
| 1452 | kunmap_atomic(addr, idx); | 1611 | port = svm->vmcb->control.exit_info_1 >> 16; |
| 1453 | kvm_release_page_dirty(page); | 1612 | gpa = svm->nested.vmcb_iopm + (port / 8); |
| 1613 | bit = port % 8; | ||
| 1614 | val = 0; | ||
| 1615 | |||
| 1616 | if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) | ||
| 1617 | val &= (1 << bit); | ||
| 1618 | |||
| 1619 | return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | ||
| 1454 | } | 1620 | } |
| 1455 | 1621 | ||
| 1456 | static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) | 1622 | static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) |
| 1457 | { | 1623 | { |
| 1458 | u32 param = svm->vmcb->control.exit_info_1 & 1; | 1624 | u32 offset, msr, value; |
| 1459 | u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 1625 | int write, mask; |
| 1460 | bool ret = false; | ||
| 1461 | u32 t0, t1; | ||
| 1462 | u8 *msrpm; | ||
| 1463 | 1626 | ||
| 1464 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) | 1627 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
| 1465 | return false; | 1628 | return NESTED_EXIT_HOST; |
| 1466 | 1629 | ||
| 1467 | msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1630 | msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
| 1631 | offset = svm_msrpm_offset(msr); | ||
| 1632 | write = svm->vmcb->control.exit_info_1 & 1; | ||
| 1633 | mask = 1 << ((2 * (msr & 0xf)) + write); | ||
| 1468 | 1634 | ||
| 1469 | if (!msrpm) | 1635 | if (offset == MSR_INVALID) |
| 1470 | goto out; | 1636 | return NESTED_EXIT_DONE; |
| 1471 | 1637 | ||
| 1472 | switch (msr) { | 1638 | /* Offset is in 32 bit units but need in 8 bit units */ |
| 1473 | case 0 ... 0x1fff: | 1639 | offset *= 4; |
| 1474 | t0 = (msr * 2) % 8; | ||
| 1475 | t1 = msr / 8; | ||
| 1476 | break; | ||
| 1477 | case 0xc0000000 ... 0xc0001fff: | ||
| 1478 | t0 = (8192 + msr - 0xc0000000) * 2; | ||
| 1479 | t1 = (t0 / 8); | ||
| 1480 | t0 %= 8; | ||
| 1481 | break; | ||
| 1482 | case 0xc0010000 ... 0xc0011fff: | ||
| 1483 | t0 = (16384 + msr - 0xc0010000) * 2; | ||
| 1484 | t1 = (t0 / 8); | ||
| 1485 | t0 %= 8; | ||
| 1486 | break; | ||
| 1487 | default: | ||
| 1488 | ret = true; | ||
| 1489 | goto out; | ||
| 1490 | } | ||
| 1491 | 1640 | ||
| 1492 | ret = msrpm[t1] & ((1 << param) << t0); | 1641 | if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) |
| 1493 | 1642 | return NESTED_EXIT_DONE; | |
| 1494 | out: | ||
| 1495 | nested_svm_unmap(msrpm, KM_USER0); | ||
| 1496 | 1643 | ||
| 1497 | return ret; | 1644 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; |
| 1498 | } | 1645 | } |
| 1499 | 1646 | ||
| 1500 | static int nested_svm_exit_special(struct vcpu_svm *svm) | 1647 | static int nested_svm_exit_special(struct vcpu_svm *svm) |
| @@ -1504,17 +1651,21 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
| 1504 | switch (exit_code) { | 1651 | switch (exit_code) { |
| 1505 | case SVM_EXIT_INTR: | 1652 | case SVM_EXIT_INTR: |
| 1506 | case SVM_EXIT_NMI: | 1653 | case SVM_EXIT_NMI: |
| 1654 | case SVM_EXIT_EXCP_BASE + MC_VECTOR: | ||
| 1507 | return NESTED_EXIT_HOST; | 1655 | return NESTED_EXIT_HOST; |
| 1508 | /* For now we are always handling NPFs when using them */ | ||
| 1509 | case SVM_EXIT_NPF: | 1656 | case SVM_EXIT_NPF: |
| 1657 | /* For now we are always handling NPFs when using them */ | ||
| 1510 | if (npt_enabled) | 1658 | if (npt_enabled) |
| 1511 | return NESTED_EXIT_HOST; | 1659 | return NESTED_EXIT_HOST; |
| 1512 | break; | 1660 | break; |
| 1513 | /* When we're shadowing, trap PFs */ | ||
| 1514 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: | 1661 | case SVM_EXIT_EXCP_BASE + PF_VECTOR: |
| 1662 | /* When we're shadowing, trap PFs */ | ||
| 1515 | if (!npt_enabled) | 1663 | if (!npt_enabled) |
| 1516 | return NESTED_EXIT_HOST; | 1664 | return NESTED_EXIT_HOST; |
| 1517 | break; | 1665 | break; |
| 1666 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: | ||
| 1667 | nm_interception(svm); | ||
| 1668 | break; | ||
| 1518 | default: | 1669 | default: |
| 1519 | break; | 1670 | break; |
| 1520 | } | 1671 | } |
| @@ -1525,7 +1676,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
| 1525 | /* | 1676 | /* |
| 1526 | * If this function returns true, this #vmexit was already handled | 1677 | * If this function returns true, this #vmexit was already handled |
| 1527 | */ | 1678 | */ |
| 1528 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | 1679 | static int nested_svm_intercept(struct vcpu_svm *svm) |
| 1529 | { | 1680 | { |
| 1530 | u32 exit_code = svm->vmcb->control.exit_code; | 1681 | u32 exit_code = svm->vmcb->control.exit_code; |
| 1531 | int vmexit = NESTED_EXIT_HOST; | 1682 | int vmexit = NESTED_EXIT_HOST; |
| @@ -1534,6 +1685,9 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
| 1534 | case SVM_EXIT_MSR: | 1685 | case SVM_EXIT_MSR: |
| 1535 | vmexit = nested_svm_exit_handled_msr(svm); | 1686 | vmexit = nested_svm_exit_handled_msr(svm); |
| 1536 | break; | 1687 | break; |
| 1688 | case SVM_EXIT_IOIO: | ||
| 1689 | vmexit = nested_svm_intercept_ioio(svm); | ||
| 1690 | break; | ||
| 1537 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { | 1691 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { |
| 1538 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); | 1692 | u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); |
| 1539 | if (svm->nested.intercept_cr_read & cr_bits) | 1693 | if (svm->nested.intercept_cr_read & cr_bits) |
| @@ -1564,6 +1718,10 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
| 1564 | vmexit = NESTED_EXIT_DONE; | 1718 | vmexit = NESTED_EXIT_DONE; |
| 1565 | break; | 1719 | break; |
| 1566 | } | 1720 | } |
| 1721 | case SVM_EXIT_ERR: { | ||
| 1722 | vmexit = NESTED_EXIT_DONE; | ||
| 1723 | break; | ||
| 1724 | } | ||
| 1567 | default: { | 1725 | default: { |
| 1568 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | 1726 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); |
| 1569 | if (svm->nested.intercept & exit_bits) | 1727 | if (svm->nested.intercept & exit_bits) |
| @@ -1571,9 +1729,17 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
| 1571 | } | 1729 | } |
| 1572 | } | 1730 | } |
| 1573 | 1731 | ||
| 1574 | if (vmexit == NESTED_EXIT_DONE) { | 1732 | return vmexit; |
| 1733 | } | ||
| 1734 | |||
| 1735 | static int nested_svm_exit_handled(struct vcpu_svm *svm) | ||
| 1736 | { | ||
| 1737 | int vmexit; | ||
| 1738 | |||
| 1739 | vmexit = nested_svm_intercept(svm); | ||
| 1740 | |||
| 1741 | if (vmexit == NESTED_EXIT_DONE) | ||
| 1575 | nested_svm_vmexit(svm); | 1742 | nested_svm_vmexit(svm); |
| 1576 | } | ||
| 1577 | 1743 | ||
| 1578 | return vmexit; | 1744 | return vmexit; |
| 1579 | } | 1745 | } |
| @@ -1615,6 +1781,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1615 | struct vmcb *nested_vmcb; | 1781 | struct vmcb *nested_vmcb; |
| 1616 | struct vmcb *hsave = svm->nested.hsave; | 1782 | struct vmcb *hsave = svm->nested.hsave; |
| 1617 | struct vmcb *vmcb = svm->vmcb; | 1783 | struct vmcb *vmcb = svm->vmcb; |
| 1784 | struct page *page; | ||
| 1618 | 1785 | ||
| 1619 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | 1786 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, |
| 1620 | vmcb->control.exit_info_1, | 1787 | vmcb->control.exit_info_1, |
| @@ -1622,10 +1789,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1622 | vmcb->control.exit_int_info, | 1789 | vmcb->control.exit_int_info, |
| 1623 | vmcb->control.exit_int_info_err); | 1790 | vmcb->control.exit_int_info_err); |
| 1624 | 1791 | ||
| 1625 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1792 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); |
| 1626 | if (!nested_vmcb) | 1793 | if (!nested_vmcb) |
| 1627 | return 1; | 1794 | return 1; |
| 1628 | 1795 | ||
| 1796 | /* Exit nested SVM mode */ | ||
| 1797 | svm->nested.vmcb = 0; | ||
| 1798 | |||
| 1629 | /* Give the current vmcb to the guest */ | 1799 | /* Give the current vmcb to the guest */ |
| 1630 | disable_gif(svm); | 1800 | disable_gif(svm); |
| 1631 | 1801 | ||
| @@ -1635,9 +1805,10 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1635 | nested_vmcb->save.ds = vmcb->save.ds; | 1805 | nested_vmcb->save.ds = vmcb->save.ds; |
| 1636 | nested_vmcb->save.gdtr = vmcb->save.gdtr; | 1806 | nested_vmcb->save.gdtr = vmcb->save.gdtr; |
| 1637 | nested_vmcb->save.idtr = vmcb->save.idtr; | 1807 | nested_vmcb->save.idtr = vmcb->save.idtr; |
| 1638 | if (npt_enabled) | 1808 | nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); |
| 1639 | nested_vmcb->save.cr3 = vmcb->save.cr3; | 1809 | nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; |
| 1640 | nested_vmcb->save.cr2 = vmcb->save.cr2; | 1810 | nested_vmcb->save.cr2 = vmcb->save.cr2; |
| 1811 | nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; | ||
| 1641 | nested_vmcb->save.rflags = vmcb->save.rflags; | 1812 | nested_vmcb->save.rflags = vmcb->save.rflags; |
| 1642 | nested_vmcb->save.rip = vmcb->save.rip; | 1813 | nested_vmcb->save.rip = vmcb->save.rip; |
| 1643 | nested_vmcb->save.rsp = vmcb->save.rsp; | 1814 | nested_vmcb->save.rsp = vmcb->save.rsp; |
| @@ -1709,10 +1880,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1709 | svm->vmcb->save.cpl = 0; | 1880 | svm->vmcb->save.cpl = 0; |
| 1710 | svm->vmcb->control.exit_int_info = 0; | 1881 | svm->vmcb->control.exit_int_info = 0; |
| 1711 | 1882 | ||
| 1712 | /* Exit nested SVM mode */ | 1883 | nested_svm_unmap(page); |
| 1713 | svm->nested.vmcb = 0; | ||
| 1714 | |||
| 1715 | nested_svm_unmap(nested_vmcb, KM_USER0); | ||
| 1716 | 1884 | ||
| 1717 | kvm_mmu_reset_context(&svm->vcpu); | 1885 | kvm_mmu_reset_context(&svm->vcpu); |
| 1718 | kvm_mmu_load(&svm->vcpu); | 1886 | kvm_mmu_load(&svm->vcpu); |
| @@ -1722,19 +1890,33 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1722 | 1890 | ||
| 1723 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | 1891 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) |
| 1724 | { | 1892 | { |
| 1725 | u32 *nested_msrpm; | 1893 | /* |
| 1894 | * This function merges the msr permission bitmaps of kvm and the | ||
| 1895 | * nested vmcb. It is omptimized in that it only merges the parts where | ||
| 1896 | * the kvm msr permission bitmap may contain zero bits | ||
| 1897 | */ | ||
| 1726 | int i; | 1898 | int i; |
| 1727 | 1899 | ||
| 1728 | nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); | 1900 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) |
| 1729 | if (!nested_msrpm) | 1901 | return true; |
| 1730 | return false; | ||
| 1731 | 1902 | ||
| 1732 | for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) | 1903 | for (i = 0; i < MSRPM_OFFSETS; i++) { |
| 1733 | svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; | 1904 | u32 value, p; |
| 1905 | u64 offset; | ||
| 1734 | 1906 | ||
| 1735 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); | 1907 | if (msrpm_offsets[i] == 0xffffffff) |
| 1908 | break; | ||
| 1909 | |||
| 1910 | p = msrpm_offsets[i]; | ||
| 1911 | offset = svm->nested.vmcb_msrpm + (p * 4); | ||
| 1912 | |||
| 1913 | if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) | ||
| 1914 | return false; | ||
| 1915 | |||
| 1916 | svm->nested.msrpm[p] = svm->msrpm[p] | value; | ||
| 1917 | } | ||
| 1736 | 1918 | ||
| 1737 | nested_svm_unmap(nested_msrpm, KM_USER0); | 1919 | svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); |
| 1738 | 1920 | ||
| 1739 | return true; | 1921 | return true; |
| 1740 | } | 1922 | } |
| @@ -1744,26 +1926,34 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 1744 | struct vmcb *nested_vmcb; | 1926 | struct vmcb *nested_vmcb; |
| 1745 | struct vmcb *hsave = svm->nested.hsave; | 1927 | struct vmcb *hsave = svm->nested.hsave; |
| 1746 | struct vmcb *vmcb = svm->vmcb; | 1928 | struct vmcb *vmcb = svm->vmcb; |
| 1929 | struct page *page; | ||
| 1930 | u64 vmcb_gpa; | ||
| 1747 | 1931 | ||
| 1748 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 1932 | vmcb_gpa = svm->vmcb->save.rax; |
| 1933 | |||
| 1934 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | ||
| 1749 | if (!nested_vmcb) | 1935 | if (!nested_vmcb) |
| 1750 | return false; | 1936 | return false; |
| 1751 | 1937 | ||
| 1752 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1938 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa, |
| 1753 | svm->nested.vmcb = svm->vmcb->save.rax; | ||
| 1754 | |||
| 1755 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
| 1756 | nested_vmcb->save.rip, | 1939 | nested_vmcb->save.rip, |
| 1757 | nested_vmcb->control.int_ctl, | 1940 | nested_vmcb->control.int_ctl, |
| 1758 | nested_vmcb->control.event_inj, | 1941 | nested_vmcb->control.event_inj, |
| 1759 | nested_vmcb->control.nested_ctl); | 1942 | nested_vmcb->control.nested_ctl); |
| 1760 | 1943 | ||
| 1944 | trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, | ||
| 1945 | nested_vmcb->control.intercept_cr_write, | ||
| 1946 | nested_vmcb->control.intercept_exceptions, | ||
| 1947 | nested_vmcb->control.intercept); | ||
| 1948 | |||
| 1761 | /* Clear internal status */ | 1949 | /* Clear internal status */ |
| 1762 | kvm_clear_exception_queue(&svm->vcpu); | 1950 | kvm_clear_exception_queue(&svm->vcpu); |
| 1763 | kvm_clear_interrupt_queue(&svm->vcpu); | 1951 | kvm_clear_interrupt_queue(&svm->vcpu); |
| 1764 | 1952 | ||
| 1765 | /* Save the old vmcb, so we don't need to pick what we save, but | 1953 | /* |
| 1766 | can restore everything when a VMEXIT occurs */ | 1954 | * Save the old vmcb, so we don't need to pick what we save, but can |
| 1955 | * restore everything when a VMEXIT occurs | ||
| 1956 | */ | ||
| 1767 | hsave->save.es = vmcb->save.es; | 1957 | hsave->save.es = vmcb->save.es; |
| 1768 | hsave->save.cs = vmcb->save.cs; | 1958 | hsave->save.cs = vmcb->save.cs; |
| 1769 | hsave->save.ss = vmcb->save.ss; | 1959 | hsave->save.ss = vmcb->save.ss; |
| @@ -1803,14 +1993,17 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 1803 | if (npt_enabled) { | 1993 | if (npt_enabled) { |
| 1804 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; | 1994 | svm->vmcb->save.cr3 = nested_vmcb->save.cr3; |
| 1805 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; | 1995 | svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; |
| 1806 | } else { | 1996 | } else |
| 1807 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); | 1997 | kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); |
| 1808 | kvm_mmu_reset_context(&svm->vcpu); | 1998 | |
| 1809 | } | 1999 | /* Guest paging mode is active - reset mmu */ |
| 2000 | kvm_mmu_reset_context(&svm->vcpu); | ||
| 2001 | |||
| 1810 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; | 2002 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; |
| 1811 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); | 2003 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); |
| 1812 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); | 2004 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); |
| 1813 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); | 2005 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); |
| 2006 | |||
| 1814 | /* In case we don't even reach vcpu_run, the fields are not updated */ | 2007 | /* In case we don't even reach vcpu_run, the fields are not updated */ |
| 1815 | svm->vmcb->save.rax = nested_vmcb->save.rax; | 2008 | svm->vmcb->save.rax = nested_vmcb->save.rax; |
| 1816 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; | 2009 | svm->vmcb->save.rsp = nested_vmcb->save.rsp; |
| @@ -1819,22 +2012,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 1819 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; | 2012 | svm->vmcb->save.dr6 = nested_vmcb->save.dr6; |
| 1820 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; | 2013 | svm->vmcb->save.cpl = nested_vmcb->save.cpl; |
| 1821 | 2014 | ||
| 1822 | /* We don't want a nested guest to be more powerful than the guest, | 2015 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; |
| 1823 | so all intercepts are ORed */ | 2016 | svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; |
| 1824 | svm->vmcb->control.intercept_cr_read |= | ||
| 1825 | nested_vmcb->control.intercept_cr_read; | ||
| 1826 | svm->vmcb->control.intercept_cr_write |= | ||
| 1827 | nested_vmcb->control.intercept_cr_write; | ||
| 1828 | svm->vmcb->control.intercept_dr_read |= | ||
| 1829 | nested_vmcb->control.intercept_dr_read; | ||
| 1830 | svm->vmcb->control.intercept_dr_write |= | ||
| 1831 | nested_vmcb->control.intercept_dr_write; | ||
| 1832 | svm->vmcb->control.intercept_exceptions |= | ||
| 1833 | nested_vmcb->control.intercept_exceptions; | ||
| 1834 | |||
| 1835 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
| 1836 | |||
| 1837 | svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; | ||
| 1838 | 2017 | ||
| 1839 | /* cache intercepts */ | 2018 | /* cache intercepts */ |
| 1840 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; | 2019 | svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; |
| @@ -1851,13 +2030,43 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 1851 | else | 2030 | else |
| 1852 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 2031 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
| 1853 | 2032 | ||
| 2033 | if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { | ||
| 2034 | /* We only want the cr8 intercept bits of the guest */ | ||
| 2035 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; | ||
| 2036 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | ||
| 2037 | } | ||
| 2038 | |||
| 2039 | /* We don't want to see VMMCALLs from a nested guest */ | ||
| 2040 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL); | ||
| 2041 | |||
| 2042 | /* | ||
| 2043 | * We don't want a nested guest to be more powerful than the guest, so | ||
| 2044 | * all intercepts are ORed | ||
| 2045 | */ | ||
| 2046 | svm->vmcb->control.intercept_cr_read |= | ||
| 2047 | nested_vmcb->control.intercept_cr_read; | ||
| 2048 | svm->vmcb->control.intercept_cr_write |= | ||
| 2049 | nested_vmcb->control.intercept_cr_write; | ||
| 2050 | svm->vmcb->control.intercept_dr_read |= | ||
| 2051 | nested_vmcb->control.intercept_dr_read; | ||
| 2052 | svm->vmcb->control.intercept_dr_write |= | ||
| 2053 | nested_vmcb->control.intercept_dr_write; | ||
| 2054 | svm->vmcb->control.intercept_exceptions |= | ||
| 2055 | nested_vmcb->control.intercept_exceptions; | ||
| 2056 | |||
| 2057 | svm->vmcb->control.intercept |= nested_vmcb->control.intercept; | ||
| 2058 | |||
| 2059 | svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; | ||
| 1854 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 2060 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
| 1855 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 2061 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
| 1856 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 2062 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
| 1857 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 2063 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
| 1858 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 2064 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
| 1859 | 2065 | ||
| 1860 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2066 | nested_svm_unmap(page); |
| 2067 | |||
| 2068 | /* nested_vmcb is our indicator if nested SVM is activated */ | ||
| 2069 | svm->nested.vmcb = vmcb_gpa; | ||
| 1861 | 2070 | ||
| 1862 | enable_gif(svm); | 2071 | enable_gif(svm); |
| 1863 | 2072 | ||
| @@ -1883,6 +2092,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
| 1883 | static int vmload_interception(struct vcpu_svm *svm) | 2092 | static int vmload_interception(struct vcpu_svm *svm) |
| 1884 | { | 2093 | { |
| 1885 | struct vmcb *nested_vmcb; | 2094 | struct vmcb *nested_vmcb; |
| 2095 | struct page *page; | ||
| 1886 | 2096 | ||
| 1887 | if (nested_svm_check_permissions(svm)) | 2097 | if (nested_svm_check_permissions(svm)) |
| 1888 | return 1; | 2098 | return 1; |
| @@ -1890,12 +2100,12 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
| 1890 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2100 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
| 1891 | skip_emulated_instruction(&svm->vcpu); | 2101 | skip_emulated_instruction(&svm->vcpu); |
| 1892 | 2102 | ||
| 1893 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2103 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
| 1894 | if (!nested_vmcb) | 2104 | if (!nested_vmcb) |
| 1895 | return 1; | 2105 | return 1; |
| 1896 | 2106 | ||
| 1897 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); | 2107 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); |
| 1898 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2108 | nested_svm_unmap(page); |
| 1899 | 2109 | ||
| 1900 | return 1; | 2110 | return 1; |
| 1901 | } | 2111 | } |
| @@ -1903,6 +2113,7 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
| 1903 | static int vmsave_interception(struct vcpu_svm *svm) | 2113 | static int vmsave_interception(struct vcpu_svm *svm) |
| 1904 | { | 2114 | { |
| 1905 | struct vmcb *nested_vmcb; | 2115 | struct vmcb *nested_vmcb; |
| 2116 | struct page *page; | ||
| 1906 | 2117 | ||
| 1907 | if (nested_svm_check_permissions(svm)) | 2118 | if (nested_svm_check_permissions(svm)) |
| 1908 | return 1; | 2119 | return 1; |
| @@ -1910,12 +2121,12 @@ static int vmsave_interception(struct vcpu_svm *svm) | |||
| 1910 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 2121 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
| 1911 | skip_emulated_instruction(&svm->vcpu); | 2122 | skip_emulated_instruction(&svm->vcpu); |
| 1912 | 2123 | ||
| 1913 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); | 2124 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); |
| 1914 | if (!nested_vmcb) | 2125 | if (!nested_vmcb) |
| 1915 | return 1; | 2126 | return 1; |
| 1916 | 2127 | ||
| 1917 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); | 2128 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); |
| 1918 | nested_svm_unmap(nested_vmcb, KM_USER0); | 2129 | nested_svm_unmap(page); |
| 1919 | 2130 | ||
| 1920 | return 1; | 2131 | return 1; |
| 1921 | } | 2132 | } |
| @@ -2018,6 +2229,8 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
| 2018 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; | 2229 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; |
| 2019 | uint32_t idt_v = | 2230 | uint32_t idt_v = |
| 2020 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; | 2231 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; |
| 2232 | bool has_error_code = false; | ||
| 2233 | u32 error_code = 0; | ||
| 2021 | 2234 | ||
| 2022 | tss_selector = (u16)svm->vmcb->control.exit_info_1; | 2235 | tss_selector = (u16)svm->vmcb->control.exit_info_1; |
| 2023 | 2236 | ||
| @@ -2038,6 +2251,12 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
| 2038 | svm->vcpu.arch.nmi_injected = false; | 2251 | svm->vcpu.arch.nmi_injected = false; |
| 2039 | break; | 2252 | break; |
| 2040 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2253 | case SVM_EXITINTINFO_TYPE_EXEPT: |
| 2254 | if (svm->vmcb->control.exit_info_2 & | ||
| 2255 | (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { | ||
| 2256 | has_error_code = true; | ||
| 2257 | error_code = | ||
| 2258 | (u32)svm->vmcb->control.exit_info_2; | ||
| 2259 | } | ||
| 2041 | kvm_clear_exception_queue(&svm->vcpu); | 2260 | kvm_clear_exception_queue(&svm->vcpu); |
| 2042 | break; | 2261 | break; |
| 2043 | case SVM_EXITINTINFO_TYPE_INTR: | 2262 | case SVM_EXITINTINFO_TYPE_INTR: |
| @@ -2054,7 +2273,14 @@ static int task_switch_interception(struct vcpu_svm *svm) | |||
| 2054 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) | 2273 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) |
| 2055 | skip_emulated_instruction(&svm->vcpu); | 2274 | skip_emulated_instruction(&svm->vcpu); |
| 2056 | 2275 | ||
| 2057 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2276 | if (kvm_task_switch(&svm->vcpu, tss_selector, reason, |
| 2277 | has_error_code, error_code) == EMULATE_FAIL) { | ||
| 2278 | svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
| 2279 | svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
| 2280 | svm->vcpu.run->internal.ndata = 0; | ||
| 2281 | return 0; | ||
| 2282 | } | ||
| 2283 | return 1; | ||
| 2058 | } | 2284 | } |
| 2059 | 2285 | ||
| 2060 | static int cpuid_interception(struct vcpu_svm *svm) | 2286 | static int cpuid_interception(struct vcpu_svm *svm) |
| @@ -2067,7 +2293,7 @@ static int cpuid_interception(struct vcpu_svm *svm) | |||
| 2067 | static int iret_interception(struct vcpu_svm *svm) | 2293 | static int iret_interception(struct vcpu_svm *svm) |
| 2068 | { | 2294 | { |
| 2069 | ++svm->vcpu.stat.nmi_window_exits; | 2295 | ++svm->vcpu.stat.nmi_window_exits; |
| 2070 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2296 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); |
| 2071 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2297 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
| 2072 | return 1; | 2298 | return 1; |
| 2073 | } | 2299 | } |
| @@ -2145,9 +2371,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
| 2145 | case MSR_IA32_SYSENTER_ESP: | 2371 | case MSR_IA32_SYSENTER_ESP: |
| 2146 | *data = svm->sysenter_esp; | 2372 | *data = svm->sysenter_esp; |
| 2147 | break; | 2373 | break; |
| 2148 | /* Nobody will change the following 5 values in the VMCB so | 2374 | /* |
| 2149 | we can safely return them on rdmsr. They will always be 0 | 2375 | * Nobody will change the following 5 values in the VMCB so we can |
| 2150 | until LBRV is implemented. */ | 2376 | * safely return them on rdmsr. They will always be 0 until LBRV is |
| 2377 | * implemented. | ||
| 2378 | */ | ||
| 2151 | case MSR_IA32_DEBUGCTLMSR: | 2379 | case MSR_IA32_DEBUGCTLMSR: |
| 2152 | *data = svm->vmcb->save.dbgctl; | 2380 | *data = svm->vmcb->save.dbgctl; |
| 2153 | break; | 2381 | break; |
| @@ -2167,7 +2395,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
| 2167 | *data = svm->nested.hsave_msr; | 2395 | *data = svm->nested.hsave_msr; |
| 2168 | break; | 2396 | break; |
| 2169 | case MSR_VM_CR: | 2397 | case MSR_VM_CR: |
| 2170 | *data = 0; | 2398 | *data = svm->nested.vm_cr_msr; |
| 2171 | break; | 2399 | break; |
| 2172 | case MSR_IA32_UCODE_REV: | 2400 | case MSR_IA32_UCODE_REV: |
| 2173 | *data = 0x01000065; | 2401 | *data = 0x01000065; |
| @@ -2197,6 +2425,31 @@ static int rdmsr_interception(struct vcpu_svm *svm) | |||
| 2197 | return 1; | 2425 | return 1; |
| 2198 | } | 2426 | } |
| 2199 | 2427 | ||
| 2428 | static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) | ||
| 2429 | { | ||
| 2430 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 2431 | int svm_dis, chg_mask; | ||
| 2432 | |||
| 2433 | if (data & ~SVM_VM_CR_VALID_MASK) | ||
| 2434 | return 1; | ||
| 2435 | |||
| 2436 | chg_mask = SVM_VM_CR_VALID_MASK; | ||
| 2437 | |||
| 2438 | if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) | ||
| 2439 | chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); | ||
| 2440 | |||
| 2441 | svm->nested.vm_cr_msr &= ~chg_mask; | ||
| 2442 | svm->nested.vm_cr_msr |= (data & chg_mask); | ||
| 2443 | |||
| 2444 | svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; | ||
| 2445 | |||
| 2446 | /* check for svm_disable while efer.svme is set */ | ||
| 2447 | if (svm_dis && (vcpu->arch.efer & EFER_SVME)) | ||
| 2448 | return 1; | ||
| 2449 | |||
| 2450 | return 0; | ||
| 2451 | } | ||
| 2452 | |||
| 2200 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | 2453 | static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) |
| 2201 | { | 2454 | { |
| 2202 | struct vcpu_svm *svm = to_svm(vcpu); | 2455 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -2263,6 +2516,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
| 2263 | svm->nested.hsave_msr = data; | 2516 | svm->nested.hsave_msr = data; |
| 2264 | break; | 2517 | break; |
| 2265 | case MSR_VM_CR: | 2518 | case MSR_VM_CR: |
| 2519 | return svm_set_vm_cr(vcpu, data); | ||
| 2266 | case MSR_VM_IGNNE: | 2520 | case MSR_VM_IGNNE: |
| 2267 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); | 2521 | pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); |
| 2268 | break; | 2522 | break; |
| @@ -2326,16 +2580,16 @@ static int pause_interception(struct vcpu_svm *svm) | |||
| 2326 | } | 2580 | } |
| 2327 | 2581 | ||
| 2328 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | 2582 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { |
| 2329 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2583 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
| 2330 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2584 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
| 2331 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2585 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
| 2332 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 2586 | [SVM_EXIT_READ_CR8] = emulate_on_interception, |
| 2333 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, | 2587 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
| 2334 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, | 2588 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, |
| 2335 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 2589 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, |
| 2336 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 2590 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, |
| 2337 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, | 2591 | [SVM_EXIT_WRITE_CR8] = cr8_write_interception, |
| 2338 | [SVM_EXIT_READ_DR0] = emulate_on_interception, | 2592 | [SVM_EXIT_READ_DR0] = emulate_on_interception, |
| 2339 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 2593 | [SVM_EXIT_READ_DR1] = emulate_on_interception, |
| 2340 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 2594 | [SVM_EXIT_READ_DR2] = emulate_on_interception, |
| 2341 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 2595 | [SVM_EXIT_READ_DR3] = emulate_on_interception, |
| @@ -2354,15 +2608,14 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
| 2354 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 2608 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
| 2355 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 2609 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
| 2356 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 2610 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
| 2357 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 2611 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
| 2358 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | 2612 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, |
| 2359 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 2613 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
| 2360 | [SVM_EXIT_INTR] = intr_interception, | 2614 | [SVM_EXIT_INTR] = intr_interception, |
| 2361 | [SVM_EXIT_NMI] = nmi_interception, | 2615 | [SVM_EXIT_NMI] = nmi_interception, |
| 2362 | [SVM_EXIT_SMI] = nop_on_interception, | 2616 | [SVM_EXIT_SMI] = nop_on_interception, |
| 2363 | [SVM_EXIT_INIT] = nop_on_interception, | 2617 | [SVM_EXIT_INIT] = nop_on_interception, |
| 2364 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 2618 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
| 2365 | /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ | ||
| 2366 | [SVM_EXIT_CPUID] = cpuid_interception, | 2619 | [SVM_EXIT_CPUID] = cpuid_interception, |
| 2367 | [SVM_EXIT_IRET] = iret_interception, | 2620 | [SVM_EXIT_IRET] = iret_interception, |
| 2368 | [SVM_EXIT_INVD] = emulate_on_interception, | 2621 | [SVM_EXIT_INVD] = emulate_on_interception, |
| @@ -2370,7 +2623,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
| 2370 | [SVM_EXIT_HLT] = halt_interception, | 2623 | [SVM_EXIT_HLT] = halt_interception, |
| 2371 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2624 | [SVM_EXIT_INVLPG] = invlpg_interception, |
| 2372 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2625 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
| 2373 | [SVM_EXIT_IOIO] = io_interception, | 2626 | [SVM_EXIT_IOIO] = io_interception, |
| 2374 | [SVM_EXIT_MSR] = msr_interception, | 2627 | [SVM_EXIT_MSR] = msr_interception, |
| 2375 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, | 2628 | [SVM_EXIT_TASK_SWITCH] = task_switch_interception, |
| 2376 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, | 2629 | [SVM_EXIT_SHUTDOWN] = shutdown_interception, |
| @@ -2393,7 +2646,12 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
| 2393 | struct kvm_run *kvm_run = vcpu->run; | 2646 | struct kvm_run *kvm_run = vcpu->run; |
| 2394 | u32 exit_code = svm->vmcb->control.exit_code; | 2647 | u32 exit_code = svm->vmcb->control.exit_code; |
| 2395 | 2648 | ||
| 2396 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2649 | trace_kvm_exit(exit_code, vcpu); |
| 2650 | |||
| 2651 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | ||
| 2652 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
| 2653 | if (npt_enabled) | ||
| 2654 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
| 2397 | 2655 | ||
| 2398 | if (unlikely(svm->nested.exit_required)) { | 2656 | if (unlikely(svm->nested.exit_required)) { |
| 2399 | nested_svm_vmexit(svm); | 2657 | nested_svm_vmexit(svm); |
| @@ -2422,11 +2680,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
| 2422 | 2680 | ||
| 2423 | svm_complete_interrupts(svm); | 2681 | svm_complete_interrupts(svm); |
| 2424 | 2682 | ||
| 2425 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) | ||
| 2426 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | ||
| 2427 | if (npt_enabled) | ||
| 2428 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | ||
| 2429 | |||
| 2430 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 2683 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
| 2431 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2684 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
| 2432 | kvm_run->fail_entry.hardware_entry_failure_reason | 2685 | kvm_run->fail_entry.hardware_entry_failure_reason |
| @@ -2479,7 +2732,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) | |||
| 2479 | 2732 | ||
| 2480 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; | 2733 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; |
| 2481 | vcpu->arch.hflags |= HF_NMI_MASK; | 2734 | vcpu->arch.hflags |= HF_NMI_MASK; |
| 2482 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | 2735 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); |
| 2483 | ++vcpu->stat.nmi_injections; | 2736 | ++vcpu->stat.nmi_injections; |
| 2484 | } | 2737 | } |
| 2485 | 2738 | ||
| @@ -2511,6 +2764,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
| 2511 | { | 2764 | { |
| 2512 | struct vcpu_svm *svm = to_svm(vcpu); | 2765 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2513 | 2766 | ||
| 2767 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
| 2768 | return; | ||
| 2769 | |||
| 2514 | if (irr == -1) | 2770 | if (irr == -1) |
| 2515 | return; | 2771 | return; |
| 2516 | 2772 | ||
| @@ -2522,8 +2778,12 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | |||
| 2522 | { | 2778 | { |
| 2523 | struct vcpu_svm *svm = to_svm(vcpu); | 2779 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2524 | struct vmcb *vmcb = svm->vmcb; | 2780 | struct vmcb *vmcb = svm->vmcb; |
| 2525 | return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2781 | int ret; |
| 2526 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | 2782 | ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && |
| 2783 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | ||
| 2784 | ret = ret && gif_set(svm) && nested_svm_nmi(svm); | ||
| 2785 | |||
| 2786 | return ret; | ||
| 2527 | } | 2787 | } |
| 2528 | 2788 | ||
| 2529 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) | 2789 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) |
| @@ -2539,10 +2799,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
| 2539 | 2799 | ||
| 2540 | if (masked) { | 2800 | if (masked) { |
| 2541 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | 2801 | svm->vcpu.arch.hflags |= HF_NMI_MASK; |
| 2542 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | 2802 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); |
| 2543 | } else { | 2803 | } else { |
| 2544 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | 2804 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; |
| 2545 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2805 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); |
| 2546 | } | 2806 | } |
| 2547 | } | 2807 | } |
| 2548 | 2808 | ||
| @@ -2568,13 +2828,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
| 2568 | { | 2828 | { |
| 2569 | struct vcpu_svm *svm = to_svm(vcpu); | 2829 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2570 | 2830 | ||
| 2571 | nested_svm_intr(svm); | 2831 | /* |
| 2572 | 2832 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes | |
| 2573 | /* In case GIF=0 we can't rely on the CPU to tell us when | 2833 | * 1, because that's a separate STGI/VMRUN intercept. The next time we |
| 2574 | * GIF becomes 1, because that's a separate STGI/VMRUN intercept. | 2834 | * get that intercept, this function will be called again though and |
| 2575 | * The next time we get that intercept, this function will be | 2835 | * we'll get the vintr intercept. |
| 2576 | * called again though and we'll get the vintr intercept. */ | 2836 | */ |
| 2577 | if (gif_set(svm)) { | 2837 | if (gif_set(svm) && nested_svm_intr(svm)) { |
| 2578 | svm_set_vintr(svm); | 2838 | svm_set_vintr(svm); |
| 2579 | svm_inject_irq(svm, 0x0); | 2839 | svm_inject_irq(svm, 0x0); |
| 2580 | } | 2840 | } |
| @@ -2588,9 +2848,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
| 2588 | == HF_NMI_MASK) | 2848 | == HF_NMI_MASK) |
| 2589 | return; /* IRET will cause a vm exit */ | 2849 | return; /* IRET will cause a vm exit */ |
| 2590 | 2850 | ||
| 2591 | /* Something prevents NMI from been injected. Single step over | 2851 | /* |
| 2592 | possible problem (IRET or exception injection or interrupt | 2852 | * Something prevents NMI from been injected. Single step over possible |
| 2593 | shadow) */ | 2853 | * problem (IRET or exception injection or interrupt shadow) |
| 2854 | */ | ||
| 2594 | svm->nmi_singlestep = true; | 2855 | svm->nmi_singlestep = true; |
| 2595 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2856 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
| 2596 | update_db_intercept(vcpu); | 2857 | update_db_intercept(vcpu); |
| @@ -2614,6 +2875,9 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
| 2614 | { | 2875 | { |
| 2615 | struct vcpu_svm *svm = to_svm(vcpu); | 2876 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2616 | 2877 | ||
| 2878 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
| 2879 | return; | ||
| 2880 | |||
| 2617 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | 2881 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { |
| 2618 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | 2882 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; |
| 2619 | kvm_set_cr8(vcpu, cr8); | 2883 | kvm_set_cr8(vcpu, cr8); |
| @@ -2625,6 +2889,9 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
| 2625 | struct vcpu_svm *svm = to_svm(vcpu); | 2889 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2626 | u64 cr8; | 2890 | u64 cr8; |
| 2627 | 2891 | ||
| 2892 | if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) | ||
| 2893 | return; | ||
| 2894 | |||
| 2628 | cr8 = kvm_get_cr8(vcpu); | 2895 | cr8 = kvm_get_cr8(vcpu); |
| 2629 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; | 2896 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; |
| 2630 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | 2897 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; |
| @@ -2635,6 +2902,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
| 2635 | u8 vector; | 2902 | u8 vector; |
| 2636 | int type; | 2903 | int type; |
| 2637 | u32 exitintinfo = svm->vmcb->control.exit_int_info; | 2904 | u32 exitintinfo = svm->vmcb->control.exit_int_info; |
| 2905 | unsigned int3_injected = svm->int3_injected; | ||
| 2906 | |||
| 2907 | svm->int3_injected = 0; | ||
| 2638 | 2908 | ||
| 2639 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) | 2909 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) |
| 2640 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | 2910 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); |
| @@ -2654,18 +2924,25 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
| 2654 | svm->vcpu.arch.nmi_injected = true; | 2924 | svm->vcpu.arch.nmi_injected = true; |
| 2655 | break; | 2925 | break; |
| 2656 | case SVM_EXITINTINFO_TYPE_EXEPT: | 2926 | case SVM_EXITINTINFO_TYPE_EXEPT: |
| 2657 | /* In case of software exception do not reinject an exception | 2927 | /* |
| 2658 | vector, but re-execute and instruction instead */ | 2928 | * In case of software exceptions, do not reinject the vector, |
| 2659 | if (is_nested(svm)) | 2929 | * but re-execute the instruction instead. Rewind RIP first |
| 2660 | break; | 2930 | * if we emulated INT3 before. |
| 2661 | if (kvm_exception_is_soft(vector)) | 2931 | */ |
| 2932 | if (kvm_exception_is_soft(vector)) { | ||
| 2933 | if (vector == BP_VECTOR && int3_injected && | ||
| 2934 | kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) | ||
| 2935 | kvm_rip_write(&svm->vcpu, | ||
| 2936 | kvm_rip_read(&svm->vcpu) - | ||
| 2937 | int3_injected); | ||
| 2662 | break; | 2938 | break; |
| 2939 | } | ||
| 2663 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { | 2940 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { |
| 2664 | u32 err = svm->vmcb->control.exit_int_info_err; | 2941 | u32 err = svm->vmcb->control.exit_int_info_err; |
| 2665 | kvm_queue_exception_e(&svm->vcpu, vector, err); | 2942 | kvm_requeue_exception_e(&svm->vcpu, vector, err); |
| 2666 | 2943 | ||
| 2667 | } else | 2944 | } else |
| 2668 | kvm_queue_exception(&svm->vcpu, vector); | 2945 | kvm_requeue_exception(&svm->vcpu, vector); |
| 2669 | break; | 2946 | break; |
| 2670 | case SVM_EXITINTINFO_TYPE_INTR: | 2947 | case SVM_EXITINTINFO_TYPE_INTR: |
| 2671 | kvm_queue_interrupt(&svm->vcpu, vector, false); | 2948 | kvm_queue_interrupt(&svm->vcpu, vector, false); |
| @@ -2688,6 +2965,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 2688 | u16 gs_selector; | 2965 | u16 gs_selector; |
| 2689 | u16 ldt_selector; | 2966 | u16 ldt_selector; |
| 2690 | 2967 | ||
| 2968 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
| 2969 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
| 2970 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
| 2971 | |||
| 2691 | /* | 2972 | /* |
| 2692 | * A vmexit emulation is required before the vcpu can be executed | 2973 | * A vmexit emulation is required before the vcpu can be executed |
| 2693 | * again. | 2974 | * again. |
| @@ -2695,10 +2976,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 2695 | if (unlikely(svm->nested.exit_required)) | 2976 | if (unlikely(svm->nested.exit_required)) |
| 2696 | return; | 2977 | return; |
| 2697 | 2978 | ||
| 2698 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
| 2699 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
| 2700 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
| 2701 | |||
| 2702 | pre_svm_run(svm); | 2979 | pre_svm_run(svm); |
| 2703 | 2980 | ||
| 2704 | sync_lapic_to_cr8(vcpu); | 2981 | sync_lapic_to_cr8(vcpu); |
| @@ -2879,25 +3156,39 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) | |||
| 2879 | { | 3156 | { |
| 2880 | } | 3157 | } |
| 2881 | 3158 | ||
| 3159 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||
| 3160 | { | ||
| 3161 | switch (func) { | ||
| 3162 | case 0x8000000A: | ||
| 3163 | entry->eax = 1; /* SVM revision 1 */ | ||
| 3164 | entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper | ||
| 3165 | ASID emulation to nested SVM */ | ||
| 3166 | entry->ecx = 0; /* Reserved */ | ||
| 3167 | entry->edx = 0; /* Do not support any additional features */ | ||
| 3168 | |||
| 3169 | break; | ||
| 3170 | } | ||
| 3171 | } | ||
| 3172 | |||
| 2882 | static const struct trace_print_flags svm_exit_reasons_str[] = { | 3173 | static const struct trace_print_flags svm_exit_reasons_str[] = { |
| 2883 | { SVM_EXIT_READ_CR0, "read_cr0" }, | 3174 | { SVM_EXIT_READ_CR0, "read_cr0" }, |
| 2884 | { SVM_EXIT_READ_CR3, "read_cr3" }, | 3175 | { SVM_EXIT_READ_CR3, "read_cr3" }, |
| 2885 | { SVM_EXIT_READ_CR4, "read_cr4" }, | 3176 | { SVM_EXIT_READ_CR4, "read_cr4" }, |
| 2886 | { SVM_EXIT_READ_CR8, "read_cr8" }, | 3177 | { SVM_EXIT_READ_CR8, "read_cr8" }, |
| 2887 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, | 3178 | { SVM_EXIT_WRITE_CR0, "write_cr0" }, |
| 2888 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, | 3179 | { SVM_EXIT_WRITE_CR3, "write_cr3" }, |
| 2889 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, | 3180 | { SVM_EXIT_WRITE_CR4, "write_cr4" }, |
| 2890 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, | 3181 | { SVM_EXIT_WRITE_CR8, "write_cr8" }, |
| 2891 | { SVM_EXIT_READ_DR0, "read_dr0" }, | 3182 | { SVM_EXIT_READ_DR0, "read_dr0" }, |
| 2892 | { SVM_EXIT_READ_DR1, "read_dr1" }, | 3183 | { SVM_EXIT_READ_DR1, "read_dr1" }, |
| 2893 | { SVM_EXIT_READ_DR2, "read_dr2" }, | 3184 | { SVM_EXIT_READ_DR2, "read_dr2" }, |
| 2894 | { SVM_EXIT_READ_DR3, "read_dr3" }, | 3185 | { SVM_EXIT_READ_DR3, "read_dr3" }, |
| 2895 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, | 3186 | { SVM_EXIT_WRITE_DR0, "write_dr0" }, |
| 2896 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, | 3187 | { SVM_EXIT_WRITE_DR1, "write_dr1" }, |
| 2897 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, | 3188 | { SVM_EXIT_WRITE_DR2, "write_dr2" }, |
| 2898 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, | 3189 | { SVM_EXIT_WRITE_DR3, "write_dr3" }, |
| 2899 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, | 3190 | { SVM_EXIT_WRITE_DR5, "write_dr5" }, |
| 2900 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, | 3191 | { SVM_EXIT_WRITE_DR7, "write_dr7" }, |
| 2901 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, | 3192 | { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, |
| 2902 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, | 3193 | { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, |
| 2903 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, | 3194 | { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, |
| @@ -2946,8 +3237,10 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
| 2946 | { | 3237 | { |
| 2947 | struct vcpu_svm *svm = to_svm(vcpu); | 3238 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2948 | 3239 | ||
| 2949 | update_cr0_intercept(svm); | ||
| 2950 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | 3240 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; |
| 3241 | if (is_nested(svm)) | ||
| 3242 | svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
| 3243 | update_cr0_intercept(svm); | ||
| 2951 | } | 3244 | } |
| 2952 | 3245 | ||
| 2953 | static struct kvm_x86_ops svm_x86_ops = { | 3246 | static struct kvm_x86_ops svm_x86_ops = { |
| @@ -2986,8 +3279,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 2986 | .set_idt = svm_set_idt, | 3279 | .set_idt = svm_set_idt, |
| 2987 | .get_gdt = svm_get_gdt, | 3280 | .get_gdt = svm_get_gdt, |
| 2988 | .set_gdt = svm_set_gdt, | 3281 | .set_gdt = svm_set_gdt, |
| 2989 | .get_dr = svm_get_dr, | 3282 | .set_dr7 = svm_set_dr7, |
| 2990 | .set_dr = svm_set_dr, | ||
| 2991 | .cache_reg = svm_cache_reg, | 3283 | .cache_reg = svm_cache_reg, |
| 2992 | .get_rflags = svm_get_rflags, | 3284 | .get_rflags = svm_get_rflags, |
| 2993 | .set_rflags = svm_set_rflags, | 3285 | .set_rflags = svm_set_rflags, |
| @@ -3023,12 +3315,14 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 3023 | .cpuid_update = svm_cpuid_update, | 3315 | .cpuid_update = svm_cpuid_update, |
| 3024 | 3316 | ||
| 3025 | .rdtscp_supported = svm_rdtscp_supported, | 3317 | .rdtscp_supported = svm_rdtscp_supported, |
| 3318 | |||
| 3319 | .set_supported_cpuid = svm_set_supported_cpuid, | ||
| 3026 | }; | 3320 | }; |
| 3027 | 3321 | ||
| 3028 | static int __init svm_init(void) | 3322 | static int __init svm_init(void) |
| 3029 | { | 3323 | { |
| 3030 | return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), | 3324 | return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), |
| 3031 | THIS_MODULE); | 3325 | __alignof__(struct vcpu_svm), THIS_MODULE); |
| 3032 | } | 3326 | } |
| 3033 | 3327 | ||
| 3034 | static void __exit svm_exit(void) | 3328 | static void __exit svm_exit(void) |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index eea40439066c..4ddadb1a5ffe 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
| @@ -12,7 +12,8 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
| 12 | /* | 12 | /* |
| 13 | * There is a race window between reading and incrementing, but we do | 13 | * There is a race window between reading and incrementing, but we do |
| 14 | * not care about potentially loosing timer events in the !reinject | 14 | * not care about potentially loosing timer events in the !reinject |
| 15 | * case anyway. | 15 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked |
| 16 | * in vcpu_enter_guest. | ||
| 16 | */ | 17 | */ |
| 17 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { | 18 | if (ktimer->reinject || !atomic_read(&ktimer->pending)) { |
| 18 | atomic_inc(&ktimer->pending); | 19 | atomic_inc(&ktimer->pending); |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6ad30a29f044..a6544b8e7c0f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
| @@ -5,8 +5,6 @@ | |||
| 5 | 5 | ||
| 6 | #undef TRACE_SYSTEM | 6 | #undef TRACE_SYSTEM |
| 7 | #define TRACE_SYSTEM kvm | 7 | #define TRACE_SYSTEM kvm |
| 8 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
| 9 | #define TRACE_INCLUDE_FILE trace | ||
| 10 | 8 | ||
| 11 | /* | 9 | /* |
| 12 | * Tracepoint for guest mode entry. | 10 | * Tracepoint for guest mode entry. |
| @@ -184,8 +182,8 @@ TRACE_EVENT(kvm_apic, | |||
| 184 | * Tracepoint for kvm guest exit: | 182 | * Tracepoint for kvm guest exit: |
| 185 | */ | 183 | */ |
| 186 | TRACE_EVENT(kvm_exit, | 184 | TRACE_EVENT(kvm_exit, |
| 187 | TP_PROTO(unsigned int exit_reason, unsigned long guest_rip), | 185 | TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), |
| 188 | TP_ARGS(exit_reason, guest_rip), | 186 | TP_ARGS(exit_reason, vcpu), |
| 189 | 187 | ||
| 190 | TP_STRUCT__entry( | 188 | TP_STRUCT__entry( |
| 191 | __field( unsigned int, exit_reason ) | 189 | __field( unsigned int, exit_reason ) |
| @@ -194,7 +192,7 @@ TRACE_EVENT(kvm_exit, | |||
| 194 | 192 | ||
| 195 | TP_fast_assign( | 193 | TP_fast_assign( |
| 196 | __entry->exit_reason = exit_reason; | 194 | __entry->exit_reason = exit_reason; |
| 197 | __entry->guest_rip = guest_rip; | 195 | __entry->guest_rip = kvm_rip_read(vcpu); |
| 198 | ), | 196 | ), |
| 199 | 197 | ||
| 200 | TP_printk("reason %s rip 0x%lx", | 198 | TP_printk("reason %s rip 0x%lx", |
| @@ -221,6 +219,38 @@ TRACE_EVENT(kvm_inj_virq, | |||
| 221 | TP_printk("irq %u", __entry->irq) | 219 | TP_printk("irq %u", __entry->irq) |
| 222 | ); | 220 | ); |
| 223 | 221 | ||
| 222 | #define EXS(x) { x##_VECTOR, "#" #x } | ||
| 223 | |||
| 224 | #define kvm_trace_sym_exc \ | ||
| 225 | EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ | ||
| 226 | EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ | ||
| 227 | EXS(MF), EXS(MC) | ||
| 228 | |||
| 229 | /* | ||
| 230 | * Tracepoint for kvm interrupt injection: | ||
| 231 | */ | ||
| 232 | TRACE_EVENT(kvm_inj_exception, | ||
| 233 | TP_PROTO(unsigned exception, bool has_error, unsigned error_code), | ||
| 234 | TP_ARGS(exception, has_error, error_code), | ||
| 235 | |||
| 236 | TP_STRUCT__entry( | ||
| 237 | __field( u8, exception ) | ||
| 238 | __field( u8, has_error ) | ||
| 239 | __field( u32, error_code ) | ||
| 240 | ), | ||
| 241 | |||
| 242 | TP_fast_assign( | ||
| 243 | __entry->exception = exception; | ||
| 244 | __entry->has_error = has_error; | ||
| 245 | __entry->error_code = error_code; | ||
| 246 | ), | ||
| 247 | |||
| 248 | TP_printk("%s (0x%x)", | ||
| 249 | __print_symbolic(__entry->exception, kvm_trace_sym_exc), | ||
| 250 | /* FIXME: don't print error_code if not present */ | ||
| 251 | __entry->has_error ? __entry->error_code : 0) | ||
| 252 | ); | ||
| 253 | |||
| 224 | /* | 254 | /* |
| 225 | * Tracepoint for page fault. | 255 | * Tracepoint for page fault. |
| 226 | */ | 256 | */ |
| @@ -413,12 +443,34 @@ TRACE_EVENT(kvm_nested_vmrun, | |||
| 413 | ), | 443 | ), |
| 414 | 444 | ||
| 415 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " | 445 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " |
| 416 | "event_inj: 0x%08x npt: %s\n", | 446 | "event_inj: 0x%08x npt: %s", |
| 417 | __entry->rip, __entry->vmcb, __entry->nested_rip, | 447 | __entry->rip, __entry->vmcb, __entry->nested_rip, |
| 418 | __entry->int_ctl, __entry->event_inj, | 448 | __entry->int_ctl, __entry->event_inj, |
| 419 | __entry->npt ? "on" : "off") | 449 | __entry->npt ? "on" : "off") |
| 420 | ); | 450 | ); |
| 421 | 451 | ||
| 452 | TRACE_EVENT(kvm_nested_intercepts, | ||
| 453 | TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept), | ||
| 454 | TP_ARGS(cr_read, cr_write, exceptions, intercept), | ||
| 455 | |||
| 456 | TP_STRUCT__entry( | ||
| 457 | __field( __u16, cr_read ) | ||
| 458 | __field( __u16, cr_write ) | ||
| 459 | __field( __u32, exceptions ) | ||
| 460 | __field( __u64, intercept ) | ||
| 461 | ), | ||
| 462 | |||
| 463 | TP_fast_assign( | ||
| 464 | __entry->cr_read = cr_read; | ||
| 465 | __entry->cr_write = cr_write; | ||
| 466 | __entry->exceptions = exceptions; | ||
| 467 | __entry->intercept = intercept; | ||
| 468 | ), | ||
| 469 | |||
| 470 | TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx", | ||
| 471 | __entry->cr_read, __entry->cr_write, __entry->exceptions, | ||
| 472 | __entry->intercept) | ||
| 473 | ); | ||
| 422 | /* | 474 | /* |
| 423 | * Tracepoint for #VMEXIT while nested | 475 | * Tracepoint for #VMEXIT while nested |
| 424 | */ | 476 | */ |
| @@ -447,7 +499,7 @@ TRACE_EVENT(kvm_nested_vmexit, | |||
| 447 | __entry->exit_int_info_err = exit_int_info_err; | 499 | __entry->exit_int_info_err = exit_int_info_err; |
| 448 | ), | 500 | ), |
| 449 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | 501 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " |
| 450 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 502 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
| 451 | __entry->rip, | 503 | __entry->rip, |
| 452 | ftrace_print_symbols_seq(p, __entry->exit_code, | 504 | ftrace_print_symbols_seq(p, __entry->exit_code, |
| 453 | kvm_x86_ops->exit_reasons_str), | 505 | kvm_x86_ops->exit_reasons_str), |
| @@ -482,7 +534,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject, | |||
| 482 | ), | 534 | ), |
| 483 | 535 | ||
| 484 | TP_printk("reason: %s ext_inf1: 0x%016llx " | 536 | TP_printk("reason: %s ext_inf1: 0x%016llx " |
| 485 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | 537 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", |
| 486 | ftrace_print_symbols_seq(p, __entry->exit_code, | 538 | ftrace_print_symbols_seq(p, __entry->exit_code, |
| 487 | kvm_x86_ops->exit_reasons_str), | 539 | kvm_x86_ops->exit_reasons_str), |
| 488 | __entry->exit_info1, __entry->exit_info2, | 540 | __entry->exit_info1, __entry->exit_info2, |
| @@ -504,7 +556,7 @@ TRACE_EVENT(kvm_nested_intr_vmexit, | |||
| 504 | __entry->rip = rip | 556 | __entry->rip = rip |
| 505 | ), | 557 | ), |
| 506 | 558 | ||
| 507 | TP_printk("rip: 0x%016llx\n", __entry->rip) | 559 | TP_printk("rip: 0x%016llx", __entry->rip) |
| 508 | ); | 560 | ); |
| 509 | 561 | ||
| 510 | /* | 562 | /* |
| @@ -526,7 +578,7 @@ TRACE_EVENT(kvm_invlpga, | |||
| 526 | __entry->address = address; | 578 | __entry->address = address; |
| 527 | ), | 579 | ), |
| 528 | 580 | ||
| 529 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", | 581 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", |
| 530 | __entry->rip, __entry->asid, __entry->address) | 582 | __entry->rip, __entry->asid, __entry->address) |
| 531 | ); | 583 | ); |
| 532 | 584 | ||
| @@ -547,11 +599,102 @@ TRACE_EVENT(kvm_skinit, | |||
| 547 | __entry->slb = slb; | 599 | __entry->slb = slb; |
| 548 | ), | 600 | ), |
| 549 | 601 | ||
| 550 | TP_printk("rip: 0x%016llx slb: 0x%08x\n", | 602 | TP_printk("rip: 0x%016llx slb: 0x%08x", |
| 551 | __entry->rip, __entry->slb) | 603 | __entry->rip, __entry->slb) |
| 552 | ); | 604 | ); |
| 553 | 605 | ||
| 606 | #define __print_insn(insn, ilen) ({ \ | ||
| 607 | int i; \ | ||
| 608 | const char *ret = p->buffer + p->len; \ | ||
| 609 | \ | ||
| 610 | for (i = 0; i < ilen; ++i) \ | ||
| 611 | trace_seq_printf(p, " %02x", insn[i]); \ | ||
| 612 | trace_seq_printf(p, "%c", 0); \ | ||
| 613 | ret; \ | ||
| 614 | }) | ||
| 615 | |||
| 616 | #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) | ||
| 617 | #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) | ||
| 618 | #define KVM_EMUL_INSN_F_CS_D (1 << 2) | ||
| 619 | #define KVM_EMUL_INSN_F_CS_L (1 << 3) | ||
| 620 | |||
| 621 | #define kvm_trace_symbol_emul_flags \ | ||
| 622 | { 0, "real" }, \ | ||
| 623 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
| 624 | | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ | ||
| 625 | { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ | ||
| 626 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
| 627 | | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ | ||
| 628 | { KVM_EMUL_INSN_F_CR0_PE \ | ||
| 629 | | KVM_EMUL_INSN_F_CS_L, "prot64" } | ||
| 630 | |||
| 631 | #define kei_decode_mode(mode) ({ \ | ||
| 632 | u8 flags = 0xff; \ | ||
| 633 | switch (mode) { \ | ||
| 634 | case X86EMUL_MODE_REAL: \ | ||
| 635 | flags = 0; \ | ||
| 636 | break; \ | ||
| 637 | case X86EMUL_MODE_VM86: \ | ||
| 638 | flags = KVM_EMUL_INSN_F_EFL_VM; \ | ||
| 639 | break; \ | ||
| 640 | case X86EMUL_MODE_PROT16: \ | ||
| 641 | flags = KVM_EMUL_INSN_F_CR0_PE; \ | ||
| 642 | break; \ | ||
| 643 | case X86EMUL_MODE_PROT32: \ | ||
| 644 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
| 645 | | KVM_EMUL_INSN_F_CS_D; \ | ||
| 646 | break; \ | ||
| 647 | case X86EMUL_MODE_PROT64: \ | ||
| 648 | flags = KVM_EMUL_INSN_F_CR0_PE \ | ||
| 649 | | KVM_EMUL_INSN_F_CS_L; \ | ||
| 650 | break; \ | ||
| 651 | } \ | ||
| 652 | flags; \ | ||
| 653 | }) | ||
| 654 | |||
| 655 | TRACE_EVENT(kvm_emulate_insn, | ||
| 656 | TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), | ||
| 657 | TP_ARGS(vcpu, failed), | ||
| 658 | |||
| 659 | TP_STRUCT__entry( | ||
| 660 | __field( __u64, rip ) | ||
| 661 | __field( __u32, csbase ) | ||
| 662 | __field( __u8, len ) | ||
| 663 | __array( __u8, insn, 15 ) | ||
| 664 | __field( __u8, flags ) | ||
| 665 | __field( __u8, failed ) | ||
| 666 | ), | ||
| 667 | |||
| 668 | TP_fast_assign( | ||
| 669 | __entry->rip = vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
| 670 | __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); | ||
| 671 | __entry->len = vcpu->arch.emulate_ctxt.decode.eip | ||
| 672 | - vcpu->arch.emulate_ctxt.decode.fetch.start; | ||
| 673 | memcpy(__entry->insn, | ||
| 674 | vcpu->arch.emulate_ctxt.decode.fetch.data, | ||
| 675 | 15); | ||
| 676 | __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode); | ||
| 677 | __entry->failed = failed; | ||
| 678 | ), | ||
| 679 | |||
| 680 | TP_printk("%x:%llx:%s (%s)%s", | ||
| 681 | __entry->csbase, __entry->rip, | ||
| 682 | __print_insn(__entry->insn, __entry->len), | ||
| 683 | __print_symbolic(__entry->flags, | ||
| 684 | kvm_trace_symbol_emul_flags), | ||
| 685 | __entry->failed ? " failed" : "" | ||
| 686 | ) | ||
| 687 | ); | ||
| 688 | |||
| 689 | #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) | ||
| 690 | #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) | ||
| 691 | |||
| 554 | #endif /* _TRACE_KVM_H */ | 692 | #endif /* _TRACE_KVM_H */ |
| 555 | 693 | ||
| 694 | #undef TRACE_INCLUDE_PATH | ||
| 695 | #define TRACE_INCLUDE_PATH arch/x86/kvm | ||
| 696 | #undef TRACE_INCLUDE_FILE | ||
| 697 | #define TRACE_INCLUDE_FILE trace | ||
| 698 | |||
| 556 | /* This part must be outside protection */ | 699 | /* This part must be outside protection */ |
| 557 | #include <trace/define_trace.h> | 700 | #include <trace/define_trace.h> |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bc933cfb4e66..859a01a07dbf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/moduleparam.h> | 27 | #include <linux/moduleparam.h> |
| 28 | #include <linux/ftrace_event.h> | 28 | #include <linux/ftrace_event.h> |
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include <linux/tboot.h> | ||
| 30 | #include "kvm_cache_regs.h" | 31 | #include "kvm_cache_regs.h" |
| 31 | #include "x86.h" | 32 | #include "x86.h" |
| 32 | 33 | ||
| @@ -98,6 +99,8 @@ module_param(ple_gap, int, S_IRUGO); | |||
| 98 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | 99 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; |
| 99 | module_param(ple_window, int, S_IRUGO); | 100 | module_param(ple_window, int, S_IRUGO); |
| 100 | 101 | ||
| 102 | #define NR_AUTOLOAD_MSRS 1 | ||
| 103 | |||
| 101 | struct vmcs { | 104 | struct vmcs { |
| 102 | u32 revision_id; | 105 | u32 revision_id; |
| 103 | u32 abort; | 106 | u32 abort; |
| @@ -125,6 +128,11 @@ struct vcpu_vmx { | |||
| 125 | u64 msr_guest_kernel_gs_base; | 128 | u64 msr_guest_kernel_gs_base; |
| 126 | #endif | 129 | #endif |
| 127 | struct vmcs *vmcs; | 130 | struct vmcs *vmcs; |
| 131 | struct msr_autoload { | ||
| 132 | unsigned nr; | ||
| 133 | struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; | ||
| 134 | struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; | ||
| 135 | } msr_autoload; | ||
| 128 | struct { | 136 | struct { |
| 129 | int loaded; | 137 | int loaded; |
| 130 | u16 fs_sel, gs_sel, ldt_sel; | 138 | u16 fs_sel, gs_sel, ldt_sel; |
| @@ -234,56 +242,56 @@ static const u32 vmx_msr_index[] = { | |||
| 234 | }; | 242 | }; |
| 235 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 243 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
| 236 | 244 | ||
| 237 | static inline int is_page_fault(u32 intr_info) | 245 | static inline bool is_page_fault(u32 intr_info) |
| 238 | { | 246 | { |
| 239 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 247 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| 240 | INTR_INFO_VALID_MASK)) == | 248 | INTR_INFO_VALID_MASK)) == |
| 241 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); | 249 | (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); |
| 242 | } | 250 | } |
| 243 | 251 | ||
| 244 | static inline int is_no_device(u32 intr_info) | 252 | static inline bool is_no_device(u32 intr_info) |
| 245 | { | 253 | { |
| 246 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 254 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| 247 | INTR_INFO_VALID_MASK)) == | 255 | INTR_INFO_VALID_MASK)) == |
| 248 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); | 256 | (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); |
| 249 | } | 257 | } |
| 250 | 258 | ||
| 251 | static inline int is_invalid_opcode(u32 intr_info) | 259 | static inline bool is_invalid_opcode(u32 intr_info) |
| 252 | { | 260 | { |
| 253 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 261 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| 254 | INTR_INFO_VALID_MASK)) == | 262 | INTR_INFO_VALID_MASK)) == |
| 255 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); | 263 | (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); |
| 256 | } | 264 | } |
| 257 | 265 | ||
| 258 | static inline int is_external_interrupt(u32 intr_info) | 266 | static inline bool is_external_interrupt(u32 intr_info) |
| 259 | { | 267 | { |
| 260 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 268 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
| 261 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 269 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
| 262 | } | 270 | } |
| 263 | 271 | ||
| 264 | static inline int is_machine_check(u32 intr_info) | 272 | static inline bool is_machine_check(u32 intr_info) |
| 265 | { | 273 | { |
| 266 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 274 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| 267 | INTR_INFO_VALID_MASK)) == | 275 | INTR_INFO_VALID_MASK)) == |
| 268 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); | 276 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); |
| 269 | } | 277 | } |
| 270 | 278 | ||
| 271 | static inline int cpu_has_vmx_msr_bitmap(void) | 279 | static inline bool cpu_has_vmx_msr_bitmap(void) |
| 272 | { | 280 | { |
| 273 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; | 281 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; |
| 274 | } | 282 | } |
| 275 | 283 | ||
| 276 | static inline int cpu_has_vmx_tpr_shadow(void) | 284 | static inline bool cpu_has_vmx_tpr_shadow(void) |
| 277 | { | 285 | { |
| 278 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; | 286 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; |
| 279 | } | 287 | } |
| 280 | 288 | ||
| 281 | static inline int vm_need_tpr_shadow(struct kvm *kvm) | 289 | static inline bool vm_need_tpr_shadow(struct kvm *kvm) |
| 282 | { | 290 | { |
| 283 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); | 291 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); |
| 284 | } | 292 | } |
| 285 | 293 | ||
| 286 | static inline int cpu_has_secondary_exec_ctrls(void) | 294 | static inline bool cpu_has_secondary_exec_ctrls(void) |
| 287 | { | 295 | { |
| 288 | return vmcs_config.cpu_based_exec_ctrl & | 296 | return vmcs_config.cpu_based_exec_ctrl & |
| 289 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 297 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
| @@ -303,80 +311,80 @@ static inline bool cpu_has_vmx_flexpriority(void) | |||
| 303 | 311 | ||
| 304 | static inline bool cpu_has_vmx_ept_execute_only(void) | 312 | static inline bool cpu_has_vmx_ept_execute_only(void) |
| 305 | { | 313 | { |
| 306 | return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); | 314 | return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; |
| 307 | } | 315 | } |
| 308 | 316 | ||
| 309 | static inline bool cpu_has_vmx_eptp_uncacheable(void) | 317 | static inline bool cpu_has_vmx_eptp_uncacheable(void) |
| 310 | { | 318 | { |
| 311 | return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); | 319 | return vmx_capability.ept & VMX_EPTP_UC_BIT; |
| 312 | } | 320 | } |
| 313 | 321 | ||
| 314 | static inline bool cpu_has_vmx_eptp_writeback(void) | 322 | static inline bool cpu_has_vmx_eptp_writeback(void) |
| 315 | { | 323 | { |
| 316 | return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); | 324 | return vmx_capability.ept & VMX_EPTP_WB_BIT; |
| 317 | } | 325 | } |
| 318 | 326 | ||
| 319 | static inline bool cpu_has_vmx_ept_2m_page(void) | 327 | static inline bool cpu_has_vmx_ept_2m_page(void) |
| 320 | { | 328 | { |
| 321 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | 329 | return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; |
| 322 | } | 330 | } |
| 323 | 331 | ||
| 324 | static inline bool cpu_has_vmx_ept_1g_page(void) | 332 | static inline bool cpu_has_vmx_ept_1g_page(void) |
| 325 | { | 333 | { |
| 326 | return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); | 334 | return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; |
| 327 | } | 335 | } |
| 328 | 336 | ||
| 329 | static inline int cpu_has_vmx_invept_individual_addr(void) | 337 | static inline bool cpu_has_vmx_invept_individual_addr(void) |
| 330 | { | 338 | { |
| 331 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 339 | return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; |
| 332 | } | 340 | } |
| 333 | 341 | ||
| 334 | static inline int cpu_has_vmx_invept_context(void) | 342 | static inline bool cpu_has_vmx_invept_context(void) |
| 335 | { | 343 | { |
| 336 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); | 344 | return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; |
| 337 | } | 345 | } |
| 338 | 346 | ||
| 339 | static inline int cpu_has_vmx_invept_global(void) | 347 | static inline bool cpu_has_vmx_invept_global(void) |
| 340 | { | 348 | { |
| 341 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); | 349 | return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; |
| 342 | } | 350 | } |
| 343 | 351 | ||
| 344 | static inline int cpu_has_vmx_ept(void) | 352 | static inline bool cpu_has_vmx_ept(void) |
| 345 | { | 353 | { |
| 346 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 354 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 347 | SECONDARY_EXEC_ENABLE_EPT; | 355 | SECONDARY_EXEC_ENABLE_EPT; |
| 348 | } | 356 | } |
| 349 | 357 | ||
| 350 | static inline int cpu_has_vmx_unrestricted_guest(void) | 358 | static inline bool cpu_has_vmx_unrestricted_guest(void) |
| 351 | { | 359 | { |
| 352 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 360 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 353 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 361 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
| 354 | } | 362 | } |
| 355 | 363 | ||
| 356 | static inline int cpu_has_vmx_ple(void) | 364 | static inline bool cpu_has_vmx_ple(void) |
| 357 | { | 365 | { |
| 358 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 366 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 359 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 367 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
| 360 | } | 368 | } |
| 361 | 369 | ||
| 362 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 370 | static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) |
| 363 | { | 371 | { |
| 364 | return flexpriority_enabled && irqchip_in_kernel(kvm); | 372 | return flexpriority_enabled && irqchip_in_kernel(kvm); |
| 365 | } | 373 | } |
| 366 | 374 | ||
| 367 | static inline int cpu_has_vmx_vpid(void) | 375 | static inline bool cpu_has_vmx_vpid(void) |
| 368 | { | 376 | { |
| 369 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 377 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 370 | SECONDARY_EXEC_ENABLE_VPID; | 378 | SECONDARY_EXEC_ENABLE_VPID; |
| 371 | } | 379 | } |
| 372 | 380 | ||
| 373 | static inline int cpu_has_vmx_rdtscp(void) | 381 | static inline bool cpu_has_vmx_rdtscp(void) |
| 374 | { | 382 | { |
| 375 | return vmcs_config.cpu_based_2nd_exec_ctrl & | 383 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 376 | SECONDARY_EXEC_RDTSCP; | 384 | SECONDARY_EXEC_RDTSCP; |
| 377 | } | 385 | } |
| 378 | 386 | ||
| 379 | static inline int cpu_has_virtual_nmis(void) | 387 | static inline bool cpu_has_virtual_nmis(void) |
| 380 | { | 388 | { |
| 381 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 389 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
| 382 | } | 390 | } |
| @@ -595,16 +603,56 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
| 595 | vmcs_write32(EXCEPTION_BITMAP, eb); | 603 | vmcs_write32(EXCEPTION_BITMAP, eb); |
| 596 | } | 604 | } |
| 597 | 605 | ||
| 606 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | ||
| 607 | { | ||
| 608 | unsigned i; | ||
| 609 | struct msr_autoload *m = &vmx->msr_autoload; | ||
| 610 | |||
| 611 | for (i = 0; i < m->nr; ++i) | ||
| 612 | if (m->guest[i].index == msr) | ||
| 613 | break; | ||
| 614 | |||
| 615 | if (i == m->nr) | ||
| 616 | return; | ||
| 617 | --m->nr; | ||
| 618 | m->guest[i] = m->guest[m->nr]; | ||
| 619 | m->host[i] = m->host[m->nr]; | ||
| 620 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); | ||
| 621 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | ||
| 622 | } | ||
| 623 | |||
| 624 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | ||
| 625 | u64 guest_val, u64 host_val) | ||
| 626 | { | ||
| 627 | unsigned i; | ||
| 628 | struct msr_autoload *m = &vmx->msr_autoload; | ||
| 629 | |||
| 630 | for (i = 0; i < m->nr; ++i) | ||
| 631 | if (m->guest[i].index == msr) | ||
| 632 | break; | ||
| 633 | |||
| 634 | if (i == m->nr) { | ||
| 635 | ++m->nr; | ||
| 636 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); | ||
| 637 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | ||
| 638 | } | ||
| 639 | |||
| 640 | m->guest[i].index = msr; | ||
| 641 | m->guest[i].value = guest_val; | ||
| 642 | m->host[i].index = msr; | ||
| 643 | m->host[i].value = host_val; | ||
| 644 | } | ||
| 645 | |||
| 598 | static void reload_tss(void) | 646 | static void reload_tss(void) |
| 599 | { | 647 | { |
| 600 | /* | 648 | /* |
| 601 | * VT restores TR but not its size. Useless. | 649 | * VT restores TR but not its size. Useless. |
| 602 | */ | 650 | */ |
| 603 | struct descriptor_table gdt; | 651 | struct desc_ptr gdt; |
| 604 | struct desc_struct *descs; | 652 | struct desc_struct *descs; |
| 605 | 653 | ||
| 606 | kvm_get_gdt(&gdt); | 654 | native_store_gdt(&gdt); |
| 607 | descs = (void *)gdt.base; | 655 | descs = (void *)gdt.address; |
| 608 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | 656 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ |
| 609 | load_TR_desc(); | 657 | load_TR_desc(); |
| 610 | } | 658 | } |
| @@ -631,9 +679,57 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
| 631 | guest_efer |= host_efer & ignore_bits; | 679 | guest_efer |= host_efer & ignore_bits; |
| 632 | vmx->guest_msrs[efer_offset].data = guest_efer; | 680 | vmx->guest_msrs[efer_offset].data = guest_efer; |
| 633 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; | 681 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
| 682 | |||
| 683 | clear_atomic_switch_msr(vmx, MSR_EFER); | ||
| 684 | /* On ept, can't emulate nx, and must switch nx atomically */ | ||
| 685 | if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { | ||
| 686 | guest_efer = vmx->vcpu.arch.efer; | ||
| 687 | if (!(guest_efer & EFER_LMA)) | ||
| 688 | guest_efer &= ~EFER_LME; | ||
| 689 | add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); | ||
| 690 | return false; | ||
| 691 | } | ||
| 692 | |||
| 634 | return true; | 693 | return true; |
| 635 | } | 694 | } |
| 636 | 695 | ||
| 696 | static unsigned long segment_base(u16 selector) | ||
| 697 | { | ||
| 698 | struct desc_ptr gdt; | ||
| 699 | struct desc_struct *d; | ||
| 700 | unsigned long table_base; | ||
| 701 | unsigned long v; | ||
| 702 | |||
| 703 | if (!(selector & ~3)) | ||
| 704 | return 0; | ||
| 705 | |||
| 706 | native_store_gdt(&gdt); | ||
| 707 | table_base = gdt.address; | ||
| 708 | |||
| 709 | if (selector & 4) { /* from ldt */ | ||
| 710 | u16 ldt_selector = kvm_read_ldt(); | ||
| 711 | |||
| 712 | if (!(ldt_selector & ~3)) | ||
| 713 | return 0; | ||
| 714 | |||
| 715 | table_base = segment_base(ldt_selector); | ||
| 716 | } | ||
| 717 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
| 718 | v = get_desc_base(d); | ||
| 719 | #ifdef CONFIG_X86_64 | ||
| 720 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
| 721 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
| 722 | #endif | ||
| 723 | return v; | ||
| 724 | } | ||
| 725 | |||
| 726 | static inline unsigned long kvm_read_tr_base(void) | ||
| 727 | { | ||
| 728 | u16 tr; | ||
| 729 | asm("str %0" : "=g"(tr)); | ||
| 730 | return segment_base(tr); | ||
| 731 | } | ||
| 732 | |||
| 637 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 733 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
| 638 | { | 734 | { |
| 639 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 735 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| @@ -758,7 +854,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 758 | } | 854 | } |
| 759 | 855 | ||
| 760 | if (vcpu->cpu != cpu) { | 856 | if (vcpu->cpu != cpu) { |
| 761 | struct descriptor_table dt; | 857 | struct desc_ptr dt; |
| 762 | unsigned long sysenter_esp; | 858 | unsigned long sysenter_esp; |
| 763 | 859 | ||
| 764 | vcpu->cpu = cpu; | 860 | vcpu->cpu = cpu; |
| @@ -767,8 +863,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 767 | * processors. | 863 | * processors. |
| 768 | */ | 864 | */ |
| 769 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ | 865 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ |
| 770 | kvm_get_gdt(&dt); | 866 | native_store_gdt(&dt); |
| 771 | vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ | 867 | vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */ |
| 772 | 868 | ||
| 773 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 869 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
| 774 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ | 870 | vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ |
| @@ -846,9 +942,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
| 846 | int ret = 0; | 942 | int ret = 0; |
| 847 | 943 | ||
| 848 | if (interruptibility & GUEST_INTR_STATE_STI) | 944 | if (interruptibility & GUEST_INTR_STATE_STI) |
| 849 | ret |= X86_SHADOW_INT_STI; | 945 | ret |= KVM_X86_SHADOW_INT_STI; |
| 850 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) | 946 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) |
| 851 | ret |= X86_SHADOW_INT_MOV_SS; | 947 | ret |= KVM_X86_SHADOW_INT_MOV_SS; |
| 852 | 948 | ||
| 853 | return ret & mask; | 949 | return ret & mask; |
| 854 | } | 950 | } |
| @@ -860,9 +956,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
| 860 | 956 | ||
| 861 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); | 957 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); |
| 862 | 958 | ||
| 863 | if (mask & X86_SHADOW_INT_MOV_SS) | 959 | if (mask & KVM_X86_SHADOW_INT_MOV_SS) |
| 864 | interruptibility |= GUEST_INTR_STATE_MOV_SS; | 960 | interruptibility |= GUEST_INTR_STATE_MOV_SS; |
| 865 | if (mask & X86_SHADOW_INT_STI) | 961 | else if (mask & KVM_X86_SHADOW_INT_STI) |
| 866 | interruptibility |= GUEST_INTR_STATE_STI; | 962 | interruptibility |= GUEST_INTR_STATE_STI; |
| 867 | 963 | ||
| 868 | if ((interruptibility != interruptibility_old)) | 964 | if ((interruptibility != interruptibility_old)) |
| @@ -882,7 +978,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 882 | } | 978 | } |
| 883 | 979 | ||
| 884 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 980 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
| 885 | bool has_error_code, u32 error_code) | 981 | bool has_error_code, u32 error_code, |
| 982 | bool reinject) | ||
| 886 | { | 983 | { |
| 887 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 984 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 888 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 985 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
| @@ -1176,9 +1273,16 @@ static __init int vmx_disabled_by_bios(void) | |||
| 1176 | u64 msr; | 1273 | u64 msr; |
| 1177 | 1274 | ||
| 1178 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1275 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
| 1179 | return (msr & (FEATURE_CONTROL_LOCKED | | 1276 | if (msr & FEATURE_CONTROL_LOCKED) { |
| 1180 | FEATURE_CONTROL_VMXON_ENABLED)) | 1277 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) |
| 1181 | == FEATURE_CONTROL_LOCKED; | 1278 | && tboot_enabled()) |
| 1279 | return 1; | ||
| 1280 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | ||
| 1281 | && !tboot_enabled()) | ||
| 1282 | return 1; | ||
| 1283 | } | ||
| 1284 | |||
| 1285 | return 0; | ||
| 1182 | /* locked but not enabled */ | 1286 | /* locked but not enabled */ |
| 1183 | } | 1287 | } |
| 1184 | 1288 | ||
| @@ -1186,21 +1290,23 @@ static int hardware_enable(void *garbage) | |||
| 1186 | { | 1290 | { |
| 1187 | int cpu = raw_smp_processor_id(); | 1291 | int cpu = raw_smp_processor_id(); |
| 1188 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1292 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
| 1189 | u64 old; | 1293 | u64 old, test_bits; |
| 1190 | 1294 | ||
| 1191 | if (read_cr4() & X86_CR4_VMXE) | 1295 | if (read_cr4() & X86_CR4_VMXE) |
| 1192 | return -EBUSY; | 1296 | return -EBUSY; |
| 1193 | 1297 | ||
| 1194 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1298 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
| 1195 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1299 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
| 1196 | if ((old & (FEATURE_CONTROL_LOCKED | | 1300 | |
| 1197 | FEATURE_CONTROL_VMXON_ENABLED)) | 1301 | test_bits = FEATURE_CONTROL_LOCKED; |
| 1198 | != (FEATURE_CONTROL_LOCKED | | 1302 | test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
| 1199 | FEATURE_CONTROL_VMXON_ENABLED)) | 1303 | if (tboot_enabled()) |
| 1304 | test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; | ||
| 1305 | |||
| 1306 | if ((old & test_bits) != test_bits) { | ||
| 1200 | /* enable and lock */ | 1307 | /* enable and lock */ |
| 1201 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | | 1308 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); |
| 1202 | FEATURE_CONTROL_LOCKED | | 1309 | } |
| 1203 | FEATURE_CONTROL_VMXON_ENABLED); | ||
| 1204 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1310 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
| 1205 | asm volatile (ASM_VMX_VMXON_RAX | 1311 | asm volatile (ASM_VMX_VMXON_RAX |
| 1206 | : : "a"(&phys_addr), "m"(phys_addr) | 1312 | : : "a"(&phys_addr), "m"(phys_addr) |
| @@ -1521,7 +1627,7 @@ static gva_t rmode_tss_base(struct kvm *kvm) | |||
| 1521 | struct kvm_memslots *slots; | 1627 | struct kvm_memslots *slots; |
| 1522 | gfn_t base_gfn; | 1628 | gfn_t base_gfn; |
| 1523 | 1629 | ||
| 1524 | slots = rcu_dereference(kvm->memslots); | 1630 | slots = kvm_memslots(kvm); |
| 1525 | base_gfn = kvm->memslots->memslots[0].base_gfn + | 1631 | base_gfn = kvm->memslots->memslots[0].base_gfn + |
| 1526 | kvm->memslots->memslots[0].npages - 3; | 1632 | kvm->memslots->memslots[0].npages - 3; |
| 1527 | return base_gfn << PAGE_SHIFT; | 1633 | return base_gfn << PAGE_SHIFT; |
| @@ -1649,6 +1755,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
| 1649 | vmcs_write32(VM_ENTRY_CONTROLS, | 1755 | vmcs_write32(VM_ENTRY_CONTROLS, |
| 1650 | vmcs_read32(VM_ENTRY_CONTROLS) | 1756 | vmcs_read32(VM_ENTRY_CONTROLS) |
| 1651 | & ~VM_ENTRY_IA32E_MODE); | 1757 | & ~VM_ENTRY_IA32E_MODE); |
| 1758 | vmx_set_efer(vcpu, vcpu->arch.efer); | ||
| 1652 | } | 1759 | } |
| 1653 | 1760 | ||
| 1654 | #endif | 1761 | #endif |
| @@ -1934,28 +2041,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | |||
| 1934 | *l = (ar >> 13) & 1; | 2041 | *l = (ar >> 13) & 1; |
| 1935 | } | 2042 | } |
| 1936 | 2043 | ||
| 1937 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2044 | static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 1938 | { | 2045 | { |
| 1939 | dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); | 2046 | dt->size = vmcs_read32(GUEST_IDTR_LIMIT); |
| 1940 | dt->base = vmcs_readl(GUEST_IDTR_BASE); | 2047 | dt->address = vmcs_readl(GUEST_IDTR_BASE); |
| 1941 | } | 2048 | } |
| 1942 | 2049 | ||
| 1943 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2050 | static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 1944 | { | 2051 | { |
| 1945 | vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); | 2052 | vmcs_write32(GUEST_IDTR_LIMIT, dt->size); |
| 1946 | vmcs_writel(GUEST_IDTR_BASE, dt->base); | 2053 | vmcs_writel(GUEST_IDTR_BASE, dt->address); |
| 1947 | } | 2054 | } |
| 1948 | 2055 | ||
| 1949 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2056 | static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 1950 | { | 2057 | { |
| 1951 | dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); | 2058 | dt->size = vmcs_read32(GUEST_GDTR_LIMIT); |
| 1952 | dt->base = vmcs_readl(GUEST_GDTR_BASE); | 2059 | dt->address = vmcs_readl(GUEST_GDTR_BASE); |
| 1953 | } | 2060 | } |
| 1954 | 2061 | ||
| 1955 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | 2062 | static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) |
| 1956 | { | 2063 | { |
| 1957 | vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); | 2064 | vmcs_write32(GUEST_GDTR_LIMIT, dt->size); |
| 1958 | vmcs_writel(GUEST_GDTR_BASE, dt->base); | 2065 | vmcs_writel(GUEST_GDTR_BASE, dt->address); |
| 1959 | } | 2066 | } |
| 1960 | 2067 | ||
| 1961 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | 2068 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) |
| @@ -2296,6 +2403,16 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
| 2296 | spin_unlock(&vmx_vpid_lock); | 2403 | spin_unlock(&vmx_vpid_lock); |
| 2297 | } | 2404 | } |
| 2298 | 2405 | ||
| 2406 | static void free_vpid(struct vcpu_vmx *vmx) | ||
| 2407 | { | ||
| 2408 | if (!enable_vpid) | ||
| 2409 | return; | ||
| 2410 | spin_lock(&vmx_vpid_lock); | ||
| 2411 | if (vmx->vpid != 0) | ||
| 2412 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
| 2413 | spin_unlock(&vmx_vpid_lock); | ||
| 2414 | } | ||
| 2415 | |||
| 2299 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | 2416 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) |
| 2300 | { | 2417 | { |
| 2301 | int f = sizeof(unsigned long); | 2418 | int f = sizeof(unsigned long); |
| @@ -2334,7 +2451,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2334 | u32 junk; | 2451 | u32 junk; |
| 2335 | u64 host_pat, tsc_this, tsc_base; | 2452 | u64 host_pat, tsc_this, tsc_base; |
| 2336 | unsigned long a; | 2453 | unsigned long a; |
| 2337 | struct descriptor_table dt; | 2454 | struct desc_ptr dt; |
| 2338 | int i; | 2455 | int i; |
| 2339 | unsigned long kvm_vmx_return; | 2456 | unsigned long kvm_vmx_return; |
| 2340 | u32 exec_control; | 2457 | u32 exec_control; |
| @@ -2415,14 +2532,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2415 | 2532 | ||
| 2416 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | 2533 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ |
| 2417 | 2534 | ||
| 2418 | kvm_get_idt(&dt); | 2535 | native_store_idt(&dt); |
| 2419 | vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ | 2536 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
| 2420 | 2537 | ||
| 2421 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); | 2538 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); |
| 2422 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ | 2539 | vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ |
| 2423 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); | 2540 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); |
| 2424 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); | 2541 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); |
| 2542 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); | ||
| 2425 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); | 2543 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); |
| 2544 | vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); | ||
| 2426 | 2545 | ||
| 2427 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); | 2546 | rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); |
| 2428 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); | 2547 | vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); |
| @@ -2703,8 +2822,7 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
| 2703 | return 0; | 2822 | return 0; |
| 2704 | 2823 | ||
| 2705 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 2824 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
| 2706 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | | 2825 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); |
| 2707 | GUEST_INTR_STATE_NMI)); | ||
| 2708 | } | 2826 | } |
| 2709 | 2827 | ||
| 2710 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 2828 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
| @@ -2948,22 +3066,20 @@ static int handle_io(struct kvm_vcpu *vcpu) | |||
| 2948 | int size, in, string; | 3066 | int size, in, string; |
| 2949 | unsigned port; | 3067 | unsigned port; |
| 2950 | 3068 | ||
| 2951 | ++vcpu->stat.io_exits; | ||
| 2952 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3069 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 2953 | string = (exit_qualification & 16) != 0; | 3070 | string = (exit_qualification & 16) != 0; |
| 3071 | in = (exit_qualification & 8) != 0; | ||
| 2954 | 3072 | ||
| 2955 | if (string) { | 3073 | ++vcpu->stat.io_exits; |
| 2956 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) | ||
| 2957 | return 0; | ||
| 2958 | return 1; | ||
| 2959 | } | ||
| 2960 | 3074 | ||
| 2961 | size = (exit_qualification & 7) + 1; | 3075 | if (string || in) |
| 2962 | in = (exit_qualification & 8) != 0; | 3076 | return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); |
| 2963 | port = exit_qualification >> 16; | ||
| 2964 | 3077 | ||
| 3078 | port = exit_qualification >> 16; | ||
| 3079 | size = (exit_qualification & 7) + 1; | ||
| 2965 | skip_emulated_instruction(vcpu); | 3080 | skip_emulated_instruction(vcpu); |
| 2966 | return kvm_emulate_pio(vcpu, in, size, port); | 3081 | |
| 3082 | return kvm_fast_pio_out(vcpu, size, port); | ||
| 2967 | } | 3083 | } |
| 2968 | 3084 | ||
| 2969 | static void | 3085 | static void |
| @@ -3054,19 +3170,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
| 3054 | return 0; | 3170 | return 0; |
| 3055 | } | 3171 | } |
| 3056 | 3172 | ||
| 3057 | static int check_dr_alias(struct kvm_vcpu *vcpu) | ||
| 3058 | { | ||
| 3059 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
| 3060 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 3061 | return -1; | ||
| 3062 | } | ||
| 3063 | return 0; | ||
| 3064 | } | ||
| 3065 | |||
| 3066 | static int handle_dr(struct kvm_vcpu *vcpu) | 3173 | static int handle_dr(struct kvm_vcpu *vcpu) |
| 3067 | { | 3174 | { |
| 3068 | unsigned long exit_qualification; | 3175 | unsigned long exit_qualification; |
| 3069 | unsigned long val; | ||
| 3070 | int dr, reg; | 3176 | int dr, reg; |
| 3071 | 3177 | ||
| 3072 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | 3178 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ |
| @@ -3101,67 +3207,20 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
| 3101 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; | 3207 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; |
| 3102 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 3208 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
| 3103 | if (exit_qualification & TYPE_MOV_FROM_DR) { | 3209 | if (exit_qualification & TYPE_MOV_FROM_DR) { |
| 3104 | switch (dr) { | 3210 | unsigned long val; |
| 3105 | case 0 ... 3: | 3211 | if (!kvm_get_dr(vcpu, dr, &val)) |
| 3106 | val = vcpu->arch.db[dr]; | 3212 | kvm_register_write(vcpu, reg, val); |
| 3107 | break; | 3213 | } else |
| 3108 | case 4: | 3214 | kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); |
| 3109 | if (check_dr_alias(vcpu) < 0) | ||
| 3110 | return 1; | ||
| 3111 | /* fall through */ | ||
| 3112 | case 6: | ||
| 3113 | val = vcpu->arch.dr6; | ||
| 3114 | break; | ||
| 3115 | case 5: | ||
| 3116 | if (check_dr_alias(vcpu) < 0) | ||
| 3117 | return 1; | ||
| 3118 | /* fall through */ | ||
| 3119 | default: /* 7 */ | ||
| 3120 | val = vcpu->arch.dr7; | ||
| 3121 | break; | ||
| 3122 | } | ||
| 3123 | kvm_register_write(vcpu, reg, val); | ||
| 3124 | } else { | ||
| 3125 | val = vcpu->arch.regs[reg]; | ||
| 3126 | switch (dr) { | ||
| 3127 | case 0 ... 3: | ||
| 3128 | vcpu->arch.db[dr] = val; | ||
| 3129 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
| 3130 | vcpu->arch.eff_db[dr] = val; | ||
| 3131 | break; | ||
| 3132 | case 4: | ||
| 3133 | if (check_dr_alias(vcpu) < 0) | ||
| 3134 | return 1; | ||
| 3135 | /* fall through */ | ||
| 3136 | case 6: | ||
| 3137 | if (val & 0xffffffff00000000ULL) { | ||
| 3138 | kvm_inject_gp(vcpu, 0); | ||
| 3139 | return 1; | ||
| 3140 | } | ||
| 3141 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
| 3142 | break; | ||
| 3143 | case 5: | ||
| 3144 | if (check_dr_alias(vcpu) < 0) | ||
| 3145 | return 1; | ||
| 3146 | /* fall through */ | ||
| 3147 | default: /* 7 */ | ||
| 3148 | if (val & 0xffffffff00000000ULL) { | ||
| 3149 | kvm_inject_gp(vcpu, 0); | ||
| 3150 | return 1; | ||
| 3151 | } | ||
| 3152 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
| 3153 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
| 3154 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | ||
| 3155 | vcpu->arch.switch_db_regs = | ||
| 3156 | (val & DR7_BP_EN_MASK); | ||
| 3157 | } | ||
| 3158 | break; | ||
| 3159 | } | ||
| 3160 | } | ||
| 3161 | skip_emulated_instruction(vcpu); | 3215 | skip_emulated_instruction(vcpu); |
| 3162 | return 1; | 3216 | return 1; |
| 3163 | } | 3217 | } |
| 3164 | 3218 | ||
| 3219 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | ||
| 3220 | { | ||
| 3221 | vmcs_writel(GUEST_DR7, val); | ||
| 3222 | } | ||
| 3223 | |||
| 3165 | static int handle_cpuid(struct kvm_vcpu *vcpu) | 3224 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
| 3166 | { | 3225 | { |
| 3167 | kvm_emulate_cpuid(vcpu); | 3226 | kvm_emulate_cpuid(vcpu); |
| @@ -3293,6 +3352,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
| 3293 | { | 3352 | { |
| 3294 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3353 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3295 | unsigned long exit_qualification; | 3354 | unsigned long exit_qualification; |
| 3355 | bool has_error_code = false; | ||
| 3356 | u32 error_code = 0; | ||
| 3296 | u16 tss_selector; | 3357 | u16 tss_selector; |
| 3297 | int reason, type, idt_v; | 3358 | int reason, type, idt_v; |
| 3298 | 3359 | ||
| @@ -3315,6 +3376,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
| 3315 | kvm_clear_interrupt_queue(vcpu); | 3376 | kvm_clear_interrupt_queue(vcpu); |
| 3316 | break; | 3377 | break; |
| 3317 | case INTR_TYPE_HARD_EXCEPTION: | 3378 | case INTR_TYPE_HARD_EXCEPTION: |
| 3379 | if (vmx->idt_vectoring_info & | ||
| 3380 | VECTORING_INFO_DELIVER_CODE_MASK) { | ||
| 3381 | has_error_code = true; | ||
| 3382 | error_code = | ||
| 3383 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
| 3384 | } | ||
| 3385 | /* fall through */ | ||
| 3318 | case INTR_TYPE_SOFT_EXCEPTION: | 3386 | case INTR_TYPE_SOFT_EXCEPTION: |
| 3319 | kvm_clear_exception_queue(vcpu); | 3387 | kvm_clear_exception_queue(vcpu); |
| 3320 | break; | 3388 | break; |
| @@ -3329,8 +3397,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) | |||
| 3329 | type != INTR_TYPE_NMI_INTR)) | 3397 | type != INTR_TYPE_NMI_INTR)) |
| 3330 | skip_emulated_instruction(vcpu); | 3398 | skip_emulated_instruction(vcpu); |
| 3331 | 3399 | ||
| 3332 | if (!kvm_task_switch(vcpu, tss_selector, reason)) | 3400 | if (kvm_task_switch(vcpu, tss_selector, reason, |
| 3401 | has_error_code, error_code) == EMULATE_FAIL) { | ||
| 3402 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | ||
| 3403 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
| 3404 | vcpu->run->internal.ndata = 0; | ||
| 3333 | return 0; | 3405 | return 0; |
| 3406 | } | ||
| 3334 | 3407 | ||
| 3335 | /* clear all local breakpoint enable flags */ | 3408 | /* clear all local breakpoint enable flags */ |
| 3336 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); | 3409 | vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); |
| @@ -3575,7 +3648,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
| 3575 | u32 exit_reason = vmx->exit_reason; | 3648 | u32 exit_reason = vmx->exit_reason; |
| 3576 | u32 vectoring_info = vmx->idt_vectoring_info; | 3649 | u32 vectoring_info = vmx->idt_vectoring_info; |
| 3577 | 3650 | ||
| 3578 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3651 | trace_kvm_exit(exit_reason, vcpu); |
| 3579 | 3652 | ||
| 3580 | /* If guest state is invalid, start emulating */ | 3653 | /* If guest state is invalid, start emulating */ |
| 3581 | if (vmx->emulation_required && emulate_invalid_guest_state) | 3654 | if (vmx->emulation_required && emulate_invalid_guest_state) |
| @@ -3660,8 +3733,11 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
| 3660 | 3733 | ||
| 3661 | /* We need to handle NMIs before interrupts are enabled */ | 3734 | /* We need to handle NMIs before interrupts are enabled */ |
| 3662 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && | 3735 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && |
| 3663 | (exit_intr_info & INTR_INFO_VALID_MASK)) | 3736 | (exit_intr_info & INTR_INFO_VALID_MASK)) { |
| 3737 | kvm_before_handle_nmi(&vmx->vcpu); | ||
| 3664 | asm("int $2"); | 3738 | asm("int $2"); |
| 3739 | kvm_after_handle_nmi(&vmx->vcpu); | ||
| 3740 | } | ||
| 3665 | 3741 | ||
| 3666 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 3742 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
| 3667 | 3743 | ||
| @@ -3921,10 +3997,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
| 3921 | { | 3997 | { |
| 3922 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3998 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3923 | 3999 | ||
| 3924 | spin_lock(&vmx_vpid_lock); | 4000 | free_vpid(vmx); |
| 3925 | if (vmx->vpid != 0) | ||
| 3926 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | ||
| 3927 | spin_unlock(&vmx_vpid_lock); | ||
| 3928 | vmx_free_vmcs(vcpu); | 4001 | vmx_free_vmcs(vcpu); |
| 3929 | kfree(vmx->guest_msrs); | 4002 | kfree(vmx->guest_msrs); |
| 3930 | kvm_vcpu_uninit(vcpu); | 4003 | kvm_vcpu_uninit(vcpu); |
| @@ -3986,6 +4059,7 @@ free_msrs: | |||
| 3986 | uninit_vcpu: | 4059 | uninit_vcpu: |
| 3987 | kvm_vcpu_uninit(&vmx->vcpu); | 4060 | kvm_vcpu_uninit(&vmx->vcpu); |
| 3988 | free_vcpu: | 4061 | free_vcpu: |
| 4062 | free_vpid(vmx); | ||
| 3989 | kmem_cache_free(kvm_vcpu_cache, vmx); | 4063 | kmem_cache_free(kvm_vcpu_cache, vmx); |
| 3990 | return ERR_PTR(err); | 4064 | return ERR_PTR(err); |
| 3991 | } | 4065 | } |
| @@ -4116,6 +4190,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
| 4116 | } | 4190 | } |
| 4117 | } | 4191 | } |
| 4118 | 4192 | ||
| 4193 | static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | ||
| 4194 | { | ||
| 4195 | } | ||
| 4196 | |||
| 4119 | static struct kvm_x86_ops vmx_x86_ops = { | 4197 | static struct kvm_x86_ops vmx_x86_ops = { |
| 4120 | .cpu_has_kvm_support = cpu_has_kvm_support, | 4198 | .cpu_has_kvm_support = cpu_has_kvm_support, |
| 4121 | .disabled_by_bios = vmx_disabled_by_bios, | 4199 | .disabled_by_bios = vmx_disabled_by_bios, |
| @@ -4152,6 +4230,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 4152 | .set_idt = vmx_set_idt, | 4230 | .set_idt = vmx_set_idt, |
| 4153 | .get_gdt = vmx_get_gdt, | 4231 | .get_gdt = vmx_get_gdt, |
| 4154 | .set_gdt = vmx_set_gdt, | 4232 | .set_gdt = vmx_set_gdt, |
| 4233 | .set_dr7 = vmx_set_dr7, | ||
| 4155 | .cache_reg = vmx_cache_reg, | 4234 | .cache_reg = vmx_cache_reg, |
| 4156 | .get_rflags = vmx_get_rflags, | 4235 | .get_rflags = vmx_get_rflags, |
| 4157 | .set_rflags = vmx_set_rflags, | 4236 | .set_rflags = vmx_set_rflags, |
| @@ -4187,6 +4266,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 4187 | .cpuid_update = vmx_cpuid_update, | 4266 | .cpuid_update = vmx_cpuid_update, |
| 4188 | 4267 | ||
| 4189 | .rdtscp_supported = vmx_rdtscp_supported, | 4268 | .rdtscp_supported = vmx_rdtscp_supported, |
| 4269 | |||
| 4270 | .set_supported_cpuid = vmx_set_supported_cpuid, | ||
| 4190 | }; | 4271 | }; |
| 4191 | 4272 | ||
| 4192 | static int __init vmx_init(void) | 4273 | static int __init vmx_init(void) |
| @@ -4234,7 +4315,8 @@ static int __init vmx_init(void) | |||
| 4234 | 4315 | ||
| 4235 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | 4316 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
| 4236 | 4317 | ||
| 4237 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); | 4318 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), |
| 4319 | __alignof__(struct vcpu_vmx), THIS_MODULE); | ||
| 4238 | if (r) | 4320 | if (r) |
| 4239 | goto out3; | 4321 | goto out3; |
| 4240 | 4322 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3c4ca98ad27f..05d571f6f196 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -40,8 +40,9 @@ | |||
| 40 | #include <linux/user-return-notifier.h> | 40 | #include <linux/user-return-notifier.h> |
| 41 | #include <linux/srcu.h> | 41 | #include <linux/srcu.h> |
| 42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
| 43 | #include <linux/perf_event.h> | ||
| 43 | #include <trace/events/kvm.h> | 44 | #include <trace/events/kvm.h> |
| 44 | #undef TRACE_INCLUDE_FILE | 45 | |
| 45 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
| 46 | #include "trace.h" | 47 | #include "trace.h" |
| 47 | 48 | ||
| @@ -223,34 +224,6 @@ static void drop_user_return_notifiers(void *ignore) | |||
| 223 | kvm_on_user_return(&smsr->urn); | 224 | kvm_on_user_return(&smsr->urn); |
| 224 | } | 225 | } |
| 225 | 226 | ||
| 226 | unsigned long segment_base(u16 selector) | ||
| 227 | { | ||
| 228 | struct descriptor_table gdt; | ||
| 229 | struct desc_struct *d; | ||
| 230 | unsigned long table_base; | ||
| 231 | unsigned long v; | ||
| 232 | |||
| 233 | if (selector == 0) | ||
| 234 | return 0; | ||
| 235 | |||
| 236 | kvm_get_gdt(&gdt); | ||
| 237 | table_base = gdt.base; | ||
| 238 | |||
| 239 | if (selector & 4) { /* from ldt */ | ||
| 240 | u16 ldt_selector = kvm_read_ldt(); | ||
| 241 | |||
| 242 | table_base = segment_base(ldt_selector); | ||
| 243 | } | ||
| 244 | d = (struct desc_struct *)(table_base + (selector & ~7)); | ||
| 245 | v = get_desc_base(d); | ||
| 246 | #ifdef CONFIG_X86_64 | ||
| 247 | if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) | ||
| 248 | v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; | ||
| 249 | #endif | ||
| 250 | return v; | ||
| 251 | } | ||
| 252 | EXPORT_SYMBOL_GPL(segment_base); | ||
| 253 | |||
| 254 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | 227 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) |
| 255 | { | 228 | { |
| 256 | if (irqchip_in_kernel(vcpu->kvm)) | 229 | if (irqchip_in_kernel(vcpu->kvm)) |
| @@ -292,7 +265,8 @@ static int exception_class(int vector) | |||
| 292 | } | 265 | } |
| 293 | 266 | ||
| 294 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | 267 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, |
| 295 | unsigned nr, bool has_error, u32 error_code) | 268 | unsigned nr, bool has_error, u32 error_code, |
| 269 | bool reinject) | ||
| 296 | { | 270 | { |
| 297 | u32 prev_nr; | 271 | u32 prev_nr; |
| 298 | int class1, class2; | 272 | int class1, class2; |
| @@ -303,6 +277,7 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
| 303 | vcpu->arch.exception.has_error_code = has_error; | 277 | vcpu->arch.exception.has_error_code = has_error; |
| 304 | vcpu->arch.exception.nr = nr; | 278 | vcpu->arch.exception.nr = nr; |
| 305 | vcpu->arch.exception.error_code = error_code; | 279 | vcpu->arch.exception.error_code = error_code; |
| 280 | vcpu->arch.exception.reinject = reinject; | ||
| 306 | return; | 281 | return; |
| 307 | } | 282 | } |
| 308 | 283 | ||
| @@ -331,10 +306,16 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
| 331 | 306 | ||
| 332 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) | 307 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) |
| 333 | { | 308 | { |
| 334 | kvm_multiple_exception(vcpu, nr, false, 0); | 309 | kvm_multiple_exception(vcpu, nr, false, 0, false); |
| 335 | } | 310 | } |
| 336 | EXPORT_SYMBOL_GPL(kvm_queue_exception); | 311 | EXPORT_SYMBOL_GPL(kvm_queue_exception); |
| 337 | 312 | ||
| 313 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) | ||
| 314 | { | ||
| 315 | kvm_multiple_exception(vcpu, nr, false, 0, true); | ||
| 316 | } | ||
| 317 | EXPORT_SYMBOL_GPL(kvm_requeue_exception); | ||
| 318 | |||
| 338 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | 319 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, |
| 339 | u32 error_code) | 320 | u32 error_code) |
| 340 | { | 321 | { |
| @@ -351,10 +332,16 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi); | |||
| 351 | 332 | ||
| 352 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | 333 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) |
| 353 | { | 334 | { |
| 354 | kvm_multiple_exception(vcpu, nr, true, error_code); | 335 | kvm_multiple_exception(vcpu, nr, true, error_code, false); |
| 355 | } | 336 | } |
| 356 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); | 337 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); |
| 357 | 338 | ||
| 339 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | ||
| 340 | { | ||
| 341 | kvm_multiple_exception(vcpu, nr, true, error_code, true); | ||
| 342 | } | ||
| 343 | EXPORT_SYMBOL_GPL(kvm_requeue_exception_e); | ||
| 344 | |||
| 358 | /* | 345 | /* |
| 359 | * Checks if cpl <= required_cpl; if true, return true. Otherwise queue | 346 | * Checks if cpl <= required_cpl; if true, return true. Otherwise queue |
| 360 | * a #GP and return false. | 347 | * a #GP and return false. |
| @@ -475,7 +462,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 475 | } | 462 | } |
| 476 | 463 | ||
| 477 | kvm_x86_ops->set_cr0(vcpu, cr0); | 464 | kvm_x86_ops->set_cr0(vcpu, cr0); |
| 478 | vcpu->arch.cr0 = cr0; | ||
| 479 | 465 | ||
| 480 | kvm_mmu_reset_context(vcpu); | 466 | kvm_mmu_reset_context(vcpu); |
| 481 | return; | 467 | return; |
| @@ -484,7 +470,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); | |||
| 484 | 470 | ||
| 485 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 471 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
| 486 | { | 472 | { |
| 487 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); | 473 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f)); |
| 488 | } | 474 | } |
| 489 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 475 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
| 490 | 476 | ||
| @@ -516,7 +502,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 516 | } | 502 | } |
| 517 | kvm_x86_ops->set_cr4(vcpu, cr4); | 503 | kvm_x86_ops->set_cr4(vcpu, cr4); |
| 518 | vcpu->arch.cr4 = cr4; | 504 | vcpu->arch.cr4 = cr4; |
| 519 | vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; | ||
| 520 | kvm_mmu_reset_context(vcpu); | 505 | kvm_mmu_reset_context(vcpu); |
| 521 | } | 506 | } |
| 522 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 507 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
| @@ -591,6 +576,80 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
| 591 | } | 576 | } |
| 592 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 577 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
| 593 | 578 | ||
| 579 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | ||
| 580 | { | ||
| 581 | switch (dr) { | ||
| 582 | case 0 ... 3: | ||
| 583 | vcpu->arch.db[dr] = val; | ||
| 584 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
| 585 | vcpu->arch.eff_db[dr] = val; | ||
| 586 | break; | ||
| 587 | case 4: | ||
| 588 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
| 589 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 590 | return 1; | ||
| 591 | } | ||
| 592 | /* fall through */ | ||
| 593 | case 6: | ||
| 594 | if (val & 0xffffffff00000000ULL) { | ||
| 595 | kvm_inject_gp(vcpu, 0); | ||
| 596 | return 1; | ||
| 597 | } | ||
| 598 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | ||
| 599 | break; | ||
| 600 | case 5: | ||
| 601 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
| 602 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 603 | return 1; | ||
| 604 | } | ||
| 605 | /* fall through */ | ||
| 606 | default: /* 7 */ | ||
| 607 | if (val & 0xffffffff00000000ULL) { | ||
| 608 | kvm_inject_gp(vcpu, 0); | ||
| 609 | return 1; | ||
| 610 | } | ||
| 611 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | ||
| 612 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | ||
| 613 | kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); | ||
| 614 | vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); | ||
| 615 | } | ||
| 616 | break; | ||
| 617 | } | ||
| 618 | |||
| 619 | return 0; | ||
| 620 | } | ||
| 621 | EXPORT_SYMBOL_GPL(kvm_set_dr); | ||
| 622 | |||
| 623 | int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | ||
| 624 | { | ||
| 625 | switch (dr) { | ||
| 626 | case 0 ... 3: | ||
| 627 | *val = vcpu->arch.db[dr]; | ||
| 628 | break; | ||
| 629 | case 4: | ||
| 630 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
| 631 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 632 | return 1; | ||
| 633 | } | ||
| 634 | /* fall through */ | ||
| 635 | case 6: | ||
| 636 | *val = vcpu->arch.dr6; | ||
| 637 | break; | ||
| 638 | case 5: | ||
| 639 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
| 640 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 641 | return 1; | ||
| 642 | } | ||
| 643 | /* fall through */ | ||
| 644 | default: /* 7 */ | ||
| 645 | *val = vcpu->arch.dr7; | ||
| 646 | break; | ||
| 647 | } | ||
| 648 | |||
| 649 | return 0; | ||
| 650 | } | ||
| 651 | EXPORT_SYMBOL_GPL(kvm_get_dr); | ||
| 652 | |||
| 594 | static inline u32 bit(int bitno) | 653 | static inline u32 bit(int bitno) |
| 595 | { | 654 | { |
| 596 | return 1 << (bitno & 31); | 655 | return 1 << (bitno & 31); |
| @@ -605,9 +664,10 @@ static inline u32 bit(int bitno) | |||
| 605 | * kvm-specific. Those are put in the beginning of the list. | 664 | * kvm-specific. Those are put in the beginning of the list. |
| 606 | */ | 665 | */ |
| 607 | 666 | ||
| 608 | #define KVM_SAVE_MSRS_BEGIN 5 | 667 | #define KVM_SAVE_MSRS_BEGIN 7 |
| 609 | static u32 msrs_to_save[] = { | 668 | static u32 msrs_to_save[] = { |
| 610 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 669 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
| 670 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | ||
| 611 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 671 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
| 612 | HV_X64_MSR_APIC_ASSIST_PAGE, | 672 | HV_X64_MSR_APIC_ASSIST_PAGE, |
| 613 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 673 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
| @@ -624,48 +684,42 @@ static u32 emulated_msrs[] = { | |||
| 624 | MSR_IA32_MISC_ENABLE, | 684 | MSR_IA32_MISC_ENABLE, |
| 625 | }; | 685 | }; |
| 626 | 686 | ||
| 627 | static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | 687 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) |
| 628 | { | 688 | { |
| 629 | if (efer & efer_reserved_bits) { | 689 | if (efer & efer_reserved_bits) |
| 630 | kvm_inject_gp(vcpu, 0); | 690 | return 1; |
| 631 | return; | ||
| 632 | } | ||
| 633 | 691 | ||
| 634 | if (is_paging(vcpu) | 692 | if (is_paging(vcpu) |
| 635 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { | 693 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) |
| 636 | kvm_inject_gp(vcpu, 0); | 694 | return 1; |
| 637 | return; | ||
| 638 | } | ||
| 639 | 695 | ||
| 640 | if (efer & EFER_FFXSR) { | 696 | if (efer & EFER_FFXSR) { |
| 641 | struct kvm_cpuid_entry2 *feat; | 697 | struct kvm_cpuid_entry2 *feat; |
| 642 | 698 | ||
| 643 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 699 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
| 644 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { | 700 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) |
| 645 | kvm_inject_gp(vcpu, 0); | 701 | return 1; |
| 646 | return; | ||
| 647 | } | ||
| 648 | } | 702 | } |
| 649 | 703 | ||
| 650 | if (efer & EFER_SVME) { | 704 | if (efer & EFER_SVME) { |
| 651 | struct kvm_cpuid_entry2 *feat; | 705 | struct kvm_cpuid_entry2 *feat; |
| 652 | 706 | ||
| 653 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 707 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
| 654 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { | 708 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) |
| 655 | kvm_inject_gp(vcpu, 0); | 709 | return 1; |
| 656 | return; | ||
| 657 | } | ||
| 658 | } | 710 | } |
| 659 | 711 | ||
| 660 | kvm_x86_ops->set_efer(vcpu, efer); | ||
| 661 | |||
| 662 | efer &= ~EFER_LMA; | 712 | efer &= ~EFER_LMA; |
| 663 | efer |= vcpu->arch.efer & EFER_LMA; | 713 | efer |= vcpu->arch.efer & EFER_LMA; |
| 664 | 714 | ||
| 715 | kvm_x86_ops->set_efer(vcpu, efer); | ||
| 716 | |||
| 665 | vcpu->arch.efer = efer; | 717 | vcpu->arch.efer = efer; |
| 666 | 718 | ||
| 667 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 719 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
| 668 | kvm_mmu_reset_context(vcpu); | 720 | kvm_mmu_reset_context(vcpu); |
| 721 | |||
| 722 | return 0; | ||
| 669 | } | 723 | } |
| 670 | 724 | ||
| 671 | void kvm_enable_efer_bits(u64 mask) | 725 | void kvm_enable_efer_bits(u64 mask) |
| @@ -695,14 +749,22 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |||
| 695 | 749 | ||
| 696 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) | 750 | static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) |
| 697 | { | 751 | { |
| 698 | static int version; | 752 | int version; |
| 753 | int r; | ||
| 699 | struct pvclock_wall_clock wc; | 754 | struct pvclock_wall_clock wc; |
| 700 | struct timespec boot; | 755 | struct timespec boot; |
| 701 | 756 | ||
| 702 | if (!wall_clock) | 757 | if (!wall_clock) |
| 703 | return; | 758 | return; |
| 704 | 759 | ||
| 705 | version++; | 760 | r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version)); |
| 761 | if (r) | ||
| 762 | return; | ||
| 763 | |||
| 764 | if (version & 1) | ||
| 765 | ++version; /* first time write, random junk */ | ||
| 766 | |||
| 767 | ++version; | ||
| 706 | 768 | ||
| 707 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); | 769 | kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); |
| 708 | 770 | ||
| @@ -795,6 +857,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
| 795 | vcpu->hv_clock.system_time = ts.tv_nsec + | 857 | vcpu->hv_clock.system_time = ts.tv_nsec + |
| 796 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; | 858 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; |
| 797 | 859 | ||
| 860 | vcpu->hv_clock.flags = 0; | ||
| 861 | |||
| 798 | /* | 862 | /* |
| 799 | * The interface expects us to write an even number signaling that the | 863 | * The interface expects us to write an even number signaling that the |
| 800 | * update is finished. Since the guest won't see the intermediate | 864 | * update is finished. Since the guest won't see the intermediate |
| @@ -1086,10 +1150,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 1086 | { | 1150 | { |
| 1087 | switch (msr) { | 1151 | switch (msr) { |
| 1088 | case MSR_EFER: | 1152 | case MSR_EFER: |
| 1089 | set_efer(vcpu, data); | 1153 | return set_efer(vcpu, data); |
| 1090 | break; | ||
| 1091 | case MSR_K7_HWCR: | 1154 | case MSR_K7_HWCR: |
| 1092 | data &= ~(u64)0x40; /* ignore flush filter disable */ | 1155 | data &= ~(u64)0x40; /* ignore flush filter disable */ |
| 1156 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | ||
| 1093 | if (data != 0) { | 1157 | if (data != 0) { |
| 1094 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 1158 | pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
| 1095 | data); | 1159 | data); |
| @@ -1132,10 +1196,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 1132 | case MSR_IA32_MISC_ENABLE: | 1196 | case MSR_IA32_MISC_ENABLE: |
| 1133 | vcpu->arch.ia32_misc_enable_msr = data; | 1197 | vcpu->arch.ia32_misc_enable_msr = data; |
| 1134 | break; | 1198 | break; |
| 1199 | case MSR_KVM_WALL_CLOCK_NEW: | ||
| 1135 | case MSR_KVM_WALL_CLOCK: | 1200 | case MSR_KVM_WALL_CLOCK: |
| 1136 | vcpu->kvm->arch.wall_clock = data; | 1201 | vcpu->kvm->arch.wall_clock = data; |
| 1137 | kvm_write_wall_clock(vcpu->kvm, data); | 1202 | kvm_write_wall_clock(vcpu->kvm, data); |
| 1138 | break; | 1203 | break; |
| 1204 | case MSR_KVM_SYSTEM_TIME_NEW: | ||
| 1139 | case MSR_KVM_SYSTEM_TIME: { | 1205 | case MSR_KVM_SYSTEM_TIME: { |
| 1140 | if (vcpu->arch.time_page) { | 1206 | if (vcpu->arch.time_page) { |
| 1141 | kvm_release_page_dirty(vcpu->arch.time_page); | 1207 | kvm_release_page_dirty(vcpu->arch.time_page); |
| @@ -1407,9 +1473,11 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1407 | data = vcpu->arch.efer; | 1473 | data = vcpu->arch.efer; |
| 1408 | break; | 1474 | break; |
| 1409 | case MSR_KVM_WALL_CLOCK: | 1475 | case MSR_KVM_WALL_CLOCK: |
| 1476 | case MSR_KVM_WALL_CLOCK_NEW: | ||
| 1410 | data = vcpu->kvm->arch.wall_clock; | 1477 | data = vcpu->kvm->arch.wall_clock; |
| 1411 | break; | 1478 | break; |
| 1412 | case MSR_KVM_SYSTEM_TIME: | 1479 | case MSR_KVM_SYSTEM_TIME: |
| 1480 | case MSR_KVM_SYSTEM_TIME_NEW: | ||
| 1413 | data = vcpu->arch.time; | 1481 | data = vcpu->arch.time; |
| 1414 | break; | 1482 | break; |
| 1415 | case MSR_IA32_P5_MC_ADDR: | 1483 | case MSR_IA32_P5_MC_ADDR: |
| @@ -1548,6 +1616,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 1548 | case KVM_CAP_HYPERV_VAPIC: | 1616 | case KVM_CAP_HYPERV_VAPIC: |
| 1549 | case KVM_CAP_HYPERV_SPIN: | 1617 | case KVM_CAP_HYPERV_SPIN: |
| 1550 | case KVM_CAP_PCI_SEGMENT: | 1618 | case KVM_CAP_PCI_SEGMENT: |
| 1619 | case KVM_CAP_DEBUGREGS: | ||
| 1551 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 1620 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
| 1552 | r = 1; | 1621 | r = 1; |
| 1553 | break; | 1622 | break; |
| @@ -1712,6 +1781,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
| 1712 | if (copy_from_user(cpuid_entries, entries, | 1781 | if (copy_from_user(cpuid_entries, entries, |
| 1713 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | 1782 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) |
| 1714 | goto out_free; | 1783 | goto out_free; |
| 1784 | vcpu_load(vcpu); | ||
| 1715 | for (i = 0; i < cpuid->nent; i++) { | 1785 | for (i = 0; i < cpuid->nent; i++) { |
| 1716 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | 1786 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; |
| 1717 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | 1787 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; |
| @@ -1729,6 +1799,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
| 1729 | r = 0; | 1799 | r = 0; |
| 1730 | kvm_apic_set_version(vcpu); | 1800 | kvm_apic_set_version(vcpu); |
| 1731 | kvm_x86_ops->cpuid_update(vcpu); | 1801 | kvm_x86_ops->cpuid_update(vcpu); |
| 1802 | vcpu_put(vcpu); | ||
| 1732 | 1803 | ||
| 1733 | out_free: | 1804 | out_free: |
| 1734 | vfree(cpuid_entries); | 1805 | vfree(cpuid_entries); |
| @@ -1749,9 +1820,11 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
| 1749 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | 1820 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, |
| 1750 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | 1821 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) |
| 1751 | goto out; | 1822 | goto out; |
| 1823 | vcpu_load(vcpu); | ||
| 1752 | vcpu->arch.cpuid_nent = cpuid->nent; | 1824 | vcpu->arch.cpuid_nent = cpuid->nent; |
| 1753 | kvm_apic_set_version(vcpu); | 1825 | kvm_apic_set_version(vcpu); |
| 1754 | kvm_x86_ops->cpuid_update(vcpu); | 1826 | kvm_x86_ops->cpuid_update(vcpu); |
| 1827 | vcpu_put(vcpu); | ||
| 1755 | return 0; | 1828 | return 0; |
| 1756 | 1829 | ||
| 1757 | out: | 1830 | out: |
| @@ -1764,6 +1837,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
| 1764 | { | 1837 | { |
| 1765 | int r; | 1838 | int r; |
| 1766 | 1839 | ||
| 1840 | vcpu_load(vcpu); | ||
| 1767 | r = -E2BIG; | 1841 | r = -E2BIG; |
| 1768 | if (cpuid->nent < vcpu->arch.cpuid_nent) | 1842 | if (cpuid->nent < vcpu->arch.cpuid_nent) |
| 1769 | goto out; | 1843 | goto out; |
| @@ -1775,6 +1849,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | |||
| 1775 | 1849 | ||
| 1776 | out: | 1850 | out: |
| 1777 | cpuid->nent = vcpu->arch.cpuid_nent; | 1851 | cpuid->nent = vcpu->arch.cpuid_nent; |
| 1852 | vcpu_put(vcpu); | ||
| 1778 | return r; | 1853 | return r; |
| 1779 | } | 1854 | } |
| 1780 | 1855 | ||
| @@ -1905,6 +1980,24 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1905 | } | 1980 | } |
| 1906 | break; | 1981 | break; |
| 1907 | } | 1982 | } |
| 1983 | case KVM_CPUID_SIGNATURE: { | ||
| 1984 | char signature[12] = "KVMKVMKVM\0\0"; | ||
| 1985 | u32 *sigptr = (u32 *)signature; | ||
| 1986 | entry->eax = 0; | ||
| 1987 | entry->ebx = sigptr[0]; | ||
| 1988 | entry->ecx = sigptr[1]; | ||
| 1989 | entry->edx = sigptr[2]; | ||
| 1990 | break; | ||
| 1991 | } | ||
| 1992 | case KVM_CPUID_FEATURES: | ||
| 1993 | entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | | ||
| 1994 | (1 << KVM_FEATURE_NOP_IO_DELAY) | | ||
| 1995 | (1 << KVM_FEATURE_CLOCKSOURCE2) | | ||
| 1996 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); | ||
| 1997 | entry->ebx = 0; | ||
| 1998 | entry->ecx = 0; | ||
| 1999 | entry->edx = 0; | ||
| 2000 | break; | ||
| 1908 | case 0x80000000: | 2001 | case 0x80000000: |
| 1909 | entry->eax = min(entry->eax, 0x8000001a); | 2002 | entry->eax = min(entry->eax, 0x8000001a); |
| 1910 | break; | 2003 | break; |
| @@ -1913,6 +2006,9 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1913 | entry->ecx &= kvm_supported_word6_x86_features; | 2006 | entry->ecx &= kvm_supported_word6_x86_features; |
| 1914 | break; | 2007 | break; |
| 1915 | } | 2008 | } |
| 2009 | |||
| 2010 | kvm_x86_ops->set_supported_cpuid(function, entry); | ||
| 2011 | |||
| 1916 | put_cpu(); | 2012 | put_cpu(); |
| 1917 | } | 2013 | } |
| 1918 | 2014 | ||
| @@ -1948,6 +2044,23 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
| 1948 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) | 2044 | for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) |
| 1949 | do_cpuid_ent(&cpuid_entries[nent], func, 0, | 2045 | do_cpuid_ent(&cpuid_entries[nent], func, 0, |
| 1950 | &nent, cpuid->nent); | 2046 | &nent, cpuid->nent); |
| 2047 | |||
| 2048 | |||
| 2049 | |||
| 2050 | r = -E2BIG; | ||
| 2051 | if (nent >= cpuid->nent) | ||
| 2052 | goto out_free; | ||
| 2053 | |||
| 2054 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent, | ||
| 2055 | cpuid->nent); | ||
| 2056 | |||
| 2057 | r = -E2BIG; | ||
| 2058 | if (nent >= cpuid->nent) | ||
| 2059 | goto out_free; | ||
| 2060 | |||
| 2061 | do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent, | ||
| 2062 | cpuid->nent); | ||
| 2063 | |||
| 1951 | r = -E2BIG; | 2064 | r = -E2BIG; |
| 1952 | if (nent >= cpuid->nent) | 2065 | if (nent >= cpuid->nent) |
| 1953 | goto out_free; | 2066 | goto out_free; |
| @@ -2027,6 +2140,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
| 2027 | int r; | 2140 | int r; |
| 2028 | unsigned bank_num = mcg_cap & 0xff, bank; | 2141 | unsigned bank_num = mcg_cap & 0xff, bank; |
| 2029 | 2142 | ||
| 2143 | vcpu_load(vcpu); | ||
| 2030 | r = -EINVAL; | 2144 | r = -EINVAL; |
| 2031 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) | 2145 | if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) |
| 2032 | goto out; | 2146 | goto out; |
| @@ -2041,6 +2155,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, | |||
| 2041 | for (bank = 0; bank < bank_num; bank++) | 2155 | for (bank = 0; bank < bank_num; bank++) |
| 2042 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; | 2156 | vcpu->arch.mce_banks[bank*4] = ~(u64)0; |
| 2043 | out: | 2157 | out: |
| 2158 | vcpu_put(vcpu); | ||
| 2044 | return r; | 2159 | return r; |
| 2045 | } | 2160 | } |
| 2046 | 2161 | ||
| @@ -2100,14 +2215,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2100 | { | 2215 | { |
| 2101 | vcpu_load(vcpu); | 2216 | vcpu_load(vcpu); |
| 2102 | 2217 | ||
| 2103 | events->exception.injected = vcpu->arch.exception.pending; | 2218 | events->exception.injected = |
| 2219 | vcpu->arch.exception.pending && | ||
| 2220 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | ||
| 2104 | events->exception.nr = vcpu->arch.exception.nr; | 2221 | events->exception.nr = vcpu->arch.exception.nr; |
| 2105 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | 2222 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; |
| 2106 | events->exception.error_code = vcpu->arch.exception.error_code; | 2223 | events->exception.error_code = vcpu->arch.exception.error_code; |
| 2107 | 2224 | ||
| 2108 | events->interrupt.injected = vcpu->arch.interrupt.pending; | 2225 | events->interrupt.injected = |
| 2226 | vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; | ||
| 2109 | events->interrupt.nr = vcpu->arch.interrupt.nr; | 2227 | events->interrupt.nr = vcpu->arch.interrupt.nr; |
| 2110 | events->interrupt.soft = vcpu->arch.interrupt.soft; | 2228 | events->interrupt.soft = 0; |
| 2229 | events->interrupt.shadow = | ||
| 2230 | kvm_x86_ops->get_interrupt_shadow(vcpu, | ||
| 2231 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); | ||
| 2111 | 2232 | ||
| 2112 | events->nmi.injected = vcpu->arch.nmi_injected; | 2233 | events->nmi.injected = vcpu->arch.nmi_injected; |
| 2113 | events->nmi.pending = vcpu->arch.nmi_pending; | 2234 | events->nmi.pending = vcpu->arch.nmi_pending; |
| @@ -2116,7 +2237,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2116 | events->sipi_vector = vcpu->arch.sipi_vector; | 2237 | events->sipi_vector = vcpu->arch.sipi_vector; |
| 2117 | 2238 | ||
| 2118 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2239 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
| 2119 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR); | 2240 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
| 2241 | | KVM_VCPUEVENT_VALID_SHADOW); | ||
| 2120 | 2242 | ||
| 2121 | vcpu_put(vcpu); | 2243 | vcpu_put(vcpu); |
| 2122 | } | 2244 | } |
| @@ -2125,7 +2247,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2125 | struct kvm_vcpu_events *events) | 2247 | struct kvm_vcpu_events *events) |
| 2126 | { | 2248 | { |
| 2127 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | 2249 | if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING |
| 2128 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) | 2250 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR |
| 2251 | | KVM_VCPUEVENT_VALID_SHADOW)) | ||
| 2129 | return -EINVAL; | 2252 | return -EINVAL; |
| 2130 | 2253 | ||
| 2131 | vcpu_load(vcpu); | 2254 | vcpu_load(vcpu); |
| @@ -2140,6 +2263,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2140 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 2263 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
| 2141 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | 2264 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) |
| 2142 | kvm_pic_clear_isr_ack(vcpu->kvm); | 2265 | kvm_pic_clear_isr_ack(vcpu->kvm); |
| 2266 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | ||
| 2267 | kvm_x86_ops->set_interrupt_shadow(vcpu, | ||
| 2268 | events->interrupt.shadow); | ||
| 2143 | 2269 | ||
| 2144 | vcpu->arch.nmi_injected = events->nmi.injected; | 2270 | vcpu->arch.nmi_injected = events->nmi.injected; |
| 2145 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) | 2271 | if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) |
| @@ -2154,6 +2280,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 2154 | return 0; | 2280 | return 0; |
| 2155 | } | 2281 | } |
| 2156 | 2282 | ||
| 2283 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | ||
| 2284 | struct kvm_debugregs *dbgregs) | ||
| 2285 | { | ||
| 2286 | vcpu_load(vcpu); | ||
| 2287 | |||
| 2288 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | ||
| 2289 | dbgregs->dr6 = vcpu->arch.dr6; | ||
| 2290 | dbgregs->dr7 = vcpu->arch.dr7; | ||
| 2291 | dbgregs->flags = 0; | ||
| 2292 | |||
| 2293 | vcpu_put(vcpu); | ||
| 2294 | } | ||
| 2295 | |||
| 2296 | static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | ||
| 2297 | struct kvm_debugregs *dbgregs) | ||
| 2298 | { | ||
| 2299 | if (dbgregs->flags) | ||
| 2300 | return -EINVAL; | ||
| 2301 | |||
| 2302 | vcpu_load(vcpu); | ||
| 2303 | |||
| 2304 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | ||
| 2305 | vcpu->arch.dr6 = dbgregs->dr6; | ||
| 2306 | vcpu->arch.dr7 = dbgregs->dr7; | ||
| 2307 | |||
| 2308 | vcpu_put(vcpu); | ||
| 2309 | |||
| 2310 | return 0; | ||
| 2311 | } | ||
| 2312 | |||
| 2157 | long kvm_arch_vcpu_ioctl(struct file *filp, | 2313 | long kvm_arch_vcpu_ioctl(struct file *filp, |
| 2158 | unsigned int ioctl, unsigned long arg) | 2314 | unsigned int ioctl, unsigned long arg) |
| 2159 | { | 2315 | { |
| @@ -2308,7 +2464,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 2308 | r = -EFAULT; | 2464 | r = -EFAULT; |
| 2309 | if (copy_from_user(&mce, argp, sizeof mce)) | 2465 | if (copy_from_user(&mce, argp, sizeof mce)) |
| 2310 | goto out; | 2466 | goto out; |
| 2467 | vcpu_load(vcpu); | ||
| 2311 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2468 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
| 2469 | vcpu_put(vcpu); | ||
| 2312 | break; | 2470 | break; |
| 2313 | } | 2471 | } |
| 2314 | case KVM_GET_VCPU_EVENTS: { | 2472 | case KVM_GET_VCPU_EVENTS: { |
| @@ -2332,6 +2490,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 2332 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); | 2490 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); |
| 2333 | break; | 2491 | break; |
| 2334 | } | 2492 | } |
| 2493 | case KVM_GET_DEBUGREGS: { | ||
| 2494 | struct kvm_debugregs dbgregs; | ||
| 2495 | |||
| 2496 | kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); | ||
| 2497 | |||
| 2498 | r = -EFAULT; | ||
| 2499 | if (copy_to_user(argp, &dbgregs, | ||
| 2500 | sizeof(struct kvm_debugregs))) | ||
| 2501 | break; | ||
| 2502 | r = 0; | ||
| 2503 | break; | ||
| 2504 | } | ||
| 2505 | case KVM_SET_DEBUGREGS: { | ||
| 2506 | struct kvm_debugregs dbgregs; | ||
| 2507 | |||
| 2508 | r = -EFAULT; | ||
| 2509 | if (copy_from_user(&dbgregs, argp, | ||
| 2510 | sizeof(struct kvm_debugregs))) | ||
| 2511 | break; | ||
| 2512 | |||
| 2513 | r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); | ||
| 2514 | break; | ||
| 2515 | } | ||
| 2335 | default: | 2516 | default: |
| 2336 | r = -EINVAL; | 2517 | r = -EINVAL; |
| 2337 | } | 2518 | } |
| @@ -2385,7 +2566,7 @@ gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | |||
| 2385 | struct kvm_mem_alias *alias; | 2566 | struct kvm_mem_alias *alias; |
| 2386 | struct kvm_mem_aliases *aliases; | 2567 | struct kvm_mem_aliases *aliases; |
| 2387 | 2568 | ||
| 2388 | aliases = rcu_dereference(kvm->arch.aliases); | 2569 | aliases = kvm_aliases(kvm); |
| 2389 | 2570 | ||
| 2390 | for (i = 0; i < aliases->naliases; ++i) { | 2571 | for (i = 0; i < aliases->naliases; ++i) { |
| 2391 | alias = &aliases->aliases[i]; | 2572 | alias = &aliases->aliases[i]; |
| @@ -2404,7 +2585,7 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | |||
| 2404 | struct kvm_mem_alias *alias; | 2585 | struct kvm_mem_alias *alias; |
| 2405 | struct kvm_mem_aliases *aliases; | 2586 | struct kvm_mem_aliases *aliases; |
| 2406 | 2587 | ||
| 2407 | aliases = rcu_dereference(kvm->arch.aliases); | 2588 | aliases = kvm_aliases(kvm); |
| 2408 | 2589 | ||
| 2409 | for (i = 0; i < aliases->naliases; ++i) { | 2590 | for (i = 0; i < aliases->naliases; ++i) { |
| 2410 | alias = &aliases->aliases[i]; | 2591 | alias = &aliases->aliases[i]; |
| @@ -2799,11 +2980,13 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 2799 | r = -EFAULT; | 2980 | r = -EFAULT; |
| 2800 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | 2981 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) |
| 2801 | goto out; | 2982 | goto out; |
| 2983 | r = -ENXIO; | ||
| 2802 | if (irqchip_in_kernel(kvm)) { | 2984 | if (irqchip_in_kernel(kvm)) { |
| 2803 | __s32 status; | 2985 | __s32 status; |
| 2804 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 2986 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
| 2805 | irq_event.irq, irq_event.level); | 2987 | irq_event.irq, irq_event.level); |
| 2806 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 2988 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
| 2989 | r = -EFAULT; | ||
| 2807 | irq_event.status = status; | 2990 | irq_event.status = status; |
| 2808 | if (copy_to_user(argp, &irq_event, | 2991 | if (copy_to_user(argp, &irq_event, |
| 2809 | sizeof irq_event)) | 2992 | sizeof irq_event)) |
| @@ -3019,6 +3202,18 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
| 3019 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); | 3202 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
| 3020 | } | 3203 | } |
| 3021 | 3204 | ||
| 3205 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | ||
| 3206 | struct kvm_segment *var, int seg) | ||
| 3207 | { | ||
| 3208 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
| 3209 | } | ||
| 3210 | |||
| 3211 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
| 3212 | struct kvm_segment *var, int seg) | ||
| 3213 | { | ||
| 3214 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
| 3215 | } | ||
| 3216 | |||
| 3022 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | 3217 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) |
| 3023 | { | 3218 | { |
| 3024 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 3219 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
| @@ -3099,14 +3294,17 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | |||
| 3099 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | 3294 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); |
| 3100 | } | 3295 | } |
| 3101 | 3296 | ||
| 3102 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3297 | static int kvm_write_guest_virt_system(gva_t addr, void *val, |
| 3103 | struct kvm_vcpu *vcpu, u32 *error) | 3298 | unsigned int bytes, |
| 3299 | struct kvm_vcpu *vcpu, | ||
| 3300 | u32 *error) | ||
| 3104 | { | 3301 | { |
| 3105 | void *data = val; | 3302 | void *data = val; |
| 3106 | int r = X86EMUL_CONTINUE; | 3303 | int r = X86EMUL_CONTINUE; |
| 3107 | 3304 | ||
| 3108 | while (bytes) { | 3305 | while (bytes) { |
| 3109 | gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); | 3306 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, |
| 3307 | PFERR_WRITE_MASK, error); | ||
| 3110 | unsigned offset = addr & (PAGE_SIZE-1); | 3308 | unsigned offset = addr & (PAGE_SIZE-1); |
| 3111 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3309 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
| 3112 | int ret; | 3310 | int ret; |
| @@ -3129,7 +3327,6 @@ out: | |||
| 3129 | return r; | 3327 | return r; |
| 3130 | } | 3328 | } |
| 3131 | 3329 | ||
| 3132 | |||
| 3133 | static int emulator_read_emulated(unsigned long addr, | 3330 | static int emulator_read_emulated(unsigned long addr, |
| 3134 | void *val, | 3331 | void *val, |
| 3135 | unsigned int bytes, | 3332 | unsigned int bytes, |
| @@ -3232,9 +3429,9 @@ mmio: | |||
| 3232 | } | 3429 | } |
| 3233 | 3430 | ||
| 3234 | int emulator_write_emulated(unsigned long addr, | 3431 | int emulator_write_emulated(unsigned long addr, |
| 3235 | const void *val, | 3432 | const void *val, |
| 3236 | unsigned int bytes, | 3433 | unsigned int bytes, |
| 3237 | struct kvm_vcpu *vcpu) | 3434 | struct kvm_vcpu *vcpu) |
| 3238 | { | 3435 | { |
| 3239 | /* Crossing a page boundary? */ | 3436 | /* Crossing a page boundary? */ |
| 3240 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { | 3437 | if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { |
| @@ -3252,45 +3449,150 @@ int emulator_write_emulated(unsigned long addr, | |||
| 3252 | } | 3449 | } |
| 3253 | EXPORT_SYMBOL_GPL(emulator_write_emulated); | 3450 | EXPORT_SYMBOL_GPL(emulator_write_emulated); |
| 3254 | 3451 | ||
| 3452 | #define CMPXCHG_TYPE(t, ptr, old, new) \ | ||
| 3453 | (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) | ||
| 3454 | |||
| 3455 | #ifdef CONFIG_X86_64 | ||
| 3456 | # define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) | ||
| 3457 | #else | ||
| 3458 | # define CMPXCHG64(ptr, old, new) \ | ||
| 3459 | (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) | ||
| 3460 | #endif | ||
| 3461 | |||
| 3255 | static int emulator_cmpxchg_emulated(unsigned long addr, | 3462 | static int emulator_cmpxchg_emulated(unsigned long addr, |
| 3256 | const void *old, | 3463 | const void *old, |
| 3257 | const void *new, | 3464 | const void *new, |
| 3258 | unsigned int bytes, | 3465 | unsigned int bytes, |
| 3259 | struct kvm_vcpu *vcpu) | 3466 | struct kvm_vcpu *vcpu) |
| 3260 | { | 3467 | { |
| 3261 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); | 3468 | gpa_t gpa; |
| 3262 | #ifndef CONFIG_X86_64 | 3469 | struct page *page; |
| 3263 | /* guests cmpxchg8b have to be emulated atomically */ | 3470 | char *kaddr; |
| 3264 | if (bytes == 8) { | 3471 | bool exchanged; |
| 3265 | gpa_t gpa; | ||
| 3266 | struct page *page; | ||
| 3267 | char *kaddr; | ||
| 3268 | u64 val; | ||
| 3269 | 3472 | ||
| 3270 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); | 3473 | /* guests cmpxchg8b have to be emulated atomically */ |
| 3474 | if (bytes > 8 || (bytes & (bytes - 1))) | ||
| 3475 | goto emul_write; | ||
| 3271 | 3476 | ||
| 3272 | if (gpa == UNMAPPED_GVA || | 3477 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); |
| 3273 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | ||
| 3274 | goto emul_write; | ||
| 3275 | 3478 | ||
| 3276 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) | 3479 | if (gpa == UNMAPPED_GVA || |
| 3277 | goto emul_write; | 3480 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
| 3481 | goto emul_write; | ||
| 3278 | 3482 | ||
| 3279 | val = *(u64 *)new; | 3483 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) |
| 3484 | goto emul_write; | ||
| 3280 | 3485 | ||
| 3281 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 3486 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 3282 | 3487 | ||
| 3283 | kaddr = kmap_atomic(page, KM_USER0); | 3488 | kaddr = kmap_atomic(page, KM_USER0); |
| 3284 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); | 3489 | kaddr += offset_in_page(gpa); |
| 3285 | kunmap_atomic(kaddr, KM_USER0); | 3490 | switch (bytes) { |
| 3286 | kvm_release_page_dirty(page); | 3491 | case 1: |
| 3492 | exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); | ||
| 3493 | break; | ||
| 3494 | case 2: | ||
| 3495 | exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); | ||
| 3496 | break; | ||
| 3497 | case 4: | ||
| 3498 | exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); | ||
| 3499 | break; | ||
| 3500 | case 8: | ||
| 3501 | exchanged = CMPXCHG64(kaddr, old, new); | ||
| 3502 | break; | ||
| 3503 | default: | ||
| 3504 | BUG(); | ||
| 3287 | } | 3505 | } |
| 3506 | kunmap_atomic(kaddr, KM_USER0); | ||
| 3507 | kvm_release_page_dirty(page); | ||
| 3508 | |||
| 3509 | if (!exchanged) | ||
| 3510 | return X86EMUL_CMPXCHG_FAILED; | ||
| 3511 | |||
| 3512 | kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); | ||
| 3513 | |||
| 3514 | return X86EMUL_CONTINUE; | ||
| 3515 | |||
| 3288 | emul_write: | 3516 | emul_write: |
| 3289 | #endif | 3517 | printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); |
| 3290 | 3518 | ||
| 3291 | return emulator_write_emulated(addr, new, bytes, vcpu); | 3519 | return emulator_write_emulated(addr, new, bytes, vcpu); |
| 3292 | } | 3520 | } |
| 3293 | 3521 | ||
| 3522 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
| 3523 | { | ||
| 3524 | /* TODO: String I/O for in kernel device */ | ||
| 3525 | int r; | ||
| 3526 | |||
| 3527 | if (vcpu->arch.pio.in) | ||
| 3528 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
| 3529 | vcpu->arch.pio.size, pd); | ||
| 3530 | else | ||
| 3531 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
| 3532 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
| 3533 | pd); | ||
| 3534 | return r; | ||
| 3535 | } | ||
| 3536 | |||
| 3537 | |||
| 3538 | static int emulator_pio_in_emulated(int size, unsigned short port, void *val, | ||
| 3539 | unsigned int count, struct kvm_vcpu *vcpu) | ||
| 3540 | { | ||
| 3541 | if (vcpu->arch.pio.count) | ||
| 3542 | goto data_avail; | ||
| 3543 | |||
| 3544 | trace_kvm_pio(1, port, size, 1); | ||
| 3545 | |||
| 3546 | vcpu->arch.pio.port = port; | ||
| 3547 | vcpu->arch.pio.in = 1; | ||
| 3548 | vcpu->arch.pio.count = count; | ||
| 3549 | vcpu->arch.pio.size = size; | ||
| 3550 | |||
| 3551 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
| 3552 | data_avail: | ||
| 3553 | memcpy(val, vcpu->arch.pio_data, size * count); | ||
| 3554 | vcpu->arch.pio.count = 0; | ||
| 3555 | return 1; | ||
| 3556 | } | ||
| 3557 | |||
| 3558 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
| 3559 | vcpu->run->io.direction = KVM_EXIT_IO_IN; | ||
| 3560 | vcpu->run->io.size = size; | ||
| 3561 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
| 3562 | vcpu->run->io.count = count; | ||
| 3563 | vcpu->run->io.port = port; | ||
| 3564 | |||
| 3565 | return 0; | ||
| 3566 | } | ||
| 3567 | |||
| 3568 | static int emulator_pio_out_emulated(int size, unsigned short port, | ||
| 3569 | const void *val, unsigned int count, | ||
| 3570 | struct kvm_vcpu *vcpu) | ||
| 3571 | { | ||
| 3572 | trace_kvm_pio(0, port, size, 1); | ||
| 3573 | |||
| 3574 | vcpu->arch.pio.port = port; | ||
| 3575 | vcpu->arch.pio.in = 0; | ||
| 3576 | vcpu->arch.pio.count = count; | ||
| 3577 | vcpu->arch.pio.size = size; | ||
| 3578 | |||
| 3579 | memcpy(vcpu->arch.pio_data, val, size * count); | ||
| 3580 | |||
| 3581 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
| 3582 | vcpu->arch.pio.count = 0; | ||
| 3583 | return 1; | ||
| 3584 | } | ||
| 3585 | |||
| 3586 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
| 3587 | vcpu->run->io.direction = KVM_EXIT_IO_OUT; | ||
| 3588 | vcpu->run->io.size = size; | ||
| 3589 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
| 3590 | vcpu->run->io.count = count; | ||
| 3591 | vcpu->run->io.port = port; | ||
| 3592 | |||
| 3593 | return 0; | ||
| 3594 | } | ||
| 3595 | |||
| 3294 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | 3596 | static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) |
| 3295 | { | 3597 | { |
| 3296 | return kvm_x86_ops->get_segment_base(vcpu, seg); | 3598 | return kvm_x86_ops->get_segment_base(vcpu, seg); |
| @@ -3311,14 +3613,14 @@ int emulate_clts(struct kvm_vcpu *vcpu) | |||
| 3311 | 3613 | ||
| 3312 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3614 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
| 3313 | { | 3615 | { |
| 3314 | return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); | 3616 | return kvm_get_dr(ctxt->vcpu, dr, dest); |
| 3315 | } | 3617 | } |
| 3316 | 3618 | ||
| 3317 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3619 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
| 3318 | { | 3620 | { |
| 3319 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3621 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
| 3320 | 3622 | ||
| 3321 | return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); | 3623 | return kvm_set_dr(ctxt->vcpu, dr, value & mask); |
| 3322 | } | 3624 | } |
| 3323 | 3625 | ||
| 3324 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3626 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
| @@ -3339,12 +3641,167 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
| 3339 | } | 3641 | } |
| 3340 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 3642 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
| 3341 | 3643 | ||
| 3644 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
| 3645 | { | ||
| 3646 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
| 3647 | } | ||
| 3648 | |||
| 3649 | static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) | ||
| 3650 | { | ||
| 3651 | unsigned long value; | ||
| 3652 | |||
| 3653 | switch (cr) { | ||
| 3654 | case 0: | ||
| 3655 | value = kvm_read_cr0(vcpu); | ||
| 3656 | break; | ||
| 3657 | case 2: | ||
| 3658 | value = vcpu->arch.cr2; | ||
| 3659 | break; | ||
| 3660 | case 3: | ||
| 3661 | value = vcpu->arch.cr3; | ||
| 3662 | break; | ||
| 3663 | case 4: | ||
| 3664 | value = kvm_read_cr4(vcpu); | ||
| 3665 | break; | ||
| 3666 | case 8: | ||
| 3667 | value = kvm_get_cr8(vcpu); | ||
| 3668 | break; | ||
| 3669 | default: | ||
| 3670 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
| 3671 | return 0; | ||
| 3672 | } | ||
| 3673 | |||
| 3674 | return value; | ||
| 3675 | } | ||
| 3676 | |||
| 3677 | static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) | ||
| 3678 | { | ||
| 3679 | switch (cr) { | ||
| 3680 | case 0: | ||
| 3681 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
| 3682 | break; | ||
| 3683 | case 2: | ||
| 3684 | vcpu->arch.cr2 = val; | ||
| 3685 | break; | ||
| 3686 | case 3: | ||
| 3687 | kvm_set_cr3(vcpu, val); | ||
| 3688 | break; | ||
| 3689 | case 4: | ||
| 3690 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
| 3691 | break; | ||
| 3692 | case 8: | ||
| 3693 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
| 3694 | break; | ||
| 3695 | default: | ||
| 3696 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
| 3697 | } | ||
| 3698 | } | ||
| 3699 | |||
| 3700 | static int emulator_get_cpl(struct kvm_vcpu *vcpu) | ||
| 3701 | { | ||
| 3702 | return kvm_x86_ops->get_cpl(vcpu); | ||
| 3703 | } | ||
| 3704 | |||
| 3705 | static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) | ||
| 3706 | { | ||
| 3707 | kvm_x86_ops->get_gdt(vcpu, dt); | ||
| 3708 | } | ||
| 3709 | |||
| 3710 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | ||
| 3711 | struct kvm_vcpu *vcpu) | ||
| 3712 | { | ||
| 3713 | struct kvm_segment var; | ||
| 3714 | |||
| 3715 | kvm_get_segment(vcpu, &var, seg); | ||
| 3716 | |||
| 3717 | if (var.unusable) | ||
| 3718 | return false; | ||
| 3719 | |||
| 3720 | if (var.g) | ||
| 3721 | var.limit >>= 12; | ||
| 3722 | set_desc_limit(desc, var.limit); | ||
| 3723 | set_desc_base(desc, (unsigned long)var.base); | ||
| 3724 | desc->type = var.type; | ||
| 3725 | desc->s = var.s; | ||
| 3726 | desc->dpl = var.dpl; | ||
| 3727 | desc->p = var.present; | ||
| 3728 | desc->avl = var.avl; | ||
| 3729 | desc->l = var.l; | ||
| 3730 | desc->d = var.db; | ||
| 3731 | desc->g = var.g; | ||
| 3732 | |||
| 3733 | return true; | ||
| 3734 | } | ||
| 3735 | |||
| 3736 | static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | ||
| 3737 | struct kvm_vcpu *vcpu) | ||
| 3738 | { | ||
| 3739 | struct kvm_segment var; | ||
| 3740 | |||
| 3741 | /* needed to preserve selector */ | ||
| 3742 | kvm_get_segment(vcpu, &var, seg); | ||
| 3743 | |||
| 3744 | var.base = get_desc_base(desc); | ||
| 3745 | var.limit = get_desc_limit(desc); | ||
| 3746 | if (desc->g) | ||
| 3747 | var.limit = (var.limit << 12) | 0xfff; | ||
| 3748 | var.type = desc->type; | ||
| 3749 | var.present = desc->p; | ||
| 3750 | var.dpl = desc->dpl; | ||
| 3751 | var.db = desc->d; | ||
| 3752 | var.s = desc->s; | ||
| 3753 | var.l = desc->l; | ||
| 3754 | var.g = desc->g; | ||
| 3755 | var.avl = desc->avl; | ||
| 3756 | var.present = desc->p; | ||
| 3757 | var.unusable = !var.present; | ||
| 3758 | var.padding = 0; | ||
| 3759 | |||
| 3760 | kvm_set_segment(vcpu, &var, seg); | ||
| 3761 | return; | ||
| 3762 | } | ||
| 3763 | |||
| 3764 | static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) | ||
| 3765 | { | ||
| 3766 | struct kvm_segment kvm_seg; | ||
| 3767 | |||
| 3768 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
| 3769 | return kvm_seg.selector; | ||
| 3770 | } | ||
| 3771 | |||
| 3772 | static void emulator_set_segment_selector(u16 sel, int seg, | ||
| 3773 | struct kvm_vcpu *vcpu) | ||
| 3774 | { | ||
| 3775 | struct kvm_segment kvm_seg; | ||
| 3776 | |||
| 3777 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
| 3778 | kvm_seg.selector = sel; | ||
| 3779 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
| 3780 | } | ||
| 3781 | |||
| 3782 | static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
| 3783 | { | ||
| 3784 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
| 3785 | } | ||
| 3786 | |||
| 3342 | static struct x86_emulate_ops emulate_ops = { | 3787 | static struct x86_emulate_ops emulate_ops = { |
| 3343 | .read_std = kvm_read_guest_virt_system, | 3788 | .read_std = kvm_read_guest_virt_system, |
| 3789 | .write_std = kvm_write_guest_virt_system, | ||
| 3344 | .fetch = kvm_fetch_guest_virt, | 3790 | .fetch = kvm_fetch_guest_virt, |
| 3345 | .read_emulated = emulator_read_emulated, | 3791 | .read_emulated = emulator_read_emulated, |
| 3346 | .write_emulated = emulator_write_emulated, | 3792 | .write_emulated = emulator_write_emulated, |
| 3347 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 3793 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
| 3794 | .pio_in_emulated = emulator_pio_in_emulated, | ||
| 3795 | .pio_out_emulated = emulator_pio_out_emulated, | ||
| 3796 | .get_cached_descriptor = emulator_get_cached_descriptor, | ||
| 3797 | .set_cached_descriptor = emulator_set_cached_descriptor, | ||
| 3798 | .get_segment_selector = emulator_get_segment_selector, | ||
| 3799 | .set_segment_selector = emulator_set_segment_selector, | ||
| 3800 | .get_gdt = emulator_get_gdt, | ||
| 3801 | .get_cr = emulator_get_cr, | ||
| 3802 | .set_cr = emulator_set_cr, | ||
| 3803 | .cpl = emulator_get_cpl, | ||
| 3804 | .set_rflags = emulator_set_rflags, | ||
| 3348 | }; | 3805 | }; |
| 3349 | 3806 | ||
| 3350 | static void cache_all_regs(struct kvm_vcpu *vcpu) | 3807 | static void cache_all_regs(struct kvm_vcpu *vcpu) |
| @@ -3375,14 +3832,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3375 | cache_all_regs(vcpu); | 3832 | cache_all_regs(vcpu); |
| 3376 | 3833 | ||
| 3377 | vcpu->mmio_is_write = 0; | 3834 | vcpu->mmio_is_write = 0; |
| 3378 | vcpu->arch.pio.string = 0; | ||
| 3379 | 3835 | ||
| 3380 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { | 3836 | if (!(emulation_type & EMULTYPE_NO_DECODE)) { |
| 3381 | int cs_db, cs_l; | 3837 | int cs_db, cs_l; |
| 3382 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3838 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
| 3383 | 3839 | ||
| 3384 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3840 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
| 3385 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | 3841 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
| 3842 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); | ||
| 3386 | vcpu->arch.emulate_ctxt.mode = | 3843 | vcpu->arch.emulate_ctxt.mode = |
| 3387 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | 3844 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : |
| 3388 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3845 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
| @@ -3391,6 +3848,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3391 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3848 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
| 3392 | 3849 | ||
| 3393 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3850 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
| 3851 | trace_kvm_emulate_insn_start(vcpu); | ||
| 3394 | 3852 | ||
| 3395 | /* Only allow emulation of specific instructions on #UD | 3853 | /* Only allow emulation of specific instructions on #UD |
| 3396 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | 3854 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ |
| @@ -3423,6 +3881,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3423 | ++vcpu->stat.insn_emulation; | 3881 | ++vcpu->stat.insn_emulation; |
| 3424 | if (r) { | 3882 | if (r) { |
| 3425 | ++vcpu->stat.insn_emulation_fail; | 3883 | ++vcpu->stat.insn_emulation_fail; |
| 3884 | trace_kvm_emulate_insn_failed(vcpu); | ||
| 3426 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3885 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
| 3427 | return EMULATE_DONE; | 3886 | return EMULATE_DONE; |
| 3428 | return EMULATE_FAIL; | 3887 | return EMULATE_FAIL; |
| @@ -3434,16 +3893,20 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3434 | return EMULATE_DONE; | 3893 | return EMULATE_DONE; |
| 3435 | } | 3894 | } |
| 3436 | 3895 | ||
| 3896 | restart: | ||
| 3437 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 3897 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
| 3438 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | 3898 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; |
| 3439 | 3899 | ||
| 3440 | if (r == 0) | 3900 | if (r == 0) |
| 3441 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | 3901 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); |
| 3442 | 3902 | ||
| 3443 | if (vcpu->arch.pio.string) | 3903 | if (vcpu->arch.pio.count) { |
| 3904 | if (!vcpu->arch.pio.in) | ||
| 3905 | vcpu->arch.pio.count = 0; | ||
| 3444 | return EMULATE_DO_MMIO; | 3906 | return EMULATE_DO_MMIO; |
| 3907 | } | ||
| 3445 | 3908 | ||
| 3446 | if ((r || vcpu->mmio_is_write) && run) { | 3909 | if (r || vcpu->mmio_is_write) { |
| 3447 | run->exit_reason = KVM_EXIT_MMIO; | 3910 | run->exit_reason = KVM_EXIT_MMIO; |
| 3448 | run->mmio.phys_addr = vcpu->mmio_phys_addr; | 3911 | run->mmio.phys_addr = vcpu->mmio_phys_addr; |
| 3449 | memcpy(run->mmio.data, vcpu->mmio_data, 8); | 3912 | memcpy(run->mmio.data, vcpu->mmio_data, 8); |
| @@ -3453,222 +3916,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 3453 | 3916 | ||
| 3454 | if (r) { | 3917 | if (r) { |
| 3455 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) | 3918 | if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) |
| 3456 | return EMULATE_DONE; | 3919 | goto done; |
| 3457 | if (!vcpu->mmio_needed) { | 3920 | if (!vcpu->mmio_needed) { |
| 3921 | ++vcpu->stat.insn_emulation_fail; | ||
| 3922 | trace_kvm_emulate_insn_failed(vcpu); | ||
| 3458 | kvm_report_emulation_failure(vcpu, "mmio"); | 3923 | kvm_report_emulation_failure(vcpu, "mmio"); |
| 3459 | return EMULATE_FAIL; | 3924 | return EMULATE_FAIL; |
| 3460 | } | 3925 | } |
| 3461 | return EMULATE_DO_MMIO; | 3926 | return EMULATE_DO_MMIO; |
| 3462 | } | 3927 | } |
| 3463 | 3928 | ||
| 3464 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | ||
| 3465 | |||
| 3466 | if (vcpu->mmio_is_write) { | 3929 | if (vcpu->mmio_is_write) { |
| 3467 | vcpu->mmio_needed = 0; | 3930 | vcpu->mmio_needed = 0; |
| 3468 | return EMULATE_DO_MMIO; | 3931 | return EMULATE_DO_MMIO; |
| 3469 | } | 3932 | } |
| 3470 | 3933 | ||
| 3471 | return EMULATE_DONE; | 3934 | done: |
| 3472 | } | 3935 | if (vcpu->arch.exception.pending) |
| 3473 | EXPORT_SYMBOL_GPL(emulate_instruction); | 3936 | vcpu->arch.emulate_ctxt.restart = false; |
| 3474 | |||
| 3475 | static int pio_copy_data(struct kvm_vcpu *vcpu) | ||
| 3476 | { | ||
| 3477 | void *p = vcpu->arch.pio_data; | ||
| 3478 | gva_t q = vcpu->arch.pio.guest_gva; | ||
| 3479 | unsigned bytes; | ||
| 3480 | int ret; | ||
| 3481 | u32 error_code; | ||
| 3482 | |||
| 3483 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; | ||
| 3484 | if (vcpu->arch.pio.in) | ||
| 3485 | ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); | ||
| 3486 | else | ||
| 3487 | ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); | ||
| 3488 | |||
| 3489 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
| 3490 | kvm_inject_page_fault(vcpu, q, error_code); | ||
| 3491 | |||
| 3492 | return ret; | ||
| 3493 | } | ||
| 3494 | |||
| 3495 | int complete_pio(struct kvm_vcpu *vcpu) | ||
| 3496 | { | ||
| 3497 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
| 3498 | long delta; | ||
| 3499 | int r; | ||
| 3500 | unsigned long val; | ||
| 3501 | |||
| 3502 | if (!io->string) { | ||
| 3503 | if (io->in) { | ||
| 3504 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 3505 | memcpy(&val, vcpu->arch.pio_data, io->size); | ||
| 3506 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | ||
| 3507 | } | ||
| 3508 | } else { | ||
| 3509 | if (io->in) { | ||
| 3510 | r = pio_copy_data(vcpu); | ||
| 3511 | if (r) | ||
| 3512 | goto out; | ||
| 3513 | } | ||
| 3514 | |||
| 3515 | delta = 1; | ||
| 3516 | if (io->rep) { | ||
| 3517 | delta *= io->cur_count; | ||
| 3518 | /* | ||
| 3519 | * The size of the register should really depend on | ||
| 3520 | * current address size. | ||
| 3521 | */ | ||
| 3522 | val = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
| 3523 | val -= delta; | ||
| 3524 | kvm_register_write(vcpu, VCPU_REGS_RCX, val); | ||
| 3525 | } | ||
| 3526 | if (io->down) | ||
| 3527 | delta = -delta; | ||
| 3528 | delta *= io->size; | ||
| 3529 | if (io->in) { | ||
| 3530 | val = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
| 3531 | val += delta; | ||
| 3532 | kvm_register_write(vcpu, VCPU_REGS_RDI, val); | ||
| 3533 | } else { | ||
| 3534 | val = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
| 3535 | val += delta; | ||
| 3536 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | ||
| 3537 | } | ||
| 3538 | } | ||
| 3539 | out: | ||
| 3540 | io->count -= io->cur_count; | ||
| 3541 | io->cur_count = 0; | ||
| 3542 | |||
| 3543 | return 0; | ||
| 3544 | } | ||
| 3545 | |||
| 3546 | static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | ||
| 3547 | { | ||
| 3548 | /* TODO: String I/O for in kernel device */ | ||
| 3549 | int r; | ||
| 3550 | |||
| 3551 | if (vcpu->arch.pio.in) | ||
| 3552 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, | ||
| 3553 | vcpu->arch.pio.size, pd); | ||
| 3554 | else | ||
| 3555 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
| 3556 | vcpu->arch.pio.port, vcpu->arch.pio.size, | ||
| 3557 | pd); | ||
| 3558 | return r; | ||
| 3559 | } | ||
| 3560 | |||
| 3561 | static int pio_string_write(struct kvm_vcpu *vcpu) | ||
| 3562 | { | ||
| 3563 | struct kvm_pio_request *io = &vcpu->arch.pio; | ||
| 3564 | void *pd = vcpu->arch.pio_data; | ||
| 3565 | int i, r = 0; | ||
| 3566 | |||
| 3567 | for (i = 0; i < io->cur_count; i++) { | ||
| 3568 | if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, | ||
| 3569 | io->port, io->size, pd)) { | ||
| 3570 | r = -EOPNOTSUPP; | ||
| 3571 | break; | ||
| 3572 | } | ||
| 3573 | pd += io->size; | ||
| 3574 | } | ||
| 3575 | return r; | ||
| 3576 | } | ||
| 3577 | |||
| 3578 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | ||
| 3579 | { | ||
| 3580 | unsigned long val; | ||
| 3581 | 3937 | ||
| 3582 | trace_kvm_pio(!in, port, size, 1); | 3938 | if (vcpu->arch.emulate_ctxt.restart) |
| 3939 | goto restart; | ||
| 3583 | 3940 | ||
| 3584 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3941 | return EMULATE_DONE; |
| 3585 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
| 3586 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
| 3587 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
| 3588 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1; | ||
| 3589 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
| 3590 | vcpu->arch.pio.in = in; | ||
| 3591 | vcpu->arch.pio.string = 0; | ||
| 3592 | vcpu->arch.pio.down = 0; | ||
| 3593 | vcpu->arch.pio.rep = 0; | ||
| 3594 | |||
| 3595 | if (!vcpu->arch.pio.in) { | ||
| 3596 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 3597 | memcpy(vcpu->arch.pio_data, &val, 4); | ||
| 3598 | } | ||
| 3599 | |||
| 3600 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | ||
| 3601 | complete_pio(vcpu); | ||
| 3602 | return 1; | ||
| 3603 | } | ||
| 3604 | return 0; | ||
| 3605 | } | 3942 | } |
| 3606 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | 3943 | EXPORT_SYMBOL_GPL(emulate_instruction); |
| 3607 | 3944 | ||
| 3608 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | 3945 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) |
| 3609 | int size, unsigned long count, int down, | ||
| 3610 | gva_t address, int rep, unsigned port) | ||
| 3611 | { | 3946 | { |
| 3612 | unsigned now, in_page; | 3947 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 3613 | int ret = 0; | 3948 | int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); |
| 3614 | 3949 | /* do not return to emulator after return from userspace */ | |
| 3615 | trace_kvm_pio(!in, port, size, count); | 3950 | vcpu->arch.pio.count = 0; |
| 3616 | |||
| 3617 | vcpu->run->exit_reason = KVM_EXIT_IO; | ||
| 3618 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | ||
| 3619 | vcpu->run->io.size = vcpu->arch.pio.size = size; | ||
| 3620 | vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; | ||
| 3621 | vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count; | ||
| 3622 | vcpu->run->io.port = vcpu->arch.pio.port = port; | ||
| 3623 | vcpu->arch.pio.in = in; | ||
| 3624 | vcpu->arch.pio.string = 1; | ||
| 3625 | vcpu->arch.pio.down = down; | ||
| 3626 | vcpu->arch.pio.rep = rep; | ||
| 3627 | |||
| 3628 | if (!count) { | ||
| 3629 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
| 3630 | return 1; | ||
| 3631 | } | ||
| 3632 | |||
| 3633 | if (!down) | ||
| 3634 | in_page = PAGE_SIZE - offset_in_page(address); | ||
| 3635 | else | ||
| 3636 | in_page = offset_in_page(address) + size; | ||
| 3637 | now = min(count, (unsigned long)in_page / size); | ||
| 3638 | if (!now) | ||
| 3639 | now = 1; | ||
| 3640 | if (down) { | ||
| 3641 | /* | ||
| 3642 | * String I/O in reverse. Yuck. Kill the guest, fix later. | ||
| 3643 | */ | ||
| 3644 | pr_unimpl(vcpu, "guest string pio down\n"); | ||
| 3645 | kvm_inject_gp(vcpu, 0); | ||
| 3646 | return 1; | ||
| 3647 | } | ||
| 3648 | vcpu->run->io.count = now; | ||
| 3649 | vcpu->arch.pio.cur_count = now; | ||
| 3650 | |||
| 3651 | if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) | ||
| 3652 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
| 3653 | |||
| 3654 | vcpu->arch.pio.guest_gva = address; | ||
| 3655 | |||
| 3656 | if (!vcpu->arch.pio.in) { | ||
| 3657 | /* string PIO write */ | ||
| 3658 | ret = pio_copy_data(vcpu); | ||
| 3659 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
| 3660 | return 1; | ||
| 3661 | if (ret == 0 && !pio_string_write(vcpu)) { | ||
| 3662 | complete_pio(vcpu); | ||
| 3663 | if (vcpu->arch.pio.count == 0) | ||
| 3664 | ret = 1; | ||
| 3665 | } | ||
| 3666 | } | ||
| 3667 | /* no string PIO read support yet */ | ||
| 3668 | |||
| 3669 | return ret; | 3951 | return ret; |
| 3670 | } | 3952 | } |
| 3671 | EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); | 3953 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); |
| 3672 | 3954 | ||
| 3673 | static void bounce_off(void *info) | 3955 | static void bounce_off(void *info) |
| 3674 | { | 3956 | { |
| @@ -3743,6 +4025,51 @@ static void kvm_timer_init(void) | |||
| 3743 | } | 4025 | } |
| 3744 | } | 4026 | } |
| 3745 | 4027 | ||
| 4028 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | ||
| 4029 | |||
| 4030 | static int kvm_is_in_guest(void) | ||
| 4031 | { | ||
| 4032 | return percpu_read(current_vcpu) != NULL; | ||
| 4033 | } | ||
| 4034 | |||
| 4035 | static int kvm_is_user_mode(void) | ||
| 4036 | { | ||
| 4037 | int user_mode = 3; | ||
| 4038 | |||
| 4039 | if (percpu_read(current_vcpu)) | ||
| 4040 | user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu)); | ||
| 4041 | |||
| 4042 | return user_mode != 0; | ||
| 4043 | } | ||
| 4044 | |||
| 4045 | static unsigned long kvm_get_guest_ip(void) | ||
| 4046 | { | ||
| 4047 | unsigned long ip = 0; | ||
| 4048 | |||
| 4049 | if (percpu_read(current_vcpu)) | ||
| 4050 | ip = kvm_rip_read(percpu_read(current_vcpu)); | ||
| 4051 | |||
| 4052 | return ip; | ||
| 4053 | } | ||
| 4054 | |||
| 4055 | static struct perf_guest_info_callbacks kvm_guest_cbs = { | ||
| 4056 | .is_in_guest = kvm_is_in_guest, | ||
| 4057 | .is_user_mode = kvm_is_user_mode, | ||
| 4058 | .get_guest_ip = kvm_get_guest_ip, | ||
| 4059 | }; | ||
| 4060 | |||
| 4061 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) | ||
| 4062 | { | ||
| 4063 | percpu_write(current_vcpu, vcpu); | ||
| 4064 | } | ||
| 4065 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); | ||
| 4066 | |||
| 4067 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) | ||
| 4068 | { | ||
| 4069 | percpu_write(current_vcpu, NULL); | ||
| 4070 | } | ||
| 4071 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); | ||
| 4072 | |||
| 3746 | int kvm_arch_init(void *opaque) | 4073 | int kvm_arch_init(void *opaque) |
| 3747 | { | 4074 | { |
| 3748 | int r; | 4075 | int r; |
| @@ -3779,6 +4106,8 @@ int kvm_arch_init(void *opaque) | |||
| 3779 | 4106 | ||
| 3780 | kvm_timer_init(); | 4107 | kvm_timer_init(); |
| 3781 | 4108 | ||
| 4109 | perf_register_guest_info_callbacks(&kvm_guest_cbs); | ||
| 4110 | |||
| 3782 | return 0; | 4111 | return 0; |
| 3783 | 4112 | ||
| 3784 | out: | 4113 | out: |
| @@ -3787,6 +4116,8 @@ out: | |||
| 3787 | 4116 | ||
| 3788 | void kvm_arch_exit(void) | 4117 | void kvm_arch_exit(void) |
| 3789 | { | 4118 | { |
| 4119 | perf_unregister_guest_info_callbacks(&kvm_guest_cbs); | ||
| 4120 | |||
| 3790 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 4121 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
| 3791 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, | 4122 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, |
| 3792 | CPUFREQ_TRANSITION_NOTIFIER); | 4123 | CPUFREQ_TRANSITION_NOTIFIER); |
| @@ -3942,85 +4273,20 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
| 3942 | return emulator_write_emulated(rip, instruction, 3, vcpu); | 4273 | return emulator_write_emulated(rip, instruction, 3, vcpu); |
| 3943 | } | 4274 | } |
| 3944 | 4275 | ||
| 3945 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | ||
| 3946 | { | ||
| 3947 | return (curr_cr & ~((1ULL << 32) - 1)) | new_val; | ||
| 3948 | } | ||
| 3949 | |||
| 3950 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4276 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
| 3951 | { | 4277 | { |
| 3952 | struct descriptor_table dt = { limit, base }; | 4278 | struct desc_ptr dt = { limit, base }; |
| 3953 | 4279 | ||
| 3954 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4280 | kvm_x86_ops->set_gdt(vcpu, &dt); |
| 3955 | } | 4281 | } |
| 3956 | 4282 | ||
| 3957 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) | 4283 | void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) |
| 3958 | { | 4284 | { |
| 3959 | struct descriptor_table dt = { limit, base }; | 4285 | struct desc_ptr dt = { limit, base }; |
| 3960 | 4286 | ||
| 3961 | kvm_x86_ops->set_idt(vcpu, &dt); | 4287 | kvm_x86_ops->set_idt(vcpu, &dt); |
| 3962 | } | 4288 | } |
| 3963 | 4289 | ||
| 3964 | void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | ||
| 3965 | unsigned long *rflags) | ||
| 3966 | { | ||
| 3967 | kvm_lmsw(vcpu, msw); | ||
| 3968 | *rflags = kvm_get_rflags(vcpu); | ||
| 3969 | } | ||
| 3970 | |||
| 3971 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | ||
| 3972 | { | ||
| 3973 | unsigned long value; | ||
| 3974 | |||
| 3975 | switch (cr) { | ||
| 3976 | case 0: | ||
| 3977 | value = kvm_read_cr0(vcpu); | ||
| 3978 | break; | ||
| 3979 | case 2: | ||
| 3980 | value = vcpu->arch.cr2; | ||
| 3981 | break; | ||
| 3982 | case 3: | ||
| 3983 | value = vcpu->arch.cr3; | ||
| 3984 | break; | ||
| 3985 | case 4: | ||
| 3986 | value = kvm_read_cr4(vcpu); | ||
| 3987 | break; | ||
| 3988 | case 8: | ||
| 3989 | value = kvm_get_cr8(vcpu); | ||
| 3990 | break; | ||
| 3991 | default: | ||
| 3992 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
| 3993 | return 0; | ||
| 3994 | } | ||
| 3995 | |||
| 3996 | return value; | ||
| 3997 | } | ||
| 3998 | |||
| 3999 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | ||
| 4000 | unsigned long *rflags) | ||
| 4001 | { | ||
| 4002 | switch (cr) { | ||
| 4003 | case 0: | ||
| 4004 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); | ||
| 4005 | *rflags = kvm_get_rflags(vcpu); | ||
| 4006 | break; | ||
| 4007 | case 2: | ||
| 4008 | vcpu->arch.cr2 = val; | ||
| 4009 | break; | ||
| 4010 | case 3: | ||
| 4011 | kvm_set_cr3(vcpu, val); | ||
| 4012 | break; | ||
| 4013 | case 4: | ||
| 4014 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); | ||
| 4015 | break; | ||
| 4016 | case 8: | ||
| 4017 | kvm_set_cr8(vcpu, val & 0xfUL); | ||
| 4018 | break; | ||
| 4019 | default: | ||
| 4020 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | ||
| 4021 | } | ||
| 4022 | } | ||
| 4023 | |||
| 4024 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | 4290 | static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) |
| 4025 | { | 4291 | { |
| 4026 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; | 4292 | struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; |
| @@ -4084,9 +4350,13 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | |||
| 4084 | { | 4350 | { |
| 4085 | struct kvm_cpuid_entry2 *best; | 4351 | struct kvm_cpuid_entry2 *best; |
| 4086 | 4352 | ||
| 4353 | best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); | ||
| 4354 | if (!best || best->eax < 0x80000008) | ||
| 4355 | goto not_found; | ||
| 4087 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | 4356 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); |
| 4088 | if (best) | 4357 | if (best) |
| 4089 | return best->eax & 0xff; | 4358 | return best->eax & 0xff; |
| 4359 | not_found: | ||
| 4090 | return 36; | 4360 | return 36; |
| 4091 | } | 4361 | } |
| 4092 | 4362 | ||
| @@ -4200,9 +4470,13 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
| 4200 | { | 4470 | { |
| 4201 | /* try to reinject previous events if any */ | 4471 | /* try to reinject previous events if any */ |
| 4202 | if (vcpu->arch.exception.pending) { | 4472 | if (vcpu->arch.exception.pending) { |
| 4473 | trace_kvm_inj_exception(vcpu->arch.exception.nr, | ||
| 4474 | vcpu->arch.exception.has_error_code, | ||
| 4475 | vcpu->arch.exception.error_code); | ||
| 4203 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, | 4476 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, |
| 4204 | vcpu->arch.exception.has_error_code, | 4477 | vcpu->arch.exception.has_error_code, |
| 4205 | vcpu->arch.exception.error_code); | 4478 | vcpu->arch.exception.error_code, |
| 4479 | vcpu->arch.exception.reinject); | ||
| 4206 | return; | 4480 | return; |
| 4207 | } | 4481 | } |
| 4208 | 4482 | ||
| @@ -4432,7 +4706,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
| 4432 | } | 4706 | } |
| 4433 | 4707 | ||
| 4434 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 4708 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
| 4435 | post_kvm_run_save(vcpu); | ||
| 4436 | 4709 | ||
| 4437 | vapic_exit(vcpu); | 4710 | vapic_exit(vcpu); |
| 4438 | 4711 | ||
| @@ -4460,26 +4733,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 4460 | if (!irqchip_in_kernel(vcpu->kvm)) | 4733 | if (!irqchip_in_kernel(vcpu->kvm)) |
| 4461 | kvm_set_cr8(vcpu, kvm_run->cr8); | 4734 | kvm_set_cr8(vcpu, kvm_run->cr8); |
| 4462 | 4735 | ||
| 4463 | if (vcpu->arch.pio.cur_count) { | 4736 | if (vcpu->arch.pio.count || vcpu->mmio_needed || |
| 4464 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4737 | vcpu->arch.emulate_ctxt.restart) { |
| 4465 | r = complete_pio(vcpu); | 4738 | if (vcpu->mmio_needed) { |
| 4466 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4739 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
| 4467 | if (r) | 4740 | vcpu->mmio_read_completed = 1; |
| 4468 | goto out; | 4741 | vcpu->mmio_needed = 0; |
| 4469 | } | 4742 | } |
| 4470 | if (vcpu->mmio_needed) { | ||
| 4471 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | ||
| 4472 | vcpu->mmio_read_completed = 1; | ||
| 4473 | vcpu->mmio_needed = 0; | ||
| 4474 | |||
| 4475 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | 4743 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
| 4476 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, | 4744 | r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); |
| 4477 | EMULTYPE_NO_DECODE); | ||
| 4478 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 4745 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
| 4479 | if (r == EMULATE_DO_MMIO) { | 4746 | if (r == EMULATE_DO_MMIO) { |
| 4480 | /* | ||
| 4481 | * Read-modify-write. Back to userspace. | ||
| 4482 | */ | ||
| 4483 | r = 0; | 4747 | r = 0; |
| 4484 | goto out; | 4748 | goto out; |
| 4485 | } | 4749 | } |
| @@ -4491,6 +4755,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 4491 | r = __vcpu_run(vcpu); | 4755 | r = __vcpu_run(vcpu); |
| 4492 | 4756 | ||
| 4493 | out: | 4757 | out: |
| 4758 | post_kvm_run_save(vcpu); | ||
| 4494 | if (vcpu->sigset_active) | 4759 | if (vcpu->sigset_active) |
| 4495 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 4760 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
| 4496 | 4761 | ||
| @@ -4562,12 +4827,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 4562 | return 0; | 4827 | return 0; |
| 4563 | } | 4828 | } |
| 4564 | 4829 | ||
| 4565 | void kvm_get_segment(struct kvm_vcpu *vcpu, | ||
| 4566 | struct kvm_segment *var, int seg) | ||
| 4567 | { | ||
| 4568 | kvm_x86_ops->get_segment(vcpu, var, seg); | ||
| 4569 | } | ||
| 4570 | |||
| 4571 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 4830 | void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
| 4572 | { | 4831 | { |
| 4573 | struct kvm_segment cs; | 4832 | struct kvm_segment cs; |
| @@ -4581,7 +4840,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); | |||
| 4581 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 4840 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, |
| 4582 | struct kvm_sregs *sregs) | 4841 | struct kvm_sregs *sregs) |
| 4583 | { | 4842 | { |
| 4584 | struct descriptor_table dt; | 4843 | struct desc_ptr dt; |
| 4585 | 4844 | ||
| 4586 | vcpu_load(vcpu); | 4845 | vcpu_load(vcpu); |
| 4587 | 4846 | ||
| @@ -4596,11 +4855,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 4596 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 4855 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
| 4597 | 4856 | ||
| 4598 | kvm_x86_ops->get_idt(vcpu, &dt); | 4857 | kvm_x86_ops->get_idt(vcpu, &dt); |
| 4599 | sregs->idt.limit = dt.limit; | 4858 | sregs->idt.limit = dt.size; |
| 4600 | sregs->idt.base = dt.base; | 4859 | sregs->idt.base = dt.address; |
| 4601 | kvm_x86_ops->get_gdt(vcpu, &dt); | 4860 | kvm_x86_ops->get_gdt(vcpu, &dt); |
| 4602 | sregs->gdt.limit = dt.limit; | 4861 | sregs->gdt.limit = dt.size; |
| 4603 | sregs->gdt.base = dt.base; | 4862 | sregs->gdt.base = dt.address; |
| 4604 | 4863 | ||
| 4605 | sregs->cr0 = kvm_read_cr0(vcpu); | 4864 | sregs->cr0 = kvm_read_cr0(vcpu); |
| 4606 | sregs->cr2 = vcpu->arch.cr2; | 4865 | sregs->cr2 = vcpu->arch.cr2; |
| @@ -4639,563 +4898,33 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
| 4639 | return 0; | 4898 | return 0; |
| 4640 | } | 4899 | } |
| 4641 | 4900 | ||
| 4642 | static void kvm_set_segment(struct kvm_vcpu *vcpu, | 4901 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, |
| 4643 | struct kvm_segment *var, int seg) | 4902 | bool has_error_code, u32 error_code) |
| 4644 | { | ||
| 4645 | kvm_x86_ops->set_segment(vcpu, var, seg); | ||
| 4646 | } | ||
| 4647 | |||
| 4648 | static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | ||
| 4649 | struct kvm_segment *kvm_desct) | ||
| 4650 | { | ||
| 4651 | kvm_desct->base = get_desc_base(seg_desc); | ||
| 4652 | kvm_desct->limit = get_desc_limit(seg_desc); | ||
| 4653 | if (seg_desc->g) { | ||
| 4654 | kvm_desct->limit <<= 12; | ||
| 4655 | kvm_desct->limit |= 0xfff; | ||
| 4656 | } | ||
| 4657 | kvm_desct->selector = selector; | ||
| 4658 | kvm_desct->type = seg_desc->type; | ||
| 4659 | kvm_desct->present = seg_desc->p; | ||
| 4660 | kvm_desct->dpl = seg_desc->dpl; | ||
| 4661 | kvm_desct->db = seg_desc->d; | ||
| 4662 | kvm_desct->s = seg_desc->s; | ||
| 4663 | kvm_desct->l = seg_desc->l; | ||
| 4664 | kvm_desct->g = seg_desc->g; | ||
| 4665 | kvm_desct->avl = seg_desc->avl; | ||
| 4666 | if (!selector) | ||
| 4667 | kvm_desct->unusable = 1; | ||
| 4668 | else | ||
| 4669 | kvm_desct->unusable = 0; | ||
| 4670 | kvm_desct->padding = 0; | ||
| 4671 | } | ||
| 4672 | |||
| 4673 | static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, | ||
| 4674 | u16 selector, | ||
| 4675 | struct descriptor_table *dtable) | ||
| 4676 | { | ||
| 4677 | if (selector & 1 << 2) { | ||
| 4678 | struct kvm_segment kvm_seg; | ||
| 4679 | |||
| 4680 | kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); | ||
| 4681 | |||
| 4682 | if (kvm_seg.unusable) | ||
| 4683 | dtable->limit = 0; | ||
| 4684 | else | ||
| 4685 | dtable->limit = kvm_seg.limit; | ||
| 4686 | dtable->base = kvm_seg.base; | ||
| 4687 | } | ||
| 4688 | else | ||
| 4689 | kvm_x86_ops->get_gdt(vcpu, dtable); | ||
| 4690 | } | ||
| 4691 | |||
| 4692 | /* allowed just for 8 bytes segments */ | ||
| 4693 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
| 4694 | struct desc_struct *seg_desc) | ||
| 4695 | { | ||
| 4696 | struct descriptor_table dtable; | ||
| 4697 | u16 index = selector >> 3; | ||
| 4698 | int ret; | ||
| 4699 | u32 err; | ||
| 4700 | gva_t addr; | ||
| 4701 | |||
| 4702 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
| 4703 | |||
| 4704 | if (dtable.limit < index * 8 + 7) { | ||
| 4705 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | ||
| 4706 | return X86EMUL_PROPAGATE_FAULT; | ||
| 4707 | } | ||
| 4708 | addr = dtable.base + index * 8; | ||
| 4709 | ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), | ||
| 4710 | vcpu, &err); | ||
| 4711 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
| 4712 | kvm_inject_page_fault(vcpu, addr, err); | ||
| 4713 | |||
| 4714 | return ret; | ||
| 4715 | } | ||
| 4716 | |||
| 4717 | /* allowed just for 8 bytes segments */ | ||
| 4718 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | ||
| 4719 | struct desc_struct *seg_desc) | ||
| 4720 | { | ||
| 4721 | struct descriptor_table dtable; | ||
| 4722 | u16 index = selector >> 3; | ||
| 4723 | |||
| 4724 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | ||
| 4725 | |||
| 4726 | if (dtable.limit < index * 8 + 7) | ||
| 4727 | return 1; | ||
| 4728 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); | ||
| 4729 | } | ||
| 4730 | |||
| 4731 | static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, | ||
| 4732 | struct desc_struct *seg_desc) | ||
| 4733 | { | ||
| 4734 | u32 base_addr = get_desc_base(seg_desc); | ||
| 4735 | |||
| 4736 | return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); | ||
| 4737 | } | ||
| 4738 | |||
| 4739 | static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, | ||
| 4740 | struct desc_struct *seg_desc) | ||
| 4741 | { | ||
| 4742 | u32 base_addr = get_desc_base(seg_desc); | ||
| 4743 | |||
| 4744 | return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); | ||
| 4745 | } | ||
| 4746 | |||
| 4747 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | ||
| 4748 | { | ||
| 4749 | struct kvm_segment kvm_seg; | ||
| 4750 | |||
| 4751 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
| 4752 | return kvm_seg.selector; | ||
| 4753 | } | ||
| 4754 | |||
| 4755 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
| 4756 | { | ||
| 4757 | struct kvm_segment segvar = { | ||
| 4758 | .base = selector << 4, | ||
| 4759 | .limit = 0xffff, | ||
| 4760 | .selector = selector, | ||
| 4761 | .type = 3, | ||
| 4762 | .present = 1, | ||
| 4763 | .dpl = 3, | ||
| 4764 | .db = 0, | ||
| 4765 | .s = 1, | ||
| 4766 | .l = 0, | ||
| 4767 | .g = 0, | ||
| 4768 | .avl = 0, | ||
| 4769 | .unusable = 0, | ||
| 4770 | }; | ||
| 4771 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | ||
| 4772 | return X86EMUL_CONTINUE; | ||
| 4773 | } | ||
| 4774 | |||
| 4775 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | ||
| 4776 | { | 4903 | { |
| 4777 | return (seg != VCPU_SREG_LDTR) && | 4904 | int cs_db, cs_l, ret; |
| 4778 | (seg != VCPU_SREG_TR) && | 4905 | cache_all_regs(vcpu); |
| 4779 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); | ||
| 4780 | } | ||
| 4781 | |||
| 4782 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
| 4783 | { | ||
| 4784 | struct kvm_segment kvm_seg; | ||
| 4785 | struct desc_struct seg_desc; | ||
| 4786 | u8 dpl, rpl, cpl; | ||
| 4787 | unsigned err_vec = GP_VECTOR; | ||
| 4788 | u32 err_code = 0; | ||
| 4789 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
| 4790 | int ret; | ||
| 4791 | 4906 | ||
| 4792 | if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) | 4907 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
| 4793 | return kvm_load_realmode_segment(vcpu, selector, seg); | ||
| 4794 | 4908 | ||
| 4795 | /* NULL selector is not valid for TR, CS and SS */ | 4909 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
| 4796 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) | 4910 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); |
| 4797 | && null_selector) | 4911 | vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); |
| 4798 | goto exception; | 4912 | vcpu->arch.emulate_ctxt.mode = |
| 4913 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
| 4914 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | ||
| 4915 | ? X86EMUL_MODE_VM86 : cs_l | ||
| 4916 | ? X86EMUL_MODE_PROT64 : cs_db | ||
| 4917 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | ||
| 4799 | 4918 | ||
| 4800 | /* TR should be in GDT only */ | 4919 | ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, |
| 4801 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | 4920 | tss_selector, reason, has_error_code, |
| 4802 | goto exception; | 4921 | error_code); |
| 4803 | 4922 | ||
| 4804 | ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
| 4805 | if (ret) | 4923 | if (ret) |
| 4806 | return ret; | 4924 | return EMULATE_FAIL; |
| 4807 | |||
| 4808 | seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); | ||
| 4809 | |||
| 4810 | if (null_selector) { /* for NULL selector skip all following checks */ | ||
| 4811 | kvm_seg.unusable = 1; | ||
| 4812 | goto load; | ||
| 4813 | } | ||
| 4814 | |||
| 4815 | err_code = selector & 0xfffc; | ||
| 4816 | err_vec = GP_VECTOR; | ||
| 4817 | |||
| 4818 | /* can't load system descriptor into segment selecor */ | ||
| 4819 | if (seg <= VCPU_SREG_GS && !kvm_seg.s) | ||
| 4820 | goto exception; | ||
| 4821 | |||
| 4822 | if (!kvm_seg.present) { | ||
| 4823 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
| 4824 | goto exception; | ||
| 4825 | } | ||
| 4826 | |||
| 4827 | rpl = selector & 3; | ||
| 4828 | dpl = kvm_seg.dpl; | ||
| 4829 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
| 4830 | |||
| 4831 | switch (seg) { | ||
| 4832 | case VCPU_SREG_SS: | ||
| 4833 | /* | ||
| 4834 | * segment is not a writable data segment or segment | ||
| 4835 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
| 4836 | */ | ||
| 4837 | if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) | ||
| 4838 | goto exception; | ||
| 4839 | break; | ||
| 4840 | case VCPU_SREG_CS: | ||
| 4841 | if (!(kvm_seg.type & 8)) | ||
| 4842 | goto exception; | ||
| 4843 | |||
| 4844 | if (kvm_seg.type & 4) { | ||
| 4845 | /* conforming */ | ||
| 4846 | if (dpl > cpl) | ||
| 4847 | goto exception; | ||
| 4848 | } else { | ||
| 4849 | /* nonconforming */ | ||
| 4850 | if (rpl > cpl || dpl != cpl) | ||
| 4851 | goto exception; | ||
| 4852 | } | ||
| 4853 | /* CS(RPL) <- CPL */ | ||
| 4854 | selector = (selector & 0xfffc) | cpl; | ||
| 4855 | break; | ||
| 4856 | case VCPU_SREG_TR: | ||
| 4857 | if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) | ||
| 4858 | goto exception; | ||
| 4859 | break; | ||
| 4860 | case VCPU_SREG_LDTR: | ||
| 4861 | if (kvm_seg.s || kvm_seg.type != 2) | ||
| 4862 | goto exception; | ||
| 4863 | break; | ||
| 4864 | default: /* DS, ES, FS, or GS */ | ||
| 4865 | /* | ||
| 4866 | * segment is not a data or readable code segment or | ||
| 4867 | * ((segment is a data or nonconforming code segment) | ||
| 4868 | * and (both RPL and CPL > DPL)) | ||
| 4869 | */ | ||
| 4870 | if ((kvm_seg.type & 0xa) == 0x8 || | ||
| 4871 | (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) | ||
| 4872 | goto exception; | ||
| 4873 | break; | ||
| 4874 | } | ||
| 4875 | |||
| 4876 | if (!kvm_seg.unusable && kvm_seg.s) { | ||
| 4877 | /* mark segment as accessed */ | ||
| 4878 | kvm_seg.type |= 1; | ||
| 4879 | seg_desc.type |= 1; | ||
| 4880 | save_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
| 4881 | } | ||
| 4882 | load: | ||
| 4883 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
| 4884 | return X86EMUL_CONTINUE; | ||
| 4885 | exception: | ||
| 4886 | kvm_queue_exception_e(vcpu, err_vec, err_code); | ||
| 4887 | return X86EMUL_PROPAGATE_FAULT; | ||
| 4888 | } | ||
| 4889 | |||
| 4890 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | ||
| 4891 | struct tss_segment_32 *tss) | ||
| 4892 | { | ||
| 4893 | tss->cr3 = vcpu->arch.cr3; | ||
| 4894 | tss->eip = kvm_rip_read(vcpu); | ||
| 4895 | tss->eflags = kvm_get_rflags(vcpu); | ||
| 4896 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 4897 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
| 4898 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
| 4899 | tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
| 4900 | tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
| 4901 | tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
| 4902 | tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
| 4903 | tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
| 4904 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
| 4905 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
| 4906 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
| 4907 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
| 4908 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); | ||
| 4909 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); | ||
| 4910 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
| 4911 | } | ||
| 4912 | |||
| 4913 | static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) | ||
| 4914 | { | ||
| 4915 | struct kvm_segment kvm_seg; | ||
| 4916 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
| 4917 | kvm_seg.selector = sel; | ||
| 4918 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
| 4919 | } | ||
| 4920 | |||
| 4921 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | ||
| 4922 | struct tss_segment_32 *tss) | ||
| 4923 | { | ||
| 4924 | kvm_set_cr3(vcpu, tss->cr3); | ||
| 4925 | |||
| 4926 | kvm_rip_write(vcpu, tss->eip); | ||
| 4927 | kvm_set_rflags(vcpu, tss->eflags | 2); | ||
| 4928 | |||
| 4929 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); | ||
| 4930 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); | ||
| 4931 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); | ||
| 4932 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); | ||
| 4933 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); | ||
| 4934 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); | ||
| 4935 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); | ||
| 4936 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); | ||
| 4937 | |||
| 4938 | /* | ||
| 4939 | * SDM says that segment selectors are loaded before segment | ||
| 4940 | * descriptors | ||
| 4941 | */ | ||
| 4942 | kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); | ||
| 4943 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
| 4944 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
| 4945 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
| 4946 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
| 4947 | kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); | ||
| 4948 | kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); | ||
| 4949 | |||
| 4950 | /* | ||
| 4951 | * Now load segment descriptors. If fault happenes at this stage | ||
| 4952 | * it is handled in a context of new task | ||
| 4953 | */ | ||
| 4954 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) | ||
| 4955 | return 1; | ||
| 4956 | |||
| 4957 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
| 4958 | return 1; | ||
| 4959 | |||
| 4960 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | ||
| 4961 | return 1; | ||
| 4962 | |||
| 4963 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
| 4964 | return 1; | ||
| 4965 | |||
| 4966 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
| 4967 | return 1; | ||
| 4968 | |||
| 4969 | if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) | ||
| 4970 | return 1; | ||
| 4971 | |||
| 4972 | if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) | ||
| 4973 | return 1; | ||
| 4974 | return 0; | ||
| 4975 | } | ||
| 4976 | |||
| 4977 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, | ||
| 4978 | struct tss_segment_16 *tss) | ||
| 4979 | { | ||
| 4980 | tss->ip = kvm_rip_read(vcpu); | ||
| 4981 | tss->flag = kvm_get_rflags(vcpu); | ||
| 4982 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 4983 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
| 4984 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
| 4985 | tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); | ||
| 4986 | tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
| 4987 | tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); | ||
| 4988 | tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
| 4989 | tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); | ||
| 4990 | |||
| 4991 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | ||
| 4992 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | ||
| 4993 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | ||
| 4994 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | ||
| 4995 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | ||
| 4996 | } | ||
| 4997 | |||
| 4998 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | ||
| 4999 | struct tss_segment_16 *tss) | ||
| 5000 | { | ||
| 5001 | kvm_rip_write(vcpu, tss->ip); | ||
| 5002 | kvm_set_rflags(vcpu, tss->flag | 2); | ||
| 5003 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); | ||
| 5004 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); | ||
| 5005 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); | ||
| 5006 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); | ||
| 5007 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); | ||
| 5008 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); | ||
| 5009 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); | ||
| 5010 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); | ||
| 5011 | |||
| 5012 | /* | ||
| 5013 | * SDM says that segment selectors are loaded before segment | ||
| 5014 | * descriptors | ||
| 5015 | */ | ||
| 5016 | kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); | ||
| 5017 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
| 5018 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
| 5019 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
| 5020 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
| 5021 | |||
| 5022 | /* | ||
| 5023 | * Now load segment descriptors. If fault happenes at this stage | ||
| 5024 | * it is handled in a context of new task | ||
| 5025 | */ | ||
| 5026 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) | ||
| 5027 | return 1; | ||
| 5028 | |||
| 5029 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) | ||
| 5030 | return 1; | ||
| 5031 | |||
| 5032 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) | ||
| 5033 | return 1; | ||
| 5034 | |||
| 5035 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) | ||
| 5036 | return 1; | ||
| 5037 | |||
| 5038 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) | ||
| 5039 | return 1; | ||
| 5040 | return 0; | ||
| 5041 | } | ||
| 5042 | |||
| 5043 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
| 5044 | u16 old_tss_sel, u32 old_tss_base, | ||
| 5045 | struct desc_struct *nseg_desc) | ||
| 5046 | { | ||
| 5047 | struct tss_segment_16 tss_segment_16; | ||
| 5048 | int ret = 0; | ||
| 5049 | |||
| 5050 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
| 5051 | sizeof tss_segment_16)) | ||
| 5052 | goto out; | ||
| 5053 | |||
| 5054 | save_state_to_tss16(vcpu, &tss_segment_16); | ||
| 5055 | |||
| 5056 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, | ||
| 5057 | sizeof tss_segment_16)) | ||
| 5058 | goto out; | ||
| 5059 | |||
| 5060 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
| 5061 | &tss_segment_16, sizeof tss_segment_16)) | ||
| 5062 | goto out; | ||
| 5063 | |||
| 5064 | if (old_tss_sel != 0xffff) { | ||
| 5065 | tss_segment_16.prev_task_link = old_tss_sel; | ||
| 5066 | 4925 | ||
| 5067 | if (kvm_write_guest(vcpu->kvm, | 4926 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
| 5068 | get_tss_base_addr_write(vcpu, nseg_desc), | 4927 | return EMULATE_DONE; |
| 5069 | &tss_segment_16.prev_task_link, | ||
| 5070 | sizeof tss_segment_16.prev_task_link)) | ||
| 5071 | goto out; | ||
| 5072 | } | ||
| 5073 | |||
| 5074 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | ||
| 5075 | goto out; | ||
| 5076 | |||
| 5077 | ret = 1; | ||
| 5078 | out: | ||
| 5079 | return ret; | ||
| 5080 | } | ||
| 5081 | |||
| 5082 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | ||
| 5083 | u16 old_tss_sel, u32 old_tss_base, | ||
| 5084 | struct desc_struct *nseg_desc) | ||
| 5085 | { | ||
| 5086 | struct tss_segment_32 tss_segment_32; | ||
| 5087 | int ret = 0; | ||
| 5088 | |||
| 5089 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
| 5090 | sizeof tss_segment_32)) | ||
| 5091 | goto out; | ||
| 5092 | |||
| 5093 | save_state_to_tss32(vcpu, &tss_segment_32); | ||
| 5094 | |||
| 5095 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, | ||
| 5096 | sizeof tss_segment_32)) | ||
| 5097 | goto out; | ||
| 5098 | |||
| 5099 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), | ||
| 5100 | &tss_segment_32, sizeof tss_segment_32)) | ||
| 5101 | goto out; | ||
| 5102 | |||
| 5103 | if (old_tss_sel != 0xffff) { | ||
| 5104 | tss_segment_32.prev_task_link = old_tss_sel; | ||
| 5105 | |||
| 5106 | if (kvm_write_guest(vcpu->kvm, | ||
| 5107 | get_tss_base_addr_write(vcpu, nseg_desc), | ||
| 5108 | &tss_segment_32.prev_task_link, | ||
| 5109 | sizeof tss_segment_32.prev_task_link)) | ||
| 5110 | goto out; | ||
| 5111 | } | ||
| 5112 | |||
| 5113 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | ||
| 5114 | goto out; | ||
| 5115 | |||
| 5116 | ret = 1; | ||
| 5117 | out: | ||
| 5118 | return ret; | ||
| 5119 | } | ||
| 5120 | |||
| 5121 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | ||
| 5122 | { | ||
| 5123 | struct kvm_segment tr_seg; | ||
| 5124 | struct desc_struct cseg_desc; | ||
| 5125 | struct desc_struct nseg_desc; | ||
| 5126 | int ret = 0; | ||
| 5127 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | ||
| 5128 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
| 5129 | u32 desc_limit; | ||
| 5130 | |||
| 5131 | old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); | ||
| 5132 | |||
| 5133 | /* FIXME: Handle errors. Failure to read either TSS or their | ||
| 5134 | * descriptors should generate a pagefault. | ||
| 5135 | */ | ||
| 5136 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | ||
| 5137 | goto out; | ||
| 5138 | |||
| 5139 | if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) | ||
| 5140 | goto out; | ||
| 5141 | |||
| 5142 | if (reason != TASK_SWITCH_IRET) { | ||
| 5143 | int cpl; | ||
| 5144 | |||
| 5145 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
| 5146 | if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { | ||
| 5147 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
| 5148 | return 1; | ||
| 5149 | } | ||
| 5150 | } | ||
| 5151 | |||
| 5152 | desc_limit = get_desc_limit(&nseg_desc); | ||
| 5153 | if (!nseg_desc.p || | ||
| 5154 | ((desc_limit < 0x67 && (nseg_desc.type & 8)) || | ||
| 5155 | desc_limit < 0x2b)) { | ||
| 5156 | kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); | ||
| 5157 | return 1; | ||
| 5158 | } | ||
| 5159 | |||
| 5160 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | ||
| 5161 | cseg_desc.type &= ~(1 << 1); //clear the B flag | ||
| 5162 | save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); | ||
| 5163 | } | ||
| 5164 | |||
| 5165 | if (reason == TASK_SWITCH_IRET) { | ||
| 5166 | u32 eflags = kvm_get_rflags(vcpu); | ||
| 5167 | kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | ||
| 5168 | } | ||
| 5169 | |||
| 5170 | /* set back link to prev task only if NT bit is set in eflags | ||
| 5171 | note that old_tss_sel is not used afetr this point */ | ||
| 5172 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
| 5173 | old_tss_sel = 0xffff; | ||
| 5174 | |||
| 5175 | if (nseg_desc.type & 8) | ||
| 5176 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, | ||
| 5177 | old_tss_base, &nseg_desc); | ||
| 5178 | else | ||
| 5179 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, | ||
| 5180 | old_tss_base, &nseg_desc); | ||
| 5181 | |||
| 5182 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | ||
| 5183 | u32 eflags = kvm_get_rflags(vcpu); | ||
| 5184 | kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); | ||
| 5185 | } | ||
| 5186 | |||
| 5187 | if (reason != TASK_SWITCH_IRET) { | ||
| 5188 | nseg_desc.type |= (1 << 1); | ||
| 5189 | save_guest_segment_descriptor(vcpu, tss_selector, | ||
| 5190 | &nseg_desc); | ||
| 5191 | } | ||
| 5192 | |||
| 5193 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); | ||
| 5194 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | ||
| 5195 | tr_seg.type = 11; | ||
| 5196 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | ||
| 5197 | out: | ||
| 5198 | return ret; | ||
| 5199 | } | 4928 | } |
| 5200 | EXPORT_SYMBOL_GPL(kvm_task_switch); | 4929 | EXPORT_SYMBOL_GPL(kvm_task_switch); |
| 5201 | 4930 | ||
| @@ -5204,15 +4933,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 5204 | { | 4933 | { |
| 5205 | int mmu_reset_needed = 0; | 4934 | int mmu_reset_needed = 0; |
| 5206 | int pending_vec, max_bits; | 4935 | int pending_vec, max_bits; |
| 5207 | struct descriptor_table dt; | 4936 | struct desc_ptr dt; |
| 5208 | 4937 | ||
| 5209 | vcpu_load(vcpu); | 4938 | vcpu_load(vcpu); |
| 5210 | 4939 | ||
| 5211 | dt.limit = sregs->idt.limit; | 4940 | dt.size = sregs->idt.limit; |
| 5212 | dt.base = sregs->idt.base; | 4941 | dt.address = sregs->idt.base; |
| 5213 | kvm_x86_ops->set_idt(vcpu, &dt); | 4942 | kvm_x86_ops->set_idt(vcpu, &dt); |
| 5214 | dt.limit = sregs->gdt.limit; | 4943 | dt.size = sregs->gdt.limit; |
| 5215 | dt.base = sregs->gdt.base; | 4944 | dt.address = sregs->gdt.base; |
| 5216 | kvm_x86_ops->set_gdt(vcpu, &dt); | 4945 | kvm_x86_ops->set_gdt(vcpu, &dt); |
| 5217 | 4946 | ||
| 5218 | vcpu->arch.cr2 = sregs->cr2; | 4947 | vcpu->arch.cr2 = sregs->cr2; |
| @@ -5311,11 +5040,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
| 5311 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | 5040 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); |
| 5312 | } | 5041 | } |
| 5313 | 5042 | ||
| 5314 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | 5043 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
| 5315 | vcpu->arch.singlestep_cs = | 5044 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + |
| 5316 | get_segment_selector(vcpu, VCPU_SREG_CS); | 5045 | get_segment_base(vcpu, VCPU_SREG_CS); |
| 5317 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); | ||
| 5318 | } | ||
| 5319 | 5046 | ||
| 5320 | /* | 5047 | /* |
| 5321 | * Trigger an rflags update that will inject or remove the trace | 5048 | * Trigger an rflags update that will inject or remove the trace |
| @@ -5806,13 +5533,22 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
| 5806 | return kvm_x86_ops->interrupt_allowed(vcpu); | 5533 | return kvm_x86_ops->interrupt_allowed(vcpu); |
| 5807 | } | 5534 | } |
| 5808 | 5535 | ||
| 5536 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) | ||
| 5537 | { | ||
| 5538 | unsigned long current_rip = kvm_rip_read(vcpu) + | ||
| 5539 | get_segment_base(vcpu, VCPU_SREG_CS); | ||
| 5540 | |||
| 5541 | return current_rip == linear_rip; | ||
| 5542 | } | ||
| 5543 | EXPORT_SYMBOL_GPL(kvm_is_linear_rip); | ||
| 5544 | |||
| 5809 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | 5545 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) |
| 5810 | { | 5546 | { |
| 5811 | unsigned long rflags; | 5547 | unsigned long rflags; |
| 5812 | 5548 | ||
| 5813 | rflags = kvm_x86_ops->get_rflags(vcpu); | 5549 | rflags = kvm_x86_ops->get_rflags(vcpu); |
| 5814 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 5550 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) |
| 5815 | rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); | 5551 | rflags &= ~X86_EFLAGS_TF; |
| 5816 | return rflags; | 5552 | return rflags; |
| 5817 | } | 5553 | } |
| 5818 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | 5554 | EXPORT_SYMBOL_GPL(kvm_get_rflags); |
| @@ -5820,10 +5556,8 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags); | |||
| 5820 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 5556 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
| 5821 | { | 5557 | { |
| 5822 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | 5558 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && |
| 5823 | vcpu->arch.singlestep_cs == | 5559 | kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) |
| 5824 | get_segment_selector(vcpu, VCPU_SREG_CS) && | 5560 | rflags |= X86_EFLAGS_TF; |
| 5825 | vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) | ||
| 5826 | rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
| 5827 | kvm_x86_ops->set_rflags(vcpu, rflags); | 5561 | kvm_x86_ops->set_rflags(vcpu, rflags); |
| 5828 | } | 5562 | } |
| 5829 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 5563 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
| @@ -5839,3 +5573,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); | |||
| 5839 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | 5573 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); |
| 5840 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | 5574 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); |
| 5841 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | 5575 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); |
| 5576 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2d101639bd8d..f4b54458285b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
| @@ -65,4 +65,14 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
| 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | static inline struct kvm_mem_aliases *kvm_aliases(struct kvm *kvm) | ||
| 69 | { | ||
| 70 | return rcu_dereference_check(kvm->arch.aliases, | ||
| 71 | srcu_read_lock_held(&kvm->srcu) | ||
| 72 | || lockdep_is_held(&kvm->slots_lock)); | ||
| 73 | } | ||
| 74 | |||
| 75 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | ||
| 76 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | ||
| 77 | |||
| 68 | #endif | 78 | #endif |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 2bdf628066bd..9257510b4836 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
| @@ -1390,7 +1390,6 @@ __init void lguest_init(void) | |||
| 1390 | #endif | 1390 | #endif |
| 1391 | #ifdef CONFIG_ACPI | 1391 | #ifdef CONFIG_ACPI |
| 1392 | acpi_disabled = 1; | 1392 | acpi_disabled = 1; |
| 1393 | acpi_ht = 0; | ||
| 1394 | #endif | 1393 | #endif |
| 1395 | 1394 | ||
| 1396 | /* | 1395 | /* |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 419386c24b82..f871e04b6965 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
| @@ -20,17 +20,18 @@ lib-y := delay.o | |||
| 20 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
| 21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o |
| 22 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
| 23 | lib-$(CONFIG_KPROBES) += insn.o inat.o | 23 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o |
| 24 | 24 | ||
| 25 | obj-y += msr.o msr-reg.o msr-reg-export.o | 25 | obj-y += msr.o msr-reg.o msr-reg-export.o |
| 26 | 26 | ||
| 27 | ifeq ($(CONFIG_X86_32),y) | 27 | ifeq ($(CONFIG_X86_32),y) |
| 28 | obj-y += atomic64_32.o | 28 | obj-y += atomic64_32.o |
| 29 | lib-y += atomic64_cx8_32.o | ||
| 29 | lib-y += checksum_32.o | 30 | lib-y += checksum_32.o |
| 30 | lib-y += strstr_32.o | 31 | lib-y += strstr_32.o |
| 31 | lib-y += semaphore_32.o string_32.o | 32 | lib-y += semaphore_32.o string_32.o |
| 32 | ifneq ($(CONFIG_X86_CMPXCHG64),y) | 33 | ifneq ($(CONFIG_X86_CMPXCHG64),y) |
| 33 | lib-y += cmpxchg8b_emu.o | 34 | lib-y += cmpxchg8b_emu.o atomic64_386_32.o |
| 34 | endif | 35 | endif |
| 35 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o | 36 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o |
| 36 | else | 37 | else |
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c index 824fa0be55a3..540179e8e9fa 100644 --- a/arch/x86/lib/atomic64_32.c +++ b/arch/x86/lib/atomic64_32.c | |||
| @@ -6,225 +6,54 @@ | |||
| 6 | #include <asm/cmpxchg.h> | 6 | #include <asm/cmpxchg.h> |
| 7 | #include <asm/atomic.h> | 7 | #include <asm/atomic.h> |
| 8 | 8 | ||
| 9 | static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new) | 9 | long long atomic64_read_cx8(long long, const atomic64_t *v); |
| 10 | { | 10 | EXPORT_SYMBOL(atomic64_read_cx8); |
| 11 | u32 low = new; | 11 | long long atomic64_set_cx8(long long, const atomic64_t *v); |
| 12 | u32 high = new >> 32; | 12 | EXPORT_SYMBOL(atomic64_set_cx8); |
| 13 | 13 | long long atomic64_xchg_cx8(long long, unsigned high); | |
| 14 | asm volatile( | 14 | EXPORT_SYMBOL(atomic64_xchg_cx8); |
| 15 | LOCK_PREFIX "cmpxchg8b %1\n" | 15 | long long atomic64_add_return_cx8(long long a, atomic64_t *v); |
| 16 | : "+A" (old), "+m" (*ptr) | 16 | EXPORT_SYMBOL(atomic64_add_return_cx8); |
| 17 | : "b" (low), "c" (high) | 17 | long long atomic64_sub_return_cx8(long long a, atomic64_t *v); |
| 18 | ); | 18 | EXPORT_SYMBOL(atomic64_sub_return_cx8); |
| 19 | return old; | 19 | long long atomic64_inc_return_cx8(long long a, atomic64_t *v); |
| 20 | } | 20 | EXPORT_SYMBOL(atomic64_inc_return_cx8); |
| 21 | 21 | long long atomic64_dec_return_cx8(long long a, atomic64_t *v); | |
| 22 | u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val) | 22 | EXPORT_SYMBOL(atomic64_dec_return_cx8); |
| 23 | { | 23 | long long atomic64_dec_if_positive_cx8(atomic64_t *v); |
| 24 | return cmpxchg8b(&ptr->counter, old_val, new_val); | 24 | EXPORT_SYMBOL(atomic64_dec_if_positive_cx8); |
| 25 | } | 25 | int atomic64_inc_not_zero_cx8(atomic64_t *v); |
| 26 | EXPORT_SYMBOL(atomic64_cmpxchg); | 26 | EXPORT_SYMBOL(atomic64_inc_not_zero_cx8); |
| 27 | 27 | int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u); | |
| 28 | /** | 28 | EXPORT_SYMBOL(atomic64_add_unless_cx8); |
| 29 | * atomic64_xchg - xchg atomic64 variable | 29 | |
| 30 | * @ptr: pointer to type atomic64_t | 30 | #ifndef CONFIG_X86_CMPXCHG64 |
| 31 | * @new_val: value to assign | 31 | long long atomic64_read_386(long long, const atomic64_t *v); |
| 32 | * | 32 | EXPORT_SYMBOL(atomic64_read_386); |
| 33 | * Atomically xchgs the value of @ptr to @new_val and returns | 33 | long long atomic64_set_386(long long, const atomic64_t *v); |
| 34 | * the old value. | 34 | EXPORT_SYMBOL(atomic64_set_386); |
| 35 | */ | 35 | long long atomic64_xchg_386(long long, unsigned high); |
| 36 | u64 atomic64_xchg(atomic64_t *ptr, u64 new_val) | 36 | EXPORT_SYMBOL(atomic64_xchg_386); |
| 37 | { | 37 | long long atomic64_add_return_386(long long a, atomic64_t *v); |
| 38 | /* | 38 | EXPORT_SYMBOL(atomic64_add_return_386); |
| 39 | * Try first with a (possibly incorrect) assumption about | 39 | long long atomic64_sub_return_386(long long a, atomic64_t *v); |
| 40 | * what we have there. We'll do two loops most likely, | 40 | EXPORT_SYMBOL(atomic64_sub_return_386); |
| 41 | * but we'll get an ownership MESI transaction straight away | 41 | long long atomic64_inc_return_386(long long a, atomic64_t *v); |
| 42 | * instead of a read transaction followed by a | 42 | EXPORT_SYMBOL(atomic64_inc_return_386); |
| 43 | * flush-for-ownership transaction: | 43 | long long atomic64_dec_return_386(long long a, atomic64_t *v); |
| 44 | */ | 44 | EXPORT_SYMBOL(atomic64_dec_return_386); |
| 45 | u64 old_val, real_val = 0; | 45 | long long atomic64_add_386(long long a, atomic64_t *v); |
| 46 | 46 | EXPORT_SYMBOL(atomic64_add_386); | |
| 47 | do { | 47 | long long atomic64_sub_386(long long a, atomic64_t *v); |
| 48 | old_val = real_val; | 48 | EXPORT_SYMBOL(atomic64_sub_386); |
| 49 | 49 | long long atomic64_inc_386(long long a, atomic64_t *v); | |
| 50 | real_val = atomic64_cmpxchg(ptr, old_val, new_val); | 50 | EXPORT_SYMBOL(atomic64_inc_386); |
| 51 | 51 | long long atomic64_dec_386(long long a, atomic64_t *v); | |
| 52 | } while (real_val != old_val); | 52 | EXPORT_SYMBOL(atomic64_dec_386); |
| 53 | 53 | long long atomic64_dec_if_positive_386(atomic64_t *v); | |
| 54 | return old_val; | 54 | EXPORT_SYMBOL(atomic64_dec_if_positive_386); |
| 55 | } | 55 | int atomic64_inc_not_zero_386(atomic64_t *v); |
| 56 | EXPORT_SYMBOL(atomic64_xchg); | 56 | EXPORT_SYMBOL(atomic64_inc_not_zero_386); |
| 57 | 57 | int atomic64_add_unless_386(atomic64_t *v, long long a, long long u); | |
| 58 | /** | 58 | EXPORT_SYMBOL(atomic64_add_unless_386); |
| 59 | * atomic64_set - set atomic64 variable | 59 | #endif |
| 60 | * @ptr: pointer to type atomic64_t | ||
| 61 | * @new_val: value to assign | ||
| 62 | * | ||
| 63 | * Atomically sets the value of @ptr to @new_val. | ||
| 64 | */ | ||
| 65 | void atomic64_set(atomic64_t *ptr, u64 new_val) | ||
| 66 | { | ||
| 67 | atomic64_xchg(ptr, new_val); | ||
| 68 | } | ||
| 69 | EXPORT_SYMBOL(atomic64_set); | ||
| 70 | |||
| 71 | /** | ||
| 72 | EXPORT_SYMBOL(atomic64_read); | ||
| 73 | * atomic64_add_return - add and return | ||
| 74 | * @delta: integer value to add | ||
| 75 | * @ptr: pointer to type atomic64_t | ||
| 76 | * | ||
| 77 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | ||
| 78 | */ | ||
| 79 | noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr) | ||
| 80 | { | ||
| 81 | /* | ||
| 82 | * Try first with a (possibly incorrect) assumption about | ||
| 83 | * what we have there. We'll do two loops most likely, | ||
| 84 | * but we'll get an ownership MESI transaction straight away | ||
| 85 | * instead of a read transaction followed by a | ||
| 86 | * flush-for-ownership transaction: | ||
| 87 | */ | ||
| 88 | u64 old_val, new_val, real_val = 0; | ||
| 89 | |||
| 90 | do { | ||
| 91 | old_val = real_val; | ||
| 92 | new_val = old_val + delta; | ||
| 93 | |||
| 94 | real_val = atomic64_cmpxchg(ptr, old_val, new_val); | ||
| 95 | |||
| 96 | } while (real_val != old_val); | ||
| 97 | |||
| 98 | return new_val; | ||
| 99 | } | ||
| 100 | EXPORT_SYMBOL(atomic64_add_return); | ||
| 101 | |||
| 102 | u64 atomic64_sub_return(u64 delta, atomic64_t *ptr) | ||
| 103 | { | ||
| 104 | return atomic64_add_return(-delta, ptr); | ||
| 105 | } | ||
| 106 | EXPORT_SYMBOL(atomic64_sub_return); | ||
| 107 | |||
| 108 | u64 atomic64_inc_return(atomic64_t *ptr) | ||
| 109 | { | ||
| 110 | return atomic64_add_return(1, ptr); | ||
| 111 | } | ||
| 112 | EXPORT_SYMBOL(atomic64_inc_return); | ||
| 113 | |||
| 114 | u64 atomic64_dec_return(atomic64_t *ptr) | ||
| 115 | { | ||
| 116 | return atomic64_sub_return(1, ptr); | ||
| 117 | } | ||
| 118 | EXPORT_SYMBOL(atomic64_dec_return); | ||
| 119 | |||
| 120 | /** | ||
| 121 | * atomic64_add - add integer to atomic64 variable | ||
| 122 | * @delta: integer value to add | ||
| 123 | * @ptr: pointer to type atomic64_t | ||
| 124 | * | ||
| 125 | * Atomically adds @delta to @ptr. | ||
| 126 | */ | ||
| 127 | void atomic64_add(u64 delta, atomic64_t *ptr) | ||
| 128 | { | ||
| 129 | atomic64_add_return(delta, ptr); | ||
| 130 | } | ||
| 131 | EXPORT_SYMBOL(atomic64_add); | ||
| 132 | |||
| 133 | /** | ||
| 134 | * atomic64_sub - subtract the atomic64 variable | ||
| 135 | * @delta: integer value to subtract | ||
| 136 | * @ptr: pointer to type atomic64_t | ||
| 137 | * | ||
| 138 | * Atomically subtracts @delta from @ptr. | ||
| 139 | */ | ||
| 140 | void atomic64_sub(u64 delta, atomic64_t *ptr) | ||
| 141 | { | ||
| 142 | atomic64_add(-delta, ptr); | ||
| 143 | } | ||
| 144 | EXPORT_SYMBOL(atomic64_sub); | ||
| 145 | |||
| 146 | /** | ||
| 147 | * atomic64_sub_and_test - subtract value from variable and test result | ||
| 148 | * @delta: integer value to subtract | ||
| 149 | * @ptr: pointer to type atomic64_t | ||
| 150 | * | ||
| 151 | * Atomically subtracts @delta from @ptr and returns | ||
| 152 | * true if the result is zero, or false for all | ||
| 153 | * other cases. | ||
| 154 | */ | ||
| 155 | int atomic64_sub_and_test(u64 delta, atomic64_t *ptr) | ||
| 156 | { | ||
| 157 | u64 new_val = atomic64_sub_return(delta, ptr); | ||
| 158 | |||
| 159 | return new_val == 0; | ||
| 160 | } | ||
| 161 | EXPORT_SYMBOL(atomic64_sub_and_test); | ||
| 162 | |||
| 163 | /** | ||
| 164 | * atomic64_inc - increment atomic64 variable | ||
| 165 | * @ptr: pointer to type atomic64_t | ||
| 166 | * | ||
| 167 | * Atomically increments @ptr by 1. | ||
| 168 | */ | ||
| 169 | void atomic64_inc(atomic64_t *ptr) | ||
| 170 | { | ||
| 171 | atomic64_add(1, ptr); | ||
| 172 | } | ||
| 173 | EXPORT_SYMBOL(atomic64_inc); | ||
| 174 | |||
| 175 | /** | ||
| 176 | * atomic64_dec - decrement atomic64 variable | ||
| 177 | * @ptr: pointer to type atomic64_t | ||
| 178 | * | ||
| 179 | * Atomically decrements @ptr by 1. | ||
| 180 | */ | ||
| 181 | void atomic64_dec(atomic64_t *ptr) | ||
| 182 | { | ||
| 183 | atomic64_sub(1, ptr); | ||
| 184 | } | ||
| 185 | EXPORT_SYMBOL(atomic64_dec); | ||
| 186 | |||
| 187 | /** | ||
| 188 | * atomic64_dec_and_test - decrement and test | ||
| 189 | * @ptr: pointer to type atomic64_t | ||
| 190 | * | ||
| 191 | * Atomically decrements @ptr by 1 and | ||
| 192 | * returns true if the result is 0, or false for all other | ||
| 193 | * cases. | ||
| 194 | */ | ||
| 195 | int atomic64_dec_and_test(atomic64_t *ptr) | ||
| 196 | { | ||
| 197 | return atomic64_sub_and_test(1, ptr); | ||
| 198 | } | ||
| 199 | EXPORT_SYMBOL(atomic64_dec_and_test); | ||
| 200 | |||
| 201 | /** | ||
| 202 | * atomic64_inc_and_test - increment and test | ||
| 203 | * @ptr: pointer to type atomic64_t | ||
| 204 | * | ||
| 205 | * Atomically increments @ptr by 1 | ||
| 206 | * and returns true if the result is zero, or false for all | ||
| 207 | * other cases. | ||
| 208 | */ | ||
| 209 | int atomic64_inc_and_test(atomic64_t *ptr) | ||
| 210 | { | ||
| 211 | return atomic64_sub_and_test(-1, ptr); | ||
| 212 | } | ||
| 213 | EXPORT_SYMBOL(atomic64_inc_and_test); | ||
| 214 | |||
| 215 | /** | ||
| 216 | * atomic64_add_negative - add and test if negative | ||
| 217 | * @delta: integer value to add | ||
| 218 | * @ptr: pointer to type atomic64_t | ||
| 219 | * | ||
| 220 | * Atomically adds @delta to @ptr and returns true | ||
| 221 | * if the result is negative, or false when | ||
| 222 | * result is greater than or equal to zero. | ||
| 223 | */ | ||
| 224 | int atomic64_add_negative(u64 delta, atomic64_t *ptr) | ||
| 225 | { | ||
| 226 | s64 new_val = atomic64_add_return(delta, ptr); | ||
| 227 | |||
| 228 | return new_val < 0; | ||
| 229 | } | ||
| 230 | EXPORT_SYMBOL(atomic64_add_negative); | ||
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S new file mode 100644 index 000000000000..4a5979aa6883 --- /dev/null +++ b/arch/x86/lib/atomic64_386_32.S | |||
| @@ -0,0 +1,174 @@ | |||
| 1 | /* | ||
| 2 | * atomic64_t for 386/486 | ||
| 3 | * | ||
| 4 | * Copyright © 2010 Luca Barbieri | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/linkage.h> | ||
| 13 | #include <asm/alternative-asm.h> | ||
| 14 | #include <asm/dwarf2.h> | ||
| 15 | |||
| 16 | /* if you want SMP support, implement these with real spinlocks */ | ||
| 17 | .macro LOCK reg | ||
| 18 | pushfl | ||
| 19 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 20 | cli | ||
| 21 | .endm | ||
| 22 | |||
| 23 | .macro UNLOCK reg | ||
| 24 | popfl | ||
| 25 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 26 | .endm | ||
| 27 | |||
| 28 | .macro BEGIN func reg | ||
| 29 | $v = \reg | ||
| 30 | |||
| 31 | ENTRY(atomic64_\func\()_386) | ||
| 32 | CFI_STARTPROC | ||
| 33 | LOCK $v | ||
| 34 | |||
| 35 | .macro RETURN | ||
| 36 | UNLOCK $v | ||
| 37 | ret | ||
| 38 | .endm | ||
| 39 | |||
| 40 | .macro END_ | ||
| 41 | CFI_ENDPROC | ||
| 42 | ENDPROC(atomic64_\func\()_386) | ||
| 43 | .purgem RETURN | ||
| 44 | .purgem END_ | ||
| 45 | .purgem END | ||
| 46 | .endm | ||
| 47 | |||
| 48 | .macro END | ||
| 49 | RETURN | ||
| 50 | END_ | ||
| 51 | .endm | ||
| 52 | .endm | ||
| 53 | |||
| 54 | BEGIN read %ecx | ||
| 55 | movl ($v), %eax | ||
| 56 | movl 4($v), %edx | ||
| 57 | END | ||
| 58 | |||
| 59 | BEGIN set %esi | ||
| 60 | movl %ebx, ($v) | ||
| 61 | movl %ecx, 4($v) | ||
| 62 | END | ||
| 63 | |||
| 64 | BEGIN xchg %esi | ||
| 65 | movl ($v), %eax | ||
| 66 | movl 4($v), %edx | ||
| 67 | movl %ebx, ($v) | ||
| 68 | movl %ecx, 4($v) | ||
| 69 | END | ||
| 70 | |||
| 71 | BEGIN add %ecx | ||
| 72 | addl %eax, ($v) | ||
| 73 | adcl %edx, 4($v) | ||
| 74 | END | ||
| 75 | |||
| 76 | BEGIN add_return %ecx | ||
| 77 | addl ($v), %eax | ||
| 78 | adcl 4($v), %edx | ||
| 79 | movl %eax, ($v) | ||
| 80 | movl %edx, 4($v) | ||
| 81 | END | ||
| 82 | |||
| 83 | BEGIN sub %ecx | ||
| 84 | subl %eax, ($v) | ||
| 85 | sbbl %edx, 4($v) | ||
| 86 | END | ||
| 87 | |||
| 88 | BEGIN sub_return %ecx | ||
| 89 | negl %edx | ||
| 90 | negl %eax | ||
| 91 | sbbl $0, %edx | ||
| 92 | addl ($v), %eax | ||
| 93 | adcl 4($v), %edx | ||
| 94 | movl %eax, ($v) | ||
| 95 | movl %edx, 4($v) | ||
| 96 | END | ||
| 97 | |||
| 98 | BEGIN inc %esi | ||
| 99 | addl $1, ($v) | ||
| 100 | adcl $0, 4($v) | ||
| 101 | END | ||
| 102 | |||
| 103 | BEGIN inc_return %esi | ||
| 104 | movl ($v), %eax | ||
| 105 | movl 4($v), %edx | ||
| 106 | addl $1, %eax | ||
| 107 | adcl $0, %edx | ||
| 108 | movl %eax, ($v) | ||
| 109 | movl %edx, 4($v) | ||
| 110 | END | ||
| 111 | |||
| 112 | BEGIN dec %esi | ||
| 113 | subl $1, ($v) | ||
| 114 | sbbl $0, 4($v) | ||
| 115 | END | ||
| 116 | |||
| 117 | BEGIN dec_return %esi | ||
| 118 | movl ($v), %eax | ||
| 119 | movl 4($v), %edx | ||
| 120 | subl $1, %eax | ||
| 121 | sbbl $0, %edx | ||
| 122 | movl %eax, ($v) | ||
| 123 | movl %edx, 4($v) | ||
| 124 | END | ||
| 125 | |||
| 126 | BEGIN add_unless %ecx | ||
| 127 | addl %eax, %esi | ||
| 128 | adcl %edx, %edi | ||
| 129 | addl ($v), %eax | ||
| 130 | adcl 4($v), %edx | ||
| 131 | cmpl %eax, %esi | ||
| 132 | je 3f | ||
| 133 | 1: | ||
| 134 | movl %eax, ($v) | ||
| 135 | movl %edx, 4($v) | ||
| 136 | movl $1, %eax | ||
| 137 | 2: | ||
| 138 | RETURN | ||
| 139 | 3: | ||
| 140 | cmpl %edx, %edi | ||
| 141 | jne 1b | ||
| 142 | xorl %eax, %eax | ||
| 143 | jmp 2b | ||
| 144 | END_ | ||
| 145 | |||
| 146 | BEGIN inc_not_zero %esi | ||
| 147 | movl ($v), %eax | ||
| 148 | movl 4($v), %edx | ||
| 149 | testl %eax, %eax | ||
| 150 | je 3f | ||
| 151 | 1: | ||
| 152 | addl $1, %eax | ||
| 153 | adcl $0, %edx | ||
| 154 | movl %eax, ($v) | ||
| 155 | movl %edx, 4($v) | ||
| 156 | movl $1, %eax | ||
| 157 | 2: | ||
| 158 | RETURN | ||
| 159 | 3: | ||
| 160 | testl %edx, %edx | ||
| 161 | jne 1b | ||
| 162 | jmp 2b | ||
| 163 | END_ | ||
| 164 | |||
| 165 | BEGIN dec_if_positive %esi | ||
| 166 | movl ($v), %eax | ||
| 167 | movl 4($v), %edx | ||
| 168 | subl $1, %eax | ||
| 169 | sbbl $0, %edx | ||
| 170 | js 1f | ||
| 171 | movl %eax, ($v) | ||
| 172 | movl %edx, 4($v) | ||
| 173 | 1: | ||
| 174 | END | ||
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S new file mode 100644 index 000000000000..71e080de3352 --- /dev/null +++ b/arch/x86/lib/atomic64_cx8_32.S | |||
| @@ -0,0 +1,224 @@ | |||
| 1 | /* | ||
| 2 | * atomic64_t for 586+ | ||
| 3 | * | ||
| 4 | * Copyright © 2010 Luca Barbieri | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License as published by | ||
| 8 | * the Free Software Foundation; either version 2 of the License, or | ||
| 9 | * (at your option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/linkage.h> | ||
| 13 | #include <asm/alternative-asm.h> | ||
| 14 | #include <asm/dwarf2.h> | ||
| 15 | |||
| 16 | .macro SAVE reg | ||
| 17 | pushl %\reg | ||
| 18 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 19 | CFI_REL_OFFSET \reg, 0 | ||
| 20 | .endm | ||
| 21 | |||
| 22 | .macro RESTORE reg | ||
| 23 | popl %\reg | ||
| 24 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 25 | CFI_RESTORE \reg | ||
| 26 | .endm | ||
| 27 | |||
| 28 | .macro read64 reg | ||
| 29 | movl %ebx, %eax | ||
| 30 | movl %ecx, %edx | ||
| 31 | /* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */ | ||
| 32 | LOCK_PREFIX | ||
| 33 | cmpxchg8b (\reg) | ||
| 34 | .endm | ||
| 35 | |||
| 36 | ENTRY(atomic64_read_cx8) | ||
| 37 | CFI_STARTPROC | ||
| 38 | |||
| 39 | read64 %ecx | ||
| 40 | ret | ||
| 41 | CFI_ENDPROC | ||
| 42 | ENDPROC(atomic64_read_cx8) | ||
| 43 | |||
| 44 | ENTRY(atomic64_set_cx8) | ||
| 45 | CFI_STARTPROC | ||
| 46 | |||
| 47 | 1: | ||
| 48 | /* we don't need LOCK_PREFIX since aligned 64-bit writes | ||
| 49 | * are atomic on 586 and newer */ | ||
| 50 | cmpxchg8b (%esi) | ||
| 51 | jne 1b | ||
| 52 | |||
| 53 | ret | ||
| 54 | CFI_ENDPROC | ||
| 55 | ENDPROC(atomic64_set_cx8) | ||
| 56 | |||
| 57 | ENTRY(atomic64_xchg_cx8) | ||
| 58 | CFI_STARTPROC | ||
| 59 | |||
| 60 | movl %ebx, %eax | ||
| 61 | movl %ecx, %edx | ||
| 62 | 1: | ||
| 63 | LOCK_PREFIX | ||
| 64 | cmpxchg8b (%esi) | ||
| 65 | jne 1b | ||
| 66 | |||
| 67 | ret | ||
| 68 | CFI_ENDPROC | ||
| 69 | ENDPROC(atomic64_xchg_cx8) | ||
| 70 | |||
| 71 | .macro addsub_return func ins insc | ||
| 72 | ENTRY(atomic64_\func\()_return_cx8) | ||
| 73 | CFI_STARTPROC | ||
| 74 | SAVE ebp | ||
| 75 | SAVE ebx | ||
| 76 | SAVE esi | ||
| 77 | SAVE edi | ||
| 78 | |||
| 79 | movl %eax, %esi | ||
| 80 | movl %edx, %edi | ||
| 81 | movl %ecx, %ebp | ||
| 82 | |||
| 83 | read64 %ebp | ||
| 84 | 1: | ||
| 85 | movl %eax, %ebx | ||
| 86 | movl %edx, %ecx | ||
| 87 | \ins\()l %esi, %ebx | ||
| 88 | \insc\()l %edi, %ecx | ||
| 89 | LOCK_PREFIX | ||
| 90 | cmpxchg8b (%ebp) | ||
| 91 | jne 1b | ||
| 92 | |||
| 93 | 10: | ||
| 94 | movl %ebx, %eax | ||
| 95 | movl %ecx, %edx | ||
| 96 | RESTORE edi | ||
| 97 | RESTORE esi | ||
| 98 | RESTORE ebx | ||
| 99 | RESTORE ebp | ||
| 100 | ret | ||
| 101 | CFI_ENDPROC | ||
| 102 | ENDPROC(atomic64_\func\()_return_cx8) | ||
| 103 | .endm | ||
| 104 | |||
| 105 | addsub_return add add adc | ||
| 106 | addsub_return sub sub sbb | ||
| 107 | |||
| 108 | .macro incdec_return func ins insc | ||
| 109 | ENTRY(atomic64_\func\()_return_cx8) | ||
| 110 | CFI_STARTPROC | ||
| 111 | SAVE ebx | ||
| 112 | |||
| 113 | read64 %esi | ||
| 114 | 1: | ||
| 115 | movl %eax, %ebx | ||
| 116 | movl %edx, %ecx | ||
| 117 | \ins\()l $1, %ebx | ||
| 118 | \insc\()l $0, %ecx | ||
| 119 | LOCK_PREFIX | ||
| 120 | cmpxchg8b (%esi) | ||
| 121 | jne 1b | ||
| 122 | |||
| 123 | 10: | ||
| 124 | movl %ebx, %eax | ||
| 125 | movl %ecx, %edx | ||
| 126 | RESTORE ebx | ||
| 127 | ret | ||
| 128 | CFI_ENDPROC | ||
| 129 | ENDPROC(atomic64_\func\()_return_cx8) | ||
| 130 | .endm | ||
| 131 | |||
| 132 | incdec_return inc add adc | ||
| 133 | incdec_return dec sub sbb | ||
| 134 | |||
| 135 | ENTRY(atomic64_dec_if_positive_cx8) | ||
| 136 | CFI_STARTPROC | ||
| 137 | SAVE ebx | ||
| 138 | |||
| 139 | read64 %esi | ||
| 140 | 1: | ||
| 141 | movl %eax, %ebx | ||
| 142 | movl %edx, %ecx | ||
| 143 | subl $1, %ebx | ||
| 144 | sbb $0, %ecx | ||
| 145 | js 2f | ||
| 146 | LOCK_PREFIX | ||
| 147 | cmpxchg8b (%esi) | ||
| 148 | jne 1b | ||
| 149 | |||
| 150 | 2: | ||
| 151 | movl %ebx, %eax | ||
| 152 | movl %ecx, %edx | ||
| 153 | RESTORE ebx | ||
| 154 | ret | ||
| 155 | CFI_ENDPROC | ||
| 156 | ENDPROC(atomic64_dec_if_positive_cx8) | ||
| 157 | |||
| 158 | ENTRY(atomic64_add_unless_cx8) | ||
| 159 | CFI_STARTPROC | ||
| 160 | SAVE ebp | ||
| 161 | SAVE ebx | ||
| 162 | /* these just push these two parameters on the stack */ | ||
| 163 | SAVE edi | ||
| 164 | SAVE esi | ||
| 165 | |||
| 166 | movl %ecx, %ebp | ||
| 167 | movl %eax, %esi | ||
| 168 | movl %edx, %edi | ||
| 169 | |||
| 170 | read64 %ebp | ||
| 171 | 1: | ||
| 172 | cmpl %eax, 0(%esp) | ||
| 173 | je 4f | ||
| 174 | 2: | ||
| 175 | movl %eax, %ebx | ||
| 176 | movl %edx, %ecx | ||
| 177 | addl %esi, %ebx | ||
| 178 | adcl %edi, %ecx | ||
| 179 | LOCK_PREFIX | ||
| 180 | cmpxchg8b (%ebp) | ||
| 181 | jne 1b | ||
| 182 | |||
| 183 | movl $1, %eax | ||
| 184 | 3: | ||
| 185 | addl $8, %esp | ||
| 186 | CFI_ADJUST_CFA_OFFSET -8 | ||
| 187 | RESTORE ebx | ||
| 188 | RESTORE ebp | ||
| 189 | ret | ||
| 190 | 4: | ||
| 191 | cmpl %edx, 4(%esp) | ||
| 192 | jne 2b | ||
| 193 | xorl %eax, %eax | ||
| 194 | jmp 3b | ||
| 195 | CFI_ENDPROC | ||
| 196 | ENDPROC(atomic64_add_unless_cx8) | ||
| 197 | |||
| 198 | ENTRY(atomic64_inc_not_zero_cx8) | ||
| 199 | CFI_STARTPROC | ||
| 200 | SAVE ebx | ||
| 201 | |||
| 202 | read64 %esi | ||
| 203 | 1: | ||
| 204 | testl %eax, %eax | ||
| 205 | je 4f | ||
| 206 | 2: | ||
| 207 | movl %eax, %ebx | ||
| 208 | movl %edx, %ecx | ||
| 209 | addl $1, %ebx | ||
| 210 | adcl $0, %ecx | ||
| 211 | LOCK_PREFIX | ||
| 212 | cmpxchg8b (%esi) | ||
| 213 | jne 1b | ||
| 214 | |||
| 215 | movl $1, %eax | ||
| 216 | 3: | ||
| 217 | RESTORE ebx | ||
| 218 | ret | ||
| 219 | 4: | ||
| 220 | testl %edx, %edx | ||
| 221 | jne 2b | ||
| 222 | jmp 3b | ||
| 223 | CFI_ENDPROC | ||
| 224 | ENDPROC(atomic64_inc_not_zero_cx8) | ||
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index aa0987088774..dc8adad10a2f 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c | |||
| @@ -30,10 +30,10 @@ static void fclex(void) | |||
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | /* Needs to be externally visible */ | 32 | /* Needs to be externally visible */ |
| 33 | void finit_task(struct task_struct *tsk) | 33 | void finit_soft_fpu(struct i387_soft_struct *soft) |
| 34 | { | 34 | { |
| 35 | struct i387_soft_struct *soft = &tsk->thread.xstate->soft; | ||
| 36 | struct address *oaddr, *iaddr; | 35 | struct address *oaddr, *iaddr; |
| 36 | memset(soft, 0, sizeof(*soft)); | ||
| 37 | soft->cwd = 0x037f; | 37 | soft->cwd = 0x037f; |
| 38 | soft->swd = 0; | 38 | soft->swd = 0; |
| 39 | soft->ftop = 0; /* We don't keep top in the status word internally. */ | 39 | soft->ftop = 0; /* We don't keep top in the status word internally. */ |
| @@ -52,7 +52,7 @@ void finit_task(struct task_struct *tsk) | |||
| 52 | 52 | ||
| 53 | void finit(void) | 53 | void finit(void) |
| 54 | { | 54 | { |
| 55 | finit_task(current); | 55 | finit_soft_fpu(¤t->thread.fpu.state->soft); |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | /* | 58 | /* |
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 5d87f586f8d7..7718541541d4 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c | |||
| @@ -681,7 +681,7 @@ int fpregs_soft_set(struct task_struct *target, | |||
| 681 | unsigned int pos, unsigned int count, | 681 | unsigned int pos, unsigned int count, |
| 682 | const void *kbuf, const void __user *ubuf) | 682 | const void *kbuf, const void __user *ubuf) |
| 683 | { | 683 | { |
| 684 | struct i387_soft_struct *s387 = &target->thread.xstate->soft; | 684 | struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; |
| 685 | void *space = s387->st_space; | 685 | void *space = s387->st_space; |
| 686 | int ret; | 686 | int ret; |
| 687 | int offset, other, i, tags, regnr, tag, newtop; | 687 | int offset, other, i, tags, regnr, tag, newtop; |
| @@ -733,7 +733,7 @@ int fpregs_soft_get(struct task_struct *target, | |||
| 733 | unsigned int pos, unsigned int count, | 733 | unsigned int pos, unsigned int count, |
| 734 | void *kbuf, void __user *ubuf) | 734 | void *kbuf, void __user *ubuf) |
| 735 | { | 735 | { |
| 736 | struct i387_soft_struct *s387 = &target->thread.xstate->soft; | 736 | struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; |
| 737 | const void *space = s387->st_space; | 737 | const void *space = s387->st_space; |
| 738 | int ret; | 738 | int ret; |
| 739 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; | 739 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; |
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 50fa0ec2c8a5..2c614410a5f3 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ | 31 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ |
| 32 | == (1 << 10)) | 32 | == (1 << 10)) |
| 33 | 33 | ||
| 34 | #define I387 (current->thread.xstate) | 34 | #define I387 (current->thread.fpu.state) |
| 35 | #define FPU_info (I387->soft.info) | 35 | #define FPU_info (I387->soft.info) |
| 36 | 36 | ||
| 37 | #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) | 37 | #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 06630d26e56d..a4c768397baa 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
| @@ -6,6 +6,7 @@ nostackp := $(call cc-option, -fno-stack-protector) | |||
| 6 | CFLAGS_physaddr.o := $(nostackp) | 6 | CFLAGS_physaddr.o := $(nostackp) |
| 7 | CFLAGS_setup_nx.o := $(nostackp) | 7 | CFLAGS_setup_nx.o := $(nostackp) |
| 8 | 8 | ||
| 9 | obj-$(CONFIG_X86_PAT) += pat_rbtree.o | ||
| 9 | obj-$(CONFIG_SMP) += tlb.o | 10 | obj-$(CONFIG_SMP) += tlb.o |
| 10 | 11 | ||
| 11 | obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o | 12 | obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 8948f47fde05..a7bcc23ef96c 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
| @@ -33,9 +33,6 @@ int numa_off __initdata; | |||
| 33 | static unsigned long __initdata nodemap_addr; | 33 | static unsigned long __initdata nodemap_addr; |
| 34 | static unsigned long __initdata nodemap_size; | 34 | static unsigned long __initdata nodemap_size; |
| 35 | 35 | ||
| 36 | DEFINE_PER_CPU(int, node_number) = 0; | ||
| 37 | EXPORT_PER_CPU_SYMBOL(node_number); | ||
| 38 | |||
| 39 | /* | 36 | /* |
| 40 | * Map cpu index to node index | 37 | * Map cpu index to node index |
| 41 | */ | 38 | */ |
| @@ -809,7 +806,7 @@ void __cpuinit numa_set_node(int cpu, int node) | |||
| 809 | per_cpu(x86_cpu_to_node_map, cpu) = node; | 806 | per_cpu(x86_cpu_to_node_map, cpu) = node; |
| 810 | 807 | ||
| 811 | if (node != NUMA_NO_NODE) | 808 | if (node != NUMA_NO_NODE) |
| 812 | per_cpu(node_number, cpu) = node; | 809 | set_cpu_numa_node(cpu, node); |
| 813 | } | 810 | } |
| 814 | 811 | ||
| 815 | void __cpuinit numa_clear_node(int cpu) | 812 | void __cpuinit numa_clear_node(int cpu) |
| @@ -867,7 +864,7 @@ void __cpuinit numa_remove_cpu(int cpu) | |||
| 867 | numa_set_cpumask(cpu, 0); | 864 | numa_set_cpumask(cpu, 0); |
| 868 | } | 865 | } |
| 869 | 866 | ||
| 870 | int cpu_to_node(int cpu) | 867 | int __cpu_to_node(int cpu) |
| 871 | { | 868 | { |
| 872 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | 869 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { |
| 873 | printk(KERN_WARNING | 870 | printk(KERN_WARNING |
| @@ -877,7 +874,7 @@ int cpu_to_node(int cpu) | |||
| 877 | } | 874 | } |
| 878 | return per_cpu(x86_cpu_to_node_map, cpu); | 875 | return per_cpu(x86_cpu_to_node_map, cpu); |
| 879 | } | 876 | } |
| 880 | EXPORT_SYMBOL(cpu_to_node); | 877 | EXPORT_SYMBOL(__cpu_to_node); |
| 881 | 878 | ||
| 882 | /* | 879 | /* |
| 883 | * Same function as cpu_to_node() but used if called before the | 880 | * Same function as cpu_to_node() but used if called before the |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 28195c350b97..532e7933d606 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
| @@ -997,7 +997,8 @@ out_err: | |||
| 997 | } | 997 | } |
| 998 | EXPORT_SYMBOL(set_memory_uc); | 998 | EXPORT_SYMBOL(set_memory_uc); |
| 999 | 999 | ||
| 1000 | int set_memory_array_uc(unsigned long *addr, int addrinarray) | 1000 | int _set_memory_array(unsigned long *addr, int addrinarray, |
| 1001 | unsigned long new_type) | ||
| 1001 | { | 1002 | { |
| 1002 | int i, j; | 1003 | int i, j; |
| 1003 | int ret; | 1004 | int ret; |
| @@ -1007,13 +1008,19 @@ int set_memory_array_uc(unsigned long *addr, int addrinarray) | |||
| 1007 | */ | 1008 | */ |
| 1008 | for (i = 0; i < addrinarray; i++) { | 1009 | for (i = 0; i < addrinarray; i++) { |
| 1009 | ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, | 1010 | ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, |
| 1010 | _PAGE_CACHE_UC_MINUS, NULL); | 1011 | new_type, NULL); |
| 1011 | if (ret) | 1012 | if (ret) |
| 1012 | goto out_free; | 1013 | goto out_free; |
| 1013 | } | 1014 | } |
| 1014 | 1015 | ||
| 1015 | ret = change_page_attr_set(addr, addrinarray, | 1016 | ret = change_page_attr_set(addr, addrinarray, |
| 1016 | __pgprot(_PAGE_CACHE_UC_MINUS), 1); | 1017 | __pgprot(_PAGE_CACHE_UC_MINUS), 1); |
| 1018 | |||
| 1019 | if (!ret && new_type == _PAGE_CACHE_WC) | ||
| 1020 | ret = change_page_attr_set_clr(addr, addrinarray, | ||
| 1021 | __pgprot(_PAGE_CACHE_WC), | ||
| 1022 | __pgprot(_PAGE_CACHE_MASK), | ||
| 1023 | 0, CPA_ARRAY, NULL); | ||
| 1017 | if (ret) | 1024 | if (ret) |
| 1018 | goto out_free; | 1025 | goto out_free; |
| 1019 | 1026 | ||
| @@ -1025,8 +1032,19 @@ out_free: | |||
| 1025 | 1032 | ||
| 1026 | return ret; | 1033 | return ret; |
| 1027 | } | 1034 | } |
| 1035 | |||
| 1036 | int set_memory_array_uc(unsigned long *addr, int addrinarray) | ||
| 1037 | { | ||
| 1038 | return _set_memory_array(addr, addrinarray, _PAGE_CACHE_UC_MINUS); | ||
| 1039 | } | ||
| 1028 | EXPORT_SYMBOL(set_memory_array_uc); | 1040 | EXPORT_SYMBOL(set_memory_array_uc); |
| 1029 | 1041 | ||
| 1042 | int set_memory_array_wc(unsigned long *addr, int addrinarray) | ||
| 1043 | { | ||
| 1044 | return _set_memory_array(addr, addrinarray, _PAGE_CACHE_WC); | ||
| 1045 | } | ||
| 1046 | EXPORT_SYMBOL(set_memory_array_wc); | ||
| 1047 | |||
| 1030 | int _set_memory_wc(unsigned long addr, int numpages) | 1048 | int _set_memory_wc(unsigned long addr, int numpages) |
| 1031 | { | 1049 | { |
| 1032 | int ret; | 1050 | int ret; |
| @@ -1153,26 +1171,34 @@ int set_pages_uc(struct page *page, int numpages) | |||
| 1153 | } | 1171 | } |
| 1154 | EXPORT_SYMBOL(set_pages_uc); | 1172 | EXPORT_SYMBOL(set_pages_uc); |
| 1155 | 1173 | ||
| 1156 | int set_pages_array_uc(struct page **pages, int addrinarray) | 1174 | static int _set_pages_array(struct page **pages, int addrinarray, |
| 1175 | unsigned long new_type) | ||
| 1157 | { | 1176 | { |
| 1158 | unsigned long start; | 1177 | unsigned long start; |
| 1159 | unsigned long end; | 1178 | unsigned long end; |
| 1160 | int i; | 1179 | int i; |
| 1161 | int free_idx; | 1180 | int free_idx; |
| 1181 | int ret; | ||
| 1162 | 1182 | ||
| 1163 | for (i = 0; i < addrinarray; i++) { | 1183 | for (i = 0; i < addrinarray; i++) { |
| 1164 | if (PageHighMem(pages[i])) | 1184 | if (PageHighMem(pages[i])) |
| 1165 | continue; | 1185 | continue; |
| 1166 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; | 1186 | start = page_to_pfn(pages[i]) << PAGE_SHIFT; |
| 1167 | end = start + PAGE_SIZE; | 1187 | end = start + PAGE_SIZE; |
| 1168 | if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) | 1188 | if (reserve_memtype(start, end, new_type, NULL)) |
| 1169 | goto err_out; | 1189 | goto err_out; |
| 1170 | } | 1190 | } |
| 1171 | 1191 | ||
| 1172 | if (cpa_set_pages_array(pages, addrinarray, | 1192 | ret = cpa_set_pages_array(pages, addrinarray, |
| 1173 | __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) { | 1193 | __pgprot(_PAGE_CACHE_UC_MINUS)); |
| 1174 | return 0; /* Success */ | 1194 | if (!ret && new_type == _PAGE_CACHE_WC) |
| 1175 | } | 1195 | ret = change_page_attr_set_clr(NULL, addrinarray, |
| 1196 | __pgprot(_PAGE_CACHE_WC), | ||
| 1197 | __pgprot(_PAGE_CACHE_MASK), | ||
| 1198 | 0, CPA_PAGES_ARRAY, pages); | ||
| 1199 | if (ret) | ||
| 1200 | goto err_out; | ||
| 1201 | return 0; /* Success */ | ||
| 1176 | err_out: | 1202 | err_out: |
| 1177 | free_idx = i; | 1203 | free_idx = i; |
| 1178 | for (i = 0; i < free_idx; i++) { | 1204 | for (i = 0; i < free_idx; i++) { |
| @@ -1184,8 +1210,19 @@ err_out: | |||
| 1184 | } | 1210 | } |
| 1185 | return -EINVAL; | 1211 | return -EINVAL; |
| 1186 | } | 1212 | } |
| 1213 | |||
| 1214 | int set_pages_array_uc(struct page **pages, int addrinarray) | ||
| 1215 | { | ||
| 1216 | return _set_pages_array(pages, addrinarray, _PAGE_CACHE_UC_MINUS); | ||
| 1217 | } | ||
| 1187 | EXPORT_SYMBOL(set_pages_array_uc); | 1218 | EXPORT_SYMBOL(set_pages_array_uc); |
| 1188 | 1219 | ||
| 1220 | int set_pages_array_wc(struct page **pages, int addrinarray) | ||
| 1221 | { | ||
| 1222 | return _set_pages_array(pages, addrinarray, _PAGE_CACHE_WC); | ||
| 1223 | } | ||
| 1224 | EXPORT_SYMBOL(set_pages_array_wc); | ||
| 1225 | |||
| 1189 | int set_pages_wb(struct page *page, int numpages) | 1226 | int set_pages_wb(struct page *page, int numpages) |
| 1190 | { | 1227 | { |
| 1191 | unsigned long addr = (unsigned long)page_address(page); | 1228 | unsigned long addr = (unsigned long)page_address(page); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index edc8b95afc1a..acc15b23b743 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
| @@ -30,6 +30,8 @@ | |||
| 30 | #include <asm/pat.h> | 30 | #include <asm/pat.h> |
| 31 | #include <asm/io.h> | 31 | #include <asm/io.h> |
| 32 | 32 | ||
| 33 | #include "pat_internal.h" | ||
| 34 | |||
| 33 | #ifdef CONFIG_X86_PAT | 35 | #ifdef CONFIG_X86_PAT |
| 34 | int __read_mostly pat_enabled = 1; | 36 | int __read_mostly pat_enabled = 1; |
| 35 | 37 | ||
| @@ -53,19 +55,15 @@ static inline void pat_disable(const char *reason) | |||
| 53 | #endif | 55 | #endif |
| 54 | 56 | ||
| 55 | 57 | ||
| 56 | static int debug_enable; | 58 | int pat_debug_enable; |
| 57 | 59 | ||
| 58 | static int __init pat_debug_setup(char *str) | 60 | static int __init pat_debug_setup(char *str) |
| 59 | { | 61 | { |
| 60 | debug_enable = 1; | 62 | pat_debug_enable = 1; |
| 61 | return 0; | 63 | return 0; |
| 62 | } | 64 | } |
| 63 | __setup("debugpat", pat_debug_setup); | 65 | __setup("debugpat", pat_debug_setup); |
| 64 | 66 | ||
| 65 | #define dprintk(fmt, arg...) \ | ||
| 66 | do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) | ||
| 67 | |||
| 68 | |||
| 69 | static u64 __read_mostly boot_pat_state; | 67 | static u64 __read_mostly boot_pat_state; |
| 70 | 68 | ||
| 71 | enum { | 69 | enum { |
| @@ -132,84 +130,7 @@ void pat_init(void) | |||
| 132 | 130 | ||
| 133 | #undef PAT | 131 | #undef PAT |
| 134 | 132 | ||
| 135 | static char *cattr_name(unsigned long flags) | 133 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ |
| 136 | { | ||
| 137 | switch (flags & _PAGE_CACHE_MASK) { | ||
| 138 | case _PAGE_CACHE_UC: return "uncached"; | ||
| 139 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | ||
| 140 | case _PAGE_CACHE_WB: return "write-back"; | ||
| 141 | case _PAGE_CACHE_WC: return "write-combining"; | ||
| 142 | default: return "broken"; | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | /* | ||
| 147 | * The global memtype list keeps track of memory type for specific | ||
| 148 | * physical memory areas. Conflicting memory types in different | ||
| 149 | * mappings can cause CPU cache corruption. To avoid this we keep track. | ||
| 150 | * | ||
| 151 | * The list is sorted based on starting address and can contain multiple | ||
| 152 | * entries for each address (this allows reference counting for overlapping | ||
| 153 | * areas). All the aliases have the same cache attributes of course. | ||
| 154 | * Zero attributes are represented as holes. | ||
| 155 | * | ||
| 156 | * The data structure is a list that is also organized as an rbtree | ||
| 157 | * sorted on the start address of memtype range. | ||
| 158 | * | ||
| 159 | * memtype_lock protects both the linear list and rbtree. | ||
| 160 | */ | ||
| 161 | |||
| 162 | struct memtype { | ||
| 163 | u64 start; | ||
| 164 | u64 end; | ||
| 165 | unsigned long type; | ||
| 166 | struct list_head nd; | ||
| 167 | struct rb_node rb; | ||
| 168 | }; | ||
| 169 | |||
| 170 | static struct rb_root memtype_rbroot = RB_ROOT; | ||
| 171 | static LIST_HEAD(memtype_list); | ||
| 172 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | ||
| 173 | |||
| 174 | static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) | ||
| 175 | { | ||
| 176 | struct rb_node *node = root->rb_node; | ||
| 177 | struct memtype *last_lower = NULL; | ||
| 178 | |||
| 179 | while (node) { | ||
| 180 | struct memtype *data = container_of(node, struct memtype, rb); | ||
| 181 | |||
| 182 | if (data->start < start) { | ||
| 183 | last_lower = data; | ||
| 184 | node = node->rb_right; | ||
| 185 | } else if (data->start > start) { | ||
| 186 | node = node->rb_left; | ||
| 187 | } else | ||
| 188 | return data; | ||
| 189 | } | ||
| 190 | |||
| 191 | /* Will return NULL if there is no entry with its start <= start */ | ||
| 192 | return last_lower; | ||
| 193 | } | ||
| 194 | |||
| 195 | static void memtype_rb_insert(struct rb_root *root, struct memtype *data) | ||
| 196 | { | ||
| 197 | struct rb_node **new = &(root->rb_node); | ||
| 198 | struct rb_node *parent = NULL; | ||
| 199 | |||
| 200 | while (*new) { | ||
| 201 | struct memtype *this = container_of(*new, struct memtype, rb); | ||
| 202 | |||
| 203 | parent = *new; | ||
| 204 | if (data->start <= this->start) | ||
| 205 | new = &((*new)->rb_left); | ||
| 206 | else if (data->start > this->start) | ||
| 207 | new = &((*new)->rb_right); | ||
| 208 | } | ||
| 209 | |||
| 210 | rb_link_node(&data->rb, parent, new); | ||
| 211 | rb_insert_color(&data->rb, root); | ||
| 212 | } | ||
| 213 | 134 | ||
| 214 | /* | 135 | /* |
| 215 | * Does intersection of PAT memory type and MTRR memory type and returns | 136 | * Does intersection of PAT memory type and MTRR memory type and returns |
| @@ -237,33 +158,6 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) | |||
| 237 | return req_type; | 158 | return req_type; |
| 238 | } | 159 | } |
| 239 | 160 | ||
| 240 | static int | ||
| 241 | chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) | ||
| 242 | { | ||
| 243 | if (new->type != entry->type) { | ||
| 244 | if (type) { | ||
| 245 | new->type = entry->type; | ||
| 246 | *type = entry->type; | ||
| 247 | } else | ||
| 248 | goto conflict; | ||
| 249 | } | ||
| 250 | |||
| 251 | /* check overlaps with more than one entry in the list */ | ||
| 252 | list_for_each_entry_continue(entry, &memtype_list, nd) { | ||
| 253 | if (new->end <= entry->start) | ||
| 254 | break; | ||
| 255 | else if (new->type != entry->type) | ||
| 256 | goto conflict; | ||
| 257 | } | ||
| 258 | return 0; | ||
| 259 | |||
| 260 | conflict: | ||
| 261 | printk(KERN_INFO "%s:%d conflicting memory types " | ||
| 262 | "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, | ||
| 263 | new->end, cattr_name(new->type), cattr_name(entry->type)); | ||
| 264 | return -EBUSY; | ||
| 265 | } | ||
| 266 | |||
| 267 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | 161 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) |
| 268 | { | 162 | { |
| 269 | int ram_page = 0, not_rampage = 0; | 163 | int ram_page = 0, not_rampage = 0; |
| @@ -296,8 +190,6 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | |||
| 296 | * Here we do two pass: | 190 | * Here we do two pass: |
| 297 | * - Find the memtype of all the pages in the range, look for any conflicts | 191 | * - Find the memtype of all the pages in the range, look for any conflicts |
| 298 | * - In case of no conflicts, set the new memtype for pages in the range | 192 | * - In case of no conflicts, set the new memtype for pages in the range |
| 299 | * | ||
| 300 | * Caller must hold memtype_lock for atomicity. | ||
| 301 | */ | 193 | */ |
| 302 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | 194 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, |
| 303 | unsigned long *new_type) | 195 | unsigned long *new_type) |
| @@ -364,9 +256,8 @@ static int free_ram_pages_type(u64 start, u64 end) | |||
| 364 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, | 256 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, |
| 365 | unsigned long *new_type) | 257 | unsigned long *new_type) |
| 366 | { | 258 | { |
| 367 | struct memtype *new, *entry; | 259 | struct memtype *new; |
| 368 | unsigned long actual_type; | 260 | unsigned long actual_type; |
| 369 | struct list_head *where; | ||
| 370 | int is_range_ram; | 261 | int is_range_ram; |
| 371 | int err = 0; | 262 | int err = 0; |
| 372 | 263 | ||
| @@ -404,9 +295,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
| 404 | is_range_ram = pat_pagerange_is_ram(start, end); | 295 | is_range_ram = pat_pagerange_is_ram(start, end); |
| 405 | if (is_range_ram == 1) { | 296 | if (is_range_ram == 1) { |
| 406 | 297 | ||
| 407 | spin_lock(&memtype_lock); | ||
| 408 | err = reserve_ram_pages_type(start, end, req_type, new_type); | 298 | err = reserve_ram_pages_type(start, end, req_type, new_type); |
| 409 | spin_unlock(&memtype_lock); | ||
| 410 | 299 | ||
| 411 | return err; | 300 | return err; |
| 412 | } else if (is_range_ram < 0) { | 301 | } else if (is_range_ram < 0) { |
| @@ -423,42 +312,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
| 423 | 312 | ||
| 424 | spin_lock(&memtype_lock); | 313 | spin_lock(&memtype_lock); |
| 425 | 314 | ||
| 426 | /* Search for existing mapping that overlaps the current range */ | 315 | err = rbt_memtype_check_insert(new, new_type); |
| 427 | where = NULL; | ||
| 428 | list_for_each_entry(entry, &memtype_list, nd) { | ||
| 429 | if (end <= entry->start) { | ||
| 430 | where = entry->nd.prev; | ||
| 431 | break; | ||
| 432 | } else if (start <= entry->start) { /* end > entry->start */ | ||
| 433 | err = chk_conflict(new, entry, new_type); | ||
| 434 | if (!err) { | ||
| 435 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | ||
| 436 | entry->start, entry->end); | ||
| 437 | where = entry->nd.prev; | ||
| 438 | } | ||
| 439 | break; | ||
| 440 | } else if (start < entry->end) { /* start > entry->start */ | ||
| 441 | err = chk_conflict(new, entry, new_type); | ||
| 442 | if (!err) { | ||
| 443 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | ||
| 444 | entry->start, entry->end); | ||
| 445 | |||
| 446 | /* | ||
| 447 | * Move to right position in the linked | ||
| 448 | * list to add this new entry | ||
| 449 | */ | ||
| 450 | list_for_each_entry_continue(entry, | ||
| 451 | &memtype_list, nd) { | ||
| 452 | if (start <= entry->start) { | ||
| 453 | where = entry->nd.prev; | ||
| 454 | break; | ||
| 455 | } | ||
| 456 | } | ||
| 457 | } | ||
| 458 | break; | ||
| 459 | } | ||
| 460 | } | ||
| 461 | |||
| 462 | if (err) { | 316 | if (err) { |
| 463 | printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " | 317 | printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " |
| 464 | "track %s, req %s\n", | 318 | "track %s, req %s\n", |
| @@ -469,13 +323,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
| 469 | return err; | 323 | return err; |
| 470 | } | 324 | } |
| 471 | 325 | ||
| 472 | if (where) | ||
| 473 | list_add(&new->nd, where); | ||
| 474 | else | ||
| 475 | list_add_tail(&new->nd, &memtype_list); | ||
| 476 | |||
| 477 | memtype_rb_insert(&memtype_rbroot, new); | ||
| 478 | |||
| 479 | spin_unlock(&memtype_lock); | 326 | spin_unlock(&memtype_lock); |
| 480 | 327 | ||
| 481 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | 328 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", |
| @@ -487,9 +334,9 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
| 487 | 334 | ||
| 488 | int free_memtype(u64 start, u64 end) | 335 | int free_memtype(u64 start, u64 end) |
| 489 | { | 336 | { |
| 490 | struct memtype *entry, *saved_entry; | ||
| 491 | int err = -EINVAL; | 337 | int err = -EINVAL; |
| 492 | int is_range_ram; | 338 | int is_range_ram; |
| 339 | struct memtype *entry; | ||
| 493 | 340 | ||
| 494 | if (!pat_enabled) | 341 | if (!pat_enabled) |
| 495 | return 0; | 342 | return 0; |
| @@ -501,9 +348,7 @@ int free_memtype(u64 start, u64 end) | |||
| 501 | is_range_ram = pat_pagerange_is_ram(start, end); | 348 | is_range_ram = pat_pagerange_is_ram(start, end); |
| 502 | if (is_range_ram == 1) { | 349 | if (is_range_ram == 1) { |
| 503 | 350 | ||
| 504 | spin_lock(&memtype_lock); | ||
| 505 | err = free_ram_pages_type(start, end); | 351 | err = free_ram_pages_type(start, end); |
| 506 | spin_unlock(&memtype_lock); | ||
| 507 | 352 | ||
| 508 | return err; | 353 | return err; |
| 509 | } else if (is_range_ram < 0) { | 354 | } else if (is_range_ram < 0) { |
| @@ -511,56 +356,20 @@ int free_memtype(u64 start, u64 end) | |||
| 511 | } | 356 | } |
| 512 | 357 | ||
| 513 | spin_lock(&memtype_lock); | 358 | spin_lock(&memtype_lock); |
| 514 | 359 | entry = rbt_memtype_erase(start, end); | |
| 515 | entry = memtype_rb_search(&memtype_rbroot, start); | ||
| 516 | if (unlikely(entry == NULL)) | ||
| 517 | goto unlock_ret; | ||
| 518 | |||
| 519 | /* | ||
| 520 | * Saved entry points to an entry with start same or less than what | ||
| 521 | * we searched for. Now go through the list in both directions to look | ||
| 522 | * for the entry that matches with both start and end, with list stored | ||
| 523 | * in sorted start address | ||
| 524 | */ | ||
| 525 | saved_entry = entry; | ||
| 526 | list_for_each_entry_from(entry, &memtype_list, nd) { | ||
| 527 | if (entry->start == start && entry->end == end) { | ||
| 528 | rb_erase(&entry->rb, &memtype_rbroot); | ||
| 529 | list_del(&entry->nd); | ||
| 530 | kfree(entry); | ||
| 531 | err = 0; | ||
| 532 | break; | ||
| 533 | } else if (entry->start > start) { | ||
| 534 | break; | ||
| 535 | } | ||
| 536 | } | ||
| 537 | |||
| 538 | if (!err) | ||
| 539 | goto unlock_ret; | ||
| 540 | |||
| 541 | entry = saved_entry; | ||
| 542 | list_for_each_entry_reverse(entry, &memtype_list, nd) { | ||
| 543 | if (entry->start == start && entry->end == end) { | ||
| 544 | rb_erase(&entry->rb, &memtype_rbroot); | ||
| 545 | list_del(&entry->nd); | ||
| 546 | kfree(entry); | ||
| 547 | err = 0; | ||
| 548 | break; | ||
| 549 | } else if (entry->start < start) { | ||
| 550 | break; | ||
| 551 | } | ||
| 552 | } | ||
| 553 | unlock_ret: | ||
| 554 | spin_unlock(&memtype_lock); | 360 | spin_unlock(&memtype_lock); |
| 555 | 361 | ||
| 556 | if (err) { | 362 | if (!entry) { |
| 557 | printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", | 363 | printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", |
| 558 | current->comm, current->pid, start, end); | 364 | current->comm, current->pid, start, end); |
| 365 | return -EINVAL; | ||
| 559 | } | 366 | } |
| 560 | 367 | ||
| 368 | kfree(entry); | ||
| 369 | |||
| 561 | dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); | 370 | dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); |
| 562 | 371 | ||
| 563 | return err; | 372 | return 0; |
| 564 | } | 373 | } |
| 565 | 374 | ||
| 566 | 375 | ||
| @@ -583,10 +392,8 @@ static unsigned long lookup_memtype(u64 paddr) | |||
| 583 | 392 | ||
| 584 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | 393 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { |
| 585 | struct page *page; | 394 | struct page *page; |
| 586 | spin_lock(&memtype_lock); | ||
| 587 | page = pfn_to_page(paddr >> PAGE_SHIFT); | 395 | page = pfn_to_page(paddr >> PAGE_SHIFT); |
| 588 | rettype = get_page_memtype(page); | 396 | rettype = get_page_memtype(page); |
| 589 | spin_unlock(&memtype_lock); | ||
| 590 | /* | 397 | /* |
| 591 | * -1 from get_page_memtype() implies RAM page is in its | 398 | * -1 from get_page_memtype() implies RAM page is in its |
| 592 | * default state and not reserved, and hence of type WB | 399 | * default state and not reserved, and hence of type WB |
| @@ -599,7 +406,7 @@ static unsigned long lookup_memtype(u64 paddr) | |||
| 599 | 406 | ||
| 600 | spin_lock(&memtype_lock); | 407 | spin_lock(&memtype_lock); |
| 601 | 408 | ||
| 602 | entry = memtype_rb_search(&memtype_rbroot, paddr); | 409 | entry = rbt_memtype_lookup(paddr); |
| 603 | if (entry != NULL) | 410 | if (entry != NULL) |
| 604 | rettype = entry->type; | 411 | rettype = entry->type; |
| 605 | else | 412 | else |
| @@ -936,29 +743,25 @@ EXPORT_SYMBOL_GPL(pgprot_writecombine); | |||
| 936 | 743 | ||
| 937 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) | 744 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) |
| 938 | 745 | ||
| 939 | /* get Nth element of the linked list */ | ||
| 940 | static struct memtype *memtype_get_idx(loff_t pos) | 746 | static struct memtype *memtype_get_idx(loff_t pos) |
| 941 | { | 747 | { |
| 942 | struct memtype *list_node, *print_entry; | 748 | struct memtype *print_entry; |
| 943 | int i = 1; | 749 | int ret; |
| 944 | 750 | ||
| 945 | print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); | 751 | print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); |
| 946 | if (!print_entry) | 752 | if (!print_entry) |
| 947 | return NULL; | 753 | return NULL; |
| 948 | 754 | ||
| 949 | spin_lock(&memtype_lock); | 755 | spin_lock(&memtype_lock); |
| 950 | list_for_each_entry(list_node, &memtype_list, nd) { | 756 | ret = rbt_memtype_copy_nth_element(print_entry, pos); |
| 951 | if (pos == i) { | ||
| 952 | *print_entry = *list_node; | ||
| 953 | spin_unlock(&memtype_lock); | ||
| 954 | return print_entry; | ||
| 955 | } | ||
| 956 | ++i; | ||
| 957 | } | ||
| 958 | spin_unlock(&memtype_lock); | 757 | spin_unlock(&memtype_lock); |
| 959 | kfree(print_entry); | ||
| 960 | 758 | ||
| 961 | return NULL; | 759 | if (!ret) { |
| 760 | return print_entry; | ||
| 761 | } else { | ||
| 762 | kfree(print_entry); | ||
| 763 | return NULL; | ||
| 764 | } | ||
| 962 | } | 765 | } |
| 963 | 766 | ||
| 964 | static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) | 767 | static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) |
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h new file mode 100644 index 000000000000..77e5ba153fac --- /dev/null +++ b/arch/x86/mm/pat_internal.h | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | #ifndef __PAT_INTERNAL_H_ | ||
| 2 | #define __PAT_INTERNAL_H_ | ||
| 3 | |||
| 4 | extern int pat_debug_enable; | ||
| 5 | |||
| 6 | #define dprintk(fmt, arg...) \ | ||
| 7 | do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) | ||
| 8 | |||
| 9 | struct memtype { | ||
| 10 | u64 start; | ||
| 11 | u64 end; | ||
| 12 | u64 subtree_max_end; | ||
| 13 | unsigned long type; | ||
| 14 | struct rb_node rb; | ||
| 15 | }; | ||
| 16 | |||
| 17 | static inline char *cattr_name(unsigned long flags) | ||
| 18 | { | ||
| 19 | switch (flags & _PAGE_CACHE_MASK) { | ||
| 20 | case _PAGE_CACHE_UC: return "uncached"; | ||
| 21 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | ||
| 22 | case _PAGE_CACHE_WB: return "write-back"; | ||
| 23 | case _PAGE_CACHE_WC: return "write-combining"; | ||
| 24 | default: return "broken"; | ||
| 25 | } | ||
| 26 | } | ||
| 27 | |||
| 28 | #ifdef CONFIG_X86_PAT | ||
| 29 | extern int rbt_memtype_check_insert(struct memtype *new, | ||
| 30 | unsigned long *new_type); | ||
| 31 | extern struct memtype *rbt_memtype_erase(u64 start, u64 end); | ||
| 32 | extern struct memtype *rbt_memtype_lookup(u64 addr); | ||
| 33 | extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos); | ||
| 34 | #else | ||
| 35 | static inline int rbt_memtype_check_insert(struct memtype *new, | ||
| 36 | unsigned long *new_type) | ||
| 37 | { return 0; } | ||
| 38 | static inline struct memtype *rbt_memtype_erase(u64 start, u64 end) | ||
| 39 | { return NULL; } | ||
| 40 | static inline struct memtype *rbt_memtype_lookup(u64 addr) | ||
| 41 | { return NULL; } | ||
| 42 | static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) | ||
| 43 | { return 0; } | ||
| 44 | #endif | ||
| 45 | |||
| 46 | #endif /* __PAT_INTERNAL_H_ */ | ||
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c new file mode 100644 index 000000000000..f537087bb740 --- /dev/null +++ b/arch/x86/mm/pat_rbtree.c | |||
| @@ -0,0 +1,274 @@ | |||
| 1 | /* | ||
| 2 | * Handle caching attributes in page tables (PAT) | ||
| 3 | * | ||
| 4 | * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | ||
| 5 | * Suresh B Siddha <suresh.b.siddha@intel.com> | ||
| 6 | * | ||
| 7 | * Interval tree (augmented rbtree) used to store the PAT memory type | ||
| 8 | * reservations. | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/seq_file.h> | ||
| 12 | #include <linux/debugfs.h> | ||
| 13 | #include <linux/kernel.h> | ||
| 14 | #include <linux/module.h> | ||
| 15 | #include <linux/rbtree.h> | ||
| 16 | #include <linux/sched.h> | ||
| 17 | #include <linux/gfp.h> | ||
| 18 | |||
| 19 | #include <asm/pgtable.h> | ||
| 20 | #include <asm/pat.h> | ||
| 21 | |||
| 22 | #include "pat_internal.h" | ||
| 23 | |||
| 24 | /* | ||
| 25 | * The memtype tree keeps track of memory type for specific | ||
| 26 | * physical memory areas. Without proper tracking, conflicting memory | ||
| 27 | * types in different mappings can cause CPU cache corruption. | ||
| 28 | * | ||
| 29 | * The tree is an interval tree (augmented rbtree) with tree ordered | ||
| 30 | * on starting address. Tree can contain multiple entries for | ||
| 31 | * different regions which overlap. All the aliases have the same | ||
| 32 | * cache attributes of course. | ||
| 33 | * | ||
| 34 | * memtype_lock protects the rbtree. | ||
| 35 | */ | ||
| 36 | |||
| 37 | static void memtype_rb_augment_cb(struct rb_node *node); | ||
| 38 | static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb); | ||
| 39 | |||
| 40 | static int is_node_overlap(struct memtype *node, u64 start, u64 end) | ||
| 41 | { | ||
| 42 | if (node->start >= end || node->end <= start) | ||
| 43 | return 0; | ||
| 44 | |||
| 45 | return 1; | ||
| 46 | } | ||
| 47 | |||
| 48 | static u64 get_subtree_max_end(struct rb_node *node) | ||
| 49 | { | ||
| 50 | u64 ret = 0; | ||
| 51 | if (node) { | ||
| 52 | struct memtype *data = container_of(node, struct memtype, rb); | ||
| 53 | ret = data->subtree_max_end; | ||
| 54 | } | ||
| 55 | return ret; | ||
| 56 | } | ||
| 57 | |||
| 58 | /* Update 'subtree_max_end' for a node, based on node and its children */ | ||
| 59 | static void update_node_max_end(struct rb_node *node) | ||
| 60 | { | ||
| 61 | struct memtype *data; | ||
| 62 | u64 max_end, child_max_end; | ||
| 63 | |||
| 64 | if (!node) | ||
| 65 | return; | ||
| 66 | |||
| 67 | data = container_of(node, struct memtype, rb); | ||
| 68 | max_end = data->end; | ||
| 69 | |||
| 70 | child_max_end = get_subtree_max_end(node->rb_right); | ||
| 71 | if (child_max_end > max_end) | ||
| 72 | max_end = child_max_end; | ||
| 73 | |||
| 74 | child_max_end = get_subtree_max_end(node->rb_left); | ||
| 75 | if (child_max_end > max_end) | ||
| 76 | max_end = child_max_end; | ||
| 77 | |||
| 78 | data->subtree_max_end = max_end; | ||
| 79 | } | ||
| 80 | |||
| 81 | /* Update 'subtree_max_end' for a node and all its ancestors */ | ||
| 82 | static void update_path_max_end(struct rb_node *node) | ||
| 83 | { | ||
| 84 | u64 old_max_end, new_max_end; | ||
| 85 | |||
| 86 | while (node) { | ||
| 87 | struct memtype *data = container_of(node, struct memtype, rb); | ||
| 88 | |||
| 89 | old_max_end = data->subtree_max_end; | ||
| 90 | update_node_max_end(node); | ||
| 91 | new_max_end = data->subtree_max_end; | ||
| 92 | |||
| 93 | if (new_max_end == old_max_end) | ||
| 94 | break; | ||
| 95 | |||
| 96 | node = rb_parent(node); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | /* Find the first (lowest start addr) overlapping range from rb tree */ | ||
| 101 | static struct memtype *memtype_rb_lowest_match(struct rb_root *root, | ||
| 102 | u64 start, u64 end) | ||
| 103 | { | ||
| 104 | struct rb_node *node = root->rb_node; | ||
| 105 | struct memtype *last_lower = NULL; | ||
| 106 | |||
| 107 | while (node) { | ||
| 108 | struct memtype *data = container_of(node, struct memtype, rb); | ||
| 109 | |||
| 110 | if (get_subtree_max_end(node->rb_left) > start) { | ||
| 111 | /* Lowest overlap if any must be on left side */ | ||
| 112 | node = node->rb_left; | ||
| 113 | } else if (is_node_overlap(data, start, end)) { | ||
| 114 | last_lower = data; | ||
| 115 | break; | ||
| 116 | } else if (start >= data->start) { | ||
| 117 | /* Lowest overlap if any must be on right side */ | ||
| 118 | node = node->rb_right; | ||
| 119 | } else { | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | return last_lower; /* Returns NULL if there is no overlap */ | ||
| 124 | } | ||
| 125 | |||
| 126 | static struct memtype *memtype_rb_exact_match(struct rb_root *root, | ||
| 127 | u64 start, u64 end) | ||
| 128 | { | ||
| 129 | struct memtype *match; | ||
| 130 | |||
| 131 | match = memtype_rb_lowest_match(root, start, end); | ||
| 132 | while (match != NULL && match->start < end) { | ||
| 133 | struct rb_node *node; | ||
| 134 | |||
| 135 | if (match->start == start && match->end == end) | ||
| 136 | return match; | ||
| 137 | |||
| 138 | node = rb_next(&match->rb); | ||
| 139 | if (node) | ||
| 140 | match = container_of(node, struct memtype, rb); | ||
| 141 | else | ||
| 142 | match = NULL; | ||
| 143 | } | ||
| 144 | |||
| 145 | return NULL; /* Returns NULL if there is no exact match */ | ||
| 146 | } | ||
| 147 | |||
| 148 | static int memtype_rb_check_conflict(struct rb_root *root, | ||
| 149 | u64 start, u64 end, | ||
| 150 | unsigned long reqtype, unsigned long *newtype) | ||
| 151 | { | ||
| 152 | struct rb_node *node; | ||
| 153 | struct memtype *match; | ||
| 154 | int found_type = reqtype; | ||
| 155 | |||
| 156 | match = memtype_rb_lowest_match(&memtype_rbroot, start, end); | ||
| 157 | if (match == NULL) | ||
| 158 | goto success; | ||
| 159 | |||
| 160 | if (match->type != found_type && newtype == NULL) | ||
| 161 | goto failure; | ||
| 162 | |||
| 163 | dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); | ||
| 164 | found_type = match->type; | ||
| 165 | |||
| 166 | node = rb_next(&match->rb); | ||
| 167 | while (node) { | ||
| 168 | match = container_of(node, struct memtype, rb); | ||
| 169 | |||
| 170 | if (match->start >= end) /* Checked all possible matches */ | ||
| 171 | goto success; | ||
| 172 | |||
| 173 | if (is_node_overlap(match, start, end) && | ||
| 174 | match->type != found_type) { | ||
| 175 | goto failure; | ||
| 176 | } | ||
| 177 | |||
| 178 | node = rb_next(&match->rb); | ||
| 179 | } | ||
| 180 | success: | ||
| 181 | if (newtype) | ||
| 182 | *newtype = found_type; | ||
| 183 | |||
| 184 | return 0; | ||
| 185 | |||
| 186 | failure: | ||
| 187 | printk(KERN_INFO "%s:%d conflicting memory types " | ||
| 188 | "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, | ||
| 189 | end, cattr_name(found_type), cattr_name(match->type)); | ||
| 190 | return -EBUSY; | ||
| 191 | } | ||
| 192 | |||
| 193 | static void memtype_rb_augment_cb(struct rb_node *node) | ||
| 194 | { | ||
| 195 | if (node) | ||
| 196 | update_path_max_end(node); | ||
| 197 | } | ||
| 198 | |||
| 199 | static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) | ||
| 200 | { | ||
| 201 | struct rb_node **node = &(root->rb_node); | ||
| 202 | struct rb_node *parent = NULL; | ||
| 203 | |||
| 204 | while (*node) { | ||
| 205 | struct memtype *data = container_of(*node, struct memtype, rb); | ||
| 206 | |||
| 207 | parent = *node; | ||
| 208 | if (newdata->start <= data->start) | ||
| 209 | node = &((*node)->rb_left); | ||
| 210 | else if (newdata->start > data->start) | ||
| 211 | node = &((*node)->rb_right); | ||
| 212 | } | ||
| 213 | |||
| 214 | rb_link_node(&newdata->rb, parent, node); | ||
| 215 | rb_insert_color(&newdata->rb, root); | ||
| 216 | } | ||
| 217 | |||
| 218 | int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) | ||
| 219 | { | ||
| 220 | int err = 0; | ||
| 221 | |||
| 222 | err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, | ||
| 223 | new->type, ret_type); | ||
| 224 | |||
| 225 | if (!err) { | ||
| 226 | if (ret_type) | ||
| 227 | new->type = *ret_type; | ||
| 228 | |||
| 229 | memtype_rb_insert(&memtype_rbroot, new); | ||
| 230 | } | ||
| 231 | return err; | ||
| 232 | } | ||
| 233 | |||
| 234 | struct memtype *rbt_memtype_erase(u64 start, u64 end) | ||
| 235 | { | ||
| 236 | struct memtype *data; | ||
| 237 | |||
| 238 | data = memtype_rb_exact_match(&memtype_rbroot, start, end); | ||
| 239 | if (!data) | ||
| 240 | goto out; | ||
| 241 | |||
| 242 | rb_erase(&data->rb, &memtype_rbroot); | ||
| 243 | out: | ||
| 244 | return data; | ||
| 245 | } | ||
| 246 | |||
| 247 | struct memtype *rbt_memtype_lookup(u64 addr) | ||
| 248 | { | ||
| 249 | struct memtype *data; | ||
| 250 | data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); | ||
| 251 | return data; | ||
| 252 | } | ||
| 253 | |||
| 254 | #if defined(CONFIG_DEBUG_FS) | ||
| 255 | int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) | ||
| 256 | { | ||
| 257 | struct rb_node *node; | ||
| 258 | int i = 1; | ||
| 259 | |||
| 260 | node = rb_first(&memtype_rbroot); | ||
| 261 | while (node && pos != i) { | ||
| 262 | node = rb_next(node); | ||
| 263 | i++; | ||
| 264 | } | ||
| 265 | |||
| 266 | if (node) { /* pos == i */ | ||
| 267 | struct memtype *this = container_of(node, struct memtype, rb); | ||
| 268 | *out = *this; | ||
| 269 | return 0; | ||
| 270 | } else { | ||
| 271 | return 1; | ||
| 272 | } | ||
| 273 | } | ||
| 274 | #endif | ||
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index df3d5c861cda..308e32570d84 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c | |||
| @@ -34,7 +34,7 @@ | |||
| 34 | /* IA32 Manual 3, 2-1 */ | 34 | /* IA32 Manual 3, 2-1 */ |
| 35 | static unsigned char prefix_codes[] = { | 35 | static unsigned char prefix_codes[] = { |
| 36 | 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, | 36 | 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, |
| 37 | 0x65, 0x2E, 0x3E, 0x66, 0x67 | 37 | 0x65, 0x66, 0x67 |
| 38 | }; | 38 | }; |
| 39 | /* IA32 Manual 3, 3-432*/ | 39 | /* IA32 Manual 3, 3-432*/ |
| 40 | static unsigned int reg_rop[] = { | 40 | static unsigned int reg_rop[] = { |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 792854003ed3..cac718499256 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include <linux/pagemap.h> | 9 | #include <linux/pagemap.h> |
| 10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 12 | #include <linux/quicklist.h> | ||
| 13 | 12 | ||
| 14 | #include <asm/system.h> | 13 | #include <asm/system.h> |
| 15 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 28c68762648f..f9897f7a9ef1 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
| @@ -363,6 +363,54 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
| 363 | for (i = 0; i < MAX_NUMNODES; i++) | 363 | for (i = 0; i < MAX_NUMNODES; i++) |
| 364 | cutoff_node(i, start, end); | 364 | cutoff_node(i, start, end); |
| 365 | 365 | ||
| 366 | /* | ||
| 367 | * Join together blocks on the same node, holes between | ||
| 368 | * which don't overlap with memory on other nodes. | ||
| 369 | */ | ||
| 370 | for (i = 0; i < num_node_memblks; ++i) { | ||
| 371 | int j, k; | ||
| 372 | |||
| 373 | for (j = i + 1; j < num_node_memblks; ++j) { | ||
| 374 | unsigned long start, end; | ||
| 375 | |||
| 376 | if (memblk_nodeid[i] != memblk_nodeid[j]) | ||
| 377 | continue; | ||
| 378 | start = min(node_memblk_range[i].end, | ||
| 379 | node_memblk_range[j].end); | ||
| 380 | end = max(node_memblk_range[i].start, | ||
| 381 | node_memblk_range[j].start); | ||
| 382 | for (k = 0; k < num_node_memblks; ++k) { | ||
| 383 | if (memblk_nodeid[i] == memblk_nodeid[k]) | ||
| 384 | continue; | ||
| 385 | if (start < node_memblk_range[k].end && | ||
| 386 | end > node_memblk_range[k].start) | ||
| 387 | break; | ||
| 388 | } | ||
| 389 | if (k < num_node_memblks) | ||
| 390 | continue; | ||
| 391 | start = min(node_memblk_range[i].start, | ||
| 392 | node_memblk_range[j].start); | ||
| 393 | end = max(node_memblk_range[i].end, | ||
| 394 | node_memblk_range[j].end); | ||
| 395 | printk(KERN_INFO "SRAT: Node %d " | ||
| 396 | "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", | ||
| 397 | memblk_nodeid[i], | ||
| 398 | node_memblk_range[i].start, | ||
| 399 | node_memblk_range[i].end, | ||
| 400 | node_memblk_range[j].start, | ||
| 401 | node_memblk_range[j].end, | ||
| 402 | start, end); | ||
| 403 | node_memblk_range[i].start = start; | ||
| 404 | node_memblk_range[i].end = end; | ||
| 405 | k = --num_node_memblks - j; | ||
| 406 | memmove(memblk_nodeid + j, memblk_nodeid + j+1, | ||
| 407 | k * sizeof(*memblk_nodeid)); | ||
| 408 | memmove(node_memblk_range + j, node_memblk_range + j+1, | ||
| 409 | k * sizeof(*node_memblk_range)); | ||
| 410 | --j; | ||
| 411 | } | ||
| 412 | } | ||
| 413 | |||
| 366 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 414 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
| 367 | memblk_nodeid); | 415 | memblk_nodeid); |
| 368 | if (memnode_shift < 0) { | 416 | if (memnode_shift < 0) { |
| @@ -461,7 +509,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
| 461 | * node, it must now point to the fake node ID. | 509 | * node, it must now point to the fake node ID. |
| 462 | */ | 510 | */ |
| 463 | for (j = 0; j < MAX_LOCAL_APIC; j++) | 511 | for (j = 0; j < MAX_LOCAL_APIC; j++) |
| 464 | if (apicid_to_node[j] == nid) | 512 | if (apicid_to_node[j] == nid && |
| 513 | fake_apicid_to_node[j] == NUMA_NO_NODE) | ||
| 465 | fake_apicid_to_node[j] = i; | 514 | fake_apicid_to_node[j] = i; |
| 466 | } | 515 | } |
| 467 | for (i = 0; i < num_nodes; i++) | 516 | for (i = 0; i < num_nodes; i++) |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 1fd17cfb956b..d769cda54082 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
| @@ -238,11 +238,11 @@ static void arch_perfmon_setup_counters(void) | |||
| 238 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | 238 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && |
| 239 | current_cpu_data.x86_model == 15) { | 239 | current_cpu_data.x86_model == 15) { |
| 240 | eax.split.version_id = 2; | 240 | eax.split.version_id = 2; |
| 241 | eax.split.num_events = 2; | 241 | eax.split.num_counters = 2; |
| 242 | eax.split.bit_width = 40; | 242 | eax.split.bit_width = 40; |
| 243 | } | 243 | } |
| 244 | 244 | ||
| 245 | num_counters = eax.split.num_events; | 245 | num_counters = eax.split.num_counters; |
| 246 | 246 | ||
| 247 | op_arch_perfmon_spec.num_counters = num_counters; | 247 | op_arch_perfmon_spec.num_counters = num_counters; |
| 248 | op_arch_perfmon_spec.num_controls = num_counters; | 248 | op_arch_perfmon_spec.num_controls = num_counters; |
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index b110d97fb925..a0207a7fdf39 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
| @@ -18,6 +18,8 @@ obj-$(CONFIG_X86_MRST) += mrst.o | |||
| 18 | obj-y += common.o early.o | 18 | obj-y += common.o early.o |
| 19 | obj-y += amd_bus.o bus_numa.o | 19 | obj-y += amd_bus.o bus_numa.o |
| 20 | 20 | ||
| 21 | obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o | ||
| 22 | |||
| 21 | ifeq ($(CONFIG_PCI_DEBUG),y) | 23 | ifeq ($(CONFIG_PCI_DEBUG),y) |
| 22 | EXTRA_CFLAGS += -DDEBUG | 24 | EXTRA_CFLAGS += -DDEBUG |
| 23 | endif | 25 | endif |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 31930fd30ea9..2ec04c424a62 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
| @@ -207,10 +207,9 @@ get_current_resources(struct acpi_device *device, int busnum, | |||
| 207 | if (!info.res) | 207 | if (!info.res) |
| 208 | goto res_alloc_fail; | 208 | goto res_alloc_fail; |
| 209 | 209 | ||
| 210 | info.name = kmalloc(16, GFP_KERNEL); | 210 | info.name = kasprintf(GFP_KERNEL, "PCI Bus %04x:%02x", domain, busnum); |
| 211 | if (!info.name) | 211 | if (!info.name) |
| 212 | goto name_alloc_fail; | 212 | goto name_alloc_fail; |
| 213 | sprintf(info.name, "PCI Bus %04x:%02x", domain, busnum); | ||
| 214 | 213 | ||
| 215 | info.res_num = 0; | 214 | info.res_num = 0; |
| 216 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, | 215 | acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource, |
| @@ -224,8 +223,11 @@ res_alloc_fail: | |||
| 224 | return; | 223 | return; |
| 225 | } | 224 | } |
| 226 | 225 | ||
| 227 | struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum) | 226 | struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) |
| 228 | { | 227 | { |
| 228 | struct acpi_device *device = root->device; | ||
| 229 | int domain = root->segment; | ||
| 230 | int busnum = root->secondary.start; | ||
| 229 | struct pci_bus *bus; | 231 | struct pci_bus *bus; |
| 230 | struct pci_sysdata *sd; | 232 | struct pci_sysdata *sd; |
| 231 | int node; | 233 | int node; |
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c new file mode 100644 index 000000000000..0846a5bbbfbd --- /dev/null +++ b/arch/x86/pci/broadcom_bus.c | |||
| @@ -0,0 +1,101 @@ | |||
| 1 | /* | ||
| 2 | * Read address ranges from a Broadcom CNB20LE Host Bridge | ||
| 3 | * | ||
| 4 | * Copyright (c) 2010 Ira W. Snyder <iws@ovro.caltech.edu> | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License as published by the | ||
| 8 | * Free Software Foundation; either version 2 of the License, or (at your | ||
| 9 | * option) any later version. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/delay.h> | ||
| 13 | #include <linux/dmi.h> | ||
| 14 | #include <linux/pci.h> | ||
| 15 | #include <linux/init.h> | ||
| 16 | #include <asm/pci_x86.h> | ||
| 17 | |||
| 18 | #include "bus_numa.h" | ||
| 19 | |||
| 20 | static void __devinit cnb20le_res(struct pci_dev *dev) | ||
| 21 | { | ||
| 22 | struct pci_root_info *info; | ||
| 23 | struct resource res; | ||
| 24 | u16 word1, word2; | ||
| 25 | u8 fbus, lbus; | ||
| 26 | int i; | ||
| 27 | |||
| 28 | /* | ||
| 29 | * The x86_pci_root_bus_res_quirks() function already refuses to use | ||
| 30 | * this information if ACPI _CRS was used. Therefore, we don't bother | ||
| 31 | * checking if ACPI is enabled, and just generate the information | ||
| 32 | * for both the ACPI _CRS and no ACPI cases. | ||
| 33 | */ | ||
| 34 | |||
| 35 | info = &pci_root_info[pci_root_num]; | ||
| 36 | pci_root_num++; | ||
| 37 | |||
| 38 | /* read the PCI bus numbers */ | ||
| 39 | pci_read_config_byte(dev, 0x44, &fbus); | ||
| 40 | pci_read_config_byte(dev, 0x45, &lbus); | ||
| 41 | info->bus_min = fbus; | ||
| 42 | info->bus_max = lbus; | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Add the legacy IDE ports on bus 0 | ||
| 46 | * | ||
| 47 | * These do not exist anywhere in the bridge registers, AFAICT. I do | ||
| 48 | * not have the datasheet, so this is the best I can do. | ||
| 49 | */ | ||
| 50 | if (fbus == 0) { | ||
| 51 | update_res(info, 0x01f0, 0x01f7, IORESOURCE_IO, 0); | ||
| 52 | update_res(info, 0x03f6, 0x03f6, IORESOURCE_IO, 0); | ||
| 53 | update_res(info, 0x0170, 0x0177, IORESOURCE_IO, 0); | ||
| 54 | update_res(info, 0x0376, 0x0376, IORESOURCE_IO, 0); | ||
| 55 | update_res(info, 0xffa0, 0xffaf, IORESOURCE_IO, 0); | ||
| 56 | } | ||
| 57 | |||
| 58 | /* read the non-prefetchable memory window */ | ||
| 59 | pci_read_config_word(dev, 0xc0, &word1); | ||
| 60 | pci_read_config_word(dev, 0xc2, &word2); | ||
| 61 | if (word1 != word2) { | ||
| 62 | res.start = (word1 << 16) | 0x0000; | ||
| 63 | res.end = (word2 << 16) | 0xffff; | ||
| 64 | res.flags = IORESOURCE_MEM; | ||
| 65 | update_res(info, res.start, res.end, res.flags, 0); | ||
| 66 | } | ||
| 67 | |||
| 68 | /* read the prefetchable memory window */ | ||
| 69 | pci_read_config_word(dev, 0xc4, &word1); | ||
| 70 | pci_read_config_word(dev, 0xc6, &word2); | ||
| 71 | if (word1 != word2) { | ||
| 72 | res.start = (word1 << 16) | 0x0000; | ||
| 73 | res.end = (word2 << 16) | 0xffff; | ||
| 74 | res.flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; | ||
| 75 | update_res(info, res.start, res.end, res.flags, 0); | ||
| 76 | } | ||
| 77 | |||
| 78 | /* read the IO port window */ | ||
| 79 | pci_read_config_word(dev, 0xd0, &word1); | ||
| 80 | pci_read_config_word(dev, 0xd2, &word2); | ||
| 81 | if (word1 != word2) { | ||
| 82 | res.start = word1; | ||
| 83 | res.end = word2; | ||
| 84 | res.flags = IORESOURCE_IO; | ||
| 85 | update_res(info, res.start, res.end, res.flags, 0); | ||
| 86 | } | ||
| 87 | |||
| 88 | /* print information about this host bridge */ | ||
| 89 | res.start = fbus; | ||
| 90 | res.end = lbus; | ||
| 91 | res.flags = IORESOURCE_BUS; | ||
| 92 | dev_info(&dev->dev, "CNB20LE PCI Host Bridge (domain %04x %pR)\n", | ||
| 93 | pci_domain_nr(dev->bus), &res); | ||
| 94 | |||
| 95 | for (i = 0; i < info->res_num; i++) | ||
| 96 | dev_info(&dev->dev, "host bridge window %pR\n", &info->res[i]); | ||
| 97 | } | ||
| 98 | |||
| 99 | DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE, | ||
| 100 | cnb20le_res); | ||
| 101 | |||
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index cf2e93869c48..215a27ae050d 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
| @@ -76,7 +76,7 @@ struct pci_ops pci_root_ops = { | |||
| 76 | * This interrupt-safe spinlock protects all accesses to PCI | 76 | * This interrupt-safe spinlock protects all accesses to PCI |
| 77 | * configuration space. | 77 | * configuration space. |
| 78 | */ | 78 | */ |
| 79 | DEFINE_SPINLOCK(pci_config_lock); | 79 | DEFINE_RAW_SPINLOCK(pci_config_lock); |
| 80 | 80 | ||
| 81 | static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) | 81 | static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d) |
| 82 | { | 82 | { |
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 347d882b3bb3..bd33620b0071 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c | |||
| @@ -27,7 +27,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, | |||
| 27 | return -EINVAL; | 27 | return -EINVAL; |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | spin_lock_irqsave(&pci_config_lock, flags); | 30 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
| 31 | 31 | ||
| 32 | outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); | 32 | outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); |
| 33 | 33 | ||
| @@ -43,7 +43,7 @@ static int pci_conf1_read(unsigned int seg, unsigned int bus, | |||
| 43 | break; | 43 | break; |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | spin_unlock_irqrestore(&pci_config_lock, flags); | 46 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| 47 | 47 | ||
| 48 | return 0; | 48 | return 0; |
| 49 | } | 49 | } |
| @@ -56,7 +56,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, | |||
| 56 | if ((bus > 255) || (devfn > 255) || (reg > 4095)) | 56 | if ((bus > 255) || (devfn > 255) || (reg > 4095)) |
| 57 | return -EINVAL; | 57 | return -EINVAL; |
| 58 | 58 | ||
| 59 | spin_lock_irqsave(&pci_config_lock, flags); | 59 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
| 60 | 60 | ||
| 61 | outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); | 61 | outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); |
| 62 | 62 | ||
| @@ -72,7 +72,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, | |||
| 72 | break; | 72 | break; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | spin_unlock_irqrestore(&pci_config_lock, flags); | 75 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| 76 | 76 | ||
| 77 | return 0; | 77 | return 0; |
| 78 | } | 78 | } |
| @@ -108,7 +108,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, | |||
| 108 | if (dev & 0x10) | 108 | if (dev & 0x10) |
| 109 | return PCIBIOS_DEVICE_NOT_FOUND; | 109 | return PCIBIOS_DEVICE_NOT_FOUND; |
| 110 | 110 | ||
| 111 | spin_lock_irqsave(&pci_config_lock, flags); | 111 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
| 112 | 112 | ||
| 113 | outb((u8)(0xF0 | (fn << 1)), 0xCF8); | 113 | outb((u8)(0xF0 | (fn << 1)), 0xCF8); |
| 114 | outb((u8)bus, 0xCFA); | 114 | outb((u8)bus, 0xCFA); |
| @@ -127,7 +127,7 @@ static int pci_conf2_read(unsigned int seg, unsigned int bus, | |||
| 127 | 127 | ||
| 128 | outb(0, 0xCF8); | 128 | outb(0, 0xCF8); |
| 129 | 129 | ||
| 130 | spin_unlock_irqrestore(&pci_config_lock, flags); | 130 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| 131 | 131 | ||
| 132 | return 0; | 132 | return 0; |
| 133 | } | 133 | } |
| @@ -147,7 +147,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, | |||
| 147 | if (dev & 0x10) | 147 | if (dev & 0x10) |
| 148 | return PCIBIOS_DEVICE_NOT_FOUND; | 148 | return PCIBIOS_DEVICE_NOT_FOUND; |
| 149 | 149 | ||
| 150 | spin_lock_irqsave(&pci_config_lock, flags); | 150 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
| 151 | 151 | ||
| 152 | outb((u8)(0xF0 | (fn << 1)), 0xCF8); | 152 | outb((u8)(0xF0 | (fn << 1)), 0xCF8); |
| 153 | outb((u8)bus, 0xCFA); | 153 | outb((u8)bus, 0xCFA); |
| @@ -166,7 +166,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, | |||
| 166 | 166 | ||
| 167 | outb(0, 0xCF8); | 167 | outb(0, 0xCF8); |
| 168 | 168 | ||
| 169 | spin_unlock_irqrestore(&pci_config_lock, flags); | 169 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| 170 | 170 | ||
| 171 | return 0; | 171 | return 0; |
| 172 | } | 172 | } |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 5d362b5ba06f..9810a0f76c91 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
| @@ -589,8 +589,6 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
| 589 | case PCI_DEVICE_ID_INTEL_ICH10_1: | 589 | case PCI_DEVICE_ID_INTEL_ICH10_1: |
| 590 | case PCI_DEVICE_ID_INTEL_ICH10_2: | 590 | case PCI_DEVICE_ID_INTEL_ICH10_2: |
| 591 | case PCI_DEVICE_ID_INTEL_ICH10_3: | 591 | case PCI_DEVICE_ID_INTEL_ICH10_3: |
| 592 | case PCI_DEVICE_ID_INTEL_CPT_LPC1: | ||
| 593 | case PCI_DEVICE_ID_INTEL_CPT_LPC2: | ||
| 594 | r->name = "PIIX/ICH"; | 592 | r->name = "PIIX/ICH"; |
| 595 | r->get = pirq_piix_get; | 593 | r->get = pirq_piix_get; |
| 596 | r->set = pirq_piix_set; | 594 | r->set = pirq_piix_set; |
| @@ -605,6 +603,13 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
| 605 | return 1; | 603 | return 1; |
| 606 | } | 604 | } |
| 607 | 605 | ||
| 606 | if ((device >= PCI_DEVICE_ID_INTEL_CPT_LPC_MIN) && | ||
| 607 | (device <= PCI_DEVICE_ID_INTEL_CPT_LPC_MAX)) { | ||
| 608 | r->name = "PIIX/ICH"; | ||
| 609 | r->get = pirq_piix_get; | ||
| 610 | r->set = pirq_piix_set; | ||
| 611 | return 1; | ||
| 612 | } | ||
| 608 | return 0; | 613 | return 0; |
| 609 | } | 614 | } |
| 610 | 615 | ||
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 39b9ebe8f886..a918553ebc75 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
| @@ -483,16 +483,17 @@ static void __init pci_mmcfg_reject_broken(int early) | |||
| 483 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | 483 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { |
| 484 | int valid = 0; | 484 | int valid = 0; |
| 485 | 485 | ||
| 486 | if (!early && !acpi_disabled) | 486 | if (!early && !acpi_disabled) { |
| 487 | valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); | 487 | valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); |
| 488 | 488 | ||
| 489 | if (valid) | 489 | if (valid) |
| 490 | continue; | 490 | continue; |
| 491 | 491 | else | |
| 492 | if (!early) | 492 | printk(KERN_ERR FW_BUG PREFIX |
| 493 | printk(KERN_ERR FW_BUG PREFIX | 493 | "MMCONFIG at %pR not reserved in " |
| 494 | "MMCONFIG at %pR not reserved in " | 494 | "ACPI motherboard resources\n", |
| 495 | "ACPI motherboard resources\n", &cfg->res); | 495 | &cfg->res); |
| 496 | } | ||
| 496 | 497 | ||
| 497 | /* Don't try to do this check unless configuration | 498 | /* Don't try to do this check unless configuration |
| 498 | type 1 is available. how about type 2 ?*/ | 499 | type 1 is available. how about type 2 ?*/ |
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 90d5fd476ed4..a3d9c54792ae 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c | |||
| @@ -64,7 +64,7 @@ err: *value = -1; | |||
| 64 | if (!base) | 64 | if (!base) |
| 65 | goto err; | 65 | goto err; |
| 66 | 66 | ||
| 67 | spin_lock_irqsave(&pci_config_lock, flags); | 67 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
| 68 | 68 | ||
| 69 | pci_exp_set_dev_base(base, bus, devfn); | 69 | pci_exp_set_dev_base(base, bus, devfn); |
| 70 | 70 | ||
| @@ -79,7 +79,7 @@ err: *value = -1; | |||
| 79 | *value = mmio_config_readl(mmcfg_virt_addr + reg); | 79 | *value = mmio_config_readl(mmcfg_virt_addr + reg); |
| 80 | break; | 80 | break; |
| 81 | } | 81 | } |
| 82 | spin_unlock_irqrestore(&pci_config_lock, flags); | 82 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| 83 | 83 | ||
| 84 | return 0; | 84 | return 0; |
| 85 | } | 85 | } |
| @@ -97,7 +97,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
| 97 | if (!base) | 97 | if (!base) |
| 98 | return -EINVAL; | 98 | return -EINVAL; |
| 99 | 99 | ||
| 100 | spin_lock_irqsave(&pci_config_lock, flags); | 100 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
| 101 | 101 | ||
| 102 | pci_exp_set_dev_base(base, bus, devfn); | 102 | pci_exp_set_dev_base(base, bus, devfn); |
| 103 | 103 | ||
| @@ -112,7 +112,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | |||
| 112 | mmio_config_writel(mmcfg_virt_addr + reg, value); | 112 | mmio_config_writel(mmcfg_virt_addr + reg, value); |
| 113 | break; | 113 | break; |
| 114 | } | 114 | } |
| 115 | spin_unlock_irqrestore(&pci_config_lock, flags); | 115 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| 116 | 116 | ||
| 117 | return 0; | 117 | return 0; |
| 118 | } | 118 | } |
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 8bf2fcb88d04..7ef3a2735df3 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c | |||
| @@ -109,7 +109,7 @@ static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn, | |||
| 109 | decode++; | 109 | decode++; |
| 110 | decode = ~(decode - 1); | 110 | decode = ~(decode - 1); |
| 111 | } else { | 111 | } else { |
| 112 | decode = ~0; | 112 | decode = 0; |
| 113 | } | 113 | } |
| 114 | 114 | ||
| 115 | /* | 115 | /* |
| @@ -247,6 +247,10 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) | |||
| 247 | u32 size; | 247 | u32 size; |
| 248 | int i; | 248 | int i; |
| 249 | 249 | ||
| 250 | /* Must have extended configuration space */ | ||
| 251 | if (dev->cfg_size < PCIE_CAP_OFFSET + 4) | ||
| 252 | return; | ||
| 253 | |||
| 250 | /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ | 254 | /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ |
| 251 | offset = fixed_bar_cap(dev->bus, dev->devfn); | 255 | offset = fixed_bar_cap(dev->bus, dev->devfn); |
| 252 | if (!offset || PCI_DEVFN(2, 0) == dev->devfn || | 256 | if (!offset || PCI_DEVFN(2, 0) == dev->devfn || |
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 8223738ad806..5c9e2458df4e 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c | |||
| @@ -37,7 +37,7 @@ static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, | |||
| 37 | if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) | 37 | if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) |
| 38 | return -EINVAL; | 38 | return -EINVAL; |
| 39 | 39 | ||
| 40 | spin_lock_irqsave(&pci_config_lock, flags); | 40 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
| 41 | 41 | ||
| 42 | write_cf8(bus, devfn, reg); | 42 | write_cf8(bus, devfn, reg); |
| 43 | 43 | ||
| @@ -62,7 +62,7 @@ static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, | |||
| 62 | break; | 62 | break; |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | spin_unlock_irqrestore(&pci_config_lock, flags); | 65 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| 66 | 66 | ||
| 67 | return 0; | 67 | return 0; |
| 68 | } | 68 | } |
| @@ -76,7 +76,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, | |||
| 76 | if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) | 76 | if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) |
| 77 | return -EINVAL; | 77 | return -EINVAL; |
| 78 | 78 | ||
| 79 | spin_lock_irqsave(&pci_config_lock, flags); | 79 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
| 80 | 80 | ||
| 81 | write_cf8(bus, devfn, reg); | 81 | write_cf8(bus, devfn, reg); |
| 82 | 82 | ||
| @@ -101,7 +101,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, | |||
| 101 | break; | 101 | break; |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | spin_unlock_irqrestore(&pci_config_lock, flags); | 104 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| 105 | 105 | ||
| 106 | return 0; | 106 | return 0; |
| 107 | } | 107 | } |
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 59a225c17b84..2492d165096a 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
| @@ -162,7 +162,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, | |||
| 162 | if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) | 162 | if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) |
| 163 | return -EINVAL; | 163 | return -EINVAL; |
| 164 | 164 | ||
| 165 | spin_lock_irqsave(&pci_config_lock, flags); | 165 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
| 166 | 166 | ||
| 167 | switch (len) { | 167 | switch (len) { |
| 168 | case 1: | 168 | case 1: |
| @@ -213,7 +213,7 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, | |||
| 213 | break; | 213 | break; |
| 214 | } | 214 | } |
| 215 | 215 | ||
| 216 | spin_unlock_irqrestore(&pci_config_lock, flags); | 216 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| 217 | 217 | ||
| 218 | return (int)((result & 0xff00) >> 8); | 218 | return (int)((result & 0xff00) >> 8); |
| 219 | } | 219 | } |
| @@ -228,7 +228,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, | |||
| 228 | if ((bus > 255) || (devfn > 255) || (reg > 255)) | 228 | if ((bus > 255) || (devfn > 255) || (reg > 255)) |
| 229 | return -EINVAL; | 229 | return -EINVAL; |
| 230 | 230 | ||
| 231 | spin_lock_irqsave(&pci_config_lock, flags); | 231 | raw_spin_lock_irqsave(&pci_config_lock, flags); |
| 232 | 232 | ||
| 233 | switch (len) { | 233 | switch (len) { |
| 234 | case 1: | 234 | case 1: |
| @@ -269,7 +269,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, | |||
| 269 | break; | 269 | break; |
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | spin_unlock_irqrestore(&pci_config_lock, flags); | 272 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); |
| 273 | 273 | ||
| 274 | return (int)((result & 0xff00) >> 8); | 274 | return (int)((result & 0xff00) >> 8); |
| 275 | } | 275 | } |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 32764b8880b5..b3c6c59ed302 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
| @@ -476,6 +476,7 @@ void xen_timer_resume(void) | |||
| 476 | __init void xen_time_init(void) | 476 | __init void xen_time_init(void) |
| 477 | { | 477 | { |
| 478 | int cpu = smp_processor_id(); | 478 | int cpu = smp_processor_id(); |
| 479 | struct timespec tp; | ||
| 479 | 480 | ||
| 480 | clocksource_register(&xen_clocksource); | 481 | clocksource_register(&xen_clocksource); |
| 481 | 482 | ||
| @@ -487,9 +488,8 @@ __init void xen_time_init(void) | |||
| 487 | } | 488 | } |
| 488 | 489 | ||
| 489 | /* Set initial system time with full resolution */ | 490 | /* Set initial system time with full resolution */ |
| 490 | xen_read_wallclock(&xtime); | 491 | xen_read_wallclock(&tp); |
| 491 | set_normalized_timespec(&wall_to_monotonic, | 492 | do_settimeofday(&tp); |
| 492 | -xtime.tv_sec, -xtime.tv_nsec); | ||
| 493 | 493 | ||
| 494 | setup_force_cpu_cap(X86_FEATURE_TSC); | 494 | setup_force_cpu_cap(X86_FEATURE_TSC); |
| 495 | 495 | ||
